科技行者

行者学院 转型私董会 科技行者专题报道 网红大战科技行者

知识库

知识库 安全导航

至顶网软件频道创造一种迅速而又随性的XML解释器

创造一种迅速而又随性的XML解释器

  • 扫一扫
    分享文章到微信

  • 扫一扫
    关注官方公众号
    至顶头条

  Xml是一种当前很受欢迎的数据格式, 它的优点在于: 人性化,自述性以及使用的方便性.但是,不幸的是,基于java的xml解释器往往太大了,比如sun的jaxp.jar 和 parser.jar 每个都达到了1.4mb. 如果你要在只有有限的内存

作者:中国IT实验室 来源:中国IT实验室 2007年9月22日

关键字: XMLBeans 编程 java

  • 评论
  • 分享微博
  • 分享邮件

  Xml是一种当前很受欢迎的数据格式, 它的优点在于: 人性化,自述性以及使用的方便性.但是,不幸的是,基于java的xml解释器往往太大了,比如sun的jaxp.jar 和 parser.jar 每个都达到了1.4mb. 如果你要在只有有限的内存容量的运行环境里运行你的程序,比如j2me的环境.或者说带宽很有限的运行环境里,比如applet,这些大的package不应该成为你的选择对象.
  注意:本篇的所有所需要的所有代码你可以通过此链接下载:
  http://www.matrix.org.cn/down_view.asp?id=67
  下面是QDParser的代码:
  package qdxml;
  import java.io.*;
  import java.util.*;
  
  /** Quick and Dirty xml parser. This parser is, like the SAX parser,
  an event based parser, but with much less functionality. */
  public class QDParser {
  private static int popMode(Stack st) {
  if(!st.empty())
  return ((Integer)st.pop()).intvalue();
  else
  return PRE;
  }
  private final static int
  TEXT = 1,
  ENTITY = 2,
  OPEN_TAG = 3,
  CLOSE_TAG = 4,
  START_TAG = 5,
  ATTRIBUTE_Lvalue = 6,
  ATTRIBUTE_EQUAL = 9,
  ATTRIBUTE_Rvalue = 10,
  QUOTE = 7,
  IN_TAG = 8,
  SINGLE_TAG = 12,
  COMMENT = 13,
  DONE = 11,
  DOCTYPE = 14,
  PRE = 15,
  CDATA = 16;
  public static void parse(DocHandler doc,Reader r) throws Exception {
  Stack st = new Stack();
  int depth = 0;
  int mode = PRE;
  int c = 0;
  int quotec = '"';
  depth = 0;
  StringBuffer sb = new StringBuffer();
  StringBuffer etag = new StringBuffer();
  String tagName = null;
  String lvalue = null;
  String rvalue = null;
  Hashtable attrs = null;
  st = new Stack();
  doc.startdocument.);
  int line=1, col=0;
  boolean eol = false;
  while((c = r.read()) != -1) {
  
  // We need to map \r, \r\n, and \n to \n
  // See XML spec section 2.11
  if(c == '\n' && eol) {
  eol = false;
  continue;
  } else if(eol) {
  eol = false;
  } else if(c == '\n') {
  line++;
  col=0;
  } else if(c == '\r') {
  eol = true;
  c = '\n';
  line++;
  col=0;
  } else {
  col++;
  }
  
  if(mode == DONE) {
  doc.enddocument.);
  return;
  
  // We are between tags collecting text.
  } else if(mode == TEXT) {
  if(c == '<') {
  st.push(new Integer(mode));
  mode = START_TAG;
  if(sb.length() > 0) {
  doc.text(sb.toString());
  sb.setLength(0);
  }
  } else if(c == '&') {
  st.push(new Integer(mode));
  mode = ENTITY;
  etag.setLength(0);
  } else
  sb.append((char)c);
  
  // we are processing a closing tag: e.g. </foo>
  } else if(mode == CLOSE_TAG) {
  if(c == '>') {
  mode = popMode(st);
  tagName = sb.toString();
  sb.setLength(0);
  depth--;
  if(depth==0)
  mode = DONE;
  doc.endElement(tagName);
  } else {
  sb.append((char)c);
  }
  
  // we are processing CDATA
  } else if(mode == CDATA) {
  if(c == '>'
  && sb.toString().endsWith("]]")) {
  sb.setLength(sb.length()-2);
  doc.text(sb.toString());
  sb.setLength(0);
  mode = popMode(st);
  } else
  sb.append((char)c);
  
  // we are processing a comment. We are inside
  // the <!-- .... --> looking for the -->.
  } else if(mode == COMMENT) {
  if(c == '>'
  && sb.toString().endsWith("--")) {
  sb.setLength(0);
  mode = popMode(st);
  } else
  sb.append((char)c);
  
  // We are outside the root tag element
  } else if(mode == PRE) {
  if(c == '<') {
  mode = TEXT;
  st.push(new Integer(mode));
  mode = START_TAG;
  }
  
  // We are inside one of these <? ... ?>
  // or one of these <!DOCTYPE ... >
  } else if(mode == DOCTYPE) {
  if(c == '>') {
  mode = popMode(st);
  if(mode == TEXT) mode = PRE;
  }
  
  // we have just seen a < and
  // are wondering what we are looking at
  // <foo>, </foo>, <!-- ... --->, etc.
  } else if(mode == START_TAG) {
  mode = popMode(st);
  if(c == '/') {
  st.push(new Integer(mode));
  mode = CLOSE_TAG;
  } else if (c == '?') {
  mode = DOCTYPE;
  } else {
  st.push(new Integer(mode));
  mode = OPEN_TAG;
  tagName = null;
  attrs = new Hashtable();
  sb.append((char)c);
  }
  
  // we are processing an entity, e.g. <, », etc.
  } else if(mode == ENTITY) {
  if(c == ';') {
  mode = popMode(st);
  String cent = etag.toString();
  etag.setLength(0);
  if(cent.equals("lt"))
  sb.append('<');
  else if(cent.equals("gt"))
  sb.append('>');
  else if(cent.equals("amp"))
  sb.append('&');
  else if(cent.equals("quot"))
  sb.append('"');
  else if(cent.equals("apos"))
  sb.append('\'');
  // Could parse hex entities if we wanted to
  //else if(cent.startsWith("#x"))
  //sb.append((char)Integer.parseInt(cent.substring(2),16));
  else if(cent.startsWith("#"))
  sb.append((char)Integer.parseInt(cent.substring(1)));
  // Insert custom entity definitions here
  else
  exc("Unknown entity: &"+cent+";",line,col);
  } else {
  etag.append((char)c);
  }
  
  // we have just seen something like this:
  // <foo a="b"/
  // and are looking for the final >.
  } else if(mode == SINGLE_TAG) {
  if(tagName == null)
  tagName = sb.toString();
  if(c != '>')
  exc("Expected > for tag: <"+tagName+"/>",line,col);
  doc.startElement(tagName,attrs);
  doc.endElement(tagName);
  if(depth==0) {
  doc.enddocument.);
  return;
  }
  sb.setLength(0);
  attrs = new Hashtable();
  tagName = null;
  mode = popMode(st);
  
  // we are processing something
  // like this <foo ... >. It could
  // still be a <!-- ... --> or something.
  } else if(mode == OPEN_TAG) {
  if(c == '>') {
  if(tagName == null)
  tagName = sb.toString();
  sb.setLength(0);
  depth++;
  doc.startElement(tagName,attrs);
  tagName = null;
  attrs = new Hashtable();
  mode = popMode(st);
  } else if(c == '/') {
  mode = SINGLE_TAG;
  } else if(c == '-' && sb.toString().equals("!-")) {
  mode = COMMENT;
  } else if(c == '[' && sb.toString().equals("![CDATA")) {
  mode = CDATA;
  sb.setLength(0);
  } else if(c == 'E' && sb.toString().equals("!DOCTYP")) {
  sb.setLength(0);
  mode = DOCTYPE;
  } else if(Character.isWhitespace((char)c)) {
  tagName = sb.toString();
  sb.setLength(0);
  mode = IN_TAG;
  } else {
  sb.append((char)c);
  }
  
  // We are processing the quoted right-hand side
  // of an element's attribute.
  } else if(mode == QUOTE) {
  if(c == quotec) {
  rvalue = sb.toString();
  sb.setLength(0);
  attrs.put(lvalue,rvalue);
  mode = IN_TAG;
  // See section the XML spec, section 3.3.3
  // on normalization processing.
  } else if(" \r\n\u0009".indexOf(c)>=0) {
  sb.append(' ');
  } else if(c == '&') {
  st.push(new Integer(mode));
  mode = ENTITY;
  etag.setLength(0);
  } else {
  sb.append((char)c);
  }
  
  } else if(mode == ATTRIBUTE_Rvalue) {
  if(c == '"' || c == '\'') {
  quotec = c;
  mode = QUOTE;
  } else if(Character.isWhitespace((char)c)) {
  
  } else {
  exc("Error in attribute processing",line,col);
  }
  
  } else if(mode == ATTRIBUTE_Lvalue) {
  if(Character.isWhit

查看本文来源

    • 评论
    • 分享微博
    • 分享邮件