Download | Plain Text | Line Numbers


/*
 * Semistrukturierte Daten - SS 2009
 * Uebungsbeispiel 3
 */
 
import java.util.ArrayList;
import java.util.Stack;
import java.lang.StringBuffer;
import java.io.FileInputStream;
import java.io.FileWriter;
 
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
 
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
 
import org.w3c.dom.*;
 
public class Beispiel3
{
  static java.io.PrintStream out = System.out;
  static String matrNr = "0728348";
  int[] nodeCounter = new int[2];
 
  public static void main(String[] args)
  {
    // Argumentueberpruefung
    if (args.length != 1)
    {
      System.err.println("Usage: java Beispiel3 <XML-File>");
      System.exit(1);
    }
 
    String xmlInput = args[0];
 
    Beispiel3 beispiel = new Beispiel3();
 
    try
    {
      beispiel.dom(xmlInput);
      beispiel.sax(xmlInput);
      beispiel.domsax(xmlInput);
    }
    catch(Exception ex)
    {
      System.err.println(ex);
      System.exit(1);
    }
  }
 
  /*============================================================================*/
 
  public class MyErrorHandler implements ErrorHandler
  {
    public void warning(SAXParseException e) throws SAXException
    {
      printError("WARNING", e);
    }
 
    public void error(SAXParseException e) throws SAXException
    {
      printError("ERROR", e);
    }
 
    public void fatalError(SAXParseException e) throws SAXException
    {
      printError("FATAL", e);
    }
 
    private void printError(String severity, SAXParseException e)
    {
      out.printf(" --> %s at %3d,%3d: %s\n"
          , severity
          , e.getLineNumber()
          , e.getColumnNumber()
          , e.getMessage());
    }
  }
 
  /*============================================================================*/
 
  /**
   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
   * wird mittels Parameter "xmlInput" uebergeben.
   *
   * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen DOM-Baum.
   */
  private void dom(String xmlInput) throws Exception
  {
    /* DOM initialization */
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    //factory.setValidating(true);
    factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
    factory.setNamespaceAware(true);
    factory.setIgnoringComments(true);
    factory.setIgnoringElementContentWhitespace(true);
 
    /* build dom tree */
    DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(new MyErrorHandler());
    Document doc = builder.parse(xmlInput);
 
    /* do the work */
    nodeCounter[0] = nodeCounter[1] = 0;
    renameNodes(doc);
    renameNodes(doc.getDocumentElement());
 
    /* write to file */
    TransformerFactory tff = TransformerFactory.newInstance();
    Transformer tf = tff.newTransformer();
    tf.transform(new DOMSource(doc), new StreamResult("beispiel3-dom.xml"));
 
    /* output statistic */
    out.printf("[DOM] Elements:   %d\n", nodeCounter[0]);
    out.printf("[DOM] Attributes: %d\n", nodeCounter[1]);
  }
 
  /*----------------------------------------------------------------------------*/
 
  private void renameNodes(Node node)
  {
    /* remove processing instruction nodes and append them at the end */
    if (node.getNodeType() == node.PROCESSING_INSTRUCTION_NODE)
    {
      /* NOTE: why are PIs from root missing in the reference file? - just do the same */
      //boolean append = (node.getParentNode() == node.getOwnerDocument()) ? false : true;
      boolean append = true;
      node.getParentNode().removeChild(node);
      if (append)
        node.getOwnerDocument().appendChild(node);
    }
    /* rename elements */
    else if (node.getNodeType() == node.ELEMENT_NODE)
    {
      nodeCounter[0]++;
      StringBuffer strbuf = new StringBuffer(node.getLocalName());
      String newnodename = strbuf.reverse().toString();
      if (node.getNamespaceURI() != null)
        newnodename = node.getPrefix() + ":" + newnodename;
      node.getOwnerDocument().renameNode(node, node.getNamespaceURI(), newnodename);
    }
 
    /* rename attributes */
    if (node.hasAttributes())
    {
      NamedNodeMap attrs = node.getAttributes();
 
      /* store attributes in arraylist due to renameNode() may delete and append attributes */
      ArrayList<Node> attrslist = new ArrayList<Node>();
      for(int i = 0; i< attrs.getLength(); i++)
        attrslist.add(attrs.item(i));
 
      for(int i = 0; i < attrslist.size(); i++)
      {
        nodeCounter[1]++;
        Node attr = attrslist.get(i);
 
        /* skip xmlns-namespace */
        if (attr.getPrefix() != null && attr.getPrefix().equals("xmlns"))
          continue;
 
        /* generate and set new nodename */
        String newnodename = attr.getLocalName().toLowerCase() + matrNr.substring(matrNr.length() - 2);
        if (attr.getNamespaceURI() != null)
          newnodename = attr.getPrefix() + ":" + newnodename;
        attr.getOwnerDocument().renameNode(attr, attr.getNamespaceURI(), newnodename);
      }
    }
 
    /* do the same for all childs */
    NodeList childs = node.getChildNodes();
    for(int i = 0; i < childs.getLength(); i++)
    {
      renameNodes(childs.item(i));
    }
  }
 
  /*============================================================================*/
 
  public class SAXHandler extends DefaultHandler
  {
    public StringBuffer output = new StringBuffer();
    private ArrayList<String> pilist = new ArrayList<String>();
 
    public void startDocument() throws SAXException
    {
      output.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>").append(System.getProperty("line.separator"));
    }
 
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
    {
      nodeCounter[0]++;
      String prefix = "";
      if (uri.length() > 0)
        prefix = qName.substring(0, qName.length() - localName.length());
      output.append(String.format("<%s%s", prefix, new StringBuffer(localName).reverse()));
 
      for(int i = 0; i < atts.getLength(); i++)
      {
        nodeCounter[1]++;
        String attrprefix = "";
        if (atts.getQName(i).length() > atts.getLocalName(i).length())
          attrprefix = atts.getQName(i).substring(0, atts.getQName(i).length() - atts.getLocalName(i).length());
 
        /* generate and set new nodename */
        output.append(String.format(" %s%s=\"%s\"",
            attrprefix,
            (atts.getQName(i).startsWith("xmlns:")) ? atts.getLocalName(i) : atts.getLocalName(i).toLowerCase() + matrNr.substring(matrNr.length() - 2),
            atts.getValue(i)));
      }
 
      output.append(">");
    }
 
    public void endElement(String uri, String localName, String qName) throws SAXException
    {
      String prefix = "";
      if (uri.length() > 0)
        prefix = qName.substring(0, qName.length() - localName.length());
      output.append(String.format("</%s%s>", prefix, new StringBuffer(localName).reverse()));
    }
 
    public void processingInstruction(String target, String data) throws SAXException
    {
      pilist.add("<?" + target + " " + data + "?>");
    }
 
    public void endDocument() throws SAXException
    {
      output.append(System.getProperty("line.separator"));
      for (int i = 0; i < pilist.size(); i++)
        output.append(pilist.get(i)).append(System.getProperty("line.separator"));
    }
 
    public void characters(char[] ch, int start, int length) throws SAXException
    {
      output.append(new String(ch, start, length));
    }
  }
 
  /*----------------------------------------------------------------------------*/
 
  /**
   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
   * wird mittels Parameter "xmlInput" uebergeben.
   *
   * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen SAX Prozessor.
   */
  private void sax(String xmlInput) throws Exception
  {
    /* SAX initialization + read from file */
    FileInputStream fis = new FileInputStream(xmlInput);
    InputSource is = new InputSource(fis);
 
    XMLReader xr = XMLReaderFactory.createXMLReader();
    xr.setErrorHandler(new MyErrorHandler());
    xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported?
    xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events
    xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes
 
    /* connet our handler and parse the file */
    SAXHandler handler = new SAXHandler();
    xr.setContentHandler(handler);
    nodeCounter[0] = nodeCounter[1] = 0;
    xr.parse(is);
    fis.close();
 
    /* write output to file for make test(-ns) */
    FileWriter outfile = new FileWriter("beispiel3-sax.xml");
    outfile.write(handler.output.toString());
    outfile.close();
 
    /* output to stdout */
    out.print(handler.output);
    out.printf("[SAX] Elements:   %d\n", nodeCounter[0]);
    out.printf("[SAX] Attributes: %d\n", nodeCounter[1]);
  }
 
  /*============================================================================*/
 
  public class SAX2DOM extends DefaultHandler
  {
    private Document doc = null;
    private Stack<Node> stack = new Stack<Node>();
    private Integer depth = Integer.parseInt(matrNr);
    private String chars = "";
 
    SAX2DOM(Document doc)
    {
      this.doc = doc;
    }
 
    public void startDocument() throws SAXException
    {
      stack.push(doc);
      processChars();
    }
 
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
    {
      if (stack.empty())
        throw new SAXException("Stack is empty. Document element got lost");
      processChars();
 
      Element element = doc.createElementNS(uri, qName);
      for(int i = 0; i < atts.getLength(); i++)
      {
        /*
        String attruri = atts.getURI(i);
        if (atts.getQName(i).startsWith("xmlns"))
          attruri = "http://www.w3.org/2000/xmlns/";
        else if (atts.getQName(i).startsWith("xml"))
          attruri = "http://www.w3.org/XML/1998/namespace";
        element.setAttributeNS(attruri, atts.getQName(i), atts.getValue(i));
        */
 
        out.println(atts.getQName(i));
        element.setAttribute(atts.getQName(i), atts.getValue(i));
      }
      element.setAttribute("tiefe", String.format("%0" + matrNr.length() + "d", depth));
      stack.push(element);
      depth++;
    }
 
    public void endElement(String uri, String localName, String qName) throws SAXException
    {
      if (stack.empty())
        throw new SAXException("endElement() without startElement()?");
      processChars();
 
      /* pop parent, append node and push parent back */
      Node node   = stack.pop();
      Node parent = stack.pop();
      parent.appendChild(node);
      stack.push(parent);
      depth--;
    }
 
    public void processingInstruction(String target, String data) throws SAXException
    {
      if (stack.empty())
        throw new SAXException("Stack is empty. Document element got lost");
      processChars();
 
      /* pop parent and append PI-node */
      Node parent = stack.pop();
      parent.appendChild(doc.createProcessingInstruction(target, data));
      stack.push(parent);
    }
 
    public void endDocument() throws SAXException
    {
      /* only doc should be left on stack! */
      if (stack.empty())
        throw new SAXException("Stack is empty. Document element got lost");
      if (stack.size() != 1)
        throw new SAXException("Unassigned elements on stack?");
      processChars();
      stack.pop();
    }
 
    public void characters(char[] ch, int start, int length) throws SAXException
    {
      /* capture chars */
      chars += new String(ch, start, length);
    }
 
    private void processChars() throws SAXException
    {
      if (stack.empty())
        throw new SAXException("Stack is empty. Document element got lost");
      if (chars.length() <= 0)
        return;
 
      /* pop parent and append textnode */
      Node parent = stack.pop();
      parent.appendChild(doc.createTextNode(chars));
      stack.push(parent);
      chars = "";
    }
  }
 
  /*----------------------------------------------------------------------------*/
 
  /**
   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
   * wird mittels Parameter "xmlInput" uebergeben.
   */
  private void domsax (String xmlInput) throws Exception
  {
    MyErrorHandler errorhandler = new MyErrorHandler();
 
    /* SAX initialization + read from file */
    FileInputStream fis = new FileInputStream(xmlInput);
    InputSource is = new InputSource(fis);
 
    XMLReader xr = XMLReaderFactory.createXMLReader();
    xr.setErrorHandler(errorhandler);
    xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported?
    /*
    xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events
    xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes
    */
    xr.setFeature("http://xml.org/sax/features/namespaces", false); // disable namespaces and localNames in events
    xr.setFeature("http://xml.org/sax/features/namespace-prefixes", false); // don't report xmlns:* attributes
 
    /* DOM initialization */
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    //factory.setValidating(true);
    factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
    factory.setNamespaceAware(true);
    factory.setIgnoringComments(true);
    factory.setIgnoringElementContentWhitespace(true);
 
    /* create new dom tree */
    DocumentBuilder docbuilder = factory.newDocumentBuilder();
    docbuilder.setErrorHandler(errorhandler);
    Document doc = docbuilder.newDocument();
 
    /* connet our handler and parse the file */
    xr.setContentHandler(new SAX2DOM(doc));
    xr.parse(is);
    fis.close();
 
    /* write to file */
    TransformerFactory tff = TransformerFactory.newInstance();
    Transformer tf = tff.newTransformer();
    tf.setOutputProperty(OutputKeys.INDENT, "yes");
    tf.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
    tf.transform(new DOMSource(doc), new StreamResult("beispiel3-domsax.xml"));
  }
}
 
/* vim: set et sw=2 ts=2: */