/* * Semistrukturierte Daten - SS 2009 * Uebungsbeispiel 3 */ import java.util.ArrayList; import java.util.Stack; import java.lang.StringBuffer; import java.io.FileInputStream; import java.io.FileWriter; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.OutputKeys; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamResult; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import org.w3c.dom.*; public class Beispiel3 { static java.io.PrintStream out = System.out; static String matrNr = "0728348"; int[] nodeCounter = new int[2]; public static void main(String[] args) { // Argumentueberpruefung if (args.length != 1) { System.err.println("Usage: java Beispiel3 "); System.exit(1); } String xmlInput = args[0]; Beispiel3 beispiel = new Beispiel3(); try { beispiel.dom(xmlInput); beispiel.sax(xmlInput); beispiel.domsax(xmlInput); } catch(Exception ex) { System.err.println(ex); System.exit(1); } } /*============================================================================*/ public class MyErrorHandler implements ErrorHandler { public void warning(SAXParseException e) throws SAXException { printError("WARNING", e); } public void error(SAXParseException e) throws SAXException { printError("ERROR", e); } public void fatalError(SAXParseException e) throws SAXException { printError("FATAL", e); } private void printError(String severity, SAXParseException e) { out.printf(" --> %s at %3d,%3d: %s\n" , severity , e.getLineNumber() , e.getColumnNumber() , e.getMessage()); } } /*============================================================================*/ /** * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll, * wird mittels Parameter "xmlInput" uebergeben. * * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen DOM-Baum. */ private void dom(String xmlInput) throws Exception { /* DOM initialization */ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //factory.setValidating(true); factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); factory.setNamespaceAware(true); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); /* build dom tree */ DocumentBuilder builder = factory.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); Document doc = builder.parse(xmlInput); /* do the work */ nodeCounter[0] = nodeCounter[1] = 0; renameNodes(doc); renameNodes(doc.getDocumentElement()); /* write to file */ TransformerFactory tff = TransformerFactory.newInstance(); Transformer tf = tff.newTransformer(); tf.transform(new DOMSource(doc), new StreamResult("beispiel3-dom.xml")); /* output statistic */ out.printf("[DOM] Elements: %d\n", nodeCounter[0]); out.printf("[DOM] Attributes: %d\n", nodeCounter[1]); } /*----------------------------------------------------------------------------*/ private void renameNodes(Node node) { /* remove processing instruction nodes and append them at the end */ if (node.getNodeType() == node.PROCESSING_INSTRUCTION_NODE) { /* NOTE: why are PIs from root missing in the reference file? - just do the same */ //boolean append = (node.getParentNode() == node.getOwnerDocument()) ? false : true; boolean append = true; node.getParentNode().removeChild(node); if (append) node.getOwnerDocument().appendChild(node); } /* rename elements */ else if (node.getNodeType() == node.ELEMENT_NODE) { nodeCounter[0]++; StringBuffer strbuf = new StringBuffer(node.getLocalName()); String newnodename = strbuf.reverse().toString(); if (node.getNamespaceURI() != null) newnodename = node.getPrefix() + ":" + newnodename; node.getOwnerDocument().renameNode(node, node.getNamespaceURI(), newnodename); } /* rename attributes */ if (node.hasAttributes()) { NamedNodeMap attrs = node.getAttributes(); /* store attributes in arraylist due to renameNode() may delete and append attributes */ ArrayList attrslist = new ArrayList(); for(int i = 0; i< attrs.getLength(); i++) attrslist.add(attrs.item(i)); for(int i = 0; i < attrslist.size(); i++) { nodeCounter[1]++; Node attr = attrslist.get(i); /* skip xmlns-namespace */ if (attr.getPrefix() != null && attr.getPrefix().equals("xmlns")) continue; /* generate and set new nodename */ String newnodename = attr.getLocalName().toLowerCase() + matrNr.substring(matrNr.length() - 2); if (attr.getNamespaceURI() != null) newnodename = attr.getPrefix() + ":" + newnodename; attr.getOwnerDocument().renameNode(attr, attr.getNamespaceURI(), newnodename); } } /* do the same for all childs */ NodeList childs = node.getChildNodes(); for(int i = 0; i < childs.getLength(); i++) { renameNodes(childs.item(i)); } } /*============================================================================*/ public class SAXHandler extends DefaultHandler { public StringBuffer output = new StringBuffer(); private ArrayList pilist = new ArrayList(); public void startDocument() throws SAXException { output.append("").append(System.getProperty("line.separator")); } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { nodeCounter[0]++; String prefix = ""; if (uri.length() > 0) prefix = qName.substring(0, qName.length() - localName.length()); output.append(String.format("<%s%s", prefix, new StringBuffer(localName).reverse())); for(int i = 0; i < atts.getLength(); i++) { nodeCounter[1]++; String attrprefix = ""; if (atts.getQName(i).length() > atts.getLocalName(i).length()) attrprefix = atts.getQName(i).substring(0, atts.getQName(i).length() - atts.getLocalName(i).length()); /* generate and set new nodename */ output.append(String.format(" %s%s=\"%s\"", attrprefix, (atts.getQName(i).startsWith("xmlns:")) ? atts.getLocalName(i) : atts.getLocalName(i).toLowerCase() + matrNr.substring(matrNr.length() - 2), atts.getValue(i))); } output.append(">"); } public void endElement(String uri, String localName, String qName) throws SAXException { String prefix = ""; if (uri.length() > 0) prefix = qName.substring(0, qName.length() - localName.length()); output.append(String.format("", prefix, new StringBuffer(localName).reverse())); } public void processingInstruction(String target, String data) throws SAXException { pilist.add(""); } public void endDocument() throws SAXException { output.append(System.getProperty("line.separator")); for (int i = 0; i < pilist.size(); i++) output.append(pilist.get(i)).append(System.getProperty("line.separator")); } public void characters(char[] ch, int start, int length) throws SAXException { output.append(new String(ch, start, length)); } } /*----------------------------------------------------------------------------*/ /** * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll, * wird mittels Parameter "xmlInput" uebergeben. * * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen SAX Prozessor. */ private void sax(String xmlInput) throws Exception { /* SAX initialization + read from file */ FileInputStream fis = new FileInputStream(xmlInput); InputSource is = new InputSource(fis); XMLReader xr = XMLReaderFactory.createXMLReader(); xr.setErrorHandler(new MyErrorHandler()); xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported? xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes /* connet our handler and parse the file */ SAXHandler handler = new SAXHandler(); xr.setContentHandler(handler); nodeCounter[0] = nodeCounter[1] = 0; xr.parse(is); fis.close(); /* write output to file for make test(-ns) */ FileWriter outfile = new FileWriter("beispiel3-sax.xml"); outfile.write(handler.output.toString()); outfile.close(); /* output to stdout */ out.print(handler.output); out.printf("[SAX] Elements: %d\n", nodeCounter[0]); out.printf("[SAX] Attributes: %d\n", nodeCounter[1]); } /*============================================================================*/ public class SAX2DOM extends DefaultHandler { private Document doc = null; private Stack stack = new Stack(); private Integer depth = Integer.parseInt(matrNr); private String chars = ""; SAX2DOM(Document doc) { this.doc = doc; } public void startDocument() throws SAXException { stack.push(doc); processChars(); } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (stack.empty()) throw new SAXException("Stack is empty. Document element got lost"); processChars(); Element element = doc.createElementNS(uri, qName); for(int i = 0; i < atts.getLength(); i++) { /* String attruri = atts.getURI(i); if (atts.getQName(i).startsWith("xmlns")) attruri = "http://www.w3.org/2000/xmlns/"; else if (atts.getQName(i).startsWith("xml")) attruri = "http://www.w3.org/XML/1998/namespace"; element.setAttributeNS(attruri, atts.getQName(i), atts.getValue(i)); */ out.println(atts.getQName(i)); element.setAttribute(atts.getQName(i), atts.getValue(i)); } element.setAttribute("tiefe", String.format("%0" + matrNr.length() + "d", depth)); stack.push(element); depth++; } public void endElement(String uri, String localName, String qName) throws SAXException { if (stack.empty()) throw new SAXException("endElement() without startElement()?"); processChars(); /* pop parent, append node and push parent back */ Node node = stack.pop(); Node parent = stack.pop(); parent.appendChild(node); stack.push(parent); depth--; } public void processingInstruction(String target, String data) throws SAXException { if (stack.empty()) throw new SAXException("Stack is empty. Document element got lost"); processChars(); /* pop parent and append PI-node */ Node parent = stack.pop(); parent.appendChild(doc.createProcessingInstruction(target, data)); stack.push(parent); } public void endDocument() throws SAXException { /* only doc should be left on stack! */ if (stack.empty()) throw new SAXException("Stack is empty. Document element got lost"); if (stack.size() != 1) throw new SAXException("Unassigned elements on stack?"); processChars(); stack.pop(); } public void characters(char[] ch, int start, int length) throws SAXException { /* capture chars */ chars += new String(ch, start, length); } private void processChars() throws SAXException { if (stack.empty()) throw new SAXException("Stack is empty. Document element got lost"); if (chars.length() <= 0) return; /* pop parent and append textnode */ Node parent = stack.pop(); parent.appendChild(doc.createTextNode(chars)); stack.push(parent); chars = ""; } } /*----------------------------------------------------------------------------*/ /** * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll, * wird mittels Parameter "xmlInput" uebergeben. */ private void domsax (String xmlInput) throws Exception { MyErrorHandler errorhandler = new MyErrorHandler(); /* SAX initialization + read from file */ FileInputStream fis = new FileInputStream(xmlInput); InputSource is = new InputSource(fis); XMLReader xr = XMLReaderFactory.createXMLReader(); xr.setErrorHandler(errorhandler); xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported? /* xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes */ xr.setFeature("http://xml.org/sax/features/namespaces", false); // disable namespaces and localNames in events xr.setFeature("http://xml.org/sax/features/namespace-prefixes", false); // don't report xmlns:* attributes /* DOM initialization */ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //factory.setValidating(true); factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); factory.setNamespaceAware(true); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); /* create new dom tree */ DocumentBuilder docbuilder = factory.newDocumentBuilder(); docbuilder.setErrorHandler(errorhandler); Document doc = docbuilder.newDocument(); /* connet our handler and parse the file */ xr.setContentHandler(new SAX2DOM(doc)); xr.parse(is); fis.close(); /* write to file */ TransformerFactory tff = TransformerFactory.newInstance(); Transformer tf = tff.newTransformer(); tf.setOutputProperty(OutputKeys.INDENT, "yes"); tf.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); tf.transform(new DOMSource(doc), new StreamResult("beispiel3-domsax.xml")); } } /* vim: set et sw=2 ts=2: */