Download | Plain Text | No Line Numbers


  1. /*
  2.  * Semistrukturierte Daten - SS 2009
  3.  * Uebungsbeispiel 3
  4.  */
  5.  
  6. import java.util.ArrayList;
  7. import java.util.Stack;
  8. import java.lang.StringBuffer;
  9. import java.io.FileInputStream;
  10. import java.io.FileWriter;
  11.  
  12. import javax.xml.parsers.DocumentBuilder;
  13. import javax.xml.parsers.DocumentBuilderFactory;
  14. import javax.xml.transform.Transformer;
  15. import javax.xml.transform.TransformerFactory;
  16. import javax.xml.transform.Result;
  17. import javax.xml.transform.Source;
  18. import javax.xml.transform.OutputKeys;
  19. import javax.xml.transform.dom.DOMSource;
  20. import javax.xml.transform.sax.SAXSource;
  21. import javax.xml.transform.stream.StreamResult;
  22.  
  23. import org.xml.sax.*;
  24. import org.xml.sax.helpers.DefaultHandler;
  25. import org.xml.sax.helpers.XMLReaderFactory;
  26.  
  27. import org.w3c.dom.*;
  28.  
  29. public class Beispiel3
  30. {
  31. static java.io.PrintStream out = System.out;
  32. static String matrNr = "0728348";
  33. int[] nodeCounter = new int[2];
  34.  
  35. public static void main(String[] args)
  36. {
  37. // Argumentueberpruefung
  38. if (args.length != 1)
  39. {
  40. System.err.println("Usage: java Beispiel3 <XML-File>");
  41. System.exit(1);
  42. }
  43.  
  44. String xmlInput = args[0];
  45.  
  46. Beispiel3 beispiel = new Beispiel3();
  47.  
  48. try
  49. {
  50. beispiel.dom(xmlInput);
  51. beispiel.sax(xmlInput);
  52. beispiel.domsax(xmlInput);
  53. }
  54. catch(Exception ex)
  55. {
  56. System.err.println(ex);
  57. System.exit(1);
  58. }
  59. }
  60.  
  61. /*============================================================================*/
  62.  
  63. public class MyErrorHandler implements ErrorHandler
  64. {
  65. public void warning(SAXParseException e) throws SAXException
  66. {
  67. printError("WARNING", e);
  68. }
  69.  
  70. public void error(SAXParseException e) throws SAXException
  71. {
  72. printError("ERROR", e);
  73. }
  74.  
  75. public void fatalError(SAXParseException e) throws SAXException
  76. {
  77. printError("FATAL", e);
  78. }
  79.  
  80. private void printError(String severity, SAXParseException e)
  81. {
  82. out.printf(" --> %s at %3d,%3d: %s\n"
  83. , severity
  84. , e.getLineNumber()
  85. , e.getColumnNumber()
  86. , e.getMessage());
  87. }
  88. }
  89.  
  90. /*============================================================================*/
  91.  
  92. /**
  93.   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
  94.   * wird mittels Parameter "xmlInput" uebergeben.
  95.   *
  96.   * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen DOM-Baum.
  97.   */
  98. private void dom(String xmlInput) throws Exception
  99. {
  100. /* DOM initialization */
  101. DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
  102. //factory.setValidating(true);
  103. factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
  104. factory.setNamespaceAware(true);
  105. factory.setIgnoringComments(true);
  106. factory.setIgnoringElementContentWhitespace(true);
  107.  
  108. /* build dom tree */
  109. DocumentBuilder builder = factory.newDocumentBuilder();
  110. builder.setErrorHandler(new MyErrorHandler());
  111. Document doc = builder.parse(xmlInput);
  112.  
  113. /* do the work */
  114. nodeCounter[0] = nodeCounter[1] = 0;
  115. renameNodes(doc);
  116. renameNodes(doc.getDocumentElement());
  117.  
  118. /* write to file */
  119. TransformerFactory tff = TransformerFactory.newInstance();
  120. Transformer tf = tff.newTransformer();
  121. tf.transform(new DOMSource(doc), new StreamResult("beispiel3-dom.xml"));
  122.  
  123. /* output statistic */
  124. out.printf("[DOM] Elements: %d\n", nodeCounter[0]);
  125. out.printf("[DOM] Attributes: %d\n", nodeCounter[1]);
  126. }
  127.  
  128. /*----------------------------------------------------------------------------*/
  129.  
  130. private void renameNodes(Node node)
  131. {
  132. /* remove processing instruction nodes and append them at the end */
  133. if (node.getNodeType() == node.PROCESSING_INSTRUCTION_NODE)
  134. {
  135. /* NOTE: why are PIs from root missing in the reference file? - just do the same */
  136. //boolean append = (node.getParentNode() == node.getOwnerDocument()) ? false : true;
  137. boolean append = true;
  138. node.getParentNode().removeChild(node);
  139. if (append)
  140. node.getOwnerDocument().appendChild(node);
  141. }
  142. /* rename elements */
  143. else if (node.getNodeType() == node.ELEMENT_NODE)
  144. {
  145. nodeCounter[0]++;
  146. StringBuffer strbuf = new StringBuffer(node.getLocalName());
  147. String newnodename = strbuf.reverse().toString();
  148. if (node.getNamespaceURI() != null)
  149. newnodename = node.getPrefix() + ":" + newnodename;
  150. node.getOwnerDocument().renameNode(node, node.getNamespaceURI(), newnodename);
  151. }
  152.  
  153. /* rename attributes */
  154. if (node.hasAttributes())
  155. {
  156. NamedNodeMap attrs = node.getAttributes();
  157.  
  158. /* store attributes in arraylist due to renameNode() may delete and append attributes */
  159. ArrayList<Node> attrslist = new ArrayList<Node>();
  160. for(int i = 0; i< attrs.getLength(); i++)
  161. attrslist.add(attrs.item(i));
  162.  
  163. for(int i = 0; i < attrslist.size(); i++)
  164. {
  165. nodeCounter[1]++;
  166. Node attr = attrslist.get(i);
  167.  
  168. /* skip xmlns-namespace */
  169. if (attr.getPrefix() != null && attr.getPrefix().equals("xmlns"))
  170. continue;
  171.  
  172. /* generate and set new nodename */
  173. String newnodename = attr.getLocalName().toLowerCase() + matrNr.substring(matrNr.length() - 2);
  174. if (attr.getNamespaceURI() != null)
  175. newnodename = attr.getPrefix() + ":" + newnodename;
  176. attr.getOwnerDocument().renameNode(attr, attr.getNamespaceURI(), newnodename);
  177. }
  178. }
  179.  
  180. /* do the same for all childs */
  181. NodeList childs = node.getChildNodes();
  182. for(int i = 0; i < childs.getLength(); i++)
  183. {
  184. renameNodes(childs.item(i));
  185. }
  186. }
  187.  
  188. /*============================================================================*/
  189.  
  190. public class SAXHandler extends DefaultHandler
  191. {
  192. public StringBuffer output = new StringBuffer();
  193. private ArrayList<String> pilist = new ArrayList<String>();
  194.  
  195. public void startDocument() throws SAXException
  196. {
  197. output.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>").append(System.getProperty("line.separator"));
  198. }
  199.  
  200. public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
  201. {
  202. nodeCounter[0]++;
  203. String prefix = "";
  204. if (uri.length() > 0)
  205. prefix = qName.substring(0, qName.length() - localName.length());
  206. output.append(String.format("<%s%s", prefix, new StringBuffer(localName).reverse()));
  207.  
  208. for(int i = 0; i < atts.getLength(); i++)
  209. {
  210. nodeCounter[1]++;
  211. String attrprefix = "";
  212. if (atts.getQName(i).length() > atts.getLocalName(i).length())
  213. attrprefix = atts.getQName(i).substring(0, atts.getQName(i).length() - atts.getLocalName(i).length());
  214.  
  215. /* generate and set new nodename */
  216. output.append(String.format(" %s%s=\"%s\"",
  217. attrprefix,
  218. (atts.getQName(i).startsWith("xmlns:")) ? atts.getLocalName(i) : atts.getLocalName(i).toLowerCase() + matrNr.substring(matrNr.length() - 2),
  219. atts.getValue(i)));
  220. }
  221.  
  222. output.append(">");
  223. }
  224.  
  225. public void endElement(String uri, String localName, String qName) throws SAXException
  226. {
  227. String prefix = "";
  228. if (uri.length() > 0)
  229. prefix = qName.substring(0, qName.length() - localName.length());
  230. output.append(String.format("</%s%s>", prefix, new StringBuffer(localName).reverse()));
  231. }
  232.  
  233. public void processingInstruction(String target, String data) throws SAXException
  234. {
  235. pilist.add("<?" + target + " " + data + "?>");
  236. }
  237.  
  238. public void endDocument() throws SAXException
  239. {
  240. output.append(System.getProperty("line.separator"));
  241. for (int i = 0; i < pilist.size(); i++)
  242. output.append(pilist.get(i)).append(System.getProperty("line.separator"));
  243. }
  244.  
  245. public void characters(char[] ch, int start, int length) throws SAXException
  246. {
  247. output.append(new String(ch, start, length));
  248. }
  249. }
  250.  
  251. /*----------------------------------------------------------------------------*/
  252.  
  253. /**
  254.   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
  255.   * wird mittels Parameter "xmlInput" uebergeben.
  256.   *
  257.   * Verwenden Sie fuer die Loesung dieser Teilaufgabe einen SAX Prozessor.
  258.   */
  259. private void sax(String xmlInput) throws Exception
  260. {
  261. /* SAX initialization + read from file */
  262. FileInputStream fis = new FileInputStream(xmlInput);
  263. InputSource is = new InputSource(fis);
  264.  
  265. XMLReader xr = XMLReaderFactory.createXMLReader();
  266. xr.setErrorHandler(new MyErrorHandler());
  267. xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported?
  268. xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events
  269. xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes
  270.  
  271. /* connet our handler and parse the file */
  272. SAXHandler handler = new SAXHandler();
  273. xr.setContentHandler(handler);
  274. nodeCounter[0] = nodeCounter[1] = 0;
  275. xr.parse(is);
  276. fis.close();
  277.  
  278. /* write output to file for make test(-ns) */
  279. FileWriter outfile = new FileWriter("beispiel3-sax.xml");
  280. outfile.write(handler.output.toString());
  281. outfile.close();
  282.  
  283. /* output to stdout */
  284. out.print(handler.output);
  285. out.printf("[SAX] Elements: %d\n", nodeCounter[0]);
  286. out.printf("[SAX] Attributes: %d\n", nodeCounter[1]);
  287. }
  288.  
  289. /*============================================================================*/
  290.  
  291. public class SAX2DOM extends DefaultHandler
  292. {
  293. private Document doc = null;
  294. private Stack<Node> stack = new Stack<Node>();
  295. private Integer depth = Integer.parseInt(matrNr);
  296. private String chars = "";
  297.  
  298. SAX2DOM(Document doc)
  299. {
  300. this.doc = doc;
  301. }
  302.  
  303. public void startDocument() throws SAXException
  304. {
  305. stack.push(doc);
  306. processChars();
  307. }
  308.  
  309. public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
  310. {
  311. if (stack.empty())
  312. throw new SAXException("Stack is empty. Document element got lost");
  313. processChars();
  314.  
  315. Element element = doc.createElementNS(uri, qName);
  316. for(int i = 0; i < atts.getLength(); i++)
  317. {
  318. /*
  319.   String attruri = atts.getURI(i);
  320.   if (atts.getQName(i).startsWith("xmlns"))
  321.   attruri = "http://www.w3.org/2000/xmlns/";
  322.   else if (atts.getQName(i).startsWith("xml"))
  323.   attruri = "http://www.w3.org/XML/1998/namespace";
  324.   element.setAttributeNS(attruri, atts.getQName(i), atts.getValue(i));
  325.   */
  326.  
  327. out.println(atts.getQName(i));
  328. element.setAttribute(atts.getQName(i), atts.getValue(i));
  329. }
  330. element.setAttribute("tiefe", String.format("%0" + matrNr.length() + "d", depth));
  331. stack.push(element);
  332. depth++;
  333. }
  334.  
  335. public void endElement(String uri, String localName, String qName) throws SAXException
  336. {
  337. if (stack.empty())
  338. throw new SAXException("endElement() without startElement()?");
  339. processChars();
  340.  
  341. /* pop parent, append node and push parent back */
  342. Node node = stack.pop();
  343. Node parent = stack.pop();
  344. parent.appendChild(node);
  345. stack.push(parent);
  346. depth--;
  347. }
  348.  
  349. public void processingInstruction(String target, String data) throws SAXException
  350. {
  351. if (stack.empty())
  352. throw new SAXException("Stack is empty. Document element got lost");
  353. processChars();
  354.  
  355. /* pop parent and append PI-node */
  356. Node parent = stack.pop();
  357. parent.appendChild(doc.createProcessingInstruction(target, data));
  358. stack.push(parent);
  359. }
  360.  
  361. public void endDocument() throws SAXException
  362. {
  363. /* only doc should be left on stack! */
  364. if (stack.empty())
  365. throw new SAXException("Stack is empty. Document element got lost");
  366. if (stack.size() != 1)
  367. throw new SAXException("Unassigned elements on stack?");
  368. processChars();
  369. stack.pop();
  370. }
  371.  
  372. public void characters(char[] ch, int start, int length) throws SAXException
  373. {
  374. /* capture chars */
  375. chars += new String(ch, start, length);
  376. }
  377.  
  378. private void processChars() throws SAXException
  379. {
  380. if (stack.empty())
  381. throw new SAXException("Stack is empty. Document element got lost");
  382. if (chars.length() <= 0)
  383. return;
  384.  
  385. /* pop parent and append textnode */
  386. Node parent = stack.pop();
  387. parent.appendChild(doc.createTextNode(chars));
  388. stack.push(parent);
  389. chars = "";
  390. }
  391. }
  392.  
  393. /*----------------------------------------------------------------------------*/
  394.  
  395. /**
  396.   * Vervollstaendigen Sie die Methode. Der Name des XML-Files, welches verarbeitet werden soll,
  397.   * wird mittels Parameter "xmlInput" uebergeben.
  398.   */
  399. private void domsax (String xmlInput) throws Exception
  400. {
  401. MyErrorHandler errorhandler = new MyErrorHandler();
  402.  
  403. /* SAX initialization + read from file */
  404. FileInputStream fis = new FileInputStream(xmlInput);
  405. InputSource is = new InputSource(fis);
  406.  
  407. XMLReader xr = XMLReaderFactory.createXMLReader();
  408. xr.setErrorHandler(errorhandler);
  409. xr.setFeature("http://xml.org/sax/features/validation", false); // should validation errors be reported?
  410. /*
  411.   xr.setFeature("http://xml.org/sax/features/namespaces", true); // include namespaces and localNames in events
  412.   xr.setFeature("http://xml.org/sax/features/namespace-prefixes", true); // report xmlns:* attributes
  413.   */
  414. xr.setFeature("http://xml.org/sax/features/namespaces", false); // disable namespaces and localNames in events
  415. xr.setFeature("http://xml.org/sax/features/namespace-prefixes", false); // don't report xmlns:* attributes
  416.  
  417. /* DOM initialization */
  418. DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
  419. //factory.setValidating(true);
  420. factory.setAttribute("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
  421. factory.setNamespaceAware(true);
  422. factory.setIgnoringComments(true);
  423. factory.setIgnoringElementContentWhitespace(true);
  424.  
  425. /* create new dom tree */
  426. DocumentBuilder docbuilder = factory.newDocumentBuilder();
  427. docbuilder.setErrorHandler(errorhandler);
  428. Document doc = docbuilder.newDocument();
  429.  
  430. /* connet our handler and parse the file */
  431. xr.setContentHandler(new SAX2DOM(doc));
  432. xr.parse(is);
  433. fis.close();
  434.  
  435. /* write to file */
  436. TransformerFactory tff = TransformerFactory.newInstance();
  437. Transformer tf = tff.newTransformer();
  438. tf.setOutputProperty(OutputKeys.INDENT, "yes");
  439. tf.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
  440. tf.transform(new DOMSource(doc), new StreamResult("beispiel3-domsax.xml"));
  441. }
  442. }
  443.  
  444. /* vim: set et sw=2 ts=2: */
  445.