View Javadoc
1   /*
2    * This file is part of Waarp Project (named also Waarp or GG).
3    *
4    *  Copyright (c) 2019, Waarp SAS, and individual contributors by the @author
5    *  tags. See the COPYRIGHT.txt in the distribution for a full listing of
6    * individual contributors.
7    *
8    *  All Waarp Project is free software: you can redistribute it and/or
9    * modify it under the terms of the GNU General Public License as published by
10   * the Free Software Foundation, either version 3 of the License, or (at your
11   * option) any later version.
12   *
13   * Waarp is distributed in the hope that it will be useful, but WITHOUT ANY
14   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15   * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16   *
17   *  You should have received a copy of the GNU General Public License along with
18   * Waarp . If not, see <http://www.gnu.org/licenses/>.
19   */
20  package org.waarp.common.xml;
21  
22  import org.dom4j.Document;
23  import org.dom4j.DocumentException;
24  import org.dom4j.DocumentHelper;
25  import org.dom4j.Element;
26  import org.dom4j.Node;
27  import org.dom4j.io.OutputFormat;
28  import org.dom4j.io.SAXReader;
29  import org.dom4j.io.XMLWriter;
30  import org.waarp.common.exception.InvalidArgumentException;
31  import org.waarp.common.utility.WaarpStringUtils;
32  import org.xml.sax.SAXException;
33  
34  import java.io.File;
35  import java.io.FileWriter;
36  import java.io.IOException;
37  import java.io.InvalidObjectException;
38  import java.io.Writer;
39  import java.util.List;
40  
41  /**
42   * XML utility that handles simple cases as:<br>
43   * <ul>
44   * <li>XPath as /x/y/z for a node and referring to root of Document</li>
45   * <li>XPath as x/y/z for a node and referring to current referenced node</li>
46   * <li>Any XPath can be a singleton (unique node referenced by the XPath) or
47   * multiple nodes (same XPath)</li>
48   * <li>Only Element as Node: no Attribute or any other type within XML</li>
49   * <li>Any other path is not supported: //x /x@y ./ ../</li>
50   * <li>Supports special SubXml tree as element (singleton or multiple)</li>
51   * </ul>
52   */
53  public final class XmlUtil {
54  
55    private static final String NODE_NOT_FOUND = "Node not found: ";
56  
57    private XmlUtil() {
58    }
59  
60    /**
61     * @return the newly created SAXReader
62     */
63    public static SAXReader getNewSaxReader() {
64      final SAXReader saxReader = new SAXReader();
65      try {
66        saxReader.setFeature(
67            "http://apache.org/xml/features/disallow-doctype-decl", true);
68        saxReader.setFeature(
69            "http://apache.org/xml/features/nonvalidating/load-dtd-grammar",
70            false);
71        saxReader.setFeature(
72            "http://apache.org/xml/features/nonvalidating/load-external-dtd",
73            false);
74        saxReader.setFeature("http://xml.org/sax/features/resolve-dtd-uris",
75                             false);
76        saxReader.setFeature(
77            "http://xml.org/sax/features/external-general-entities", false);
78        saxReader.setFeature(
79            "http://xml.org/sax/features/external-parameter-entities", false);
80        saxReader.setFeature(
81            "http://apache.org/xml/features/validation/id-idref-checking", false);
82      } catch (final SAXException e) {
83        //Parse with external resources downloading allowed.
84      }
85      return saxReader;
86    }
87  
88    /**
89     * @param filename
90     *
91     * @return Existing Document from filename
92     *
93     * @throws IOException
94     * @throws DocumentException
95     */
96    public static Document getDocument(final String filename)
97        throws IOException, DocumentException {
98      final File file = new File(filename);
99      if (!file.canRead()) {
100       throw new IOException("File is not readable: " + filename);
101     }
102     return getDocument(file);
103   }
104 
105   /**
106    * @param file
107    *
108    * @return Existing Document from file
109    *
110    * @throws IOException
111    * @throws DocumentException
112    */
113   public static Document getDocument(final File file)
114       throws IOException, DocumentException {
115     if (!file.canRead()) {
116       throw new IOException("File is not readable: " + file.getPath());
117     }
118     final SAXReader reader = getNewSaxReader();
119     return reader.read(file);
120   }
121 
122   /**
123    * Read the document from the string
124    *
125    * @param document as String
126    *
127    * @return the Document
128    *
129    * @throws DocumentException
130    */
131   public static Document readDocument(final String document)
132       throws DocumentException {
133     return DocumentHelper.parseText(document);
134   }
135 
136   /**
137    * @param document
138    *
139    * @return the document as an XML string
140    */
141   public static String writeToString(final Document document) {
142     return document.asXML();
143   }
144 
145   /**
146    * @param element
147    *
148    * @return the element as an XML string
149    */
150   public static String writeToString(final Element element) {
151     return element.asXML();
152   }
153 
154   /**
155    * @return an empty new Document
156    */
157   public static Document createEmptyDocument() {
158     return DocumentHelper.createDocument();
159   }
160 
161   /**
162    * Save the document into the file
163    *
164    * @param filename
165    * @param document
166    *
167    * @throws IOException
168    */
169   public static void saveDocument(final String filename,
170                                   final Document document) throws IOException {
171     final File file = new File(filename);
172     saveDocument(file, document);
173   }
174 
175   /**
176    * Save the document into the file
177    *
178    * @param file
179    * @param document
180    *
181    * @throws IOException
182    */
183   public static void saveDocument(final File file, final Document document)
184       throws IOException {
185     if (file.exists() && !file.canWrite()) {
186       throw new IOException("File is not writable: " + file.getPath());
187     }
188 
189     saveDocument(new FileWriter(file), document);
190   }
191 
192   /**
193    * Save the document into the Writer outWriter
194    *
195    * @param outWriter
196    * @param document
197    *
198    * @throws IOException
199    */
200   public static void saveDocument(final Writer outWriter,
201                                   final Document document) throws IOException {
202     final OutputFormat format = OutputFormat.createPrettyPrint();
203     format.setEncoding(WaarpStringUtils.UTF8.name());
204     final XMLWriter writer = new XMLWriter(outWriter, format);
205     writer.write(document);
206     writer.flush();
207     writer.close();
208   }
209 
210   /**
211    * Save the branch from element into the file
212    *
213    * @param filename
214    * @param element
215    *
216    * @throws IOException
217    */
218   public static void saveElement(final String filename, final Element element)
219       throws IOException {
220     final File file = new File(filename);
221     saveElement(file, element);
222   }
223 
224   /**
225    * Save the branch from element into the file
226    *
227    * @param file
228    * @param element
229    *
230    * @throws IOException
231    */
232   public static void saveElement(final File file, final Element element)
233       throws IOException {
234     if (file.exists() && !file.canWrite()) {
235       throw new IOException("File is not writable: " + file.getPath());
236     }
237     final OutputFormat format = OutputFormat.createPrettyPrint();
238     format.setEncoding(WaarpStringUtils.UTF8.name());
239     final XMLWriter writer = new XMLWriter(new FileWriter(file), format);
240     writer.write(element);
241     writer.flush();
242     writer.close();
243   }
244 
245   /**
246    * Add or Get (if already existing) an element given by the path relative to
247    * the referent element and set the
248    * value
249    *
250    * @param ref
251    * @param path
252    * @param value
253    *
254    * @return the new added or already existing element with new value
255    */
256   public static Element addOrSetElement(final Element ref, final String path,
257                                         final String value) {
258     final Element current = addOrGetElement(ref, path);
259     current.setText(value);
260     return current;
261   }
262 
263   /**
264    * Add or Get (if already existing) an element given by the path relative to
265    * the referent element
266    *
267    * @param ref
268    * @param path
269    *
270    * @return the new added or already existing element
271    */
272   public static Element addOrGetElement(final Element ref, final String path) {
273     final String[] pathes = path.split("/");
274     Element current = ref;
275     for (final String nodename : pathes) {
276       if (!nodename.isEmpty()) {
277         final Element exist = current.element(nodename);
278         if (exist == null) {
279           current = current.addElement(nodename);
280         } else {
281           current = exist;
282         }
283       }
284     }
285     return current;
286   }
287 
288   /**
289    * Add an element given by the path relative to the referent element and set
290    * the value
291    *
292    * @param ref
293    * @param path
294    * @param value
295    *
296    * @return the new added element with value
297    */
298   public static Element addAndSetElementMultiple(final Element ref,
299                                                  final String path,
300                                                  final String value) {
301     final Element current = addAndGetElementMultiple(ref, path);
302     current.setText(value);
303     return current;
304   }
305 
306   /**
307    * Add an element given by the path relative to the referent element
308    *
309    * @param ref
310    * @param path
311    *
312    * @return the new added element
313    */
314   public static Element addAndGetElementMultiple(final Element ref,
315                                                  final String path) {
316     final String[] pathes = path.split("/");
317     Element current = ref;
318     for (int i = 0; i < pathes.length - 1; i++) {
319       final String nodename = pathes[i];
320       if (!nodename.isEmpty()) {
321         final Element exist = current.element(nodename);
322         if (exist == null) {
323           current = current.addElement(nodename);
324         } else {
325           current = exist;
326         }
327       }
328     }
329     final String nodename = pathes[pathes.length - 1];
330     if (!nodename.isEmpty()) {
331       current = current.addElement(nodename);
332     }
333     return current;
334   }
335 
336   /**
337    * @param ref
338    * @param path
339    *
340    * @return the parent element associated with the path relatively to the
341    *     referent element
342    *
343    * @throws DocumentException
344    */
345   public static Element getParentElement(final Element ref, final String path)
346       throws DocumentException {
347     String npath = path;
348     while (npath.charAt(0) == '/') {
349       npath = npath.substring(1);
350     }
351     final Element current = (Element) ref.selectSingleNode(npath);
352     if (current == null) {
353       throw new DocumentException(NODE_NOT_FOUND + path);
354     }
355     return current.getParent();
356   }
357 
358   /**
359    * @param ref
360    * @param path
361    *
362    * @return the element associated with the path relatively to the referent
363    *     element
364    *
365    * @throws DocumentException
366    */
367   public static Element getElement(final Element ref, final String path)
368       throws DocumentException {
369     String npath = path;
370     while (npath.charAt(0) == '/') {
371       npath = npath.substring(1);
372     }
373     final Element current = (Element) ref.selectSingleNode(npath);
374     if (current == null) {
375       throw new DocumentException(NODE_NOT_FOUND + path);
376     }
377     return current;
378   }
379 
380   /**
381    * @param ref
382    * @param path
383    *
384    * @return the element associated with the path relatively to the referent
385    *     element
386    *
387    * @throws DocumentException
388    */
389   public static List<Node> getElementMultiple(final Element ref,
390                                               final String path)
391       throws DocumentException {
392     String npath = path;
393     while (npath.charAt(0) == '/') {
394       npath = npath.substring(1);
395     }
396     final List<Node> list = ref.selectNodes(npath);
397     if (list == null || list.isEmpty()) {
398       throw new DocumentException("Nodes not found: " + path);
399     }
400     return list;
401   }
402 
403   /**
404    * Add or Get (if already existing) an element given by the path relative to
405    * the document and set the value
406    *
407    * @param doc
408    * @param path
409    * @param value
410    *
411    * @return the new added or already existing element with new value
412    */
413   public static Element addOrSetElement(final Document doc, final String path,
414                                         final String value) {
415     final Element current = addOrGetElement(doc, path);
416     if (current != null) {
417       current.setText(value);
418     }
419     return current;
420   }
421 
422   /**
423    * Add or Get (if already existing) an element given by the path relative to
424    * the document
425    *
426    * @param doc
427    * @param path
428    *
429    * @return the new added or already existing element
430    */
431   public static Element addOrGetElement(final Document doc, final String path) {
432     final String[] pathes = path.split("/");
433     int rank;
434     for (rank = 0; rank < pathes.length; rank++) {
435       if (!pathes[rank].isEmpty()) {
436         break; // found
437       }
438     }
439     if (rank >= pathes.length) {
440       return null; // Should not be !
441     }
442     Element current = (Element) doc.selectSingleNode(pathes[rank]);
443     if (current == null) {
444       current = doc.addElement(pathes[rank]);
445     }
446     for (int i = rank + 1; i < pathes.length; i++) {
447       final String nodename = pathes[i];
448       if (!nodename.isEmpty()) {
449         final Element exist = current.element(nodename);
450         if (exist == null) {
451           current = current.addElement(nodename);
452         } else {
453           current = exist;
454         }
455       }
456     }
457     return current;
458   }
459 
460   /**
461    * Add an element given by the path relative to the document and set the
462    * value
463    *
464    * @param doc
465    * @param path
466    * @param value
467    *
468    * @return the new added element with value
469    */
470   public static Element addAndSetElementMultiple(final Document doc,
471                                                  final String path,
472                                                  final String value) {
473     final Element current = addAndGetElementMultiple(doc, path);
474     if (current != null) {
475       current.setText(value);
476     }
477     return current;
478   }
479 
480   /**
481    * Add an element given by the path relative to the document
482    *
483    * @param doc
484    * @param path
485    *
486    * @return the new added element
487    */
488   public static Element addAndGetElementMultiple(final Document doc,
489                                                  final String path) {
490     final String[] pathes = path.split("/");
491     int rank;
492     for (rank = 0; rank < pathes.length; rank++) {
493       if (!pathes[rank].isEmpty()) {
494         break; // found
495       }
496     }
497     if (rank >= pathes.length) {
498       return null; // Should not be !
499     }
500     Element current = (Element) doc.selectSingleNode(pathes[rank]);
501     if (current == null) {
502       current = doc.addElement(pathes[rank]);
503     }
504     if (rank == pathes.length - 1) {
505       // Last level is the root !!! No multiple root is allowed !!!
506       // So just give back the root if it exists
507       return current;
508     }
509     for (int i = rank + 1; i < pathes.length - 1; i++) {
510       final String nodename = pathes[i];
511       if (!nodename.isEmpty()) {
512         final Element exist = current.element(nodename);
513         if (exist == null) {
514           current = current.addElement(nodename);
515         } else {
516           current = exist;
517         }
518       }
519     }
520     final String nodename = pathes[pathes.length - 1];
521     if (!nodename.isEmpty()) {
522       current = current.addElement(nodename);
523     }
524     return current;
525   }
526 
527   /**
528    * @param doc
529    * @param path
530    *
531    * @return the Parent element associated with the path relatively to the
532    *     document
533    *
534    * @throws DocumentException
535    */
536   public static Element getParentElement(final Document doc, final String path)
537       throws DocumentException {
538     final Element current = (Element) doc.selectSingleNode(path);
539     if (current == null) {
540       throw new DocumentException(NODE_NOT_FOUND + path);
541     }
542     return current.getParent();
543   }
544 
545   /**
546    * @param doc
547    * @param path
548    *
549    * @return the element associated with the path relatively to the document
550    *
551    * @throws DocumentException
552    */
553   public static Element getElement(final Document doc, final String path)
554       throws DocumentException {
555     final Element current = (Element) doc.selectSingleNode(path);
556     if (current == null) {
557       throw new DocumentException(NODE_NOT_FOUND + path);
558     }
559     return current;
560   }
561 
562   /**
563    * @param doc
564    * @param path
565    *
566    * @return the element associated with the path relatively to the document
567    *
568    * @throws DocumentException
569    */
570   public static List<Node> getElementMultiple(final Document doc,
571                                               final String path)
572       throws DocumentException {
573     final List<Node> list = doc.selectNodes(path);
574     if (list == null || list.isEmpty()) {
575       throw new DocumentException("Nodes not found: " + path);
576     }
577     return list;
578   }
579 
580   /**
581    * Remove extra space and tab, newline from beginning and end of String
582    *
583    * @param string
584    *
585    * @return the trimed string
586    */
587   public static String getExtraTrimed(final String string) {
588     return string.replaceAll("[\\s]+", " ").trim();
589     // was ("^[\\s]*|[\\s]*$ ", "")
590   }
591 
592   /**
593    * Create the XmlValues from the XmlDevls and the Document
594    *
595    * @param doc
596    * @param decls
597    *
598    * @return XmlValues
599    */
600   public static XmlValue[] read(final Document doc, final XmlDecl[] decls) {
601     final XmlValue[] values;
602     final int len = decls.length;
603     values = new XmlValue[len];
604     for (int i = 0; i < len; i++) {
605       final XmlValue value = new XmlValue(decls[i]);
606       values[i] = value;
607       if (decls[i].isSubXml()) {
608         if (decls[i].isMultiple()) {
609           final List<Node> elts;
610           try {
611             elts = getElementMultiple(doc, decls[i].getXmlPath());
612           } catch (final DocumentException e) {
613             continue;
614           }
615           addValueToNodes(value, elts, decls[i]);
616         } else {
617           final Element element;
618           try {
619             element = getElement(doc, decls[i].getXmlPath());
620           } catch (final DocumentException e) {
621             continue;
622           }
623           setValueToElement(value, read(element, decls[i].getSubXml()));
624         }
625       } else if (decls[i].isMultiple()) {
626         final List<Node> elts;
627         try {
628           elts = getElementMultiple(doc, decls[i].getXmlPath());
629         } catch (final DocumentException e) {
630           continue;
631         }
632         addFromStringToElements(value, elts);
633       } else {
634         final Element element;
635         try {
636           element = getElement(doc, decls[i].getXmlPath());
637         } catch (final DocumentException e) {
638           continue;
639         }
640         final String svalue = element.getText();
641         try {
642           value.setFromString(getExtraTrimed(svalue));
643         } catch (final InvalidArgumentException e) {
644           // nothing
645         }
646       }
647     }
648     return values;
649   }
650 
651   /**
652    * Create the XmlValues from the XmlDevls and the reference Element
653    *
654    * @param ref
655    * @param decls
656    *
657    * @return XmlValues
658    */
659   public static XmlValue[] read(final Element ref, final XmlDecl[] decls) {
660     final XmlValue[] values;
661     final int len = decls.length;
662     values = new XmlValue[len];
663     for (int i = 0; i < len; i++) {
664       final XmlValue value = new XmlValue(decls[i]);
665       values[i] = value;
666       if (decls[i].isSubXml()) {
667         if (decls[i].isMultiple()) {
668           final List<Node> elts;
669           try {
670             elts = getElementMultiple(ref, decls[i].getXmlPath());
671           } catch (final DocumentException e) {
672             continue;
673           }
674           addValueToNodes(value, elts, decls[i]);
675         } else {
676           final Element element;
677           try {
678             element = getElement(ref, decls[i].getXmlPath());
679           } catch (final DocumentException e) {
680             continue;
681           }
682           setValueToElement(value, read(element, decls[i].getSubXml()));
683         }
684       } else if (decls[i].isMultiple()) {
685         final List<Node> elts;
686         try {
687           elts = getElementMultiple(ref, decls[i].getXmlPath());
688         } catch (final DocumentException e) {
689           continue;
690         }
691         addFromStringToElements(value, elts);
692       } else {
693         final Element element;
694         try {
695           element = getElement(ref, decls[i].getXmlPath());
696         } catch (final DocumentException e) {
697           continue;
698         }
699         final String svalue = element.getText();
700         try {
701           value.setFromString(getExtraTrimed(svalue));
702         } catch (final InvalidArgumentException e) {
703           // nothing
704         }
705       }
706     }
707     return values;
708   }
709 
710   private static void addFromStringToElements(final XmlValue value,
711                                               final List<Node> elts) {
712     for (final Node element : elts) {
713       final String svalue = element.getText();
714       try {
715         value.addFromString(getExtraTrimed(svalue));
716       } catch (final InvalidObjectException e) {
717         // nothing
718       } catch (final InvalidArgumentException e) {
719         // nothing
720       }
721     }
722   }
723 
724   private static void setValueToElement(final XmlValue value,
725                                         final XmlValue[] read) {
726     if (read == null) {
727       return;
728     }
729     try {
730       value.setValue(read);
731     } catch (final InvalidObjectException e) {
732       // nothing
733     }
734   }
735 
736   private static void addValueToNodes(final XmlValue value,
737                                       final List<Node> elts,
738                                       final XmlDecl decl) {
739     for (final Node element : elts) {
740       final XmlValue[] newValue = read((Element) element, decl.getSubXml());
741       try {
742         value.addValue(newValue);
743       } catch (final InvalidObjectException e) {
744         // nothing
745       }
746     }
747   }
748 
749   /**
750    * Add all nodes from XmlValues into Document
751    *
752    * @param doc
753    * @param values
754    */
755   @SuppressWarnings("unchecked")
756   public static void write(final Document doc, final XmlValue[] values) {
757     for (final XmlValue value : values) {
758       if (value != null) {
759         if (value.isSubXml()) {
760           if (value.isMultiple()) {
761             final List<XmlValue[]> list = (List<XmlValue[]>) value.getList();
762             for (final XmlValue[] object : list) {
763               final Element ref =
764                   addAndGetElementMultiple(doc, value.getXmlPath());
765               write(ref, object);
766             }
767           } else {
768             final Element ref = addOrGetElement(doc, value.getXmlPath());
769             write(ref, value.getSubXml());
770           }
771         } else if (value.isMultiple()) {
772           final List<?> list = value.getList();
773           for (final Object object : list) {
774             addAndSetElementMultiple(doc, value.getXmlPath(),
775                                      object.toString());
776           }
777         } else {
778           addOrSetElement(doc, value.getXmlPath(), value.getIntoString());
779         }
780       }
781     }
782   }
783 
784   /**
785    * Add all nodes from XmlValues from the referenced Element
786    *
787    * @param ref
788    * @param values
789    */
790   @SuppressWarnings("unchecked")
791   public static void write(final Element ref, final XmlValue[] values) {
792     for (final XmlValue value : values) {
793       if (value != null) {
794         if (value.isSubXml()) {
795           if (value.isMultiple()) {
796             final List<XmlValue[]> list = (List<XmlValue[]>) value.getList();
797             for (final XmlValue[] object : list) {
798               final Element newref =
799                   addAndGetElementMultiple(ref, value.getXmlPath());
800               write(newref, object);
801             }
802           } else {
803             final Element newref = addOrGetElement(ref, value.getXmlPath());
804             write(newref, value.getSubXml());
805           }
806         } else if (value.isMultiple()) {
807           final List<?> list = value.getList();
808           for (final Object object : list) {
809             addAndSetElementMultiple(ref, value.getXmlPath(),
810                                      object.toString());
811           }
812         } else {
813           addOrSetElement(ref, value.getXmlPath(), value.getIntoString());
814         }
815       }
816     }
817   }
818 
819   /**
820    * Write the given XML document to filename using the encoding
821    *
822    * @param filename
823    * @param encoding if null, default encoding UTF-8 will be used
824    * @param document
825    *
826    * @throws IOException
827    */
828   public static void writeXML(final String filename, final String encoding,
829                               final Document document) throws IOException {
830     final OutputFormat format = OutputFormat.createPrettyPrint();
831     if (encoding != null) {
832       format.setEncoding(encoding);
833     } else {
834       format.setEncoding(WaarpStringUtils.UTF8.name());
835     }
836     final XMLWriter writer;
837     writer = new XMLWriter(new FileWriter(filename), format);
838     writer.write(document);
839     try {
840       writer.close();
841     } catch (final IOException ignored) {
842       // nothing
843     }
844   }
845 }