Turning off DTD validation

As soon as you create the DocumentBuilderFactory, call the method setValidating and set it to false.

Try out following example and set it to true to see the parsing errors.

customers.dtd (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<!ELEMENT customer (name, addresses+, email)+ >
<!ATTLIST customer custid CDATA #REQUIRED>
 
<!ELEMENT name (#PCDATA)>
 
<!ELEMENT addresses (addline1, addline2, zip, location, state)>
<!ELEMENT addline1 (#PCDATA)>
<!ELEMENT addline2 (#PCDATA)>
<!ELEMENT zip (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ELEMENT state (#PCDATA)>
 
<!ELEMENT email (#PCDATA)>

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE customers SYSTEM "customers.dtd">
<customers>
   <customer id="C12345" type="prio1">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
</customers>

Main.java:

import org.w3c.dom.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
 
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         dbf.setValidating(true);
         DocumentBuilder db = dbf.newDocumentBuilder();
         doc = db.parse(new File("customers.xml"));
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
}

Checking whether an XML node has attributes

Use the method hasAttributes defined in the interface Node, a superinterface of Element.

Main.java:

import org.w3c.dom.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
 
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         DocumentBuilder db = dbf.newDocumentBuilder();
         doc = db.parse(new File("customers.xml"));
 
         Element root = doc.getDocumentElement();
         if (root.hasAttributes()) {
            System.out.println(root.getTagName() + " has attributes!");
         }
         else {
            System.out.println(root.getTagName() + " has no attributes!");
         }
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
}

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<customers id="1">
   <customer id="C12345" type="prio1">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
</customers>

List out the elements that belong to a particular XML namespace

For more information about namespaces, check out http://www.jclark.com/xml/xmlns.htm.

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<customers xmlns='http://www.esus.com/custns'>
   <customer id="C12345" type="prio1">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
   <destination>
      <address xmlns='http://www.esus.com/destns'>123.321.1.20</address>
   </destination>
</customers>

Main.java:

import org.w3c.dom.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
  
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         dbf.setNamespaceAware(true);
         DocumentBuilder db = dbf.newDocumentBuilder();
         doc = db.parse(new File("customers.xml"));
 
         Element root = doc.getDocumentElement();
         root.normalize();
 
         // display all custns elements
         NodeList nl = doc.getElementsByTagNameNS("http://www.esus.com/custns", "*");
         System.out.println("Elements that belong to http://www.esus.com/custns namespace:");
         printNodeList(nl);
 
         // display all destns elements
         nl = doc.getElementsByTagNameNS("http://www.esus.com/destns", "*");
         System.out.println("Elements that belong to http://www.esus.com/destns namespace:");
         printNodeList(nl);          
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
 
   public static void printNodeList(NodeList nl) {
      for (int i=0; i<nl.getLength(); i++) {
         Node n = nl.item(i);
         System.out.println("t" + n.getNodeName());
         Node tn = n.getFirstChild();
         if (!tn.getNodeValue().trim().equals("")) {   
            System.out.println("tt" + tn.getNodeValue());
         }
         System.out.println();
      }
   }
}

outputs:

Elements that belong to http://www.esus.com/custns namespace:
	customers
 
	customer
 
	name
		Joris Van den Bogaert
 
	address
 
	addressline
		Handelskaai 3
 
	zip
		1000
 
	location
		Brussels
 
	country
		BELGIUM
 
	destination
 
Elements that belong to http://www.esus.com/destns namespace:
	address
		123.321.1.20

Creating your own DOM error handler

Call the method setErrorHandler and pass in your custom class that implements org.xml.sax.ErrorHandler. Make sure you call the method setValidating(true)!

The following example prints out a bunch of error messages, because the XML doesn’t match the DTD.

customers.dtd (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<!ELEMENT customer (name, addresses+, email)+ >
<!ATTLIST customer custid CDATA #REQUIRED>
 
<!ELEMENT name (#PCDATA)>
 
<!ELEMENT addresses (addline1, addline2, zip, location, state)>
<!ELEMENT addline1 (#PCDATA)>
<!ELEMENT addline2 (#PCDATA)>
<!ELEMENT zip (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ELEMENT state (#PCDATA)>
 
<!ELEMENT email (#PCDATA)>

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE customers SYSTEM "customers.dtd">
<customers>
   <customer id="C12345">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
   <destination>
      <address>123.321.1.20</address>
   </destination>
</customers>

Main.java:

import org.w3c.dom.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
 
import javax.xml.parsers.DocumentBuilder;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
 
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         dbf.setValidating(true);
         
         DocumentBuilder db = dbf.newDocumentBuilder();
         MyErrorHandler handler = new MyErrorHandler();
         db.setErrorHandler(handler);
         doc = db.parse(new File("customers.xml")); 
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
}
 
class MyErrorHandler implements ErrorHandler {
   public void warning(SAXParseException e) throws SAXException {
      System.err.println("[warning] "+e.getMessage());
   }
 
   public void error(SAXParseException e) throws SAXException {
      System.err.println("[error] "+e.getMessage());
   }
 
   public void fatalError(SAXParseException e) throws SAXException {
      System.err.println("[fatal error] "+e.getMessage());
      throw e;
   }
};

outputs:

[error] Element type "customers" must be declared.
[error] Attribute "custid" is required and must be specified for element type "customer".
[error] Attribute "id" must be declared for element type "customer".
[error] Element type "address" must be declared.
[error] Element type "addressline" must be declared.
[error] Element type "country" must be declared.
[error] The content of element type "customer" must match "(name,addresses+,email)+".
[error] Element type "destination" must be declared.
[error] Element type "address" must be declared.

Using SAX

SAX stands for Simple API for XML. A SAX parser is event-based. Events are reported through callbacks as the parser moves through the XML document.

This example was compiled and run with JDK1.4, as the APIs and reference implementation are included. If you use a lower JDK version, download JAXP (Java APIs for XML Processing) on http://java.sun.com/xml/jaxp.html.

Main.java:

import org.xml.sax.helpers.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import java.io.*;
 
public class Main 
{
   public static void main(String []args) {
      try {
         SAXParserFactory factory = SAXParserFactory.newInstance();
         SAXParser parser = factory.newSAXParser();
 
         InputSource is = new InputSource(new FileReader("example.xml"));
         parser.parse(is, new MySAXHandler());
      }
      catch(ParserConfigurationException e) {
         e.printStackTrace();
      }
      catch(SAXException e) {
         e.printStackTrace();
      }
      catch(IOException e) {
         e.printStackTrace();
      }        
   }
}
 
class MySAXHandler extends DefaultHandler
{
   int indent = 0;
 
   public void startDocument() throws SAXException {
      System.out.println("startDocument()");
   }
 
   public void endDocument() throws SAXException {
      indent(-3);
      System.out.println("endDocument()");
   }
 
   public void characters(char[] ch, int start, int length)  {
      String s = new String(ch, start, length);
      if (!s.trim().equals("")) {
         indent(0);
         System.out.println(s);
      }
   }
 
   public void ignorableWhitespace(char[] ch, int start, int length) {
      System.out.println("[whitespace]");
   }
 
   public void startElement(String uri, String localName, 
                            String qName, Attributes attributes) {
      indent(3);
      System.out.println("[element " + qName + "]");
      for (int i=0; i<attributes.getLength(); i++) {
         indent(0);
         System.out.println("[Attribute " + attributes.getQName(i) +
                            ", value=" + attributes.getValue(i) + "]");
      }
   }
 
   public void endElement(String uri, String localName, String qName) {
      indent(0);
      System.out.println("[endelement " + qName + "]");
      indent -= 3;
   }
 
   public void indent(int pos) {
      indent += pos;
      for (int i=0; i<indent; i++) {
         System.out.print(" ");
      }
   }
}

example.xml:

<?xml version="1.0"?>
<customer id="C123456">
   <name>Joris Van den Bogaert</name>
   <email>joris1@esus.com</email>
</customer>

output:

startDocument()
   [element customer]
   [Attribute id, value=C123456]
      [element name]
      Joris Van den Bogaert
      [endelement name]
      [element email]
      joris1@esus.com
      [endelement email]
   [endelement customer]
endDocument()

Traversing a DOM tree using a NodeIterator

(DOM Level 2!) A NodeIterator can come in pretty handy to walk over all the nodes, even though you loose the structure of the document. See the questions/answers about TreeWalker to retain the structure.

The following simple example shows you how it works. If you have played around with FileFilters before, it’s a no-brainer. It shows all the zip codes that contain a “8″.

For more information on the NodeIterator, the NodeFilter and their options, check out the API docs: NodeIterator, NodeFilter

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<customers>
   <customer id="C12345">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
   <customer id="C23495">
      <name>John Doe</name>
      <address>
         <addressline>5, S 5th Ave.</addressline>
         <zip>59715</zip>
         <location>Bozeman, MT</location>
         <country>US</country>
      </address>
   </customer>
   <customer id="C03429">
      <name>John Babcock</name>
      <address>
         <addressline>73, Broad street</addressline>
         <zip>06418</zip>
         <location>Chester, CT</location>
         <country>US</country>
      </address>
   </customer>
   <customer id="C12345">
      <name>Dominique Bodard</name>
      <address>
         <addressline>21-23, Rue de Madrid</addressline>
         <zip>75008</zip>
         <location>Paris</location>
         <country>FRANCE</country>
      </address>
   </customer>
</customers>

Main.java:

import org.w3c.dom.*;
import org.w3c.dom.traversal.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
 
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         DocumentBuilder db = dbf.newDocumentBuilder();
         doc = db.parse(new File("customers.xml")); 
 
         if (!doc.isSupported("Traversal", "2.0")) {
            System.out.println("Traversal not supported in your parser version");
            System.exit(1);
         }
 
         DocumentTraversal dt = (DocumentTraversal) doc;
         NodeIterator iterator = dt.createNodeIterator(doc, 
                                                       NodeFilter.SHOW_ALL, 
                                                       new ZipFilter(), 
                                                       true);
         Node node;
         while ((node = iterator.nextNode()) != null) {
            System.out.println(node.getFirstChild().getNodeValue());
         }
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
}
 
class ZipFilter implements NodeFilter
{
   public short acceptNode(Node n) {
      if (n.getNodeName().equals("zip") && n.getFirstChild().getNodeValue().indexOf("8") > -1) { 
         return FILTER_ACCEPT;
      }
      return FILTER_SKIP;
   }
}

outputs:

06418
75008