Using SAX

SAX stands for Simple API for XML. A SAX parser is event-based. Events are reported through callbacks as the parser moves through the XML document.

This example was compiled and run with JDK1.4, as the APIs and reference implementation are included. If you use a lower JDK version, download JAXP (Java APIs for XML Processing) on http://java.sun.com/xml/jaxp.html.

Main.java:

import org.xml.sax.helpers.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import java.io.*;
 
public class Main 
{
   public static void main(String []args) {
      try {
         SAXParserFactory factory = SAXParserFactory.newInstance();
         SAXParser parser = factory.newSAXParser();
 
         InputSource is = new InputSource(new FileReader("example.xml"));
         parser.parse(is, new MySAXHandler());
      }
      catch(ParserConfigurationException e) {
         e.printStackTrace();
      }
      catch(SAXException e) {
         e.printStackTrace();
      }
      catch(IOException e) {
         e.printStackTrace();
      }        
   }
}
 
class MySAXHandler extends DefaultHandler
{
   int indent = 0;
 
   public void startDocument() throws SAXException {
      System.out.println("startDocument()");
   }
 
   public void endDocument() throws SAXException {
      indent(-3);
      System.out.println("endDocument()");
   }
 
   public void characters(char[] ch, int start, int length)  {
      String s = new String(ch, start, length);
      if (!s.trim().equals("")) {
         indent(0);
         System.out.println(s);
      }
   }
 
   public void ignorableWhitespace(char[] ch, int start, int length) {
      System.out.println("[whitespace]");
   }
 
   public void startElement(String uri, String localName, 
                            String qName, Attributes attributes) {
      indent(3);
      System.out.println("[element " + qName + "]");
      for (int i=0; i<attributes.getLength(); i++) {
         indent(0);
         System.out.println("[Attribute " + attributes.getQName(i) +
                            ", value=" + attributes.getValue(i) + "]");
      }
   }
 
   public void endElement(String uri, String localName, String qName) {
      indent(0);
      System.out.println("[endelement " + qName + "]");
      indent -= 3;
   }
 
   public void indent(int pos) {
      indent += pos;
      for (int i=0; i<indent; i++) {
         System.out.print(" ");
      }
   }
}

example.xml:

<?xml version="1.0"?>
<customer id="C123456">
   <name>Joris Van den Bogaert</name>
   <email>joris1@esus.com</email>
</customer>

output:

startDocument()
   [element customer]
   [Attribute id, value=C123456]
      [element name]
      Joris Van den Bogaert
      [endelement name]
      [element email]
      joris1@esus.com
      [endelement email]
   [endelement customer]
endDocument()

Traversing a DOM tree using a NodeIterator

(DOM Level 2!) A NodeIterator can come in pretty handy to walk over all the nodes, even though you loose the structure of the document. See the questions/answers about TreeWalker to retain the structure.

The following simple example shows you how it works. If you have played around with FileFilters before, it’s a no-brainer. It shows all the zip codes that contain a “8″.

For more information on the NodeIterator, the NodeFilter and their options, check out the API docs: NodeIterator, NodeFilter

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<customers>
   <customer id="C12345">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline>Handelskaai 3</addressline>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BELGIUM</country>
      </address>
   </customer>
   <customer id="C23495">
      <name>John Doe</name>
      <address>
         <addressline>5, S 5th Ave.</addressline>
         <zip>59715</zip>
         <location>Bozeman, MT</location>
         <country>US</country>
      </address>
   </customer>
   <customer id="C03429">
      <name>John Babcock</name>
      <address>
         <addressline>73, Broad street</addressline>
         <zip>06418</zip>
         <location>Chester, CT</location>
         <country>US</country>
      </address>
   </customer>
   <customer id="C12345">
      <name>Dominique Bodard</name>
      <address>
         <addressline>21-23, Rue de Madrid</addressline>
         <zip>75008</zip>
         <location>Paris</location>
         <country>FRANCE</country>
      </address>
   </customer>
</customers>

Main.java:

import org.w3c.dom.*;
import org.w3c.dom.traversal.*;
  
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
 
import java.io.*;
   
public class Main
{
   public static void main(String []args) {
      Document doc;
  
      try {
         DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
         DocumentBuilder db = dbf.newDocumentBuilder();
         doc = db.parse(new File("customers.xml")); 
 
         if (!doc.isSupported("Traversal", "2.0")) {
            System.out.println("Traversal not supported in your parser version");
            System.exit(1);
         }
 
         DocumentTraversal dt = (DocumentTraversal) doc;
         NodeIterator iterator = dt.createNodeIterator(doc, 
                                                       NodeFilter.SHOW_ALL, 
                                                       new ZipFilter(), 
                                                       true);
         Node node;
         while ((node = iterator.nextNode()) != null) {
            System.out.println(node.getFirstChild().getNodeValue());
         }
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   } 
}
 
class ZipFilter implements NodeFilter
{
   public short acceptNode(Node n) {
      if (n.getNodeName().equals("zip") && n.getFirstChild().getNodeValue().indexOf("8") > -1) { 
         return FILTER_ACCEPT;
      }
      return FILTER_SKIP;
   }
}

outputs:

06418
75008

Using Apache’s Xerces SAX parser

Download the Xerces Java parser at http://xml.apache.org/xerces2-j/index.html. Adjust your classpath and specify that you want to use the Xerces parser.

Programmatically:

import org.xml.sax.helpers.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import java.io.*;
 
public class Main 
{
   public static void main(String []args) {
      try {
         SAXParserFactory factory = new org.apache.xerces.jaxp.SAXParserFactoryImpl();    
         SAXParser parser = factory.newSAXParser();
 
         InputSource is = new InputSource(new FileReader("example.xml"));
         parser.parse(is, new DefaultHandler());
      }
      catch(ParserConfigurationException e) {
         e.printStackTrace();
      }
      catch(SAXException e) {
         e.printStackTrace();
      }
      catch(IOException e) {
         e.printStackTrace();
      }        
   }
}

Declaratively:

   java -Djavax.xml.parsers.SAXParserFactory=org.apache.xerces.jaxp.SAXParserFactoryImpl Main

Creating an HTML page from an XML template using XMLC

Enhydra’s XMLC allows you to have random access to your XML document by compiling the XML document into a Java class that contains a DOM (document object model) representation. For every XML template that you provide, a Java class equivalent is generated. Using this class, you can not only access your tags with standard DOM methods, but XMLC also provides convenience methods like getters and setters to dynamically alter the elements.

So, with XMLC, you don’t have to include Java code inside your HTML (JSP) nor vice versa (servlets).

For every tag that contains a ID attribute, a getElementXXX() is generated. For example, if you have the following tag defined in your template:

   <font id="topcolor" color="">test color</font>

The method getElementTopcolor() is generated which returns an object of type org.w3c.dom.html.HTMLFontElement.

It also creates setTextXXX methods for your ID elements, so you can easily change the text between tags.

The following is a simple example that starts from a template HTML, dynamically modifies its element and prints out the resulting HTML.

First follow the instructions on this page to download and install XMLC.

I created the file C:testingxmlTestTemplate.html:

<html>
<head>
   <title id="title">test title</title>
</head>
<body>
   <h1 id="topheader">test header</h1>
   <font id="topcolor" color="">test color</font>
   <br>
   <br>
   <span id="maintext">test span</span>
</body>
</html>

To generate the .class file for this HTML:

Enhydra$ //c/javalibs/xmlc2.0.1/bin/xmlc -keep TestTemplate.html

(Note 1: I installed xmlc in c:javalibs; Note 2: -keep specifies that the .java file should not be deleted after generation. This way, you can inspect the .java file and find out what methods are available!)

The “client”: Main.java:

import org.w3c.dom.html.*;
import org.w3c.dom.*;
 
public class Main
{
   public static void main(String []args) {
      TestTemplate tt = new TestTemplate();
 
      // get title and change it 
      HTMLTitleElement title = tt.getElementTitle();
      title.setText("Generated XML");
 
      // set element with id "topheader"
      tt.setTextTopheader("Insightful quote");
  
      // set element with it "topcolor"
      tt.setTextTopcolor("[I cannot be held responsible]");
      HTMLFontElement elem = tt.getElementTopcolor();
      elem.setColor("#ff0000");
 
      // set element with id "maintext"
      tt.setTextMaintext("The shorter you live, the longer you're dead");
 
      // generate new HTML
      System.out.print(tt.toDocument());
   }
}

Compile and run. Result:

<HTML>
<HEAD>
   <TITLE id="title">Generated XML</TITLE>
</HEAD>
<BODY>
   <H1 id="topheader">Insightful quote</H1>
   <FONT color="#ff0000" id="topcolor">[I cannot be held responsible]</FONT> 
   <BR> 
   <BR> 
   <SPAN>The shorter you live, the longer you're dead</SPAN>
</BODY>
</HTML>

For more information, check the tutorial at http://www.pisoftware.com/publications/xmlc-tutorial/intro.html.

Serializing a Java object into XML

Representing a complex object graph into an XML is not easy and binary serialization happens much faster. If you still need to represent your Java objects in XML, you can use the package JSX, downloadable from http://www.freshmeat.net/projects/jsx.

Put JSX0.9.5.0.jar in your classpath and try out the following example.

Customer.java:

import java.util.*;
 
class Customer {
   private String name;
   private int age;
   private Vector addresses;
   private Hashtable phones;
 
   public Customer(String name, int age, Vector addresses, Hashtable phones) {
      this.name = name;
      this.age = age;
      this.addresses = (Vector) addresses.clone();   
      this.phones = (Hashtable) phones.clone();
   }
 
   public String toString() {
      StringBuffer s = new StringBuffer();
      s.append("Name = " + name + "n");
      s.append("Age  = " + age  + "n");
      s.append("addresses = " + addresses + "n");
      s.append("phones    = " + phones);
      return s.toString();
   }
}

MainOut.java:

import java.util.*;
import java.io.*;
import JSX.*;
 
public class MainOut
{
   public static void main(String []args) {
      String name = "Jefke McCann";
      int age = 24;
      Vector addresses = new Vector();
      addresses.add("43, 5th Ave. 59715, Bozeman, MT");
      addresses.add("Handelskaai 3, 1000 Brussels, Belgium");
      Hashtable phones = new Hashtable();
      phones.put("001 (406) 585-2345", "ISDN");
      phones.put("0032 (2) 219.28.39", "FAX");
 
      Customer cust = new Customer(name, age, addresses, phones);
 
      try {
         ObjOut out = new ObjOut(true, new FileWriter("test.xml"));
         out.writeObject(cust);
      }
      catch(IOException e) {
         e.printStackTrace();
      }
   }
}

MainIn.java:

import java.util.*;
import java.io.*;
import JSX.*;
 
public class MainIn
{
   public static void main(String []args) {
      try {
         ObjIn in = new ObjIn(new FileReader("test.xml"));
         Customer cust = (Customer) in.readObject();
         System.out.println(cust);
      }
      catch(ClassNotFoundException e) {
         e.printStackTrace();
      }
      catch(IOException e) {
         e.printStackTrace();
      }
   }
}

If you execute java MainOut, the following XML will be written to disk.
test.xml:

<? jsx version="1"?>
<Customer
 name="Jefke McCann"
 age="24">
  <java.util.Vector obj-name="addresses">
    <java.lang.String valueOf="43, 5th Ave. 59715, Bozeman, MT"/>
    <java.lang.String valueOf="Handelskaai 3, 1000 Brussels, Belgium"/>
  </java.util.Vector>
  <java.util.Hashtable obj-name="phones">
    <java.lang.String valueOf="001 (406) 585-2345"/>
    <java.lang.String valueOf="ISDN"/>
 
    <java.lang.String valueOf="0032 (2) 219.28.39"/>
    <java.lang.String valueOf="FAX"/>
  </java.util.Hashtable>
</Customer>

MainIn will read in test.xml and create a Customer object from it.

Using JAXB

JAXB allows for a mapping between Java objects and XML files. The following example goes through a couple steps so you get a feel of this powerful library.

Download the early-access implementation from http://java.sun.com/xml/jaxb, and add the JAR files to your classpath.

First, write a DTD that describes your object.
customer.dtd (!!remove the space between the ? and xml) :

<? xml version="1.0" encoding="UTF-8"?>
<!ELEMENT customer (name, addresses+, email)+ >
<!ATTLIST customer custid CDATA #REQUIRED>
 
<!ELEMENT name (#PCDATA)>
 
<!ELEMENT addresses (addline1, addline2, zip, location, state)>
<!ELEMENT addline1 (#PCDATA)>
<!ELEMENT addline2 (#PCDATA)>
<!ELEMENT zip (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ELEMENT state (#PCDATA)>
 
<!ELEMENT email (#PCDATA)>

To map this description of a customer to an object (tree), you can invoke the schema to java compiler:

   java com.sun.tools.xjc.Main customer.dtd -roots customer

output:

   .Addresses.java
   .Customer.java
   .Email.java
   .Name.java

Two files have been created that are the equivalent of the DTD. You can further customize the mapping by creating a .xjs file. For example, in the following customer.xjs file, it is specified that the attribute custid is mapped onto an int in the Java object (the default was a String). Also, the classes are to be placed in the package
com.esus.jaxbtest.
customer.xjs (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<xml-java-binding-schema version="1.0ea">
  <options package="com.esus.jaxbtest"/>
 
  <element name="customer" type="class" root="true">
    <attribute name="custid" convert="long"/>
  </element>
</xml-java-binding-schema>

Now run the conversion utility again, but specify your customized mapping:

   java com.sun.tools.xjc.Main customer.dtd customer.xjs

output is now:

   .comesusjaxbtestAddresses.java
   .comesusjaxbtestCustomer.java
   .comesusjaxbtestEmail.java
   .comesusjaxbtestName.java

Now, let’s test the mapping and create an XML file data.xml that describes one customer.

data.xml (!!remove the space between ? and xml):

<? xml version="1.0"?>
<customer>
   <name>Jefke McCann</name>
   <addresses>
      <addline1>516, S. 5th Ave.</addline1>
      <zip>59715</zip>
      <location>Bozeman</location>
      <state>MT</state>
   </addresses>
   <email>jefke.mccann@yahoo.com</email>
</customer>

java Main:

C:myxmljaxb>java Main
javax.xml.bind.MissingContentException: addline2
        at Addresses.validateThis(Addresses.java:93)
        at javax.xml.bind.Unmarshaller.unmarshal(Unmarshaller.java:209)
        at javax.xml.bind.Unmarshaller.unmarshal(Unmarshaller.java:133)
        at Customer.unmarshal(Customer.java:161)
        at javax.xml.bind.Unmarshaller.unmarshal(Unmarshaller.java:199)
        at javax.xml.bind.Unmarshaller.unmarshalRoot(Unmarshaller.java:222)
        at javax.xml.bind.Dispatcher.unmarshal(Dispatcher.java:350)
        at Customer.unmarshal(Customer.java:198)
        at Customer.unmarshal(Customer.java:192)
        at Customer.unmarshal(Customer.java:186)
        at Main.main(Main.java:10)

Notice that the conversion halts because data.xml does not contain the tag addline2 while it was required in the original DTD customer.dtd. Let’s make addline2 optional, and regenerate.

customer.dtd (!!remove the space between ? and xml):

<? xml version="1.0" encoding="UTF-8"?>
<!ELEMENT customer (name, addresses+, email)+ >
<!ATTLIST customer custid CDATA #REQUIRED>
 
<!ELEMENT name (#PCDATA)>
 
<!ELEMENT addresses (addline1, addline2?, zip, location, state)>
<!ELEMENT addline1 (#PCDATA)>
<!ELEMENT addline2 (#PCDATA)>
<!ELEMENT zip (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ELEMENT state (#PCDATA)>
 
<!ELEMENT email (#PCDATA)>
C:myxmljaxb>del *.class
 
C:myxmljaxb>java com.sun.tools.xjc.Main customer.dtd customer.xjs
.comesusjaxbtestAddresses.java
.comesusjaxbtestCustomer.java
.comesusjaxbtestEmail.java
.comesusjaxbtestName.java
 
C:myxmljaxb>javac Main.java
 
C:myxmljaxb>java Main
javax.xml.bind.MissingAttributeException: custid
        at com.esus.jaxbtest.Customer.validateThis(Customer.java:82)
        at javax.xml.bind.Unmarshaller.unmarshal(Unmarshaller.java:209)
        at javax.xml.bind.Unmarshaller.unmarshalRoot(Unmarshaller.java:222)
        at javax.xml.bind.Dispatcher.unmarshal(Dispatcher.java:350)
        at com.esus.jaxbtest.Customer.unmarshal(Customer.java:156)
        at com.esus.jaxbtest.Customer.unmarshal(Customer.java:150)
        at com.esus.jaxbtest.Customer.unmarshal(Customer.java:144)
        at Main.main(Main.java:10)

Notice now that the unmarshalling fails because our customer record does not contain the required custid attribute. Add it!
data.xml (!!remove the space between ? and xml):

<? xml version="1.0"?>
<customer custid="12345">
   <name>Jefke McCann</name>
   <addresses>
      <addline1>516, S. 5th Ave.</addline1>
      <zip>59715</zip>
      <location>Bozeman</location>
      <state>MT</state>
   </addresses>
   <email>jefke.mccann@yahoo.com</email>
</customer>

Now run again:

C:myxmljaxb>java Main
<<customer custid=12345 content=[<<name content=Jefke McCann>>, <<addresses addl
ine1=516, S. 5th Ave. zip=59715 location=Bozeman state=MT>>, <<email content=jef
ke.mccann@yahoo.com>>]>>

To go the other way:
Main.java:

import com.esus.jaxbtest.*;
import java.util.*;
import java.io.*;
 
public class Main
{
   public static Customer cust;
 
   public static void main(String []args) {
      try {
         cust = new Customer();
 
         buildTree();
         validate();
         marshal();
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   }
 
   public static void buildTree() throws Exception {
      // set custid attribute
      cust.setCustid(12345);
      List custEntries = cust.getContent();
      Name name = new Name();
      name.setContent("Joris Van den Bogaert");
      custEntries.add(name);
      Email email = new Email();
      email.setContent("joris1@esus.com");
      custEntries.add(email);
      Addresses address1 = new Addresses();
      address1.setAddline1("A. Dewitstraat 50");
      address1.setZip("3078");
      address1.setLocation("Meerbeek");
      address1.setState("BE");
      Addresses address2 = new Addresses();
      address2.setAddline1("Handelskaai 3");
      address2.setZip("1000");
      address2.setLocation("Brussel");
      address2.setState("BE");
      custEntries.add(address1);
      custEntries.add(address2);
   }
 
   public static void validate() throws Exception {
      cust.validate();
   }
 
   public static void marshal() throws Exception {
      FileOutputStream out = new FileOutputStream("out.xml");
      try {
         cust.marshal(out);
      } finally {
         out.close();
      }	
   }
}

After executing this program, the following file out.xml will be created:

<? xml version="1.0" encoding="UTF-8"?>

<customer custid="12345">
  <name>Joris Van den Bogaert</name>
  <email>joris1@esus.com</email>
  <addresses>
    <addline1>A. Dewitstraat 50</addline1>
    <zip>3078</zip>
    <location>Meerbeek</location>
    <state>BE</state></addresses>
  <addresses>
    <addline1>Handelskaai 3</addline1>
    <zip>1000</zip>
    <location>Brussel</location>
    <state>BE</state></addresses>
</customer>

Converting an XML to an HTML using XSLT with Xalan

Xalan is an Apache implementation of the W3C recommendations for XSL transformations, XSLT. XSLT allows you to transform xml documents in an HTML, another XML or whatever format you desire.

Download Xalan at http://xml.apache.org/xalan-j/index.html.

You can apply an XSL to an XML document with command line. Make sure xalan.xml is in your classpath before trying out this example. It will create an HTML file (check it out!) from an XML.

  
  java org.apache.xalan.xslt.Process -in customers.xml -xsl customers.xsl -out customers.html

customers.xml (!!remove the space between ? and xml):

<? xml version="1.0"?>
<!DOCTYPE customers SYSTEM "customers.dtd">
 
<customers>
   <customer id="cust1">
      <name>Joris Van den Bogaert</name>
      <address>
         <addressline1>Handelskaai 3</addressline1>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BE</country>
      </address>
      <address>
         <addressline1>A. Dewitstraat 50</addressline1>
         <zip>3078</zip>
         <location>Meerbeek</location>
         <country>BE</country>
      </address>
   </customer>
  
   <customer id="cust2">
      <name>Alicia Kolesnikova</name>
      <address>
         <addressline1>Handelskaai 3</addressline1>
         <zip>1000</zip>
         <location>Brussels</location>
         <country>BE</country>
      </address>
   </customer>
</customers>

customers.dtd:

<!ELEMENT customers (customer*)>
<!ELEMENT customer (name, address+)>
<!ELEMENT address (addressline1, addressline2, zip, location, country)>
 
<!ELEMENT name (#PCDATA)>
 
<!ELEMENT addressline1 (#PCDATA)>
<!ELEMENT addressline2 (#PCDATA)>
<!ELEMENT zip (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ELEMENT country (#PCDATA)>
 
<!ATTLIST customer
   id NMTOKEN #REQUIRED
>

customers.xsl (!!remove the space between ? and xml):

<? xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
<xsl:template match="customers">
<html>
   <head><title>Customers</title></head>
   <body>
      <h1>Customers</h1>
      <table border="1">
         <th>Customer ID</th>
         <th>Name</th>
         <th>Addresses</th>
     
         <xsl:apply-templates/>
 
      </table>
   </body>
</html>
</xsl:template>
 
<xsl:template match="customer">
   <tr>
      <td><xsl:value-of select="@id"/></td>
      <td><xsl:value-of select="name"/></td>
 
      <td><xsl:apply-templates select="address"/></td>
   </tr>
</xsl:template>
 
<xsl:template match="address">
   <xsl:value-of select="addressline1"/>
   <xsl:if test="string(addressline2) != string('')">
      &#160;<xsl:value-of select="addressline2"/>
   </xsl:if>,
   <xsl:value-of select="zip"/>&#160;<xsl:value-of select="location"/> 
   [<xsl:value-of select="country"/>]<br/>
</xsl:template>
 
</xsl:stylesheet>