Ebooks

Java SAX tutorial

Java SAX tutorial shows how to use Java SAX API to read and validate XML documents.

SAX

SAX (Simple API for XML) is an event-driven algorithm for parsing XML documents. SAX is an alternative to the Document Object Model (DOM). Where the DOM reads the whole document to operate on XML, SAX parsers read XML node by node, issuing parsing events while making a step through the input stream. SAX processes documents state-independently (the handling of an element does not depend on the elements that came before). SAX parsers are read-only.

SAX parsers are faster and require less memory. On the other hand, DOM is easier to use and there are tasks, such as sorting elements, rearranging elements or looking up elements, that are faster with DOM.

A SAX parser comes with JDK, so there is no need to dowload a dependency.

Java SAX parsing example

In the following example, we read an XML file with a SAX parser.

<build>
    <plugins>
        <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>exec-maven-plugin</artifactId>
            <version>1.6.0</version>
            <configuration>
                <mainClass>com.zetcode.JavaReadXmlSaxEx</mainClass>
            </configuration>
        </plugin>
    </plugins>
</build>   

We use the exec-maven-plugin to execute the Java main class from Maven.

users.xml
<?xml version="1.0" encoding="UTF-8"?>
<users>
    <user id="1">
        <firstname>Peter</firstname>
        <lastname>Brown</lastname>
        <occupation>programmer</occupation>
    </user>
    <user id="2">
        <firstname>Martin</firstname>
        <lastname>Smith</lastname>
        <occupation>accountant</occupation>
    </user>
    <user id="3">
        <firstname>Lucy</firstname>
        <lastname>Gordon</lastname>
        <occupation>teacher</occupation>
    </user>    
</users>

We are going to read this XML file.

User.java
package com.zetcode;

public class User {

    int id;
    private String firstName;
    private String lastName;
    private String occupation;

    public User() {
    }

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getFirstName() {
        return firstName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    public String getOccupation() {
        return occupation;
    }

    public void setOccupation(String occupation) {
        this.occupation = occupation;
    }

    @Override
    public String toString() {

        StringBuilder builder = new StringBuilder();
        builder.append("User{").append("id=").append(id)
                .append(", firstName=").append(firstName)
                .append(", lastName=").append(lastName)
                .append(", occupation=").append(occupation).append("}");

        return builder.toString();
    }
}

This is the user bean; it will hold data from XML nodes.

MyRunner.java
package com.zetcode;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;

public class MyRunner {

    private SAXParser createSaxParser() {

        SAXParser saxParser = null;

        try {

            SAXParserFactory factory = SAXParserFactory.newInstance();
            saxParser = factory.newSAXParser();

            return saxParser;
        } catch (ParserConfigurationException | SAXException ex) {
        
            Logger lgr = Logger.getLogger(MyRunner.class.getName());
            lgr.log(Level.SEVERE, ex.getMessage(), ex);
        }

        return saxParser;
    }

    public List<User> parseUsers() {

        MyHandler handler = new MyHandler();
        String fileName = "src/main/resources/users.xml";
        File xmlDocument = Paths.get(fileName).toFile();

        try {

            SAXParser parser = createSaxParser();
            parser.parse(xmlDocument, handler);

        } catch (SAXException | IOException ex) {

            Logger lgr = Logger.getLogger(MyRunner.class.getName());
            lgr.log(Level.SEVERE, ex.getMessage(), ex);
        }

        return handler.getUsers();
    }
}

MyRunner creates a SAX parser and launches parsing. The parseUsers returns the parsed data in a list of User objects.

SAXParserFactory factory = SAXParserFactory.newInstance();
saxParser = factory.newSAXParser();

From the SAXParserFactory, we get the SAXParser.

SAXParser parser = createSaxParser();
parser.parse(xmlDocument, handler);

We parse the document with the parse() method. The second parameter of the method is the handler object, which contains the event handlers.

MyHandler.java
package com.zetcode;

import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class MyHandler extends DefaultHandler {

    private List<User> users = new ArrayList<>();
    private User user;

    private boolean bfn = false;
    private boolean bln = false;
    private boolean boc = false;

    @Override
    public void startElement(String uri, String localName,
            String qName, Attributes attributes) throws SAXException {

        if ("user".equals(qName)) {
        
            user = new User();
            
            int id = Integer.valueOf(attributes.getValue("id"));
            user.setId(id);
        }

        switch (qName) {

            case "firstname":
                bfn = true;
                break;

            case "lastname":
                bln = true;
                break;

            case "occupation":
                boc = true;
                break;
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {

        if (bfn) {
            user.setFirstName(new String(ch, start, length));
            bfn = false;
        }

        if (bln) {
            user.setLastName(new String(ch, start, length));
            bln = false;
        }

        if (boc) {
            user.setOccupation(new String(ch, start, length));
            boc = false;
        }
    }

    @Override
    public void endElement(String uri, String localName,
            String qName) throws SAXException {

        if ("user".equals(qName)) {
            users.add(user);
        }
    }
    
    public List<User> getUsers() {
        
        return users;
    }
}

In the MyHandler class, we have the implementations of the event handlers.

public class MyHandler extends DefaultHandler {

The handler class must extend from the DefaultHandler, where we have the event methods.

@Override
public void startElement(String uri, String localName,
        String qName, Attributes attributes) throws SAXException {

    if ("user".equals(qName)) {
    
        user = new User();
        
        int id = Integer.valueOf(attributes.getValue("id"));
        user.setId(id);
    }

    switch (qName) {

        case "firstname":
            bfn = true;
            break;

        case "lastname":
            bln = true;
            break;

        case "occupation":
            boc = true;
            break;
    }
}

The startElement() method is called when the parser starts parsing a new element. We create a new user if the element is <user>. For other types of elements, we set boolean values.

@Override
public void characters(char[] ch, int start, int length) throws SAXException {

    if (bfn) {
        user.setFirstName(new String(ch, start, length));
        bfn = false;
    }

    if (bln) {
        user.setLastName(new String(ch, start, length));
        bln = false;
    }

    if (boc) {
        user.setOccupation(new String(ch, start, length));
        boc = false;
    }
}

The characters() method is called when the parser encounters text inside elements. Depending on the boolean variable, we set the user attributes.

@Override
public void endElement(String uri, String localName,
        String qName) throws SAXException {

    if ("user".equals(qName)) {
        users.add(user);
    }
}

At the end of the <user> element, we add the user object to the list of users.

JavaReadXmlSaxEx.java
package com.zetcode;

import java.util.List;

public class JavaReadXmlSaxEx  {

    public static void main(String[] args) {

        MyRunner runner = new MyRunner();
        List<User> lines = runner.parseUsers();
        
        lines.forEach(System.out::println);
    }
}

JavaReadXmlSaxEx starts the application. It delegates the parsing tasks to MyRunner. In the end, the retrieved data is printed to the console.

$ mvn exec:java -q
User{id=1, firstName=Peter, lastName=Brown, occupation=programmer}
User{id=2, firstName=Martin, lastName=Smith, occupation=accountant}
User{id=3, firstName=Lucy, lastName=Gordon, occupation=teacher}

This is the output of the example.

Java SAX validation example

The following example uses the XSD language to validate an XML file. XSD (XML Schema Definition) is the current standard schema language for all XML documents and data. (There are other alternative schema languages such as DTD and RELAX NG.) XSD is a set of rules to which an XML document must conform in order to be considered valid according to the schema.

users.xsd
<?xml version="1.0"?>

<xs:schema version="1.0"
           xmlns:xs="http://www.w3.org/2001/XMLSchema"
           elementFormDefault="qualified">
    
    <xs:element name="users">
        <xs:complexType>
            <xs:sequence>
                <xs:element name="user" maxOccurs="unbounded" minOccurs="0">
                    <xs:complexType>
                        <xs:sequence>
                            <xs:element type="xs:string" name="firstname"/>
                            <xs:element type="xs:string" name="lastname"/>
                            <xs:element type="xs:string" name="occupation"/>
                        </xs:sequence>
                        <xs:attribute name="id" type="xs:int" use="required"/>
                    </xs:complexType>
                </xs:element>
            </xs:sequence>
        </xs:complexType>
    </xs:element>    

</xs:schema>

This is the XSD file for validating users. It declares, for instance, that the <user> element must be within the <users> element or that the id attribute of <user> must be and integer and is mandatory.

JavaXmlSchemaValidationEx.java
package com.zetcode;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.XMLConstants;
import javax.xml.transform.sax.SAXSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class JavaXmlSchemaValidationEx {

    public static void main(String[] args) {

        File xsdFile = new File("src/main/resources/users.xsd");

        try {

            Path xmlPath = Paths.get("src/main/resources/users.xml");
            Reader reader = Files.newBufferedReader(xmlPath);

            String schemaLang = XMLConstants.W3C_XML_SCHEMA_NS_URI;
            SchemaFactory factory = SchemaFactory.newInstance(schemaLang);
            Schema schema = factory.newSchema(xsdFile);

            Validator validator = schema.newValidator();

            SAXSource source = new SAXSource(new InputSource(reader));
            validator.validate(source);

            System.out.println("The document was validated OK");

        } catch (SAXException ex) {
            
            Logger lgr = Logger.getLogger(JavaXmlSchemaValidationEx.class.getName());
            lgr.log(Level.SEVERE, "The document failed to validate");
            lgr.log(Level.SEVERE, ex.getMessage(), ex);
        } catch (IOException ex) {
            
            Logger lgr = Logger.getLogger(JavaXmlSchemaValidationEx.class.getName());
            lgr.log(Level.SEVERE, ex.getMessage(), ex);
        }
    }
}

The example uses the users.xsd schema to validate the users.xml file.

String schemaLang = XMLConstants.W3C_XML_SCHEMA_NS_URI;
SchemaFactory factory = SchemaFactory.newInstance(schemaLang);
Schema schema = factory.newSchema(xsdFile);

With the SchemaFactory we choose the W3C XML schema for our schema definition. In other words, our custom schema definition must also adhere to certain rules.

Validator validator = schema.newValidator();

A new validator is generated from the schema.

SAXSource source = new SAXSource(new InputSource(reader));
validator.validate(source);

We validate the XML document against the provided schema.

} catch (SAXException ex) {
    
    Logger lgr = Logger.getLogger(JavaXmlSchemaValidationEx.class.getName());
    lgr.log(Level.SEVERE, "The document failed to validate");
    lgr.log(Level.SEVERE, ex.getMessage(), ex);
} 

By default, if the document is not valid, a SAXException is thrown.

In this tutorial, we have read and validated an XML document with Java SAX. You might also be interested in the related tutorials: Java DOM tutorial, Java Servlet serving XML, and Java tutorial.