Adam Mackler | 25 Jun 2012 09:44
Gravatar

RE: Schematron to validate XML representations

For anyone interested, here is the solution I've come up with.

Again, I am validating the XML documents in HTTP request entities against a W3C 
Schema that also contains embedded Schematron rules.  Notes:

*) In addition to the files referenced in the code, the Schematron skeleton file
   is also necessary (available in the same place as the other schematron files).
*) I had to install SAXON, which, FYI, uses XSLT2
*) I am new to this.  No doubt there are better ways to do this.  I welcome and
   invite all feedback.

First some example content and then the working code below.

The following XML entity when PUT to my server should validate:

<person>
  <firstName>Charles</firstName>
  <lastName>Smith</lastName>
  <gender>male</gender>
</person>

The following XML entity when PUT to my server should NOT validate because (1) the person is missing a first
name and (2) because it is a male with a maiden name.  These rules will be enforced by Schematron.

<person>
  <lastName>Smith</lastName>
  <maidenName>Dudley</maidenName>
  <gender>male</gender>
</person>

The following XML entity when put to my server should NOT validate because the document element is not a
person.  This will be enforced by a W33 Schema.

<invalid />

This is the schema file called schema/tron.xsd in my classpath.  It is a W3C Schema with Schematron annotations:

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
	   elementFormDefault="qualified"
	   xmlns:sch="http://purl.oclc.org/dsdl/schematron">
  <xs:element name="person">
    <xs:annotation>
      <xs:appinfo>
	<sch:pattern name="No men with maiden names">
	  <sch:rule context="person[gender='male']">
	    <sch:report test="maidenName">A person element with a gender element equal to male must not contain a
maidenName sub-element</sch:report>
	  </sch:rule>
	</sch:pattern>
      </xs:appinfo>
    </xs:annotation>
    <xs:complexType>
      <xs:sequence>
	<xs:element name="firstName" type="xs:token" minOccurs="0">
	  <xs:annotation>
	    <xs:appinfo>
	      <sch:pattern name="All persons must have first names">
		<sch:rule context="person">
		  <sch:assert test="firstName">All person elements must have a firstName element as a child.</sch:assert>
		</sch:rule>
	      </sch:pattern>
	    </xs:appinfo>
	  </xs:annotation>
	</xs:element><!-- end of firstname -->
	<xs:element name="lastName" type="xs:token"/>
	<xs:element name="maidenName" type="xs:token" minOccurs="0"/>
	<xs:element name="gender">
	  <xs:simpleType>
	    <xs:restriction base="xs:token">
	      <xs:enumeration value="male"/>
	      <xs:enumeration value="female"/>
	    </xs:restriction>
	  </xs:simpleType>
	</xs:element>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
</xs:schema>

This is my ServerResource subclass in a file called org/mackler/resource/InstanceResource.java:

package org.mackler.resource;

imports...

public class InstanceResource extends org.mackler.resource.Base {

     <at> Put("xml")
    public StringRepresentation put(DomRepresentation requestEntity) 
	throws IOException, SAXException, ResourceException, ParserConfigurationException {

	validate(requestEntity, "tron");

	return new StringRepresentation("\nyou validated this: \n"
                                       + requestEntity.getText()+"\n");
    }
}

End of org/mackler/resource/InstanceResource.java:

This is the base class that all my ServerResources that need to validate can
inherit from.  It's in a file called org/mackler/resource/Base.java:

package org.mackler.resource;

imports...

public class Base extends ServerResource {  

    public void validate(DomRepresentation requestEntity, String schemaName)
	throws IOException,
	       ResourceException,
	       SAXException,
	       ParserConfigurationException {

	ClientResource xsdResource
	    = new ClientResource(LocalReference.createClapReference("/schema/" + schemaName +".xsd"));
       	Representation xsd;

	/* Beginning of W3C Schema validation code */
	try {
	    xsd = xsdResource.get();
	} catch (ResourceException e) {
	    	    throw new ResourceException(Status.SERVER_ERROR_INTERNAL,
			"Error getting the W3C XML Schema: "
			+ xsdResource.getReference().toString(), e );
	}

	try {
	    requestEntity.validate(xsd);
	} catch (Exception e) {
	    throw new ResourceException(Status.CLIENT_ERROR_UNPROCESSABLE_ENTITY,
					"Request entity failed validate against W3C Schema", e);
	}
	/* End of W3C Schema validation code */

	/* Beginning of Schematron validation code */

	try {
	    xsd = xsdResource.get();
	} catch (ResourceException e) {
	    throw new ResourceException(Status.SERVER_ERROR_INTERNAL,
                "Error getting the Schematron-annotated XML Schema: "
                 + xsdResource.getReference().toString(), e );
	}

	/* Schematron files are from here:
	 * http://code.google.com/p/schematron/source/browse/trunk/schematron/code/
	 */

	ClientResource extractorResource
	    = new ClientResource(LocalReference.createClapReference("/schema/ExtractSchFromXSD-2.xsl"));
	ClientResource svrlResource
	    = new ClientResource(LocalReference.createClapReference("/schema/iso_svrl_for_xslt2.xsl"));

       	Representation extractor;
       	Representation svrl;

	try {
	    extractor = extractorResource.get();
	} catch (ResourceException e) {
	    throw new ResourceException(Status.SERVER_ERROR_INTERNAL,
                "Error getting the transformation stylesheet needed to extract Schematron rules: "
		+ extractorResource.getReference().toString(), e );
	}

	try {
	    svrl = svrlResource.get();
	} catch (ResourceException e) {
	    throw new ResourceException(Status.SERVER_ERROR_INTERNAL,
                "Error getting the Schematron implementation: "
		+ " " + svrlResource.getReference().toString(), e );
	}

	/* So far we have:
	 *   xsd:       the user's W3C schema (containing the embedded schematron rules)
	 *   extractor: xslt sheet to extracts schematron annotations from the W3C schema;
	 *   svrl:      xslt implementation of the schematron validatior report language;
	 * For details on how these parts work together, see the
	 * section labelled "Under the Hood" on
	 * http://www.schematron.com/
	 */

	// Extract the Schematron schema from the W3C Schema annotations:
	TransformRepresentation schematronRules
	    = new TransformRepresentation(xsd, extractor);

	/* Apply svrl to schematronSchema
	 * The context enables creation of the URI resolver that works
	 * with CLAP, needed for SVRL to find the included the skeleton
	 * Schematron implementation translation sheet in the
	 * classpath */

	TransformRepresentation schematronSchema
	    = new TransformRepresentation(getContext(), schematronRules, svrl);

	/* Validate the HTTP XML request entity */
	TransformRepresentation report =
	    new TransformRepresentation(requestEntity, schematronSchema);

	// Do some DOM manipulation to find any errors in the Schematron report
	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	factory.setNamespaceAware(true);

	DocumentBuilder documentBuilder = factory.newDocumentBuilder();
	Document document = documentBuilder.parse(report.getStream());
	NodeList nodeList = document.getElementsByTagNameNS("http://purl.oclc.org/dsdl/svrl", "text");

	if ( nodeList.getLength() > 0 ) {
	    StringBuilder message = new StringBuilder();
	    int i = 0;
	    while (i < nodeList.getLength()) {
		message.append(( i==0 ? "" : "; ")
                             + ((Text)nodeList.item(i++).getFirstChild()).getData());
	    }
	    throw new ResourceException(Status.CLIENT_ERROR_UNPROCESSABLE_ENTITY,
		  "Schematron validation failure: " + message.toString());
	}

	xsd.release();
	extractor.release();
	svrl.release();
	schematronRules.release();
	schematronSchema.release();

	/* End of Schematron validation code */

    }

}

------------------------------------------------------
http://restlet.tigris.org/ds/viewMessage.do?dsForumId=4447&dsMessageId=2973037


Gmane