#! /usr/bin/python # $Id: simple_appl.py 0.3 1999/01/19 20:42:17 simon Exp simon $ """This simple application writes a structured view of the contents of an XML file. The line number after the opening tag demonstrates the use of locator methods. The error and fatalError methods can simply print the exception because the SAXParseException class has a __str__ method.""" from xml.sax import saxexts, saxlib, saxutils import sys, urllib, string class DocumentHandler(saxlib.DocumentHandler): """Handle general document events. This is the main client interface for SAX: it contains callbacks for the most important document events, such as the start and end of elements. You need to create an object that implements this interface, and then register it with the Parser. If you do not want to implement the entire interface, you can derive a class from HandlerBase, which implements the default functionality. You can find the location of any document event using the Locator interface supplied by setDocumentLocator().""" def __init__(self): self.start_tag = {'name' : [], 'indent': '', 'line' : ''} def setDocumentLocator(self, locator): "Receive an object for locating the origin of SAX document events." self.locator = locator def startDocument(self): "Handle an event for the beginning of a document." self.level = -1 # we are still below the root element try: print "Document: %s" % (self.locator.getSystemId()) except AttributeError: pass def startElement(self, name, attrs): "Handle an event for the beginning of an element." self.output_start_tag('start') # output start element of parent self.level = self.level + 1 self.start_tag['indent'] = " " * self.level self.start_tag['name'] = [name] # attrs is an AttributeMap object # that implements the AttributeList methods. for i in range(attrs.getLength()): self.start_tag['name'].append("%s=\"%s\"" % (attrs.getName(i),attrs.getValue(i))) try: self.start_tag['line'] = self.locator.getLineNumber() except AttributeError: self.start_tag['line'] = None def endElement(self, name): "Handle an event for the end of an element." # output start tag (empty element) or print end tag if not self.output_start_tag('end'): print "%s" % (" " * self.level, name) self.level = self.level - 1 def characters(self, all_data, start, length): "Handle a character data event." # all_data contains the whole file; # start:start+length is this part's slice data = string.strip(all_data[start:start+length]) if data: self.output_start_tag('data') # output start element of parent print "%s%s" % (" " * (self.level + 1), data) def output_start_tag (self, where): """startElement puts its data in self.start_tag; startElement, characters, and endElement call output_start_tag; when called by startElement or characters and the start tag (of the parent) is still unprinted: print start tag, return 1; else return None; when called by endElement and the start tag is still unprinted: print empty element tag, return 1; else return None""" if self.start_tag['name']: # if still unprinted if where in ['start', 'data']: STAGC = ">" elif where in ['end']: STAGC = "/>" else: raise ValueError, 'output_start_tag("start"|"data"|"end")' output = "%s<%s%s" % \ (self.start_tag['indent'], string.join(self.start_tag['name']), STAGC) if self.start_tag['line']: output = "%s (line %s)" % (output, self.start_tag['line']) print output self.start_tag = {'name' : [], 'indent': '', 'line' : ''} return 1 else: return None class ErrorHandler: """Basic interface for SAX error handlers. If you create an object that implements this interface, then register the object with your Parser, the parser will call the methods in your object to report all warnings and errors. There are three levels of errors available: warnings, (possibly) recoverable errors, and unrecoverable errors. All methods take a SAXParseException as the only parameter.""" global SGMLSyntaxError SGMLSyntaxError = "SGML syntax error" def error(self, exception): "Handle a recoverable error." sys.stderr.write ("Error: %s\n" % exception) def fatalError(self, exception): "Handle a non-recoverable error." sys.stderr.write ("Fatal error: %s\n" % exception) raise SGMLSyntaxError def warning(self, exception): "Handle a warning." sys.stderr.write ("Warning: %s\n" % exception) # pick a specific parser from xml.sax.drivers import drv_xmlproc SAXparser=drv_xmlproc.SAX_XPParser() # ask a specific parser from the parser factory # SAXparser=saxexts.make_parser("xml.sax.drivers.drv_xmlproc") # in some versions of the saxexts module this is the correct form: # SAXparser=saxexts.make_parser("xmlproc") # ask any parser from the parser factory # SAXparser=saxexts.make_parser() # ask any validating parser from the XML validating parser factory # SAXparser=saxexts.XMLValParserFactory.make_parser() SAXparser.setDocumentHandler(DocumentHandler()) # three options for error handling: # 1. use our own ErrorHandler SAXparser.setErrorHandler(ErrorHandler()) # 2. use the ErrorRaiser from saxutils # SAXparser.setErrorHandler(saxutils.ErrorRaiser()) # 3. use the ErrorPrinter from saxutils # SAXparser.setErrorHandler(saxutils.ErrorPrinter()) if __name__ == '__main__': try: SAXparser.parse(sys.argv[1]) # catch the 'SGMLSyntaxError's raised by our own ErrorHandler except SGMLSyntaxError: sys.stderr.write("%s; processing aborted\n" % (SGMLSyntaxError)) sys.exit(1) # catch the SAXParseException errors raised by the SAX parser # and passed on by ErrorRaiser except saxlib.SAXParseException: sys.stderr.write("%s; processing aborted\n" % (saxlib.SAXParseException)) sys.exit(1)