418 lines
13 KiB
Python
418 lines
13 KiB
Python
"""
|
|
SAX driver for the Java SAX parsers. Can only be used in Jython.
|
|
|
|
$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $
|
|
"""
|
|
|
|
# --- Initialization
|
|
|
|
version = "0.10"
|
|
revision = "$Revision: 1.5 $"
|
|
|
|
import string
|
|
from xml.sax import xmlreader, saxutils
|
|
from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
|
|
from xml.sax import _exceptions
|
|
|
|
# we only work in jython
|
|
import sys
|
|
if sys.platform[:4] != "java":
|
|
raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None)
|
|
del sys
|
|
|
|
# get the necessary Java SAX classes
|
|
try:
|
|
from org.python.core import FilelikeInputStream
|
|
from org.xml.sax.helpers import XMLReaderFactory
|
|
from org.xml import sax as javasax
|
|
from org.xml.sax.ext import LexicalHandler
|
|
except ImportError:
|
|
raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None)
|
|
|
|
# get some JAXP stuff
|
|
try:
|
|
from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
|
|
factory = SAXParserFactory.newInstance()
|
|
# Set this feature false, otherwise will attempt to load DTDs like
|
|
# DOCTYPE doc PUBLIC 'http://xml.python.org/public which are
|
|
# purposefully very much nonexistent in tests such as
|
|
# test_minidom.
|
|
#
|
|
# NOTE that this factory is by default nonvalidating anyway, as
|
|
# needed for Python usage.
|
|
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", False)
|
|
jaxp = 1
|
|
except ImportError:
|
|
jaxp = 0
|
|
|
|
from java.lang import String, Exception as JException
|
|
|
|
class SAXUnicodeDecodeError(UnicodeDecodeError):
|
|
def __init__(self, message):
|
|
self.message = message
|
|
def __repr__(self):
|
|
return "SAXUnicodeDecodeError: caused by %s" % (self.message,)
|
|
__str__ = __repr__
|
|
|
|
|
|
def _wrap_sax_exception(e):
|
|
# Work around issues in how we report exceptions to using
|
|
# code. Note this is an implementation detail, so some assumptions
|
|
# are required. But if this identification fails, a reasonable exception
|
|
# will still be thrown.
|
|
#
|
|
# Because of some differences in how Oracle packages Xerces, also catch
|
|
# on the parse method itself.
|
|
if "MalformedByteSequenceException" in str(e.getException()):
|
|
return SAXUnicodeDecodeError(str(e))
|
|
return _exceptions.SAXParseException(e.message,
|
|
e.exception,
|
|
SimpleLocator(e.columnNumber,
|
|
e.lineNumber,
|
|
e.publicId,
|
|
e.systemId))
|
|
|
|
class JyErrorHandlerWrapper(javasax.ErrorHandler):
|
|
def __init__(self, err_handler):
|
|
self._err_handler = err_handler
|
|
|
|
def error(self, exc):
|
|
self._err_handler.error(_wrap_sax_exception(exc))
|
|
|
|
def fatalError(self, exc):
|
|
self._err_handler.fatalError(_wrap_sax_exception(exc))
|
|
|
|
def warning(self, exc):
|
|
self._err_handler.warning(_wrap_sax_exception(exc))
|
|
|
|
class JyInputSourceWrapper(javasax.InputSource):
|
|
def __init__(self, source):
|
|
if isinstance(source, basestring):
|
|
javasax.InputSource.__init__(self, source)
|
|
elif hasattr(source, "read"):#file like object
|
|
f = source
|
|
javasax.InputSource.__init__(self, FilelikeInputStream(f))
|
|
if hasattr(f, "name"):
|
|
self.setSystemId(f.name)
|
|
else:#xml.sax.xmlreader.InputSource object
|
|
#Use byte stream constructor if possible so that Xerces won't attempt to open
|
|
#the url at systemId unless it's really there
|
|
if source.getByteStream():
|
|
javasax.InputSource.__init__(self,
|
|
FilelikeInputStream(source.getByteStream()))
|
|
else:
|
|
javasax.InputSource.__init__(self)
|
|
if source.getSystemId():
|
|
self.setSystemId(source.getSystemId())
|
|
self.setPublicId(source.getPublicId())
|
|
self.setEncoding(source.getEncoding())
|
|
|
|
class JyEntityResolverWrapper(javasax.EntityResolver):
|
|
def __init__(self, entityResolver):
|
|
self._resolver = entityResolver
|
|
|
|
def resolveEntity(self, pubId, sysId):
|
|
return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
|
|
|
|
class JyDTDHandlerWrapper(javasax.DTDHandler):
|
|
def __init__(self, dtdHandler):
|
|
self._handler = dtdHandler
|
|
|
|
def notationDecl(self, name, publicId, systemId):
|
|
self._handler.notationDecl(name, publicId, systemId)
|
|
|
|
def unparsedEntityDecl(self, name, publicId, systemId, notationName):
|
|
self._handler.unparsedEntityDecl(name, publicId, systemId, notationName)
|
|
|
|
class SimpleLocator(xmlreader.Locator):
|
|
def __init__(self, colNum, lineNum, pubId, sysId):
|
|
self.colNum = colNum
|
|
self.lineNum = lineNum
|
|
self.pubId = pubId
|
|
self.sysId = sysId
|
|
|
|
def getColumnNumber(self):
|
|
return self.colNum
|
|
|
|
def getLineNumber(self):
|
|
return self.lineNum
|
|
|
|
def getPublicId(self):
|
|
return self.pubId
|
|
|
|
def getSystemId(self):
|
|
return self.sysId
|
|
|
|
# --- JavaSAXParser
|
|
class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler, LexicalHandler):
|
|
"SAX driver for the Java SAX parsers."
|
|
|
|
def __init__(self, jdriver = None):
|
|
xmlreader.XMLReader.__init__(self)
|
|
self._parser = create_java_parser(jdriver)
|
|
self._parser.setFeature(feature_namespaces, 0)
|
|
self._parser.setFeature(feature_namespace_prefixes, 0)
|
|
self._parser.setContentHandler(self)
|
|
self._nsattrs = AttributesNSImpl()
|
|
self._attrs = AttributesImpl()
|
|
self.setEntityResolver(self.getEntityResolver())
|
|
self.setErrorHandler(self.getErrorHandler())
|
|
self.setDTDHandler(self.getDTDHandler())
|
|
try:
|
|
self._parser.setProperty("http://xml.org/sax/properties/lexical-handler", self)
|
|
except Exception, x:
|
|
pass
|
|
|
|
# XMLReader methods
|
|
|
|
def parse(self, source):
|
|
"Parse an XML document from a URL or an InputSource."
|
|
try:
|
|
self._parser.parse(JyInputSourceWrapper(source))
|
|
except JException as e:
|
|
# Handle the difference in how Oracle packages Xerces...
|
|
if "MalformedByteSequenceException" in str(e):
|
|
raise SAXUnicodeDecodeError(str(e))
|
|
else:
|
|
raise
|
|
|
|
def getFeature(self, name):
|
|
return self._parser.getFeature(name)
|
|
|
|
def setFeature(self, name, state):
|
|
self._parser.setFeature(name, state)
|
|
|
|
def getProperty(self, name):
|
|
return self._parser.getProperty(name)
|
|
|
|
def setProperty(self, name, value):
|
|
self._parser.setProperty(name, value)
|
|
|
|
def setEntityResolver(self, resolver):
|
|
self._parser.entityResolver = JyEntityResolverWrapper(resolver)
|
|
xmlreader.XMLReader.setEntityResolver(self, resolver)
|
|
|
|
def setErrorHandler(self, err_handler):
|
|
self._parser.errorHandler = JyErrorHandlerWrapper(err_handler)
|
|
xmlreader.XMLReader.setErrorHandler(self, err_handler)
|
|
|
|
def setDTDHandler(self, dtd_handler):
|
|
self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler))
|
|
xmlreader.XMLReader.setDTDHandler(self, dtd_handler)
|
|
|
|
# ContentHandler methods
|
|
def setDocumentLocator(self, locator):
|
|
self._cont_handler.setDocumentLocator(locator)
|
|
|
|
def startDocument(self):
|
|
self._cont_handler.startDocument()
|
|
self._namespaces = self._parser.getFeature(feature_namespaces)
|
|
|
|
def startElement(self, uri, lname, qname, attrs):
|
|
if self._namespaces:
|
|
self._nsattrs._attrs = attrs
|
|
self._cont_handler.startElementNS((uri or None, lname), qname,
|
|
self._nsattrs)
|
|
else:
|
|
self._attrs._attrs = attrs
|
|
self._cont_handler.startElement(qname, self._attrs)
|
|
|
|
def startPrefixMapping(self, prefix, uri):
|
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
|
|
|
def characters(self, char, start, len):
|
|
self._cont_handler.characters(unicode(String(char, start, len)))
|
|
|
|
def ignorableWhitespace(self, char, start, len):
|
|
self._cont_handler.ignorableWhitespace(unicode(String(char, start,
|
|
len)))
|
|
|
|
def endElement(self, uri, lname, qname):
|
|
if self._namespaces:
|
|
self._cont_handler.endElementNS((uri or None, lname), qname)
|
|
else:
|
|
self._cont_handler.endElement(qname)
|
|
|
|
def endPrefixMapping(self, prefix):
|
|
self._cont_handler.endPrefixMapping(prefix)
|
|
|
|
def endDocument(self):
|
|
self._cont_handler.endDocument()
|
|
|
|
def processingInstruction(self, target, data):
|
|
self._cont_handler.processingInstruction(target, data)
|
|
|
|
# Lexical handler methods
|
|
def comment(self, char, start, len):
|
|
try:
|
|
# Need to wrap this in a try..except in case the parser does not support lexical events
|
|
self._cont_handler.comment(unicode(String(char, start, len)))
|
|
except:
|
|
pass
|
|
|
|
def startCDATA(self):
|
|
pass # TODO
|
|
|
|
def endCDATA(self):
|
|
pass # TODO
|
|
|
|
def startDTD(self, name, publicId, systemId):
|
|
pass # TODO
|
|
|
|
def endDTD(self):
|
|
pass # TODO
|
|
|
|
def startEntity(self, name):
|
|
pass # TODO
|
|
|
|
def endEntity(self, name):
|
|
pass # TODO
|
|
|
|
def skippedEntity(self, name):
|
|
pass
|
|
|
|
|
|
def _fixTuple(nsTuple, frm, to):
|
|
if isinstance(nsTuple, tuple) and len(nsTuple) == 2:
|
|
nsUri, localName = nsTuple
|
|
if nsUri == frm:
|
|
nsUri = to
|
|
return (nsUri, localName)
|
|
return nsTuple
|
|
|
|
def _makeJavaNsTuple(nsTuple):
|
|
return _fixTuple(nsTuple, None, '')
|
|
|
|
def _makePythonNsTuple(nsTuple):
|
|
return _fixTuple(nsTuple, '', None)
|
|
|
|
class AttributesImpl:
|
|
|
|
def __init__(self, attrs = None):
|
|
self._attrs = attrs
|
|
|
|
def getLength(self):
|
|
return self._attrs.getLength()
|
|
|
|
def getType(self, name):
|
|
return self._attrs.getType(_makeJavaNsTuple(name))
|
|
|
|
def getValue(self, name):
|
|
value = self._attrs.getValue(_makeJavaNsTuple(name))
|
|
if value == None:
|
|
raise KeyError(name)
|
|
return value
|
|
|
|
def getNames(self):
|
|
return [_makePythonNsTuple(self._attrs.getQName(index)) for index in range(len(self))]
|
|
|
|
def getQNames(self):
|
|
return [self._attrs.getQName(index) for index in range(len(self))]
|
|
|
|
def getValueByQName(self, qname):
|
|
idx = self._attrs.getIndex(qname)
|
|
if idx == -1:
|
|
raise KeyError, qname
|
|
return self._attrs.getValue(idx)
|
|
|
|
def getNameByQName(self, qname):
|
|
idx = self._attrs.getIndex(qname)
|
|
if idx == -1:
|
|
raise KeyError, qname
|
|
return qname
|
|
|
|
def getQNameByName(self, name):
|
|
idx = self._attrs.getIndex(_makeJavaNsTuple(name))
|
|
if idx == -1:
|
|
raise KeyError, name
|
|
return name
|
|
|
|
def __len__(self):
|
|
return self._attrs.getLength()
|
|
|
|
def __getitem__(self, name):
|
|
return self.getValue(name)
|
|
|
|
def keys(self):
|
|
return self.getNames()
|
|
|
|
def copy(self):
|
|
return self.__class__(self._attrs)
|
|
|
|
def items(self):
|
|
return [(name, self[name]) for name in self.getNames()]
|
|
|
|
def values(self):
|
|
return map(self.getValue, self.getNames())
|
|
|
|
def get(self, name, alt=None):
|
|
try:
|
|
return self.getValue(name)
|
|
except KeyError:
|
|
return alt
|
|
|
|
def has_key(self, name):
|
|
try:
|
|
self.getValue(name)
|
|
return True
|
|
except KeyError:
|
|
return False
|
|
|
|
# --- AttributesNSImpl
|
|
|
|
class AttributesNSImpl(AttributesImpl):
|
|
|
|
def __init__(self, attrs=None):
|
|
AttributesImpl.__init__(self, attrs)
|
|
|
|
def getType(self, name):
|
|
name = _makeJavaNsTuple(name)
|
|
return self._attrs.getType(name[0], name[1])
|
|
|
|
def getValue(self, name):
|
|
jname = _makeJavaNsTuple(name)
|
|
value = self._attrs.getValue(jname[0], jname[1])
|
|
if value == None:
|
|
raise KeyError(name)
|
|
return value
|
|
|
|
def getNames(self):
|
|
names = []
|
|
for idx in range(len(self)):
|
|
names.append(_makePythonNsTuple( (self._attrs.getURI(idx), self._attrs.getLocalName(idx)) ))
|
|
return names
|
|
|
|
def getNameByQName(self, qname):
|
|
idx = self._attrs.getIndex(qname)
|
|
if idx == -1:
|
|
raise KeyError, qname
|
|
return _makePythonNsTuple( (self._attrs.getURI(idx), self._attrs.getLocalName(idx)) )
|
|
|
|
def getQNameByName(self, name):
|
|
name = _makeJavaNsTuple(name)
|
|
idx = self._attrs.getIndex(name[0], name[1])
|
|
if idx == -1:
|
|
raise KeyError, name
|
|
return self._attrs.getQName(idx)
|
|
|
|
def getQNames(self):
|
|
return [self._attrs.getQName(idx) for idx in range(len(self))]
|
|
|
|
# ---
|
|
|
|
def create_java_parser(jdriver = None):
|
|
try:
|
|
if jdriver:
|
|
return XMLReaderFactory.createXMLReader(jdriver)
|
|
elif jaxp:
|
|
return factory.newSAXParser().getXMLReader()
|
|
else:
|
|
return XMLReaderFactory.createXMLReader()
|
|
except ParserConfigurationException, e:
|
|
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
|
except javasax.SAXException, e:
|
|
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
|
|
|
def create_parser(jdriver = None):
|
|
return JavaSAXParser(jdriver)
|