Module libxml2 :: Class xmlDoc

Class xmlDoc

xmlCore --+    
          |    
    xmlNode --+
              |
             xmlDoc

Instance Methods

[hide private]

__init__(self, _obj=None)

source code

__repr__(self)

source code

htmlAutoCloseTag(self, name, elem)
The HTML DTD allows a tag to implicitly close other tags.

source code

htmlIsAutoClosed(self, elem)
The HTML DTD allows a tag to implicitly close other tags.

source code

htmlDocContentDumpFormatOutput(self, buf, encoding, format)
Dump an HTML document.

source code

htmlDocContentDumpOutput(self, buf, encoding)
Dump an HTML document.

source code

htmlDocDump(self, f)
Dump an HTML document to an open FILE.

source code

htmlGetMetaEncoding(self)
Encoding definition lookup in the Meta tags

source code

htmlNodeDumpFile(self, out, cur)
Dump an HTML node, recursive behaviour,children are printed too, and formatting returns are added.

source code

htmlNodeDumpFileFormat(self, out, cur, encoding, format)
Dump an HTML node, recursive behaviour,children are printed too.

source code

htmlNodeDumpFormatOutput(self, buf, cur, encoding, format)
Dump an HTML node, recursive behaviour,children are printed too.

source code

htmlNodeDumpOutput(self, buf, cur, encoding)
Dump an HTML node, recursive behaviour,children are printed too, and formatting returns/spaces are added.

source code

htmlSaveFile(self, filename)
Dump an HTML document to a file.

source code

htmlSaveFileEnc(self, filename, encoding)
Dump an HTML document to a file using a given encoding and formatting returns/spaces are added.

source code

htmlSaveFileFormat(self, filename, encoding, format)
Dump an HTML document to a file using a given encoding.

source code

htmlSetMetaEncoding(self, encoding)
Sets the current encoding in the Meta tags NOTE: this will not change the document content encoding, just the META flag associated.

source code

debugCheckDocument(self, output)
Check the document for potential content problems, and output the errors to @output

source code

debugDumpDocument(self, output)
Dumps debug information for the document, it's recursive

source code

debugDumpDocumentHead(self, output)
Dumps debug information cncerning the document, not recursive

source code

debugDumpEntities(self, output)
Dumps debug information for all the entities in use by the document

source code

addDocEntity(self, name, type, ExternalID, SystemID, content)
Register a new entity for this document.

source code

addDtdEntity(self, name, type, ExternalID, SystemID, content)
Register a new entity for this document DTD external subset.

source code

docEntity(self, name)
Do an entity lookup in the document entity hash table and

source code

dtdEntity(self, name)
Do an entity lookup in the DTD entity hash table and

source code

encodeEntities(self, input)
TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary compatibility People must migrate their code to xmlEncodeEntitiesReentrant ! This routine will issue a warning when encountered.

source code

encodeEntitiesReentrant(self, input)
Do a global encoding of a string, replacing the predefined entities and non ASCII values with their entities and CharRef counterparts.

source code

encodeSpecialChars(self, input)
Do a global encoding of a string, replacing the predefined entities this routine is reentrant, and result must be deallocated.

source code

parameterEntity(self, name)
Do an entity lookup in the internal and external subsets and

source code

relaxNGNewDocParserCtxt(self)
Create an XML RelaxNGs parser context for that document.

source code

relaxNGValidateDoc(self, ctxt)
Validate a document tree in memory.

source code

relaxNGValidateFullElement(self, ctxt, elem)
Validate a full subtree when xmlRelaxNGValidatePushElement() returned 0 and the content of the node has been expanded.

source code

relaxNGValidatePopElement(self, ctxt, elem)
Pop the element end from the RelaxNG validation stack.

source code

relaxNGValidatePushElement(self, ctxt, elem)
Push a new element start on the RelaxNG validation stack.

source code

copyDoc(self, recursive)
Do a copy of the document info.

source code

copyNode(self, node, extended)
Do a copy of the node to a given document.

source code

copyNodeList(self, node)
Do a recursive copy of the node list.

source code

createIntSubset(self, name, ExternalID, SystemID)
Create the internal subset of a document

source code

docCompressMode(self)
get the compression ratio for a document, ZLIB based

source code

dump(self, f)
Dump an XML document to an open FILE.

source code

elemDump(self, f, cur)
Dump an XML/HTML node, recursive behaviour, children are printed too.

source code

formatDump(self, f, format)
Dump an XML document to an open FILE.

source code

freeDoc(self)
Free up all the structures used by a document, tree included.

source code

getRootElement(self)
Get the root element of the document (doc->children is a list containing possibly comments, PIs, etc ...).

source code

intSubset(self)
Get the internal subset of a document

source code

newCDataBlock(self, content, len)
Creation of a new node containing a CDATA block.

source code

newCharRef(self, name)
Creation of a new character reference node.

source code

newDocComment(self, content)
Creation of a new node containing a comment within a document.

source code

newDocFragment(self)
Creation of a new Fragment node.

source code

newDocNode(self, ns, name, content)
Creation of a new node element within a document.

source code

newDocNodeEatName(self, ns, name, content)
Creation of a new node element within a document.

source code

newDocPI(self, name, content)
Creation of a processing instruction element.

source code

newDocProp(self, name, value)
Create a new property carried by a document.

source code

newDocRawNode(self, ns, name, content)
Creation of a new node element within a document.

source code

newDocText(self, content)
Creation of a new text node within a document.

source code

newDocTextLen(self, content, len)
Creation of a new text node with an extra content length parameter.

source code

newDtd(self, name, ExternalID, SystemID)
Creation of a new DTD for the external subset.

source code

newGlobalNs(self, href, prefix)
Creation of a Namespace, the old way using PI and without scoping DEPRECATED !!!

source code

newReference(self, name)
Creation of a new reference node.

source code

nodeDumpOutput(self, buf, cur, level, format, encoding)
Dump an XML node, recursive behaviour, children are printed too.

source code

nodeGetBase(self, cur)
Searches for the BASE URL.

source code

nodeListGetRawString(self, list, inLine)
Builds the string equivalent to the text contained in the Node list made of TEXTs and ENTITY_REFs, contrary to xmlNodeListGetString() this function doesn't do any character encoding handling.

source code

nodeListGetString(self, list, inLine)
Build the string equivalent to the text contained in the Node list made of TEXTs and ENTITY_REFs

source code

reconciliateNs(self, tree)
This function checks that all the namespaces declared within the given tree are properly declared.

source code

saveFile(self, filename)
Dump an XML document to a file.

source code

saveFileEnc(self, filename, encoding)
Dump an XML document, converting it to the given encoding

source code

saveFileTo(self, buf, encoding)
Dump an XML document to an I/O buffer.

source code

saveFormatFile(self, filename, format)
Dump an XML document to a file.

source code

saveFormatFileEnc(self, filename, encoding, format)
Dump an XML document to a file or an URL.

source code

saveFormatFileTo(self, buf, encoding, format)
Dump an XML document to an I/O buffer.

source code

searchNs(self, node, nameSpace)
Search a Ns registered under a given name space for a document.

source code

searchNsByHref(self, node, href)
Search a Ns aliasing a given URI.

source code

setDocCompressMode(self, mode)
set the compression ratio for a document, ZLIB based Correct values: 0 (uncompressed) to 9 (max compression)

source code

setListDoc(self, list)
update all nodes in the list to point to the right document

source code

setRootElement(self, root)
Set the root element of the document (doc->children is a list containing possibly comments, PIs, etc ...).

source code

setTreeDoc(self, tree)
update all nodes under the tree to point to the right document

source code

stringGetNodeList(self, value)
Parse the value string and build the node list associated.

source code

stringLenGetNodeList(self, value, len)
Parse the value string and build the node list associated.

source code

ID(self, ID)
Search the attribute declaring the given ID

source code

isID(self, elem, attr)
Determine whether an attribute is of type ID.

source code

isMixedElement(self, name)
Search in the DtDs whether an element accept Mixed content (or ANY) basically if it is supposed to accept text childs

source code

isRef(self, elem, attr)
Determine whether an attribute is of type Ref.

source code

removeID(self, attr)
Remove the given attribute from the ID table maintained internally.

source code

removeRef(self, attr)
Remove the given attribute from the Ref table maintained internally.

source code

validCtxtNormalizeAttributeValue(self, ctxt, elem, name, value)
Does the validation related extra step of the normalization of attribute values: If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by single space (#x20) character.

source code

validNormalizeAttributeValue(self, elem, name, value)
Does the validation related extra step of the normalization of attribute values: If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by single space (#x20) character.

source code

validateDocument(self, ctxt)
Try to validate the document instance basically it does the all the checks described by the XML Rec i.e.

source code

validateDocumentFinal(self, ctxt)
Does the final step for the document validation once all the incremental validation steps have been completed basically it does the following checks described by the XML Rec Check all the IDREF/IDREFS attributes definition for validity

source code

validateDtd(self, ctxt, dtd)
Try to validate the document against the dtd instance Basically it does check all the definitions in the DtD.

source code

validateDtdFinal(self, ctxt)
Does the final step for the dtds validation once all the subsets have been parsed basically it does the following checks described by the XML Rec - check that ENTITY and ENTITIES type attributes default or possible values matches one of the defined entities.

source code

validateElement(self, ctxt, elem)
Try to validate the subtree under an element

source code

validateNotationUse(self, ctxt, notationName)
Validate that the given name match a notation declaration.

source code

validateOneAttribute(self, ctxt, elem, attr, value)
Try to validate a single attribute for an element basically it does the following checks as described by the XML-1.0 recommendation: - [ VC: Attribute Value Type ] - [ VC: Fixed Attribute Default ] - [ VC: Entity Name ] - [ VC: Name Token ] - [ VC: ID ] - [ VC: IDREF ] - [ VC: Entity Name ] - [ VC: Notation Attributes ] The ID/IDREF uniqueness and matching are done separately

source code

validateOneElement(self, ctxt, elem)
Try to validate a single element and it's attributes, basically it does the following checks as described by the XML-1.0 recommendation: - [ VC: Element Valid ] - [ VC: Required Attribute ] Then call xmlValidateOneAttribute() for each attribute present.

source code

validateOneNamespace(self, ctxt, elem, prefix, ns, value)
Try to validate a single namespace declaration for an element basically it does the following checks as described by the XML-1.0 recommendation: - [ VC: Attribute Value Type ] - [ VC: Fixed Attribute Default ] - [ VC: Entity Name ] - [ VC: Name Token ] - [ VC: ID ] - [ VC: IDREF ] - [ VC: Entity Name ] - [ VC: Notation Attributes ] The ID/IDREF uniqueness and matching are done separately

source code

validatePopElement(self, ctxt, elem, qname)
Pop the element end from the validation stack.

source code

validatePushElement(self, ctxt, elem, qname)
Push a new element start on the validation stack.

source code

validateRoot(self, ctxt)
Try to validate a the root element basically it does the following check as described by the XML-1.0 recommendation: - [ VC: Root Element Type ] it doesn't try to recurse or apply other check to the element

source code

xincludeProcess(self)
Implement the XInclude substitution on the XML document @doc

source code

xincludeProcessFlags(self, flags)
Implement the XInclude substitution on the XML document @doc

source code

NewWalker(self, reader)
Setup an xmltextReader to parse a preparsed XML document.

source code

readerWalker(self)
Create an xmltextReader for a preparsed document.

source code

schemaNewDocParserCtxt(self)
Create an XML Schemas parse context for that document.

source code

schemaValidateDoc(self, ctxt)

source code

xpathNewContext(self)
Create a new xmlXPathContext

source code

xpathOrderDocElems(self)
Call this routine to speed up XPath computation on static documents.

source code

xpointerNewContext(self, here, origin)
Create a new XPointer context

source code

Inherited from xmlNode: addChild, addChildList, addContent, addContentLen, addNextSibling, addPrevSibling, addSibling, copyProp, copyPropList, debugDumpNode, debugDumpNodeList, debugDumpOneNode, docCopyNode, docCopyNodeList, docSetRootElement, freeNode, freeNodeList, getBase, getContent, getLang, getSpacePreserve, hasNsProp, hasProp, isBlankNode, isText, lastChild, lineNo, listGetRawString, listGetString, lsCountNode, lsOneNode, newChild, newNs, newNsProp, newNsPropEatName, newProp, newTextChild, noNsProp, nodePath, ns, nsDefs, nsProp, prop, replaceNode, schemaValidateOneElement, setBase, setContent, setContentLen, setLang, setName, setNs, setNsProp, setProp, setSpacePreserve, shellPrintNode, textConcat, textMerge, unlinkNode, unsetNsProp, unsetProp, xincludeProcessTree, xincludeProcessTreeFlags, xpathCastNodeToNumber, xpathCastNodeToString, xpathCmpNodes, xpathNewNodeSet, xpathNewValueTree, xpathNextAncestor, xpathNextAncestorOrSelf, xpathNextAttribute, xpathNextChild, xpathNextDescendant, xpathNextDescendantOrSelf, xpathNextFollowing, xpathNextFollowingSibling, xpathNextNamespace, xpathNextParent, xpathNextPreceding, xpathNextPrecedingSibling, xpathNextSelf, xpointerNewCollapsedRange, xpointerNewLocationSetNodes, xpointerNewRange, xpointerNewRangeNodes

Inherited from xmlCore: __eq__, __getattr__, __hash__, __iter__, __ne__, __str__, c14nMemory, c14nSaveTo, free, get_children, get_content, get_doc, get_last, get_name, get_next, get_parent, get_prev, get_properties, get_type, removeNsDef, saveTo, serialize, walk_breadth_first, walk_depth_first, xpathEval, xpathEval2

Properties

[hide private]

Inherited from xmlCore: children, content, doc, last, name, next, parent, prev, properties, type

Method Details

[hide private]

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.

htmlIsAutoClosed(self, elem)

source code

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child

htmlDocContentDumpOutput(self, buf, encoding)

source code

Dump an HTML document. Formating return/spaces are added.

htmlNodeDumpFileFormat(self, out, cur, encoding, format)

source code

Dump an HTML node, recursive behaviour,children are printed too. TODO: if encoding == None try to save in the doc encoding

htmlSaveFile(self, filename)

source code

Dump an HTML document to a file. If @filename is "-" the stdout file is used.

encodeEntitiesReentrant(self, input)

source code

Do a global encoding of a string, replacing the predefined entities and non ASCII values with their entities and CharRef counterparts. Contrary to xmlEncodeEntities, this routine is reentrant, and result must be deallocated.

relaxNGNewDocParserCtxt(self)

source code

Create an XML RelaxNGs parser context for that document. Note: since the process of compiling a RelaxNG schemas modifies the document, the @doc parameter is duplicated internally.

copyDoc(self, recursive)

source code

Do a copy of the document info. If recursive, the content tree will be copied too as well as DTD, namespaces and entities.

copyNode(self, node, extended)

source code

Do a copy of the node to a given document.

Overrides: xmlNode.copyNode

copyNodeList(self, node)

source code

Do a recursive copy of the node list.

Overrides: xmlNode.copyNodeList

newDocNode(self, ns, name, content)

source code

Creation of a new node element within a document. @ns and

newDocNodeEatName(self, ns, name, content)

source code

Creation of a new node element within a document. @ns and

newDocRawNode(self, ns, name, content)

source code

Creation of a new node element within a document. @ns and @content are optional (None).

newDocTextLen(self, content, len)

source code

Creation of a new text node with an extra content length parameter. The text node pertain to a given document.

newDtd(self, name, ExternalID, SystemID)

source code

Creation of a new DTD for the external subset. To create an internal subset, use xmlCreateIntSubset().

nodeDumpOutput(self, buf, cur, level, format, encoding)

source code

Dump an XML node, recursive behaviour, children are printed too. Note that @format = 1 provide node indenting only if xmlIndentTreeOutput = 1 or xmlKeepBlanksDefault(0) was called

nodeGetBase(self, cur)

source code

Searches for the BASE URL. The code should work on both XML
and HTML document even if base mechanisms are completely
different. It returns the base as defined in RFC 2396
sections 5.1.1. Base URI within Document Content and
5.1.2. Base URI from the Encapsulating Entity However it
does not return the document base (5.1.3), use
xmlDocumentGetBase() for this

reconciliateNs(self, tree)

source code

This function checks that all the namespaces declared within the given tree are properly declared. This is needed for example after Copy or Cut and then paste operations. The subtree may still hold pointers to namespace declarations outside the subtree or invalid/masked. As much as possible the function try to reuse the existing namespaces found in the new environment. If not possible the new namespaces are redeclared on @tree at the top of the given subtree.

Overrides: xmlNode.reconciliateNs

saveFile(self, filename)

source code

Dump an XML document to a file. Will use compression if compiled in and enabled. If @filename is "-" the stdout file is used.

saveFileTo(self, buf, encoding)

source code

Dump an XML document to an I/O buffer. Warning ! This call xmlOutputBufferClose() on buf which is not available after this call.

saveFormatFile(self, filename, format)

source code

Dump an XML document to a file. Will use compression if compiled in and enabled. If @filename is "-" the stdout file is used. If @format is set then the document will be indented on output. Note that @format = 1 provide node indenting only if xmlIndentTreeOutput = 1 or xmlKeepBlanksDefault(0) was called

saveFormatFileTo(self, buf, encoding, format)

source code

Dump an XML document to an I/O buffer. Warning ! This call xmlOutputBufferClose() on buf which is not available after this call.

searchNs(self, node, nameSpace)

source code

Search a Ns registered under a given name space for a document. recurse on the parents until it finds the defined namespace or return None otherwise. @nameSpace can be None, this is a search for the default namespace. We don't allow to cross entities boundaries. If you don't declare the namespace within those you will be in troubles !!! A warning is generated to cover this case.

Overrides: xmlNode.searchNs

searchNsByHref(self, node, href)

source code

Search a Ns aliasing a given URI. Recurse on the parents until it finds the defined namespace or return None otherwise.

Overrides: xmlNode.searchNsByHref

setListDoc(self, list)

source code

update all nodes in the list to point to the right document

Overrides: xmlNode.setListDoc

setTreeDoc(self, tree)

source code

update all nodes under the tree to point to the right document

Overrides: xmlNode.setTreeDoc

stringGetNodeList(self, value)

source code

Parse the value string and build the node list associated. Should produce a flat tree with only TEXTs and ENTITY_REFs.

stringLenGetNodeList(self, value, len)

source code

Parse the value string and build the node list associated. Should produce a flat tree with only TEXTs and ENTITY_REFs.

isID(self, elem, attr)

source code

Determine whether an attribute is of type ID. In case we have DTD(s) then this is done if DTD loading has been requested. In the case of HTML documents parsed with the HTML parser, then ID detection is done systematically.

Overrides: xmlNode.isID

isRef(self, elem, attr)

source code

Determine whether an attribute is of type Ref. In case we have DTD(s) then this is simple, otherwise we use an heuristic: name Ref (upper or lowercase).

Overrides: xmlNode.isRef

validCtxtNormalizeAttributeValue(self, ctxt, elem, name, value)

source code

Does the validation related extra step of the normalization of attribute values: If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by single space (#x20) character. Also check VC: Standalone Document Declaration in P32, and update ctxt->valid accordingly

validNormalizeAttributeValue(self, elem, name, value)

source code

Overrides: xmlNode.validNormalizeAttributeValue

validateDocument(self, ctxt)

source code

Try to validate the document instance basically it does the all the checks described by the XML Rec i.e. validates the internal and external subset (if present) and validate the document tree.

validateDtd(self, ctxt, dtd)

source code

Try to validate the document against the dtd instance Basically it does check all the definitions in the DtD. Note the the internal subset (if present) is de-coupled (i.e. not used), which could give problems if ID or IDREF is present.

validateDtdFinal(self, ctxt)

source code

Does the final step for the dtds validation once all the subsets have been parsed basically it does the following checks described by the XML Rec - check that ENTITY and ENTITIES type attributes default or possible values matches one of the defined entities. - check that NOTATION type attributes default or possible values matches one of the defined notations.

validateNotationUse(self, ctxt, notationName)

source code

Validate that the given name match a notation declaration.

[ VC: Notation Declared ]

validateOneElement(self, ctxt, elem)

source code

Try to validate a single element and it's attributes, basically it does the following checks as described by the XML-1.0 recommendation: - [ VC: Element Valid ] - [ VC: Required Attribute ] Then call xmlValidateOneAttribute() for each attribute present. The ID/IDREF checkings are done separately

NewWalker(self, reader)

source code

Setup an xmltextReader to parse a preparsed XML document. This reuses the existing @reader xmlTextReader.

schemaNewDocParserCtxt(self)

source code

Create an XML Schemas parse context for that document. NB. The document may be modified during the parsing process.

xpathOrderDocElems(self)

source code

Call this routine to speed up XPath computation on static documents. This stamps all the element nodes with the document order Like for line information, the order is kept in the element->content field, the value stored is actually - the node number (starting at -1) to be able to differentiate from line numbers.

xpointerNewContext(self, here, origin)

source code

Create a new XPointer context

Overrides: xmlNode.xpointerNewContext

Class xmlDoc

__init__(self, _obj=None) (Constructor)

__repr__(self) (Representation operator)

htmlAutoCloseTag(self, name, elem)

htmlIsAutoClosed(self, elem)

htmlDocContentDumpOutput(self, buf, encoding)

htmlNodeDumpFileFormat(self, out, cur, encoding, format)

htmlSaveFile(self, filename)

encodeEntitiesReentrant(self, input)

relaxNGNewDocParserCtxt(self)

copyDoc(self, recursive)

copyNode(self, node, extended)

copyNodeList(self, node)

newDocNode(self, ns, name, content)

newDocNodeEatName(self, ns, name, content)

newDocRawNode(self, ns, name, content)

newDocTextLen(self, content, len)

newDtd(self, name, ExternalID, SystemID)

nodeDumpOutput(self, buf, cur, level, format, encoding)

nodeGetBase(self, cur)

reconciliateNs(self, tree)

saveFile(self, filename)

saveFileTo(self, buf, encoding)

saveFormatFile(self, filename, format)

saveFormatFileTo(self, buf, encoding, format)

searchNs(self, node, nameSpace)

searchNsByHref(self, node, href)

setListDoc(self, list)

setTreeDoc(self, tree)

stringGetNodeList(self, value)

stringLenGetNodeList(self, value, len)

isID(self, elem, attr)

isRef(self, elem, attr)

validCtxtNormalizeAttributeValue(self, ctxt, elem, name, value)

validNormalizeAttributeValue(self, elem, name, value)

validateDocument(self, ctxt)

validateDtd(self, ctxt, dtd)

validateDtdFinal(self, ctxt)

validateNotationUse(self, ctxt, notationName)

validateOneElement(self, ctxt, elem)

NewWalker(self, reader)

schemaNewDocParserCtxt(self)

xpathOrderDocElems(self)

xpointerNewContext(self, here, origin)

init(self, _obj=None)
(Constructor)

repr(self)
(Representation operator)