Spaces:
Running
Running
/* | |
__ __ _ | |
___\ \/ /_ __ __ _| |_ | |
/ _ \\ /| '_ \ / _` | __| | |
| __// \| |_) | (_| | |_ | |
\___/_/\_\ .__/ \__,_|\__| | |
|_| XML parser | |
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd | |
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> | |
Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> | |
Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> | |
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> | |
Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> | |
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org> | |
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> | |
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> | |
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl> | |
Licensed under the MIT license: | |
Permission is hereby granted, free of charge, to any person obtaining | |
a copy of this software and associated documentation files (the | |
"Software"), to deal in the Software without restriction, including | |
without limitation the rights to use, copy, modify, merge, publish, | |
distribute, sublicense, and/or sell copies of the Software, and to permit | |
persons to whom the Software is furnished to do so, subject to the | |
following conditions: | |
The above copyright notice and this permission notice shall be included | |
in all copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN | |
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | |
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
USE OR OTHER DEALINGS IN THE SOFTWARE. | |
*/ | |
extern "C" { | |
struct XML_ParserStruct; | |
typedef struct XML_ParserStruct *XML_Parser; | |
typedef unsigned char XML_Bool; | |
/* The XML_Status enum gives the possible return values for several | |
API functions. The preprocessor #defines are included so this | |
stanza can be added to code that still needs to support older | |
versions of Expat 1.95.x: | |
#ifndef XML_STATUS_OK | |
#define XML_STATUS_OK 1 | |
#define XML_STATUS_ERROR 0 | |
#endif | |
Otherwise, the #define hackery is quite ugly and would have been | |
dropped. | |
*/ | |
enum XML_Status { | |
XML_STATUS_ERROR = 0, | |
XML_STATUS_OK = 1, | |
XML_STATUS_SUSPENDED = 2 | |
}; | |
enum XML_Error { | |
XML_ERROR_NONE, | |
XML_ERROR_NO_MEMORY, | |
XML_ERROR_SYNTAX, | |
XML_ERROR_NO_ELEMENTS, | |
XML_ERROR_INVALID_TOKEN, | |
XML_ERROR_UNCLOSED_TOKEN, | |
XML_ERROR_PARTIAL_CHAR, | |
XML_ERROR_TAG_MISMATCH, | |
XML_ERROR_DUPLICATE_ATTRIBUTE, | |
XML_ERROR_JUNK_AFTER_DOC_ELEMENT, | |
XML_ERROR_PARAM_ENTITY_REF, | |
XML_ERROR_UNDEFINED_ENTITY, | |
XML_ERROR_RECURSIVE_ENTITY_REF, | |
XML_ERROR_ASYNC_ENTITY, | |
XML_ERROR_BAD_CHAR_REF, | |
XML_ERROR_BINARY_ENTITY_REF, | |
XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, | |
XML_ERROR_MISPLACED_XML_PI, | |
XML_ERROR_UNKNOWN_ENCODING, | |
XML_ERROR_INCORRECT_ENCODING, | |
XML_ERROR_UNCLOSED_CDATA_SECTION, | |
XML_ERROR_EXTERNAL_ENTITY_HANDLING, | |
XML_ERROR_NOT_STANDALONE, | |
XML_ERROR_UNEXPECTED_STATE, | |
XML_ERROR_ENTITY_DECLARED_IN_PE, | |
XML_ERROR_FEATURE_REQUIRES_XML_DTD, | |
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, | |
/* Added in 1.95.7. */ | |
XML_ERROR_UNBOUND_PREFIX, | |
/* Added in 1.95.8. */ | |
XML_ERROR_UNDECLARING_PREFIX, | |
XML_ERROR_INCOMPLETE_PE, | |
XML_ERROR_XML_DECL, | |
XML_ERROR_TEXT_DECL, | |
XML_ERROR_PUBLICID, | |
XML_ERROR_SUSPENDED, | |
XML_ERROR_NOT_SUSPENDED, | |
XML_ERROR_ABORTED, | |
XML_ERROR_FINISHED, | |
XML_ERROR_SUSPEND_PE, | |
/* Added in 2.0. */ | |
XML_ERROR_RESERVED_PREFIX_XML, | |
XML_ERROR_RESERVED_PREFIX_XMLNS, | |
XML_ERROR_RESERVED_NAMESPACE_URI, | |
/* Added in 2.2.1. */ | |
XML_ERROR_INVALID_ARGUMENT, | |
/* Added in 2.3.0. */ | |
XML_ERROR_NO_BUFFER, | |
/* Added in 2.4.0. */ | |
XML_ERROR_AMPLIFICATION_LIMIT_BREACH | |
}; | |
enum XML_Content_Type { | |
XML_CTYPE_EMPTY = 1, | |
XML_CTYPE_ANY, | |
XML_CTYPE_MIXED, | |
XML_CTYPE_NAME, | |
XML_CTYPE_CHOICE, | |
XML_CTYPE_SEQ | |
}; | |
enum XML_Content_Quant { | |
XML_CQUANT_NONE, | |
XML_CQUANT_OPT, | |
XML_CQUANT_REP, | |
XML_CQUANT_PLUS | |
}; | |
/* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be | |
XML_CQUANT_NONE, and the other fields will be zero or NULL. | |
If type == XML_CTYPE_MIXED, then quant will be NONE or REP and | |
numchildren will contain number of elements that may be mixed in | |
and children point to an array of XML_Content cells that will be | |
all of XML_CTYPE_NAME type with no quantification. | |
If type == XML_CTYPE_NAME, then the name points to the name, and | |
the numchildren field will be zero and children will be NULL. The | |
quant fields indicates any quantifiers placed on the name. | |
CHOICE and SEQ will have name NULL, the number of children in | |
numchildren and children will point, recursively, to an array | |
of XML_Content cells. | |
The EMPTY, ANY, and MIXED types will only occur at top level. | |
*/ | |
typedef struct XML_cp XML_Content; | |
struct XML_cp { | |
enum XML_Content_Type type; | |
enum XML_Content_Quant quant; | |
XML_Char *name; | |
unsigned int numchildren; | |
XML_Content *children; | |
}; | |
/* This is called for an element declaration. See above for | |
description of the model argument. It's the user code's responsibility | |
to free model when finished with it. See XML_FreeContentModel. | |
There is no need to free the model from the handler, it can be kept | |
around and freed at a later stage. | |
*/ | |
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, | |
const XML_Char *name, | |
XML_Content *model); | |
XMLPARSEAPI(void) | |
XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); | |
/* The Attlist declaration handler is called for *each* attribute. So | |
a single Attlist declaration with multiple attributes declared will | |
generate multiple calls to this handler. The "default" parameter | |
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" | |
keyword. The "isrequired" parameter will be true and the default | |
value will be NULL in the case of "#REQUIRED". If "isrequired" is | |
true and default is non-NULL, then this is a "#FIXED" default. | |
*/ | |
typedef void(XMLCALL *XML_AttlistDeclHandler)( | |
void *userData, const XML_Char *elname, const XML_Char *attname, | |
const XML_Char *att_type, const XML_Char *dflt, int isrequired); | |
XMLPARSEAPI(void) | |
XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); | |
/* The XML declaration handler is called for *both* XML declarations | |
and text declarations. The way to distinguish is that the version | |
parameter will be NULL for text declarations. The encoding | |
parameter may be NULL for XML declarations. The standalone | |
parameter will be -1, 0, or 1 indicating respectively that there | |
was no standalone parameter in the declaration, that it was given | |
as no, or that it was given as yes. | |
*/ | |
typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, | |
const XML_Char *version, | |
const XML_Char *encoding, | |
int standalone); | |
XMLPARSEAPI(void) | |
XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); | |
typedef struct { | |
void *(*malloc_fcn)(size_t size); | |
void *(*realloc_fcn)(void *ptr, size_t size); | |
void (*free_fcn)(void *ptr); | |
} XML_Memory_Handling_Suite; | |
/* Constructs a new parser; encoding is the encoding specified by the | |
external protocol or NULL if there is none specified. | |
*/ | |
XMLPARSEAPI(XML_Parser) | |
XML_ParserCreate(const XML_Char *encoding); | |
/* Constructs a new parser and namespace processor. Element type | |
names and attribute names that belong to a namespace will be | |
expanded; unprefixed attribute names are never expanded; unprefixed | |
element type names are expanded only if there is a default | |
namespace. The expanded name is the concatenation of the namespace | |
URI, the namespace separator character, and the local part of the | |
name. If the namespace separator is '\0' then the namespace URI | |
and the local part will be concatenated without any separator. | |
It is a programming error to use the separator '\0' with namespace | |
triplets (see XML_SetReturnNSTriplet). | |
If a namespace separator is chosen that can be part of a URI or | |
part of an XML name, splitting an expanded name back into its | |
1, 2 or 3 original parts on application level in the element handler | |
may end up vulnerable, so these are advised against; sane choices for | |
a namespace separator are e.g. '\n' (line feed) and '|' (pipe). | |
Note that Expat does not validate namespace URIs (beyond encoding) | |
against RFC 3986 today (and is not required to do so with regard to | |
the XML 1.0 namespaces specification) but it may start doing that | |
in future releases. Before that, an application using Expat must | |
be ready to receive namespace URIs containing non-URI characters. | |
*/ | |
XMLPARSEAPI(XML_Parser) | |
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); | |
/* Constructs a new parser using the memory management suite referred to | |
by memsuite. If memsuite is NULL, then use the standard library memory | |
suite. If namespaceSeparator is non-NULL it creates a parser with | |
namespace processing as described above. The character pointed at | |
will serve as the namespace separator. | |
All further memory operations used for the created parser will come from | |
the given suite. | |
*/ | |
XMLPARSEAPI(XML_Parser) | |
XML_ParserCreate_MM(const XML_Char *encoding, | |
const XML_Memory_Handling_Suite *memsuite, | |
const XML_Char *namespaceSeparator); | |
/* Prepare a parser object to be re-used. This is particularly | |
valuable when memory allocation overhead is disproportionately high, | |
such as when a large number of small documnents need to be parsed. | |
All handlers are cleared from the parser, except for the | |
unknownEncodingHandler. The parser's external state is re-initialized | |
except for the values of ns and ns_triplets. | |
Added in Expat 1.95.3. | |
*/ | |
XMLPARSEAPI(XML_Bool) | |
XML_ParserReset(XML_Parser parser, const XML_Char *encoding); | |
/* atts is array of name/value pairs, terminated by 0; | |
names and values are 0 terminated. | |
*/ | |
typedef void(XMLCALL *XML_StartElementHandler)(void *userData, | |
const XML_Char *name, | |
const XML_Char **atts); | |
typedef void(XMLCALL *XML_EndElementHandler)(void *userData, | |
const XML_Char *name); | |
/* s is not 0 terminated. */ | |
typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, | |
const XML_Char *s, int len); | |
/* target and data are 0 terminated */ | |
typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, | |
const XML_Char *target, | |
const XML_Char *data); | |
/* data is 0 terminated */ | |
typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); | |
typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); | |
typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); | |
/* This is called for any characters in the XML document for which | |
there is no applicable handler. This includes both characters that | |
are part of markup which is of a kind that is not reported | |
(comments, markup declarations), or characters that are part of a | |
construct which could be reported but for which no handler has been | |
supplied. The characters are passed exactly as they were in the XML | |
document except that they will be encoded in UTF-8 or UTF-16. | |
Line boundaries are not normalized. Note that a byte order mark | |
character is not passed to the default handler. There are no | |
guarantees about how characters are divided between calls to the | |
default handler: for example, a comment might be split between | |
multiple calls. | |
*/ | |
typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, | |
int len); | |
/* This is called for the start of the DOCTYPE declaration, before | |
any DTD or internal subset is parsed. | |
*/ | |
typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, | |
const XML_Char *doctypeName, | |
const XML_Char *sysid, | |
const XML_Char *pubid, | |
int has_internal_subset); | |
/* This is called for the end of the DOCTYPE declaration when the | |
closing > is encountered, but after processing any external | |
subset. | |
*/ | |
typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); | |
/* This is called for entity declarations. The is_parameter_entity | |
argument will be non-zero if the entity is a parameter entity, zero | |
otherwise. | |
For internal entities (<!ENTITY foo "bar">), value will | |
be non-NULL and systemId, publicID, and notationName will be NULL. | |
The value string is NOT null-terminated; the length is provided in | |
the value_length argument. Since it is legal to have zero-length | |
values, do not use this argument to test for internal entities. | |
For external entities, value will be NULL and systemId will be | |
non-NULL. The publicId argument will be NULL unless a public | |
identifier was provided. The notationName argument will have a | |
non-NULL value only for unparsed entity declarations. | |
Note that is_parameter_entity can't be changed to XML_Bool, since | |
that would break binary compatibility. | |
*/ | |
typedef void(XMLCALL *XML_EntityDeclHandler)( | |
void *userData, const XML_Char *entityName, int is_parameter_entity, | |
const XML_Char *value, int value_length, const XML_Char *base, | |
const XML_Char *systemId, const XML_Char *publicId, | |
const XML_Char *notationName); | |
XMLPARSEAPI(void) | |
XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); | |
/* OBSOLETE -- OBSOLETE -- OBSOLETE | |
This handler has been superseded by the EntityDeclHandler above. | |
It is provided here for backward compatibility. | |
This is called for a declaration of an unparsed (NDATA) entity. | |
The base argument is whatever was set by XML_SetBase. The | |
entityName, systemId and notationName arguments will never be | |
NULL. The other arguments may be. | |
*/ | |
typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( | |
void *userData, const XML_Char *entityName, const XML_Char *base, | |
const XML_Char *systemId, const XML_Char *publicId, | |
const XML_Char *notationName); | |
/* This is called for a declaration of notation. The base argument is | |
whatever was set by XML_SetBase. The notationName will never be | |
NULL. The other arguments can be. | |
*/ | |
typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, | |
const XML_Char *notationName, | |
const XML_Char *base, | |
const XML_Char *systemId, | |
const XML_Char *publicId); | |
/* When namespace processing is enabled, these are called once for | |
each namespace declaration. The call to the start and end element | |
handlers occur between the calls to the start and end namespace | |
declaration handlers. For an xmlns attribute, prefix will be | |
NULL. For an xmlns="" attribute, uri will be NULL. | |
*/ | |
typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, | |
const XML_Char *prefix, | |
const XML_Char *uri); | |
typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, | |
const XML_Char *prefix); | |
/* This is called if the document is not standalone, that is, it has an | |
external subset or a reference to a parameter entity, but does not | |
have standalone="yes". If this handler returns XML_STATUS_ERROR, | |
then processing will not continue, and the parser will return a | |
XML_ERROR_NOT_STANDALONE error. | |
If parameter entity parsing is enabled, then in addition to the | |
conditions above this handler will only be called if the referenced | |
entity was actually read. | |
*/ | |
typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); | |
/* This is called for a reference to an external parsed general | |
entity. The referenced entity is not automatically parsed. The | |
application can parse it immediately or later using | |
XML_ExternalEntityParserCreate. | |
The parser argument is the parser parsing the entity containing the | |
reference; it can be passed as the parser argument to | |
XML_ExternalEntityParserCreate. The systemId argument is the | |
system identifier as specified in the entity declaration; it will | |
not be NULL. | |
The base argument is the system identifier that should be used as | |
the base for resolving systemId if systemId was relative; this is | |
set by XML_SetBase; it may be NULL. | |
The publicId argument is the public identifier as specified in the | |
entity declaration, or NULL if none was specified; the whitespace | |
in the public identifier will have been normalized as required by | |
the XML spec. | |
The context argument specifies the parsing context in the format | |
expected by the context argument to XML_ExternalEntityParserCreate; | |
context is valid only until the handler returns, so if the | |
referenced entity is to be parsed later, it must be copied. | |
context is NULL only when the entity is a parameter entity. | |
The handler should return XML_STATUS_ERROR if processing should not | |
continue because of a fatal error in the handling of the external | |
entity. In this case the calling parser will return an | |
XML_ERROR_EXTERNAL_ENTITY_HANDLING error. | |
Note that unlike other handlers the first argument is the parser, | |
not userData. | |
*/ | |
typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, | |
const XML_Char *context, | |
const XML_Char *base, | |
const XML_Char *systemId, | |
const XML_Char *publicId); | |
/* This is called in two situations: | |
1) An entity reference is encountered for which no declaration | |
has been read *and* this is not an error. | |
2) An internal entity reference is read, but not expanded, because | |
XML_SetDefaultHandler has been called. | |
Note: skipped parameter entities in declarations and skipped general | |
entities in attribute values cannot be reported, because | |
the event would be out of sync with the reporting of the | |
declarations or attribute values | |
*/ | |
typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, | |
const XML_Char *entityName, | |
int is_parameter_entity); | |
/* This structure is filled in by the XML_UnknownEncodingHandler to | |
provide information to the parser about encodings that are unknown | |
to the parser. | |
The map[b] member gives information about byte sequences whose | |
first byte is b. | |
If map[b] is c where c is >= 0, then b by itself encodes the | |
Unicode scalar value c. | |
If map[b] is -1, then the byte sequence is malformed. | |
If map[b] is -n, where n >= 2, then b is the first byte of an | |
n-byte sequence that encodes a single Unicode scalar value. | |
The data member will be passed as the first argument to the convert | |
function. | |
The convert function is used to convert multibyte sequences; s will | |
point to a n-byte sequence where map[(unsigned char)*s] == -n. The | |
convert function must return the Unicode scalar value represented | |
by this byte sequence or -1 if the byte sequence is malformed. | |
The convert function may be NULL if the encoding is a single-byte | |
encoding, that is if map[b] >= -1 for all bytes b. | |
When the parser is finished with the encoding, then if release is | |
not NULL, it will call release passing it the data member; once | |
release has been called, the convert function will not be called | |
again. | |
Expat places certain restrictions on the encodings that are supported | |
using this mechanism. | |
1. Every ASCII character that can appear in a well-formed XML document, | |
other than the characters | |
$@\^`{}~ | |
must be represented by a single byte, and that byte must be the | |
same byte that represents that character in ASCII. | |
2. No character may require more than 4 bytes to encode. | |
3. All characters encoded must have Unicode scalar values <= | |
0xFFFF, (i.e., characters that would be encoded by surrogates in | |
UTF-16 are not allowed). Note that this restriction doesn't | |
apply to the built-in support for UTF-8 and UTF-16. | |
4. No Unicode character may be encoded by more than one distinct | |
sequence of bytes. | |
*/ | |
typedef struct { | |
int map[256]; | |
void *data; | |
int(XMLCALL *convert)(void *data, const char *s); | |
void(XMLCALL *release)(void *data); | |
} XML_Encoding; | |
/* This is called for an encoding that is unknown to the parser. | |
The encodingHandlerData argument is that which was passed as the | |
second argument to XML_SetUnknownEncodingHandler. | |
The name argument gives the name of the encoding as specified in | |
the encoding declaration. | |
If the callback can provide information about the encoding, it must | |
fill in the XML_Encoding structure, and return XML_STATUS_OK. | |
Otherwise it must return XML_STATUS_ERROR. | |
If info does not describe a suitable encoding, then the parser will | |
return an XML_ERROR_UNKNOWN_ENCODING error. | |
*/ | |
typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, | |
const XML_Char *name, | |
XML_Encoding *info); | |
XMLPARSEAPI(void) | |
XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, | |
XML_EndElementHandler end); | |
XMLPARSEAPI(void) | |
XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetCharacterDataHandler(XML_Parser parser, | |
XML_CharacterDataHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetProcessingInstructionHandler(XML_Parser parser, | |
XML_ProcessingInstructionHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetCdataSectionHandler(XML_Parser parser, | |
XML_StartCdataSectionHandler start, | |
XML_EndCdataSectionHandler end); | |
XMLPARSEAPI(void) | |
XML_SetStartCdataSectionHandler(XML_Parser parser, | |
XML_StartCdataSectionHandler start); | |
XMLPARSEAPI(void) | |
XML_SetEndCdataSectionHandler(XML_Parser parser, | |
XML_EndCdataSectionHandler end); | |
/* This sets the default handler and also inhibits expansion of | |
internal entities. These entity references will be passed to the | |
default handler, or to the skipped entity handler, if one is set. | |
*/ | |
XMLPARSEAPI(void) | |
XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); | |
/* This sets the default handler but does not inhibit expansion of | |
internal entities. The entity reference will not be passed to the | |
default handler. | |
*/ | |
XMLPARSEAPI(void) | |
XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, | |
XML_EndDoctypeDeclHandler end); | |
XMLPARSEAPI(void) | |
XML_SetStartDoctypeDeclHandler(XML_Parser parser, | |
XML_StartDoctypeDeclHandler start); | |
XMLPARSEAPI(void) | |
XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); | |
XMLPARSEAPI(void) | |
XML_SetUnparsedEntityDeclHandler(XML_Parser parser, | |
XML_UnparsedEntityDeclHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetNamespaceDeclHandler(XML_Parser parser, | |
XML_StartNamespaceDeclHandler start, | |
XML_EndNamespaceDeclHandler end); | |
XMLPARSEAPI(void) | |
XML_SetStartNamespaceDeclHandler(XML_Parser parser, | |
XML_StartNamespaceDeclHandler start); | |
XMLPARSEAPI(void) | |
XML_SetEndNamespaceDeclHandler(XML_Parser parser, | |
XML_EndNamespaceDeclHandler end); | |
XMLPARSEAPI(void) | |
XML_SetNotStandaloneHandler(XML_Parser parser, | |
XML_NotStandaloneHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetExternalEntityRefHandler(XML_Parser parser, | |
XML_ExternalEntityRefHandler handler); | |
/* If a non-NULL value for arg is specified here, then it will be | |
passed as the first argument to the external entity ref handler | |
instead of the parser object. | |
*/ | |
XMLPARSEAPI(void) | |
XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); | |
XMLPARSEAPI(void) | |
XML_SetSkippedEntityHandler(XML_Parser parser, | |
XML_SkippedEntityHandler handler); | |
XMLPARSEAPI(void) | |
XML_SetUnknownEncodingHandler(XML_Parser parser, | |
XML_UnknownEncodingHandler handler, | |
void *encodingHandlerData); | |
/* This can be called within a handler for a start element, end | |
element, processing instruction or character data. It causes the | |
corresponding markup to be passed to the default handler. | |
*/ | |
XMLPARSEAPI(void) | |
XML_DefaultCurrent(XML_Parser parser); | |
/* If do_nst is non-zero, and namespace processing is in effect, and | |
a name has a prefix (i.e. an explicit namespace qualifier) then | |
that name is returned as a triplet in a single string separated by | |
the separator character specified when the parser was created: URI | |
+ sep + local_name + sep + prefix. | |
If do_nst is zero, then namespace information is returned in the | |
default manner (URI + sep + local_name) whether or not the name | |
has a prefix. | |
Note: Calling XML_SetReturnNSTriplet after XML_Parse or | |
XML_ParseBuffer has no effect. | |
*/ | |
XMLPARSEAPI(void) | |
XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); | |
/* This value is passed as the userData argument to callbacks. */ | |
XMLPARSEAPI(void) | |
XML_SetUserData(XML_Parser parser, void *userData); | |
/* Returns the last value set by XML_SetUserData or NULL. */ | |
/* This is equivalent to supplying an encoding argument to | |
XML_ParserCreate. On success XML_SetEncoding returns non-zero, | |
zero otherwise. | |
Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer | |
has no effect and returns XML_STATUS_ERROR. | |
*/ | |
XMLPARSEAPI(enum XML_Status) | |
XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); | |
/* If this function is called, then the parser will be passed as the | |
first argument to callbacks instead of userData. The userData will | |
still be accessible using XML_GetUserData. | |
*/ | |
XMLPARSEAPI(void) | |
XML_UseParserAsHandlerArg(XML_Parser parser); | |
/* If useDTD == XML_TRUE is passed to this function, then the parser | |
will assume that there is an external subset, even if none is | |
specified in the document. In such a case the parser will call the | |
externalEntityRefHandler with a value of NULL for the systemId | |
argument (the publicId and context arguments will be NULL as well). | |
Note: For the purpose of checking WFC: Entity Declared, passing | |
useDTD == XML_TRUE will make the parser behave as if the document | |
had a DTD with an external subset. | |
Note: If this function is called, then this must be done before | |
the first call to XML_Parse or XML_ParseBuffer, since it will | |
have no effect after that. Returns | |
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. | |
Note: If the document does not have a DOCTYPE declaration at all, | |
then startDoctypeDeclHandler and endDoctypeDeclHandler will not | |
be called, despite an external subset being parsed. | |
Note: If XML_DTD is not defined when Expat is compiled, returns | |
XML_ERROR_FEATURE_REQUIRES_XML_DTD. | |
Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. | |
*/ | |
XMLPARSEAPI(enum XML_Error) | |
XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); | |
/* Sets the base to be used for resolving relative URIs in system | |
identifiers in declarations. Resolving relative identifiers is | |
left to the application: this value will be passed through as the | |
base argument to the XML_ExternalEntityRefHandler, | |
XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base | |
argument will be copied. Returns XML_STATUS_ERROR if out of memory, | |
XML_STATUS_OK otherwise. | |
*/ | |
XMLPARSEAPI(enum XML_Status) | |
XML_SetBase(XML_Parser parser, const XML_Char *base); | |
XMLPARSEAPI(const XML_Char *) | |
XML_GetBase(XML_Parser parser); | |
/* Returns the number of the attribute/value pairs passed in last call | |
to the XML_StartElementHandler that were specified in the start-tag | |
rather than defaulted. Each attribute/value pair counts as 2; thus | |
this corresponds to an index into the atts array passed to the | |
XML_StartElementHandler. Returns -1 if parser == NULL. | |
*/ | |
XMLPARSEAPI(int) | |
XML_GetSpecifiedAttributeCount(XML_Parser parser); | |
/* Returns the index of the ID attribute passed in the last call to | |
XML_StartElementHandler, or -1 if there is no ID attribute or | |
parser == NULL. Each attribute/value pair counts as 2; thus this | |
corresponds to an index into the atts array passed to the | |
XML_StartElementHandler. | |
*/ | |
XMLPARSEAPI(int) | |
XML_GetIdAttributeIndex(XML_Parser parser); | |
/* Source file byte offsets for the start and end of attribute names and values. | |
The value indices are exclusive of surrounding quotes; thus in a UTF-8 source | |
file an attribute value of "blah" will yield: | |
info->valueEnd - info->valueStart = 4 bytes. | |
*/ | |
typedef struct { | |
XML_Index nameStart; /* Offset to beginning of the attribute name. */ | |
XML_Index nameEnd; /* Offset after the attribute name's last byte. */ | |
XML_Index valueStart; /* Offset to beginning of the attribute value. */ | |
XML_Index valueEnd; /* Offset after the attribute value's last byte. */ | |
} XML_AttrInfo; | |
/* Returns an array of XML_AttrInfo structures for the attribute/value pairs | |
passed in last call to the XML_StartElementHandler that were specified | |
in the start-tag rather than defaulted. Each attribute/value pair counts | |
as 1; thus the number of entries in the array is | |
XML_GetSpecifiedAttributeCount(parser) / 2. | |
*/ | |
XMLPARSEAPI(const XML_AttrInfo *) | |
XML_GetAttributeInfo(XML_Parser parser); | |
/* Parses some input. Returns XML_STATUS_ERROR if a fatal error is | |
detected. The last call to XML_Parse must have isFinal true; len | |
may be zero for this call (or any other). | |
Though the return values for these functions has always been | |
described as a Boolean value, the implementation, at least for the | |
1.95.x series, has always returned exactly one of the XML_Status | |
values. | |
*/ | |
XMLPARSEAPI(enum XML_Status) | |
XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); | |
XMLPARSEAPI(void *) | |
XML_GetBuffer(XML_Parser parser, int len); | |
XMLPARSEAPI(enum XML_Status) | |
XML_ParseBuffer(XML_Parser parser, int len, int isFinal); | |
/* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return. | |
Must be called from within a call-back handler, except when aborting | |
(resumable = 0) an already suspended parser. Some call-backs may | |
still follow because they would otherwise get lost. Examples: | |
- endElementHandler() for empty elements when stopped in | |
startElementHandler(), | |
- endNameSpaceDeclHandler() when stopped in endElementHandler(), | |
and possibly others. | |
Can be called from most handlers, including DTD related call-backs, | |
except when parsing an external parameter entity and resumable != 0. | |
Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. | |
Possible error codes: | |
- XML_ERROR_SUSPENDED: when suspending an already suspended parser. | |
- XML_ERROR_FINISHED: when the parser has already finished. | |
- XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. | |
When resumable != 0 (true) then parsing is suspended, that is, | |
XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. | |
Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() | |
return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. | |
*Note*: | |
This will be applied to the current parser instance only, that is, if | |
there is a parent parser then it will continue parsing when the | |
externalEntityRefHandler() returns. It is up to the implementation of | |
the externalEntityRefHandler() to call XML_StopParser() on the parent | |
parser (recursively), if one wants to stop parsing altogether. | |
When suspended, parsing can be resumed by calling XML_ResumeParser(). | |
*/ | |
XMLPARSEAPI(enum XML_Status) | |
XML_StopParser(XML_Parser parser, XML_Bool resumable); | |
/* Resumes parsing after it has been suspended with XML_StopParser(). | |
Must not be called from within a handler call-back. Returns same | |
status codes as XML_Parse() or XML_ParseBuffer(). | |
Additional error code XML_ERROR_NOT_SUSPENDED possible. | |
*Note*: | |
This must be called on the most deeply nested child parser instance | |
first, and on its parent parser only after the child parser has finished, | |
to be applied recursively until the document entity's parser is restarted. | |
That is, the parent parser will not resume by itself and it is up to the | |
application to call XML_ResumeParser() on it at the appropriate moment. | |
*/ | |
XMLPARSEAPI(enum XML_Status) | |
XML_ResumeParser(XML_Parser parser); | |
enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; | |
typedef struct { | |
enum XML_Parsing parsing; | |
XML_Bool finalBuffer; | |
} XML_ParsingStatus; | |
/* Returns status of parser with respect to being initialized, parsing, | |
finished, or suspended and processing the final buffer. | |
XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus, | |
XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED | |
*/ | |
XMLPARSEAPI(void) | |
XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); | |
/* Creates an XML_Parser object that can parse an external general | |
entity; context is a '\0'-terminated string specifying the parse | |
context; encoding is a '\0'-terminated string giving the name of | |
the externally specified encoding, or NULL if there is no | |
externally specified encoding. The context string consists of a | |
sequence of tokens separated by formfeeds (\f); a token consisting | |
of a name specifies that the general entity of the name is open; a | |
token of the form prefix=uri specifies the namespace for a | |
particular prefix; a token of the form =uri specifies the default | |
namespace. This can be called at any point after the first call to | |
an ExternalEntityRefHandler so longer as the parser has not yet | |
been freed. The new parser is completely independent and may | |
safely be used in a separate thread. The handlers and userData are | |
initialized from the parser argument. Returns NULL if out of memory. | |
Otherwise returns a new XML_Parser object. | |
*/ | |
XMLPARSEAPI(XML_Parser) | |
XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, | |
const XML_Char *encoding); | |
enum XML_ParamEntityParsing { | |
XML_PARAM_ENTITY_PARSING_NEVER, | |
XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, | |
XML_PARAM_ENTITY_PARSING_ALWAYS | |
}; | |
/* Controls parsing of parameter entities (including the external DTD | |
subset). If parsing of parameter entities is enabled, then | |
references to external parameter entities (including the external | |
DTD subset) will be passed to the handler set with | |
XML_SetExternalEntityRefHandler. The context passed will be 0. | |
Unlike external general entities, external parameter entities can | |
only be parsed synchronously. If the external parameter entity is | |
to be parsed, it must be parsed during the call to the external | |
entity ref handler: the complete sequence of | |
XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and | |
XML_ParserFree calls must be made during this call. After | |
XML_ExternalEntityParserCreate has been called to create the parser | |
for the external parameter entity (context must be 0 for this | |
call), it is illegal to make any calls on the old parser until | |
XML_ParserFree has been called on the newly created parser. | |
If the library has been compiled without support for parameter | |
entity parsing (ie without XML_DTD being defined), then | |
XML_SetParamEntityParsing will return 0 if parsing of parameter | |
entities is requested; otherwise it will return non-zero. | |
Note: If XML_SetParamEntityParsing is called after XML_Parse or | |
XML_ParseBuffer, then it has no effect and will always return 0. | |
Note: If parser == NULL, the function will do nothing and return 0. | |
*/ | |
XMLPARSEAPI(int) | |
XML_SetParamEntityParsing(XML_Parser parser, | |
enum XML_ParamEntityParsing parsing); | |
/* Sets the hash salt to use for internal hash calculations. | |
Helps in preventing DoS attacks based on predicting hash | |
function behavior. This must be called before parsing is started. | |
Returns 1 if successful, 0 when called after parsing has started. | |
Note: If parser == NULL, the function will do nothing and return 0. | |
*/ | |
XMLPARSEAPI(int) | |
XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); | |
/* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then | |
XML_GetErrorCode returns information about the error. | |
*/ | |
XMLPARSEAPI(enum XML_Error) | |
XML_GetErrorCode(XML_Parser parser); | |
/* These functions return information about the current parse | |
location. They may be called from any callback called to report | |
some parse event; in this case the location is the location of the | |
first of the sequence of characters that generated the event. When | |
called from callbacks generated by declarations in the document | |
prologue, the location identified isn't as neatly defined, but will | |
be within the relevant markup. When called outside of the callback | |
functions, the position indicated will be just past the last parse | |
event (regardless of whether there was an associated callback). | |
They may also be called after returning from a call to XML_Parse | |
or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then | |
the location is the location of the character at which the error | |
was detected; otherwise the location is the location of the last | |
parse event, as described above. | |
Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber | |
return 0 to indicate an error. | |
Note: XML_GetCurrentByteIndex returns -1 to indicate an error. | |
*/ | |
XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); | |
XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); | |
XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); | |
/* Return the number of bytes in the current event. | |
Returns 0 if the event is in an internal entity. | |
*/ | |
XMLPARSEAPI(int) | |
XML_GetCurrentByteCount(XML_Parser parser); | |
/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets | |
the integer pointed to by offset to the offset within this buffer | |
of the current parse position, and sets the integer pointed to by size | |
to the size of this buffer (the number of input bytes). Otherwise | |
returns a NULL pointer. Also returns a NULL pointer if a parse isn't | |
active. | |
NOTE: The character pointer returned should not be used outside | |
the handler that makes the call. | |
*/ | |
XMLPARSEAPI(const char *) | |
XML_GetInputContext(XML_Parser parser, int *offset, int *size); | |
/* For backwards compatibility with previous versions. */ | |
/* Frees the content model passed to the element declaration handler */ | |
XMLPARSEAPI(void) | |
XML_FreeContentModel(XML_Parser parser, XML_Content *model); | |
/* Exposing the memory handling functions used in Expat */ | |
XMLPARSEAPI(void *) | |
XML_ATTR_MALLOC | |
XML_ATTR_ALLOC_SIZE(2) | |
XML_MemMalloc(XML_Parser parser, size_t size); | |
XMLPARSEAPI(void *) | |
XML_ATTR_ALLOC_SIZE(3) | |
XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); | |
XMLPARSEAPI(void) | |
XML_MemFree(XML_Parser parser, void *ptr); | |
/* Frees memory used by the parser. */ | |
XMLPARSEAPI(void) | |
XML_ParserFree(XML_Parser parser); | |
/* Returns a string describing the error. */ | |
XMLPARSEAPI(const XML_LChar *) | |
XML_ErrorString(enum XML_Error code); | |
/* Return a string containing the version number of this expat */ | |
XMLPARSEAPI(const XML_LChar *) | |
XML_ExpatVersion(void); | |
typedef struct { | |
int major; | |
int minor; | |
int micro; | |
} XML_Expat_Version; | |
/* Return an XML_Expat_Version structure containing numeric version | |
number information for this version of expat. | |
*/ | |
XMLPARSEAPI(XML_Expat_Version) | |
XML_ExpatVersionInfo(void); | |
/* Added in Expat 1.95.5. */ | |
enum XML_FeatureEnum { | |
XML_FEATURE_END = 0, | |
XML_FEATURE_UNICODE, | |
XML_FEATURE_UNICODE_WCHAR_T, | |
XML_FEATURE_DTD, | |
XML_FEATURE_CONTEXT_BYTES, | |
XML_FEATURE_MIN_SIZE, | |
XML_FEATURE_SIZEOF_XML_CHAR, | |
XML_FEATURE_SIZEOF_XML_LCHAR, | |
XML_FEATURE_NS, | |
XML_FEATURE_LARGE_SIZE, | |
XML_FEATURE_ATTR_INFO, | |
/* Added in Expat 2.4.0. */ | |
XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, | |
XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT | |
/* Additional features must be added to the end of this enum. */ | |
}; | |
typedef struct { | |
enum XML_FeatureEnum feature; | |
const XML_LChar *name; | |
long int value; | |
} XML_Feature; | |
XMLPARSEAPI(const XML_Feature *) | |
XML_GetFeatureList(void); | |
/* Added in Expat 2.4.0. */ | |
XMLPARSEAPI(XML_Bool) | |
XML_SetBillionLaughsAttackProtectionMaximumAmplification( | |
XML_Parser parser, float maximumAmplificationFactor); | |
/* Added in Expat 2.4.0. */ | |
XMLPARSEAPI(XML_Bool) | |
XML_SetBillionLaughsAttackProtectionActivationThreshold( | |
XML_Parser parser, unsigned long long activationThresholdBytes); | |
/* Expat follows the semantic versioning convention. | |
See http://semver.org. | |
*/ | |
} | |