From e157ebf61808971abdd0af8cef311dc7efc30230 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 24 Jun 1999 09:39:06 +0000 Subject: [PATCH] Added some bits of code that may be useful in the future --- expat/xmlparse/xmlparse.hpp | 114 ++++++++++++++++++++++++++++ expat/xmlwf/ct.c | 145 ++++++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100755 expat/xmlparse/xmlparse.hpp create mode 100755 expat/xmlwf/ct.c diff --git a/expat/xmlparse/xmlparse.hpp b/expat/xmlparse/xmlparse.hpp new file mode 100755 index 00000000..fd8f315a --- /dev/null +++ b/expat/xmlparse/xmlparse.hpp @@ -0,0 +1,114 @@ +class XMLParser { +public: + typedef char Char; + typedef LChar Char; + + class ElementHandler { + public: + virtual void startElement(XMLParser &parser, + const Char *name, + const Char **atts) = 0; + virtual void endElement(XMLParser &parser, const Char *name) = 0; + } + + class CharacterDataHandler { + public: + virtual void characterData(XMLParser &parser, const Char *s, int len) = 0; + }; + + class ProcessingInstructionHandler { + public: + virtual void processingInstruction(XMLParser &parser, + const Char *target, + const Char *data) = 0; + }; + + class OtherHandler { + public: + virtual void other(XMLParser &parser, const Char *s, int len) = 0; + }; + + class DeclHandler { + public: + virtual void unparsedEntityDecl(XMLParser &parser, + const Char *entityName, + const Char *base, + const Char *systemId, + const Char *publicId, + const Char *notationName) = 0; + virtual void notationDecl(XMLParser &parser, + const Char *notationName, + const Char *base, + const Char *systemId, + const Char *publicId) = 0; + }; + + class ExternalEntityRefHandler { + public: + virtual int externalEntityRef(XMLParser &parser, + const Char *openEntityNames, + const Char *base, + const Char *systemId, + const Char *publicId) = 0; + }; + + class Converter { + public: + virtual int convert(const char *) = 0; + virtual void release() = 0; + }; + + class EncodingManager { + public: + virtual bool getEncoding(const Char *name, + int map[256], + Converter *&converter) = 0; + }; + + virtual void setElementHandler(ElementHandler *handler) = 0; + virtual void setCharacterDataHandler(CharacterDataHandler *handler) = 0; + virtual void setProcessingInstructionHandler(ProcessingInstructionHandler *handler) = 0; + virtual void setOtherHandler(OtherHandler &handler) = 0; + virtual void setDeclHandler(DeclHandler &handler) = 0; + virtual void setExternalEntityRefHandler(ExternalEntityRefHandler &handler) = 0; + virtual void setEncodingManager(EncodingManager &manager) = 0; + virtual void setBase(const Char *base) = 0; + virtual const Char *getBase() = 0; + virtual int parse(const char *s, int len, bool isFinal) = 0; + virtual char *getBuffer(int len) = 0; + virtual int parseBuffer(int len, bool isFinal) = 0; + virtual XMLParser *externalEntityParserCreate(const Char *openEntityNames, + const Char *encoding) = 0; + enum Error { + ERROR_NONE, + ERROR_NO_MEMORY, + ERROR_SYNTAX, + ERROR_NO_ELEMENTS, + ERROR_INVALID_TOKEN, + ERROR_UNCLOSED_TOKEN, + ERROR_PARTIAL_CHAR, + ERROR_TAG_MISMATCH, + ERROR_DUPLICATE_ATTRIBUTE, + ERROR_JUNK_AFTER_DOC_ELEMENT, + ERROR_PARAM_ENTITY_REF, + ERROR_UNDEFINED_ENTITY, + ERROR_RECURSIVE_ENTITY_REF, + ERROR_ASYNC_ENTITY, + ERROR_BAD_CHAR_REF, + ERROR_BINARY_ENTITY_REF, + ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, + ERROR_MISPLACED_PI, + ERROR_UNKNOWN_ENCODING, + ERROR_INCORRECT_ENCODING, + ERROR_UNCLOSED_CDATA_SECTION, + ERROR_EXTERNAL_ENTITY_HANDLING + }; + + virtual Error getErrorCode() = 0; + virtual int getCurrentLineNumber() = 0; + virtual int getCurrentColumnNumber() = 0; + virtual long getCurrentByteIndex() = 0; + virtual void release() = 0; + static const LChar *errorString(int code); + static XMLParser *create(const Char *encoding); +}; diff --git a/expat/xmlwf/ct.c b/expat/xmlwf/ct.c new file mode 100755 index 00000000..098699d0 --- /dev/null +++ b/expat/xmlwf/ct.c @@ -0,0 +1,145 @@ +#define CHARSET_MAX 41 + +static +const char *getTok(const char **pp) +{ + enum { inAtom, inString, init, inComment }; + int state = init; + const char *tokStart = 0; + for (;;) { + switch (**pp) { + case '\0': + return 0; + case ' ': + case '\r': + case '\t': + case '\n': + if (state == inAtom) + return tokStart; + break; + case '(': + if (state == inAtom) + return tokStart; + if (state != inString) + state++; + break; + case ')': + if (state > init) + --state; + else if (state != inString) + return 0; + break; + case ';': + case '/': + case '=': + if (state == inAtom) + return tokStart; + if (state == init) + return (*pp)++; + break; + case '\\': + ++*pp; + if (**pp == '\0') + return 0; + break; + case '"': + switch (state) { + case inString: + ++*pp; + return tokStart; + case inAtom: + return tokStart; + case init: + tokStart = *pp; + state = inString; + break; + } + break; + default: + if (state == init) { + tokStart = *pp; + state = inAtom; + } + break; + } + ++*pp; + } + /* not reached */ +} + +/* key must be lowercase ASCII */ + +static +int matchkey(const char *start, const char *end, const char *key) +{ + if (!start) + return 0; + for (; start != end; start++, key++) + if (*start != *key && *start != 'A' + (*key - 'a')) + return 0; + return *key == '\0'; +} + +void getXMLCharset(const char *buf, char *charset) +{ + const char *next, *p; + + charset[0] = '\0'; + next = buf; + p = getTok(&next); + if (matchkey(p, next, "text")) + strcpy(charset, "us-ascii"); + else if (!matchkey(p, next, "application")) + return; + p = getTok(&next); + if (!p || *p != '/') + return; + p = getTok(&next); + if (matchkey(p, next, "xml")) + isXml = 1; + p = getTok(&next); + while (p) { + if (*p == ';') { + p = getTok(&next); + if (matchkey(p, next, "charset")) { + p = getTok(&next); + if (p && *p == '=') { + p = getTok(&next); + if (p) { + char *s = charset; + if (*p == '"') { + while (++p != next - 1) { + if (*p == '\\') + ++p; + if (s == charset + CHARSET_MAX - 1) { + charset[0] = '\0'; + break; + } + *s++ = *p; + } + *s++ = '\0'; + } + else { + if (next - p > CHARSET_MAX - 1) + break; + while (p != next) + *s++ = *p++; + *s = 0; + break; + } + } + } + } + } + else + p = getTok(&next); + } +} + +int main(int argc, char **argv) +{ + char buf[CHARSET_MAX]; + getXMLCharset(argv[1], buf); + printf("charset = \"%s\"\n", buf); + return 0; +}