diff --git a/HTMLparser.c b/HTMLparser.c
index 4d43b93d..1a4d80d3 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3448,7 +3448,8 @@ static void
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
const xmlChar *encoding;
- if ((ctxt == NULL) || (attvalue == NULL))
+ if ((ctxt == NULL) || (attvalue == NULL) ||
+ (ctxt->options & HTML_PARSE_IGNORE_ENC))
return;
/* do not change encoding */
@@ -3500,7 +3501,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
xmlSwitchToEncoding(ctxt, handler);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
} else {
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
+ "htmlCheckEncoding: unknown encoding %s\n",
+ encoding, NULL);
}
}
@@ -6537,6 +6540,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
ctxt->options |= HTML_PARSE_NODEFDTD;
options -= HTML_PARSE_NODEFDTD;
}
+ if (options & HTML_PARSE_IGNORE_ENC) {
+ ctxt->options |= HTML_PARSE_IGNORE_ENC;
+ options -= HTML_PARSE_IGNORE_ENC;
+ }
ctxt->dictNames = 0;
return (options);
}
diff --git a/HTMLtree.c b/HTMLtree.c
index b5085836..f23ae023 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -481,7 +481,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
if (enc != XML_CHAR_ENCODING_UTF8) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
}
@@ -562,11 +562,9 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
}
handler = xmlFindCharEncodingHandler(encoding);
- if (handler == NULL) {
- *mem = NULL;
- *size = 0;
- return;
- }
+ if (handler == NULL)
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
+
} else {
handler = xmlFindCharEncodingHandler(encoding);
}
@@ -587,7 +585,7 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
return;
}
- htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
+ htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
xmlOutputBufferFlush(buf);
if (buf->conv != NULL) {
@@ -1061,7 +1059,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
} else {
handler = xmlFindCharEncodingHandler(encoding);
}
@@ -1120,7 +1118,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
}
@@ -1181,7 +1179,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
handler = xmlFindCharEncodingHandler(encoding);
if (handler == NULL)
- return(-1);
+ htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
} else {
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index fbcc811e..10a3d654 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -184,7 +184,8 @@ typedef enum {
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
HTML_PARSE_NONET = 1<<11,/* Forbid network access */
HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
- HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
+ HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */
+ HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */
} htmlParserOption;
XMLPUBFUN void XMLCALL
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 47b3df15..aabb96cf 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -1105,8 +1105,9 @@ typedef enum {
crash if you try to modify the tree) */
XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */
XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
- XML_PARSE_HUGE = 1<<19, /* relax any hardcoded limit from the parser */
- XML_PARSE_OLDSAX = 1<<20 /* parse using SAX2 interface from before 2.7.0 */
+ XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */
+ XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */
+ XML_PARSE_IGNORE_ENC= 1<<21 /* ignore internal document encoding hint */
} xmlParserOption;
XMLPUBFUN void XMLCALL
diff --git a/parser.c b/parser.c
index 9ab86412..02a18771 100644
--- a/parser.c
+++ b/parser.c
@@ -9922,6 +9922,13 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
} else {
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
}
+
+ /*
+ * Non standard parsing, allowing the user to ignore encoding
+ */
+ if (ctxt->options & XML_PARSE_IGNORE_ENC)
+ return(encoding);
+
/*
* UTF-16 encoding stwich has already taken place at this stage,
* more over the little-endian/big-endian selection is already done
@@ -14561,6 +14568,10 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi
ctxt->options |= XML_PARSE_OLDSAX;
options -= XML_PARSE_OLDSAX;
}
+ if (options & XML_PARSE_IGNORE_ENC) {
+ ctxt->options |= XML_PARSE_IGNORE_ENC;
+ options -= XML_PARSE_IGNORE_ENC;
+ }
ctxt->linenumbers = 1;
return (options);
}
diff --git a/xmllint.c b/xmllint.c
index 8af56cd4..14c64bec 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -130,6 +130,7 @@ static int copy = 0;
#endif /* LIBXML_TREE_ENABLED */
static int recovery = 0;
static int noent = 0;
+static int noenc = 0;
static int noblanks = 0;
static int noout = 0;
static int nowrap = 0;
@@ -2983,6 +2984,7 @@ static void usage(const char *name) {
printf("\t--recover : output what was parsable on broken XML documents\n");
printf("\t--huge : remove any internal arbitrary parser limits\n");
printf("\t--noent : substitute entity references by their value\n");
+ printf("\t--noenc : ignore any encoding specified inside the document\n");
printf("\t--noout : don't output the result tree\n");
printf("\t--path 'paths': provide a set of paths for resources\n");
printf("\t--load-trace : print trace of all external entites loaded\n");
@@ -3137,6 +3139,10 @@ main(int argc, char **argv) {
(!strcmp(argv[i], "--noent"))) {
noent++;
options |= XML_PARSE_NOENT;
+ } else if ((!strcmp(argv[i], "-noenc")) ||
+ (!strcmp(argv[i], "--noenc"))) {
+ noenc++;
+ options |= XML_PARSE_IGNORE_ENC;
} else if ((!strcmp(argv[i], "-nsclean")) ||
(!strcmp(argv[i], "--nsclean"))) {
options |= XML_PARSE_NSCLEAN;