summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2020-11-01 16:53:40 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2020-11-01 17:01:57 +0100
commit7664faf66555c5c5b1221322bff7e49722ea36c7 (patch)
tree988a1cb9dd332be03c56836c3b167eaba3619301
parent134a1ac3372fe1eae6bc5c6acd12666c17e82696 (diff)
sfeed_xmlenc: be more paranoid in printing encoding names
sfeed_xmlenc is used automatically in sfeed_update for detecting the encoding. In particular do not allow slashes anymore either. For example "//IGNORE" and "//TRANSLIT" which are normally allowed. Some iconv implementation might allow other funky names or even pathnames too, so disallow that. See also the notes about the "frommap" for the "-f" option. https://pubs.opengroup.org/onlinepubs/9699919799/utilities/iconv.html + some minor parsing handling improvements.
-rw-r--r--sfeed_xmlenc.15
-rw-r--r--sfeed_xmlenc.c16
2 files changed, 16 insertions, 5 deletions
diff --git a/sfeed_xmlenc.1 b/sfeed_xmlenc.1
index caca7dd..9e46cf7 100644
--- a/sfeed_xmlenc.1
+++ b/sfeed_xmlenc.1
@@ -1,4 +1,4 @@
-.Dd March 15, 2020
+.Dd November 1, 2020
.Dt SFEED_XMLENC 1
.Os
.Sh NAME
@@ -9,7 +9,8 @@
.Sh DESCRIPTION
.Nm
reads XML data from stdin and writes the found text\-encoding to stdout.
-It translates the characters to lowercase and strips control characters.
+It converts characters in the encoding-name to lowercase and strips characters
+which are not allowed in an encoding-name.
.Sh EXIT STATUS
.Ex -std
.Sh EXAMPLES
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
index 15372b4..a5ad2b6 100644
--- a/sfeed_xmlenc.c
+++ b/sfeed_xmlenc.c
@@ -14,7 +14,7 @@ static void
xmltagstart(XMLParser *p, const char *t, size_t tl)
{
/* optimization: try to find a processing instruction only at the
- start of the data. */
+ start of the data at the first few starting tags. */
if (tags++ > 3)
exit(0);
}
@@ -26,11 +26,19 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
if (strcasecmp(t, "?xml") || strcasecmp(n, "encoding"))
return;
- /* output lowercase, no control characters */
for (; *v; v++) {
- if (!iscntrl((unsigned char)*v))
+ if (isalpha((unsigned char)*v) ||
+ isdigit((unsigned char)*v) ||
+ *v == '.' || *v == ':' || *v == '-' || *v == '_')
putchar(tolower((unsigned char)*v));
}
+}
+
+static void
+xmlattrend(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
+{
+ if (strcasecmp(t, "?xml") || strcasecmp(n, "encoding"))
+ return;
putchar('\n');
exit(0);
}
@@ -42,6 +50,8 @@ main(void)
err(1, "pledge");
parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattr; /* no entity conversion */
+ parser.xmlattrend = xmlattrend;
parser.xmltagstart = xmltagstart;
/* NOTE: getnext is defined in xml.h for inline optimization */