diff options
author | Hiltjo Posthuma <hiltjo@codemadness.org> | 2020-11-01 16:53:40 +0100 |
---|---|---|
committer | Hiltjo Posthuma <hiltjo@codemadness.org> | 2020-11-01 17:01:57 +0100 |
commit | 7664faf66555c5c5b1221322bff7e49722ea36c7 (patch) | |
tree | 988a1cb9dd332be03c56836c3b167eaba3619301 | |
parent | 134a1ac3372fe1eae6bc5c6acd12666c17e82696 (diff) |
sfeed_xmlenc: be more paranoid in printing encoding names
sfeed_xmlenc is used automatically in sfeed_update for detecting the encoding.
In particular do not allow slashes anymore either. For example "//IGNORE" and
"//TRANSLIT" which are normally allowed.
Some iconv implementation might allow other funky names or even pathnames too,
so disallow that.
See also the notes about the "frommap" for the "-f" option.
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/iconv.html
+ some minor parsing handling improvements.
-rw-r--r-- | sfeed_xmlenc.1 | 5 | ||||
-rw-r--r-- | sfeed_xmlenc.c | 16 |
2 files changed, 16 insertions, 5 deletions
diff --git a/sfeed_xmlenc.1 b/sfeed_xmlenc.1 index caca7dd..9e46cf7 100644 --- a/sfeed_xmlenc.1 +++ b/sfeed_xmlenc.1 @@ -1,4 +1,4 @@ -.Dd March 15, 2020 +.Dd November 1, 2020 .Dt SFEED_XMLENC 1 .Os .Sh NAME @@ -9,7 +9,8 @@ .Sh DESCRIPTION .Nm reads XML data from stdin and writes the found text\-encoding to stdout. -It translates the characters to lowercase and strips control characters. +It converts characters in the encoding-name to lowercase and strips characters +which are not allowed in an encoding-name. .Sh EXIT STATUS .Ex -std .Sh EXAMPLES diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c index 15372b4..a5ad2b6 100644 --- a/sfeed_xmlenc.c +++ b/sfeed_xmlenc.c @@ -14,7 +14,7 @@ static void xmltagstart(XMLParser *p, const char *t, size_t tl) { /* optimization: try to find a processing instruction only at the - start of the data. */ + start of the data at the first few starting tags. */ if (tags++ > 3) exit(0); } @@ -26,11 +26,19 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (strcasecmp(t, "?xml") || strcasecmp(n, "encoding")) return; - /* output lowercase, no control characters */ for (; *v; v++) { - if (!iscntrl((unsigned char)*v)) + if (isalpha((unsigned char)*v) || + isdigit((unsigned char)*v) || + *v == '.' || *v == ':' || *v == '-' || *v == '_') putchar(tolower((unsigned char)*v)); } +} + +static void +xmlattrend(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl) +{ + if (strcasecmp(t, "?xml") || strcasecmp(n, "encoding")) + return; putchar('\n'); exit(0); } @@ -42,6 +50,8 @@ main(void) err(1, "pledge"); parser.xmlattr = xmlattr; + parser.xmlattrentity = xmlattr; /* no entity conversion */ + parser.xmlattrend = xmlattrend; parser.xmltagstart = xmltagstart; /* NOTE: getnext is defined in xml.h for inline optimization */ |