From d5ee385b4b5f19934a00408a2addc70f965ea4a9 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 29 Mar 2022 11:03:54 +0200 Subject: compatibility: reduce the assumption the builtin libc locale is ASCII-compatible This is not clearly defined by the C99 standard. Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or something like noticed on OpenBSD 3.8). (In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise this would break many programs) --- xml.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'xml.c') diff --git a/xml.c b/xml.c index f16cbbb..3bd9557 100644 --- a/xml.c +++ b/xml.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,6 +5,9 @@ #include "xml.h" +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) + static void xml_parseattrs(XMLParser *x) { @@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x) int c, endsep, endname = 0, valuestart = 0; while ((c = GETNEXT()) != EOF) { - if (isspace(c)) { + if (ISSPACE(c)) { if (namelen) endname = 1; continue; @@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x) x->name[namelen] = '\0'; valuestart = 1; endname = 1; - } else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) { + } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { /* attribute without value */ x->name[namelen] = '\0'; if (x->xmlattrstart) @@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x) if (c == '\'' || c == '"') { endsep = c; } else { - endsep = ' '; /* isspace() */ + endsep = ' '; /* ISSPACE() */ goto startvalue; } @@ -58,7 +60,7 @@ startvalue: x->data[0] = c; valuelen = 1; while ((c = GETNEXT()) != EOF) { - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) break; if (valuelen < sizeof(x->data) - 1) x->data[valuelen++] = c; @@ -79,7 +81,7 @@ startvalue: break; } } - } else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) { + } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { if (valuelen < sizeof(x->data) - 1) { x->data[valuelen++] = c; } else { @@ -90,7 +92,7 @@ startvalue: valuelen = 1; } } - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) { + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { x->data[valuelen] = '\0'; if (x->xmlattr) x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); @@ -328,7 +330,7 @@ xml_parse(XMLParser *x) while ((c = GETNEXT()) != EOF) { if (c == '/') x->isshorttag = 1; /* short tag */ - else if (c == '>' || isspace(c)) { + else if (c == '>' || ISSPACE(c)) { x->tag[x->taglen] = '\0'; if (isend) { /* end tag, starts with xmltagend) @@ -339,7 +341,7 @@ xml_parse(XMLParser *x) /* start tag */ if (x->xmltagstart) x->xmltagstart(x, x->tag, x->taglen); - if (isspace(c)) + if (ISSPACE(c)) xml_parseattrs(x); if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); -- cgit v1.2.3 From 60e402d0f2c086dbbbd21436bb1b3aa5ad9b77d6 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 26 Aug 2022 21:55:35 +0200 Subject: improve comment: uppercase cdata -> CDATA --- xml.c | 2 +- xml.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'xml.c') diff --git a/xml.c b/xml.c index 3bd9557..a82053e 100644 --- a/xml.c +++ b/xml.c @@ -292,7 +292,7 @@ xml_parse(XMLParser *x) if ((c = GETNEXT()) == EOF) return; - if (c == '!') { /* cdata and comments */ + if (c == '!') { /* CDATA and comments */ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */ if (tagdatalen <= sizeof("[CDATA[") - 1) diff --git a/xml.h b/xml.h index cef4a05..ad7d26b 100644 --- a/xml.h +++ b/xml.h @@ -34,7 +34,7 @@ typedef struct xmlparser { int isshorttag; /* current attribute name */ char name[1024]; - /* data buffer used for tag data, cdata and attribute data */ + /* data buffer used for tag data, CDATA and attribute data */ char data[BUFSIZ]; } XMLParser; -- cgit v1.2.3 From 21a263cb27aeaf02b4a0a0319f435fac92f1ea28 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 15 Aug 2023 19:10:51 +0200 Subject: improve wording and small typos --- sfeed.c | 6 +++--- sfeed_curses.c | 2 +- sfeed_opml_import.c | 2 +- sfeed_web.c | 2 +- sfeed_xmlenc.c | 2 +- xml.c | 4 ++-- xml.h | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'xml.c') diff --git a/sfeed.c b/sfeed.c index 888a575..cdd528c 100644 --- a/sfeed.c +++ b/sfeed.c @@ -724,8 +724,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (!ctx.tag.id) return; - /* content-type may be: Atom: text, xhtml, html or mime-type. - MRSS (media:description): plain, html. */ + /* content-type may be for Atom: text, xhtml, html or a mime-type. + for MRSS (media:description): plain, html. */ if (ISCONTENTTAG(ctx)) { if (isattr(n, nl, STRP("type"))) string_append(&attrtype, v, vl); @@ -1073,7 +1073,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/sfeed_curses.c b/sfeed_curses.c index 95421fd..0434812 100644 --- a/sfeed_curses.c +++ b/sfeed_curses.c @@ -1875,7 +1875,7 @@ markread(struct pane *p, off_t from, off_t to, int isread) _exit(status); default: /* waitpid() and block on process status change, - fail if exit statuscode was unavailable or non-zero */ + fail if the exit status code was unavailable or non-zero */ if (waitpid(pid, &status, 0) <= 0 || status) break; diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c index 9922133..ce33aac 100644 --- a/sfeed_opml_import.c +++ b/sfeed_opml_import.c @@ -96,7 +96,7 @@ main(void) "# list of feeds to fetch:\n" "feeds() {\n" " # feed [basesiteurl] [encoding]\n", stdout); - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); fputs("}\n", stdout); diff --git a/sfeed_web.c b/sfeed_web.c index 0082f2d..e25e91c 100644 --- a/sfeed_web.c +++ b/sfeed_web.c @@ -132,7 +132,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c index 7fc93ae..461c047 100644 --- a/sfeed_xmlenc.c +++ b/sfeed_xmlenc.c @@ -52,7 +52,7 @@ main(void) parser.xmlattrend = xmlattrend; parser.xmltagstart = xmltagstart; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/xml.c b/xml.c index a82053e..1524d1f 100644 --- a/xml.c +++ b/xml.c @@ -317,7 +317,7 @@ xml_parse(XMLParser *x) x->taglen = 1; x->isshorttag = isend = 0; - /* treat processing instruction as shorttag, don't strip "?" prefix. */ + /* treat processing instruction as short tag, don't strip "?" prefix. */ if (c == '?') { x->isshorttag = 1; } else if (c == '/') { @@ -346,7 +346,7 @@ xml_parse(XMLParser *x) if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); } - /* call tagend for shortform or processing instruction */ + /* call tagend for short tag or processing instruction */ if (x->isshorttag) { if (x->xmltagend) x->xmltagend(x, x->tag, x->taglen, x->isshorttag); diff --git a/xml.h b/xml.h index 122726c..6f3cf71 100644 --- a/xml.h +++ b/xml.h @@ -30,7 +30,7 @@ typedef struct xmlparser { /* current tag */ char tag[1024]; size_t taglen; - /* current tag is in shortform ? */ + /* current tag is a short tag ? */ int isshorttag; /* current attribute name */ char name[1024]; -- cgit v1.2.3 From 1f6dfac83e8325a42dc5faa2e31cc25593acdfc8 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Sun, 30 Jun 2024 09:59:48 +0200 Subject: improve parsing whitespace after end tag names Simplified test-case: https://git.codemadness.org/sfeed_tests/commit/e091160c3125322193bd8f27691c87eaa48cfc93.html --- xml.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'xml.c') diff --git a/xml.c b/xml.c index 1524d1f..35b7961 100644 --- a/xml.c +++ b/xml.c @@ -333,6 +333,8 @@ xml_parse(XMLParser *x) else if (c == '>' || ISSPACE(c)) { x->tag[x->taglen] = '\0'; if (isend) { /* end tag, starts with ' && c != EOF) /* skip until > */ + c = GETNEXT(); if (x->xmltagend) x->xmltagend(x, x->tag, x->taglen, x->isshorttag); x->tag[0] = '\0'; -- cgit v1.2.3