From fad48ffa27af96ee0d9489ded88f80c1eeb238dc Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Mon, 14 Mar 2022 19:22:42 +0100 Subject: stricter error checking in file streams (input, output) This also makes the programs exit with a non-zero status when a read or write error occurs. This makes checking the exit status more reliable in scripts. A simple example to simulate a disk with no space left: curl -s 'https://codemadness.org/atom.xml' | sfeed > f /mnt/test: write failed, file system is full echo $? 0 Which now produces: curl -s 'https://codemadness.org/atom.xml' | sfeed > f /mnt/test: write failed, file system is full write error: echo $? 1 Tested with a small mfs on OpenBSD, fstab entry: swap /mnt/test mfs rw,nodev,nosuid,-s=1M 0 0 --- sfeed_xmlenc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'sfeed_xmlenc.c') diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c index c6a43d4..67d7b0f 100644 --- a/sfeed_xmlenc.c +++ b/sfeed_xmlenc.c @@ -56,5 +56,8 @@ main(void) /* NOTE: getnext is defined in xml.h for inline optimization */ xml_parse(&parser); + checkfileerror(stdin, "", 'r'); + checkfileerror(stdout, "", 'w'); + return 0; } -- cgit v1.2.3 From d5ee385b4b5f19934a00408a2addc70f965ea4a9 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 29 Mar 2022 11:03:54 +0200 Subject: compatibility: reduce the assumption the builtin libc locale is ASCII-compatible This is not clearly defined by the C99 standard. Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or something like noticed on OpenBSD 3.8). (In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise this would break many programs) --- sfeed.c | 50 +++++++++++++++++++++++++------------------------- sfeed_opml_import.c | 1 - sfeed_web.c | 1 - sfeed_xmlenc.c | 7 +++---- util.c | 9 ++++----- util.h | 6 +++++- xml.c | 20 +++++++++++--------- 7 files changed, 48 insertions(+), 46 deletions(-) (limited to 'sfeed_xmlenc.c') diff --git a/sfeed.c b/sfeed.c index fc1249a..b969874 100644 --- a/sfeed.c +++ b/sfeed.c @@ -246,7 +246,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen) static char * ltrim(const char *s) { - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; return (char *)s; } @@ -256,7 +256,7 @@ rtrim(const char *s) { const char *e; - for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--) + for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--) ; return (char *)e; } @@ -341,7 +341,7 @@ printtrimmed(const char *s) p = ltrim(s); e = rtrim(p); for (; *p && p != e; p++) { - if (isspace((unsigned char)*p)) + if (ISSPACE((unsigned char)*p)) putchar(' '); /* any whitespace to space */ else if (!ISCNTRL((unsigned char)*p)) /* ignore other control chars */ @@ -514,20 +514,20 @@ gettzoffset(const char *s) long tzhour = 0, tzmin = 0; size_t i; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; switch (*s) { case '-': /* offset */ case '+': - for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzhour = (tzhour * 10) + (*p - '0'); if (*p == ':') p++; - for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzmin = (tzmin * 10) + (*p - '0'); return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1); default: /* timezone name */ - for (i = 0; isalpha((unsigned char)s[i]); i++) + for (i = 0; ISALPHA((unsigned char)s[i]); i++) ; if (i != 3) return 0; @@ -565,35 +565,35 @@ parsetime(const char *s, long long *tp) int va[6] = { 0 }, i, j, v, vi; size_t m; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s)) + if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s)) return -1; - if (isdigit((unsigned char)s[0]) && - isdigit((unsigned char)s[1]) && - isdigit((unsigned char)s[2]) && - isdigit((unsigned char)s[3])) { + if (ISDIGIT((unsigned char)s[0]) && + ISDIGIT((unsigned char)s[1]) && + ISDIGIT((unsigned char)s[2]) && + ISDIGIT((unsigned char)s[3])) { /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */ vi = 0; } else { /* format: "[%a, ]%d %b %Y %H:%M:%S" */ /* parse "[%a, ]%d %b %Y " part, then use time parsing as above */ - for (; isalpha((unsigned char)*s); s++) + for (; ISALPHA((unsigned char)*s); s++) ; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; if (*s == ',') s++; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[2] = v; /* day */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* end of word month */ - for (j = 0; isalpha((unsigned char)s[j]); j++) + for (j = 0; ISALPHA((unsigned char)s[j]); j++) ; /* check month name */ if (j < 3 || j > 9) @@ -609,15 +609,15 @@ parsetime(const char *s, long long *tp) } if (m >= 12) return -1; /* no month found */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); /* obsolete short year: RFC2822 4.3 */ if (i <= 3) v += (v >= 0 && v <= 49) ? 2000 : 1900; va[0] = v; /* year */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* parse only regular time part, see below */ vi = 3; @@ -626,20 +626,20 @@ parsetime(const char *s, long long *tp) /* parse time parts (and possibly remaining date parts) */ for (; *s && vi < 6; vi++) { for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && - isdigit((unsigned char)*s); s++, i++) { + ISDIGIT((unsigned char)*s); s++, i++) { v = (v * 10) + (*s - '0'); } va[vi] = v; if ((vi < 2 && *s == '-') || - (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) || + (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) || (vi > 2 && *s == ':')) s++; } /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */ if (*s == '.') { - for (s++; isdigit((unsigned char)*s); s++) + for (s++; ISDIGIT((unsigned char)*s); s++) ; } diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c index 9a13e36..14b5444 100644 --- a/sfeed_opml_import.c +++ b/sfeed_opml_import.c @@ -1,4 +1,3 @@ -#include #include #include diff --git a/sfeed_web.c b/sfeed_web.c index dfff0a3..630ab60 100644 --- a/sfeed_web.c +++ b/sfeed_web.c @@ -1,4 +1,3 @@ -#include #include #include diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c index 67d7b0f..7fc93ae 100644 --- a/sfeed_xmlenc.c +++ b/sfeed_xmlenc.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -26,10 +25,10 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, return; for (; *v; v++) { - if (isalpha((unsigned char)*v) || - isdigit((unsigned char)*v) || + if (ISALPHA((unsigned char)*v) || + ISDIGIT((unsigned char)*v) || *v == '.' || *v == ':' || *v == '-' || *v == '_') - putchar(tolower((unsigned char)*v)); + putchar(TOLOWER((unsigned char)*v)); } } diff --git a/util.c b/util.c index ed0b5c9..0b7da06 100644 --- a/util.c +++ b/util.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -66,8 +65,8 @@ strcasestr(const char *h, const char *n) return (char *)h; for (; *h; ++h) { - for (i = 0; n[i] && tolower((unsigned char)n[i]) == - tolower((unsigned char)h[i]); ++i) + for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == + TOLOWER((unsigned char)h[i]); ++i) ; if (n[i] == '\0') return (char *)h; @@ -82,7 +81,7 @@ uri_hasscheme(const char *s) { const char *p = s; - for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || *p == '+' || *p == '-' || *p == '.'; p++) ; /* scheme, except if empty and starts with ":" then it is a path */ @@ -109,7 +108,7 @@ uri_parse(const char *s, struct uri *u) } /* scheme / protocol part */ - for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || *p == '+' || *p == '-' || *p == '.'; p++) ; /* scheme, except if empty and starts with ":" then it is a path */ diff --git a/util.h b/util.h index c68cef4..fac6424 100644 --- a/util.h +++ b/util.h @@ -8,8 +8,12 @@ #define unveil(p1,p2) 0 #endif -/* control-character in the ASCII range 0-127: compatible with UTF-8 */ +/* ctype-like macros, but always compatible with ASCII / UTF-8 */ +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) #define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f) +#define ISDIGIT(c) (((unsigned)c) - '0' < 10) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) +#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c)) #undef strcasestr char *strcasestr(const char *, const char *); diff --git a/xml.c b/xml.c index f16cbbb..3bd9557 100644 --- a/xml.c +++ b/xml.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,6 +5,9 @@ #include "xml.h" +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) + static void xml_parseattrs(XMLParser *x) { @@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x) int c, endsep, endname = 0, valuestart = 0; while ((c = GETNEXT()) != EOF) { - if (isspace(c)) { + if (ISSPACE(c)) { if (namelen) endname = 1; continue; @@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x) x->name[namelen] = '\0'; valuestart = 1; endname = 1; - } else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) { + } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { /* attribute without value */ x->name[namelen] = '\0'; if (x->xmlattrstart) @@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x) if (c == '\'' || c == '"') { endsep = c; } else { - endsep = ' '; /* isspace() */ + endsep = ' '; /* ISSPACE() */ goto startvalue; } @@ -58,7 +60,7 @@ startvalue: x->data[0] = c; valuelen = 1; while ((c = GETNEXT()) != EOF) { - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) break; if (valuelen < sizeof(x->data) - 1) x->data[valuelen++] = c; @@ -79,7 +81,7 @@ startvalue: break; } } - } else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) { + } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { if (valuelen < sizeof(x->data) - 1) { x->data[valuelen++] = c; } else { @@ -90,7 +92,7 @@ startvalue: valuelen = 1; } } - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) { + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { x->data[valuelen] = '\0'; if (x->xmlattr) x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); @@ -328,7 +330,7 @@ xml_parse(XMLParser *x) while ((c = GETNEXT()) != EOF) { if (c == '/') x->isshorttag = 1; /* short tag */ - else if (c == '>' || isspace(c)) { + else if (c == '>' || ISSPACE(c)) { x->tag[x->taglen] = '\0'; if (isend) { /* end tag, starts with xmltagend) @@ -339,7 +341,7 @@ xml_parse(XMLParser *x) /* start tag */ if (x->xmltagstart) x->xmltagstart(x, x->tag, x->taglen); - if (isspace(c)) + if (ISSPACE(c)) xml_parseattrs(x); if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); -- cgit v1.2.3 From 21a263cb27aeaf02b4a0a0319f435fac92f1ea28 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 15 Aug 2023 19:10:51 +0200 Subject: improve wording and small typos --- sfeed.c | 6 +++--- sfeed_curses.c | 2 +- sfeed_opml_import.c | 2 +- sfeed_web.c | 2 +- sfeed_xmlenc.c | 2 +- xml.c | 4 ++-- xml.h | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'sfeed_xmlenc.c') diff --git a/sfeed.c b/sfeed.c index 888a575..cdd528c 100644 --- a/sfeed.c +++ b/sfeed.c @@ -724,8 +724,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (!ctx.tag.id) return; - /* content-type may be: Atom: text, xhtml, html or mime-type. - MRSS (media:description): plain, html. */ + /* content-type may be for Atom: text, xhtml, html or a mime-type. + for MRSS (media:description): plain, html. */ if (ISCONTENTTAG(ctx)) { if (isattr(n, nl, STRP("type"))) string_append(&attrtype, v, vl); @@ -1073,7 +1073,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/sfeed_curses.c b/sfeed_curses.c index 95421fd..0434812 100644 --- a/sfeed_curses.c +++ b/sfeed_curses.c @@ -1875,7 +1875,7 @@ markread(struct pane *p, off_t from, off_t to, int isread) _exit(status); default: /* waitpid() and block on process status change, - fail if exit statuscode was unavailable or non-zero */ + fail if the exit status code was unavailable or non-zero */ if (waitpid(pid, &status, 0) <= 0 || status) break; diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c index 9922133..ce33aac 100644 --- a/sfeed_opml_import.c +++ b/sfeed_opml_import.c @@ -96,7 +96,7 @@ main(void) "# list of feeds to fetch:\n" "feeds() {\n" " # feed [basesiteurl] [encoding]\n", stdout); - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); fputs("}\n", stdout); diff --git a/sfeed_web.c b/sfeed_web.c index 0082f2d..e25e91c 100644 --- a/sfeed_web.c +++ b/sfeed_web.c @@ -132,7 +132,7 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c index 7fc93ae..461c047 100644 --- a/sfeed_xmlenc.c +++ b/sfeed_xmlenc.c @@ -52,7 +52,7 @@ main(void) parser.xmlattrend = xmlattrend; parser.xmltagstart = xmltagstart; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); checkfileerror(stdin, "", 'r'); diff --git a/xml.c b/xml.c index a82053e..1524d1f 100644 --- a/xml.c +++ b/xml.c @@ -317,7 +317,7 @@ xml_parse(XMLParser *x) x->taglen = 1; x->isshorttag = isend = 0; - /* treat processing instruction as shorttag, don't strip "?" prefix. */ + /* treat processing instruction as short tag, don't strip "?" prefix. */ if (c == '?') { x->isshorttag = 1; } else if (c == '/') { @@ -346,7 +346,7 @@ xml_parse(XMLParser *x) if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); } - /* call tagend for shortform or processing instruction */ + /* call tagend for short tag or processing instruction */ if (x->isshorttag) { if (x->xmltagend) x->xmltagend(x, x->tag, x->taglen, x->isshorttag); diff --git a/xml.h b/xml.h index 122726c..6f3cf71 100644 --- a/xml.h +++ b/xml.h @@ -30,7 +30,7 @@ typedef struct xmlparser { /* current tag */ char tag[1024]; size_t taglen; - /* current tag is in shortform ? */ + /* current tag is a short tag ? */ int isshorttag; /* current attribute name */ char name[1024]; -- cgit v1.2.3