diff options
author | Benjamin Chausse <benjamin@chausse.xyz> | 2024-08-09 14:11:50 -0400 |
---|---|---|
committer | Benjamin Chausse <benjamin@chausse.xyz> | 2024-08-09 14:11:50 -0400 |
commit | 5857d82e8e596d6fda406a0c4d8d68ca7a03c124 (patch) | |
tree | 553916894dee907825360580c5d9a05c82c5af16 /sfeed.c | |
parent | 3574e3cbf9d99546e868aeb995ce2c171cdc36a6 (diff) | |
parent | 19957bc272e745af7b56b79fa648e8b6b77113b1 (diff) |
Diffstat (limited to 'sfeed.c')
-rw-r--r-- | sfeed.c | 160 |
1 files changed, 90 insertions, 70 deletions
@@ -1,4 +1,3 @@ -#include <ctype.h> #include <errno.h> #include <stdint.h> #include <stdio.h> @@ -127,7 +126,7 @@ static void xmltagstartparsed(XMLParser *, const char *, size_t, int); /* map tag name to TagId type */ /* RSS, must be alphabetical order */ -static FeedTag rsstags[] = { +static const FeedTag rsstags[] = { { STRP("author"), RSSTagAuthor }, { STRP("category"), RSSTagCategory }, { STRP("content:encoded"), RSSTagContentEncoded }, @@ -144,7 +143,7 @@ static FeedTag rsstags[] = { }; /* Atom, must be alphabetical order */ -static FeedTag atomtags[] = { +static const FeedTag atomtags[] = { { STRP("author"), AtomTagAuthor }, { STRP("category"), AtomTagCategory }, { STRP("content"), AtomTagContent }, @@ -161,14 +160,14 @@ static FeedTag atomtags[] = { }; /* special case: nested <author><name> */ -static FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor }; -static FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName }; +static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor }; +static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName }; /* reference to no / unknown tag */ -static FeedTag notag = { STRP(""), TagUnknown }; +static const FeedTag notag = { STRP(""), TagUnknown }; /* map TagId type to RSS/Atom field, all tags must be defined */ -static int fieldmap[TagLast] = { +static const int fieldmap[TagLast] = { [TagUnknown] = -1, /* RSS */ [RSSTagDcdate] = FeedFieldTime, @@ -205,7 +204,7 @@ static int fieldmap[TagLast] = { static const int FieldSeparator = '\t'; /* separator for multiple values in a field, separator should be 1 byte */ -static const char *FieldMultiSeparator = "|"; +static const char FieldMultiSeparator[] = "|"; static struct uri baseuri; static const char *baseurl; @@ -246,7 +245,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen) static char * ltrim(const char *s) { - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; return (char *)s; } @@ -256,7 +255,7 @@ rtrim(const char *s) { const char *e; - for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--) + for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--) ; return (char *)e; } @@ -294,7 +293,7 @@ string_append(String *s, const char *data, size_t len) return; if (s->len >= SIZE_MAX - len) { - errno = EOVERFLOW; + errno = ENOMEM; err(1, "realloc"); } @@ -326,7 +325,7 @@ string_print_encoded(String *s) case '\t': putchar('\\'); putchar('t'); break; default: /* ignore control chars */ - if (!iscntrl((unsigned char)*p)) + if (!ISCNTRL((unsigned char)*p)) putchar(*p); break; } @@ -341,9 +340,9 @@ printtrimmed(const char *s) p = ltrim(s); e = rtrim(p); for (; *p && p != e; p++) { - if (isspace((unsigned char)*p)) + if (ISSPACE((unsigned char)*p)) putchar(' '); /* any whitespace to space */ - else if (!iscntrl((unsigned char)*p)) + else if (!ISCNTRL((unsigned char)*p)) /* ignore other control chars */ putchar(*p); } @@ -384,7 +383,7 @@ string_print_trimmed_multi(String *s) } } -/* Print URL, if it's a relative URL then it uses the global `baseurl`. */ +/* Print URL, if it is a relative URL then it uses the global `baseurl`. */ static void printuri(char *s) { @@ -410,7 +409,7 @@ printuri(char *s) *e = c; /* restore NUL byte to original character */ } -/* Print URL, if it's a relative URL then it uses the global `baseurl`. */ +/* Print URL, if it is a relative URL then it uses the global `baseurl`. */ static void string_print_uri(String *s) { @@ -433,18 +432,23 @@ string_print_timestamp(String *s) printf("%lld", t); } -/* Convert time fields. Returns a UNIX timestamp. */ +/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp. + Parameters should be passed as they are in a struct tm: + that is: year = year - 1900, month = month - 1. */ static long long datetounix(long long year, int mon, int day, int hour, int min, int sec) { - static const int secs_through_month[] = { + /* seconds in a month in a regular (non-leap) year */ + static const long secs_through_month[] = { 0, 31 * 86400, 59 * 86400, 90 * 86400, 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 }; int is_leap = 0, cycles, centuries = 0, leaps = 0, rem; long long t; + /* optimization: handle common range year 1902 up to and including 2038 */ if (year - 2ULL <= 136) { + /* amount of leap days relative to 1970: every 4 years */ leaps = (year - 68) >> 2; if (!((year - 68) & 3)) { leaps--; @@ -452,8 +456,11 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) } else { is_leap = 0; } - t = 31536000 * (year - 70) + 86400 * leaps; + t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */ } else { + /* general leap year calculation: + leap years occur mostly every 4 years but every 100 years + a leap year is skipped unless the year is divisible by 400 */ cycles = (year - 100) / 400; rem = (year - 100) % 400; if (rem < 0) { @@ -463,20 +470,27 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) if (!rem) { is_leap = 1; } else { - if (rem >= 300) - centuries = 3, rem -= 300; - else if (rem >= 200) - centuries = 2, rem -= 200; - else if (rem >= 100) - centuries = 1, rem -= 100; + if (rem >= 300) { + centuries = 3; + rem -= 300; + } else if (rem >= 200) { + centuries = 2; + rem -= 200; + } else if (rem >= 100) { + centuries = 1; + rem -= 100; + } if (rem) { leaps = rem / 4U; rem %= 4U; is_leap = !rem; } } - leaps += 97 * cycles + 24 * centuries - is_leap; - t = (year - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400; + leaps += (97 * cycles) + (24 * centuries) - is_leap; + + /* adjust 8 leap days from 1970 up to and including 2000: + ((30 * 365) + 8) * 86400 = 946771200 */ + t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL; } t += secs_through_month[mon]; if (is_leap && mon >= 2) @@ -490,16 +504,16 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec) } /* Get timezone from string, return time offset in seconds from UTC. - * NOTE: only parses timezones in RFC-822, many other timezone names are + * NOTE: only parses timezones in RFC 822, many other timezone names are * ambiguous anyway. - * ANSI and military zones are defined wrong in RFC822 and are unsupported, - * see note on RFC2822 4.3 page 32. */ + * ANSI and military zones are defined wrong in RFC 822 and are unsupported, + * see note on RFC 2822 4.3 page 32. */ static long gettzoffset(const char *s) { - static struct { + static const struct { char *name; - const int offhour; + int offhour; } tzones[] = { { "CDT", -5 * 3600 }, { "CST", -6 * 3600 }, @@ -514,24 +528,24 @@ gettzoffset(const char *s) long tzhour = 0, tzmin = 0; size_t i; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; switch (*s) { case '-': /* offset */ case '+': - for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzhour = (tzhour * 10) + (*p - '0'); if (*p == ':') p++; - for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzmin = (tzmin * 10) + (*p - '0'); return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1); default: /* timezone name */ - for (i = 0; isalpha((unsigned char)s[i]); i++) + for (i = 0; ISALPHA((unsigned char)s[i]); i++) ; if (i != 3) return 0; - /* compare tz and adjust offset relative to UTC */ + /* compare timezone and adjust offset relative to UTC */ for (i = 0; i < sizeof(tzones) / sizeof(*tzones); i++) { if (!memcmp(s, tzones[i].name, 3)) return tzones[i].offhour; @@ -545,7 +559,7 @@ gettzoffset(const char *s) static int parsetime(const char *s, long long *tp) { - static struct { + static const struct { char *name; int len; } mons[] = { @@ -565,35 +579,35 @@ parsetime(const char *s, long long *tp) int va[6] = { 0 }, i, j, v, vi; size_t m; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s)) + if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s)) return -1; - if (isdigit((unsigned char)s[0]) && - isdigit((unsigned char)s[1]) && - isdigit((unsigned char)s[2]) && - isdigit((unsigned char)s[3])) { + if (ISDIGIT((unsigned char)s[0]) && + ISDIGIT((unsigned char)s[1]) && + ISDIGIT((unsigned char)s[2]) && + ISDIGIT((unsigned char)s[3])) { /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */ vi = 0; } else { /* format: "[%a, ]%d %b %Y %H:%M:%S" */ /* parse "[%a, ]%d %b %Y " part, then use time parsing as above */ - for (; isalpha((unsigned char)*s); s++) + for (; ISALPHA((unsigned char)*s); s++) ; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; if (*s == ',') s++; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[2] = v; /* day */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* end of word month */ - for (j = 0; isalpha((unsigned char)s[j]); j++) + for (j = 0; ISALPHA((unsigned char)s[j]); j++) ; /* check month name */ if (j < 3 || j > 9) @@ -609,15 +623,15 @@ parsetime(const char *s, long long *tp) } if (m >= 12) return -1; /* no month found */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); - /* obsolete short year: RFC2822 4.3 */ - if (i <= 3) - v += (v >= 0 && v <= 49) ? 2000 : 1900; + /* obsolete short year: RFC 2822 4.3 */ + if (i == 2 || i == 3) + v += (i == 2 && v >= 0 && v <= 49) ? 2000 : 1900; va[0] = v; /* year */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* parse only regular time part, see below */ vi = 3; @@ -626,20 +640,20 @@ parsetime(const char *s, long long *tp) /* parse time parts (and possibly remaining date parts) */ for (; *s && vi < 6; vi++) { for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && - isdigit((unsigned char)*s); s++, i++) { + ISDIGIT((unsigned char)*s); s++, i++) { v = (v * 10) + (*s - '0'); } va[vi] = v; if ((vi < 2 && *s == '-') || - (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) || + (vi == 2 && (*s == 'T' || *s == 't' || ISSPACE((unsigned char)*s))) || (vi > 2 && *s == ':')) s++; } /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */ if (*s == '.') { - for (s++; isdigit((unsigned char)*s); s++) + for (s++; ISDIGIT((unsigned char)*s); s++) ; } @@ -679,6 +693,9 @@ printfields(void) putchar(FieldSeparator); string_print_trimmed_multi(&ctx.fields[FeedFieldCategory].str); putchar('\n'); + + if (ferror(stdout)) /* check for errors but do not flush */ + checkfileerror(stdout, "<stdout>", 'w'); } static int @@ -707,8 +724,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, if (!ctx.tag.id) return; - /* content-type may be: Atom: text, xhtml, html or mime-type. - MRSS (media:description): plain, html. */ + /* content-type may be for Atom: text, xhtml, html or a mime-type. + for MRSS (media:description): plain, html. */ if (ISCONTENTTAG(ctx)) { if (isattr(n, nl, STRP("type"))) string_append(&attrtype, v, vl); @@ -741,7 +758,7 @@ static void xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, const char *data, size_t datalen) { - char buf[16]; + char buf[8]; int len; /* handles transforming inline XML to data */ @@ -755,7 +772,7 @@ xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, return; /* try to translate entity, else just pass as data to - * xmldata handler. */ + * xmlattr handler. */ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) xmlattr(p, t, tl, n, nl, buf, (size_t)len); else @@ -818,7 +835,7 @@ xmldata(XMLParser *p, const char *s, size_t len) static void xmldataentity(XMLParser *p, const char *data, size_t datalen) { - char buf[16]; + char buf[8]; int len; if (!ctx.field) @@ -835,7 +852,7 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen) static void xmltagstart(XMLParser *p, const char *t, size_t tl) { - FeedTag *f; + const FeedTag *f; if (ISINCONTENT(ctx)) { if (ctx.contenttype == ContentTypeHTML) { @@ -894,7 +911,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) return; } - /* set tag type based on it's attribute value */ + /* set tag type based on its attribute value */ if (ctx.tag.id == RSSTagGuid) { /* if empty the default is "true" */ if (!attrispermalink.len || @@ -964,7 +981,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) return; if (ISINCONTENT(ctx)) { - /* not close content field */ + /* not a closed content field */ if (!istag(ctx.tag.name, ctx.tag.len, t, tl)) { if (!isshort && ctx.contenttype == ContentTypeHTML) { xmldata(p, "</", 2); @@ -976,7 +993,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) { /* matched tag end: close it */ /* copy also to the link field if the attribute isPermaLink="true" - and it is not set by a tag with higher prio. */ + and it is not set by a tag with higher priority. */ if (ctx.tag.id == RSSTagGuidPermalinkTrue && ctx.field && ctx.tag.id > ctx.fields[FeedFieldLink].tagid) { string_clear(&ctx.fields[FeedFieldLink].str); @@ -1005,7 +1022,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) } /* temporary string: for fields that cannot be processed - directly and need more context, for example by it's tag + directly and need more context, for example by its tag attributes, like the Atom link rel="alternate|enclosure". */ if (tmpstr.len && ctx.field) { if (ISFEEDFIELDMULTI(fieldmap[ctx.tag.id])) { @@ -1056,8 +1073,11 @@ main(int argc, char *argv[]) parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; - /* NOTE: getnext is defined in xml.h for inline optimization */ + /* NOTE: GETNEXT is defined in xml.h for inline optimization */ xml_parse(&parser); + checkfileerror(stdin, "<stdin>", 'r'); + checkfileerror(stdout, "<stdout>", 'w'); + return 0; } |