summaryrefslogtreecommitdiff
path: root/sfeed.c
diff options
context:
space:
mode:
authorBenjamin Chausse <benjamin@chausse.xyz>2024-08-09 14:11:50 -0400
committerBenjamin Chausse <benjamin@chausse.xyz>2024-08-09 14:11:50 -0400
commit5857d82e8e596d6fda406a0c4d8d68ca7a03c124 (patch)
tree553916894dee907825360580c5d9a05c82c5af16 /sfeed.c
parent3574e3cbf9d99546e868aeb995ce2c171cdc36a6 (diff)
parent19957bc272e745af7b56b79fa648e8b6b77113b1 (diff)
Merge remote-tracking branch 'upstream/master'HEADmaster
Diffstat (limited to 'sfeed.c')
-rw-r--r--sfeed.c160
1 files changed, 90 insertions, 70 deletions
diff --git a/sfeed.c b/sfeed.c
index 4dd89c1..16141cf 100644
--- a/sfeed.c
+++ b/sfeed.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
@@ -127,7 +126,7 @@ static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
/* map tag name to TagId type */
/* RSS, must be alphabetical order */
-static FeedTag rsstags[] = {
+static const FeedTag rsstags[] = {
{ STRP("author"), RSSTagAuthor },
{ STRP("category"), RSSTagCategory },
{ STRP("content:encoded"), RSSTagContentEncoded },
@@ -144,7 +143,7 @@ static FeedTag rsstags[] = {
};
/* Atom, must be alphabetical order */
-static FeedTag atomtags[] = {
+static const FeedTag atomtags[] = {
{ STRP("author"), AtomTagAuthor },
{ STRP("category"), AtomTagCategory },
{ STRP("content"), AtomTagContent },
@@ -161,14 +160,14 @@ static FeedTag atomtags[] = {
};
/* special case: nested <author><name> */
-static FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
-static FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
+static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
+static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
/* reference to no / unknown tag */
-static FeedTag notag = { STRP(""), TagUnknown };
+static const FeedTag notag = { STRP(""), TagUnknown };
/* map TagId type to RSS/Atom field, all tags must be defined */
-static int fieldmap[TagLast] = {
+static const int fieldmap[TagLast] = {
[TagUnknown] = -1,
/* RSS */
[RSSTagDcdate] = FeedFieldTime,
@@ -205,7 +204,7 @@ static int fieldmap[TagLast] = {
static const int FieldSeparator = '\t';
/* separator for multiple values in a field, separator should be 1 byte */
-static const char *FieldMultiSeparator = "|";
+static const char FieldMultiSeparator[] = "|";
static struct uri baseuri;
static const char *baseurl;
@@ -246,7 +245,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen)
static char *
ltrim(const char *s)
{
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
return (char *)s;
}
@@ -256,7 +255,7 @@ rtrim(const char *s)
{
const char *e;
- for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--)
+ for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--)
;
return (char *)e;
}
@@ -294,7 +293,7 @@ string_append(String *s, const char *data, size_t len)
return;
if (s->len >= SIZE_MAX - len) {
- errno = EOVERFLOW;
+ errno = ENOMEM;
err(1, "realloc");
}
@@ -326,7 +325,7 @@ string_print_encoded(String *s)
case '\t': putchar('\\'); putchar('t'); break;
default:
/* ignore control chars */
- if (!iscntrl((unsigned char)*p))
+ if (!ISCNTRL((unsigned char)*p))
putchar(*p);
break;
}
@@ -341,9 +340,9 @@ printtrimmed(const char *s)
p = ltrim(s);
e = rtrim(p);
for (; *p && p != e; p++) {
- if (isspace((unsigned char)*p))
+ if (ISSPACE((unsigned char)*p))
putchar(' '); /* any whitespace to space */
- else if (!iscntrl((unsigned char)*p))
+ else if (!ISCNTRL((unsigned char)*p))
/* ignore other control chars */
putchar(*p);
}
@@ -384,7 +383,7 @@ string_print_trimmed_multi(String *s)
}
}
-/* Print URL, if it's a relative URL then it uses the global `baseurl`. */
+/* Print URL, if it is a relative URL then it uses the global `baseurl`. */
static void
printuri(char *s)
{
@@ -410,7 +409,7 @@ printuri(char *s)
*e = c; /* restore NUL byte to original character */
}
-/* Print URL, if it's a relative URL then it uses the global `baseurl`. */
+/* Print URL, if it is a relative URL then it uses the global `baseurl`. */
static void
string_print_uri(String *s)
{
@@ -433,18 +432,23 @@ string_print_timestamp(String *s)
printf("%lld", t);
}
-/* Convert time fields. Returns a UNIX timestamp. */
+/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
+ Parameters should be passed as they are in a struct tm:
+ that is: year = year - 1900, month = month - 1. */
static long long
datetounix(long long year, int mon, int day, int hour, int min, int sec)
{
- static const int secs_through_month[] = {
+ /* seconds in a month in a regular (non-leap) year */
+ static const long secs_through_month[] = {
0, 31 * 86400, 59 * 86400, 90 * 86400,
120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
long long t;
+ /* optimization: handle common range year 1902 up to and including 2038 */
if (year - 2ULL <= 136) {
+ /* amount of leap days relative to 1970: every 4 years */
leaps = (year - 68) >> 2;
if (!((year - 68) & 3)) {
leaps--;
@@ -452,8 +456,11 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec)
} else {
is_leap = 0;
}
- t = 31536000 * (year - 70) + 86400 * leaps;
+ t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
} else {
+ /* general leap year calculation:
+ leap years occur mostly every 4 years but every 100 years
+ a leap year is skipped unless the year is divisible by 400 */
cycles = (year - 100) / 400;
rem = (year - 100) % 400;
if (rem < 0) {
@@ -463,20 +470,27 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec)
if (!rem) {
is_leap = 1;
} else {
- if (rem >= 300)
- centuries = 3, rem -= 300;
- else if (rem >= 200)
- centuries = 2, rem -= 200;
- else if (rem >= 100)
- centuries = 1, rem -= 100;
+ if (rem >= 300) {
+ centuries = 3;
+ rem -= 300;
+ } else if (rem >= 200) {
+ centuries = 2;
+ rem -= 200;
+ } else if (rem >= 100) {
+ centuries = 1;
+ rem -= 100;
+ }
if (rem) {
leaps = rem / 4U;
rem %= 4U;
is_leap = !rem;
}
}
- leaps += 97 * cycles + 24 * centuries - is_leap;
- t = (year - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400;
+ leaps += (97 * cycles) + (24 * centuries) - is_leap;
+
+ /* adjust 8 leap days from 1970 up to and including 2000:
+ ((30 * 365) + 8) * 86400 = 946771200 */
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
}
t += secs_through_month[mon];
if (is_leap && mon >= 2)
@@ -490,16 +504,16 @@ datetounix(long long year, int mon, int day, int hour, int min, int sec)
}
/* Get timezone from string, return time offset in seconds from UTC.
- * NOTE: only parses timezones in RFC-822, many other timezone names are
+ * NOTE: only parses timezones in RFC 822, many other timezone names are
* ambiguous anyway.
- * ANSI and military zones are defined wrong in RFC822 and are unsupported,
- * see note on RFC2822 4.3 page 32. */
+ * ANSI and military zones are defined wrong in RFC 822 and are unsupported,
+ * see note on RFC 2822 4.3 page 32. */
static long
gettzoffset(const char *s)
{
- static struct {
+ static const struct {
char *name;
- const int offhour;
+ int offhour;
} tzones[] = {
{ "CDT", -5 * 3600 },
{ "CST", -6 * 3600 },
@@ -514,24 +528,24 @@ gettzoffset(const char *s)
long tzhour = 0, tzmin = 0;
size_t i;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
switch (*s) {
case '-': /* offset */
case '+':
- for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++)
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzhour = (tzhour * 10) + (*p - '0');
if (*p == ':')
p++;
- for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++)
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
tzmin = (tzmin * 10) + (*p - '0');
return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
default: /* timezone name */
- for (i = 0; isalpha((unsigned char)s[i]); i++)
+ for (i = 0; ISALPHA((unsigned char)s[i]); i++)
;
if (i != 3)
return 0;
- /* compare tz and adjust offset relative to UTC */
+ /* compare timezone and adjust offset relative to UTC */
for (i = 0; i < sizeof(tzones) / sizeof(*tzones); i++) {
if (!memcmp(s, tzones[i].name, 3))
return tzones[i].offhour;
@@ -545,7 +559,7 @@ gettzoffset(const char *s)
static int
parsetime(const char *s, long long *tp)
{
- static struct {
+ static const struct {
char *name;
int len;
} mons[] = {
@@ -565,35 +579,35 @@ parsetime(const char *s, long long *tp)
int va[6] = { 0 }, i, j, v, vi;
size_t m;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s))
+ if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s))
return -1;
- if (isdigit((unsigned char)s[0]) &&
- isdigit((unsigned char)s[1]) &&
- isdigit((unsigned char)s[2]) &&
- isdigit((unsigned char)s[3])) {
+ if (ISDIGIT((unsigned char)s[0]) &&
+ ISDIGIT((unsigned char)s[1]) &&
+ ISDIGIT((unsigned char)s[2]) &&
+ ISDIGIT((unsigned char)s[3])) {
/* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
vi = 0;
} else {
/* format: "[%a, ]%d %b %Y %H:%M:%S" */
/* parse "[%a, ]%d %b %Y " part, then use time parsing as above */
- for (; isalpha((unsigned char)*s); s++)
+ for (; ISALPHA((unsigned char)*s); s++)
;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
if (*s == ',')
s++;
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++)
+ for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
va[2] = v; /* day */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
/* end of word month */
- for (j = 0; isalpha((unsigned char)s[j]); j++)
+ for (j = 0; ISALPHA((unsigned char)s[j]); j++)
;
/* check month name */
if (j < 3 || j > 9)
@@ -609,15 +623,15 @@ parsetime(const char *s, long long *tp)
}
if (m >= 12)
return -1; /* no month found */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
- for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++)
+ for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++)
v = (v * 10) + (*s - '0');
- /* obsolete short year: RFC2822 4.3 */
- if (i <= 3)
- v += (v >= 0 && v <= 49) ? 2000 : 1900;
+ /* obsolete short year: RFC 2822 4.3 */
+ if (i == 2 || i == 3)
+ v += (i == 2 && v >= 0 && v <= 49) ? 2000 : 1900;
va[0] = v; /* year */
- for (; isspace((unsigned char)*s); s++)
+ for (; ISSPACE((unsigned char)*s); s++)
;
/* parse only regular time part, see below */
vi = 3;
@@ -626,20 +640,20 @@ parsetime(const char *s, long long *tp)
/* parse time parts (and possibly remaining date parts) */
for (; *s && vi < 6; vi++) {
for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
- isdigit((unsigned char)*s); s++, i++) {
+ ISDIGIT((unsigned char)*s); s++, i++) {
v = (v * 10) + (*s - '0');
}
va[vi] = v;
if ((vi < 2 && *s == '-') ||
- (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) ||
+ (vi == 2 && (*s == 'T' || *s == 't' || ISSPACE((unsigned char)*s))) ||
(vi > 2 && *s == ':'))
s++;
}
/* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
if (*s == '.') {
- for (s++; isdigit((unsigned char)*s); s++)
+ for (s++; ISDIGIT((unsigned char)*s); s++)
;
}
@@ -679,6 +693,9 @@ printfields(void)
putchar(FieldSeparator);
string_print_trimmed_multi(&ctx.fields[FeedFieldCategory].str);
putchar('\n');
+
+ if (ferror(stdout)) /* check for errors but do not flush */
+ checkfileerror(stdout, "<stdout>", 'w');
}
static int
@@ -707,8 +724,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
if (!ctx.tag.id)
return;
- /* content-type may be: Atom: text, xhtml, html or mime-type.
- MRSS (media:description): plain, html. */
+ /* content-type may be for Atom: text, xhtml, html or a mime-type.
+ for MRSS (media:description): plain, html. */
if (ISCONTENTTAG(ctx)) {
if (isattr(n, nl, STRP("type")))
string_append(&attrtype, v, vl);
@@ -741,7 +758,7 @@ static void
xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
const char *data, size_t datalen)
{
- char buf[16];
+ char buf[8];
int len;
/* handles transforming inline XML to data */
@@ -755,7 +772,7 @@ xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
return;
/* try to translate entity, else just pass as data to
- * xmldata handler. */
+ * xmlattr handler. */
if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
xmlattr(p, t, tl, n, nl, buf, (size_t)len);
else
@@ -818,7 +835,7 @@ xmldata(XMLParser *p, const char *s, size_t len)
static void
xmldataentity(XMLParser *p, const char *data, size_t datalen)
{
- char buf[16];
+ char buf[8];
int len;
if (!ctx.field)
@@ -835,7 +852,7 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen)
static void
xmltagstart(XMLParser *p, const char *t, size_t tl)
{
- FeedTag *f;
+ const FeedTag *f;
if (ISINCONTENT(ctx)) {
if (ctx.contenttype == ContentTypeHTML) {
@@ -894,7 +911,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
return;
}
- /* set tag type based on it's attribute value */
+ /* set tag type based on its attribute value */
if (ctx.tag.id == RSSTagGuid) {
/* if empty the default is "true" */
if (!attrispermalink.len ||
@@ -964,7 +981,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
return;
if (ISINCONTENT(ctx)) {
- /* not close content field */
+ /* not a closed content field */
if (!istag(ctx.tag.name, ctx.tag.len, t, tl)) {
if (!isshort && ctx.contenttype == ContentTypeHTML) {
xmldata(p, "</", 2);
@@ -976,7 +993,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
} else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
/* matched tag end: close it */
/* copy also to the link field if the attribute isPermaLink="true"
- and it is not set by a tag with higher prio. */
+ and it is not set by a tag with higher priority. */
if (ctx.tag.id == RSSTagGuidPermalinkTrue && ctx.field &&
ctx.tag.id > ctx.fields[FeedFieldLink].tagid) {
string_clear(&ctx.fields[FeedFieldLink].str);
@@ -1005,7 +1022,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
}
/* temporary string: for fields that cannot be processed
- directly and need more context, for example by it's tag
+ directly and need more context, for example by its tag
attributes, like the Atom link rel="alternate|enclosure". */
if (tmpstr.len && ctx.field) {
if (ISFEEDFIELDMULTI(fieldmap[ctx.tag.id])) {
@@ -1056,8 +1073,11 @@ main(int argc, char *argv[])
parser.xmltagstart = xmltagstart;
parser.xmltagstartparsed = xmltagstartparsed;
- /* NOTE: getnext is defined in xml.h for inline optimization */
+ /* NOTE: GETNEXT is defined in xml.h for inline optimization */
xml_parse(&parser);
+ checkfileerror(stdin, "<stdin>", 'r');
+ checkfileerror(stdout, "<stdout>", 'w');
+
return 0;
}