diff options
author | Hiltjo Posthuma <hiltjo@codemadness.org> | 2020-10-21 22:06:58 +0200 |
---|---|---|
committer | Hiltjo Posthuma <hiltjo@codemadness.org> | 2020-10-22 18:52:46 +0200 |
commit | e771e43d51830ec7d2a19d9d4e67cded83c1b302 (patch) | |
tree | 08ca320c92816b3086b7c6d97e3d4bc8e43b81e9 /sfeed_web.c | |
parent | f054e581dac4921b302e0459a40d1b4f1fbd28ae (diff) |
sfeed_web: attribute parsing improvements, improve man page
Fix attribute parsing and now decode entities. The following now works (from
helsinkitimes.fi):
<base href="https://www.helsinkitimes.fi/" />
<link href="/?format=feed&type=rss" rel="alternate" type="application/rss+xml" title="RSS 2.0" />
<link href="/?format=feed&type=atom" rel="alternate" type="application/atom+xml" title="Atom 1.0" />
Properly associate attributes with the actual tag, this now parses properly
(from ascii.jp).
<link rel="apple-touch-icon-precomposed" href="/img/apple-touch-icon.png" />
<link rel="alternate" type="application/rss+xml" />
Diffstat (limited to 'sfeed_web.c')
-rw-r--r-- | sfeed_web.c | 94 |
1 files changed, 66 insertions, 28 deletions
diff --git a/sfeed_web.c b/sfeed_web.c index 308d22c..c9cea05 100644 --- a/sfeed_web.c +++ b/sfeed_web.c @@ -10,65 +10,101 @@ #define STRP(s) s,sizeof(s)-1 static XMLParser parser; -static int isbase, islink, isfeedlink; -static char abslink[4096], feedlink[4096], basehref[4096], feedtype[256]; +static int isbasetag, islinktag, ishrefattr, istypeattr; +static char linkhref[4096], linktype[256], basehref[4096]; +static char abslink[4096]; static void -printfeedtype(const char *s, FILE *fp) +printvalue(const char *s) { for (; *s; s++) - if (!isspace((unsigned char)*s)) - fputc(*s, fp); + if (!iscntrl((unsigned char)*s)) + putchar(*s); } static void xmltagstart(XMLParser *p, const char *t, size_t tl) { - isbase = islink = isfeedlink = 0; - feedlink[0] = '\0'; + isbasetag = islinktag = 0; if (!strcasecmp(t, "base")) - isbase = 1; + isbasetag = 1; else if (!strcasecmp(t, "link")) - islink = 1; + islinktag = 1; } static void xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) { - if (!isfeedlink) + if (!islinktag) return; - if (absuri(abslink, sizeof(abslink), feedlink, basehref) != -1) - fputs(abslink, stdout); + if (strncasecmp(linktype, STRP("application/atom")) && + strncasecmp(linktype, STRP("application/xml")) && + strncasecmp(linktype, STRP("application/rss"))) + return; + + if (absuri(abslink, sizeof(abslink), linkhref, basehref) != -1) + printvalue(abslink); else - fputs(feedlink, stdout); + printvalue(linkhref); putchar('\t'); - printfeedtype(feedtype, stdout); + printvalue(linktype); putchar('\n'); } static void +xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al) +{ + ishrefattr = istypeattr = 0; + + if (!isbasetag && !islinktag) + return; + + if (!strcasecmp(a, "href")) { + ishrefattr = 1; + if (isbasetag) + basehref[0] = '\0'; + else if (islinktag) + linkhref[0] = '\0'; + } else if (!strcasecmp(a, "type") && islinktag) { + istypeattr = 1; + linktype[0] = '\0'; + } +} + +static void xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, const char *v, size_t vl) { - if (isbase) { - if (!strcasecmp(n, "href")) - strlcpy(basehref, v, sizeof(basehref)); - } else if (islink) { - if (!strcasecmp(n, "type")) { - if (!strncasecmp(v, STRP("application/atom")) || - !strncasecmp(v, STRP("application/xml")) || - !strncasecmp(v, STRP("application/rss"))) { - isfeedlink = 1; - strlcpy(feedtype, v, sizeof(feedtype)); - } - } else if (!strcasecmp(n, "href")) { - strlcpy(feedlink, v, sizeof(feedlink)); - } + if (isbasetag && ishrefattr) { + strlcat(basehref, v, sizeof(basehref)); + } else if (islinktag) { + if (ishrefattr) + strlcat(linkhref, v, sizeof(linkhref)); + else if (istypeattr) + strlcat(linktype, v, sizeof(linktype)); } } +static void +xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al, + const char *v, size_t vl) +{ + char buf[16]; + int len; + + if (!ishrefattr && !istypeattr) + return; + + /* try to translate entity, else just pass as data to + * xmlattr handler. */ + if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) + xmlattr(p, t, tl, a, al, buf, (size_t)len); + else + xmlattr(p, t, tl, a, al, v, vl); +} + int main(int argc, char *argv[]) { @@ -79,6 +115,8 @@ main(int argc, char *argv[]) strlcpy(basehref, argv[1], sizeof(basehref)); parser.xmlattr = xmlattr; + parser.xmlattrentity = xmlattrentity; + parser.xmlattrstart = xmlattrstart; parser.xmltagstart = xmltagstart; parser.xmltagstartparsed = xmltagstartparsed; |