summaryrefslogtreecommitdiff
path: root/sfeed.c
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2019-02-24 15:25:31 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2019-02-24 15:31:26 +0100
commitb3c9ad3cc6a8ad77b9c643aafe3a290b9f67e39d (patch)
tree2fcbeaa836bbb7247033d890a9be1a18ece3aa12 /sfeed.c
parent19cd36545777e20ca03c066d4a29d9c626b86b57 (diff)
stricter Atom link parsing
the Atom link parsing is more strict now and checks the rel attribute. When the rel attribute is empty it is handled as a normal link ("alternate"). This makes sure when an link with an other type is specified (such as "enclosure", "related", "self" or "via") before a link it is not used. sfeed does not handle enclosures, but the code is reworked so it is very simple to add this. Enclosures are often used for example to attach some image to a newspost or an audio file to a podcast.
Diffstat (limited to 'sfeed.c')
-rw-r--r--sfeed.c43
1 files changed, 35 insertions, 8 deletions
diff --git a/sfeed.c b/sfeed.c
index b308188..560e808 100644
--- a/sfeed.c
+++ b/sfeed.c
@@ -54,6 +54,7 @@ enum TagId {
AtomTagMediaDescription, AtomTagSummary, AtomTagContent,
AtomTagId,
AtomTagLink,
+ AtomTagLinkAlternate,
AtomTagAuthor,
TagLast
};
@@ -141,8 +142,10 @@ static FeedTag atomtags[] = {
{ STRP("updated"), AtomTagUpdated }
};
-/* map tagid type to RSS/Atom field */
+/* map tagid type to RSS/Atom field
+ NOTE: all tags must be defined */
static int fieldmap[TagLast] = {
+ [TagUnknown] = -1,
/* RSS */
[RSSTagDcdate] = FeedFieldTime,
[RSSTagPubdate] = FeedFieldTime,
@@ -162,7 +165,8 @@ static int fieldmap[TagLast] = {
[AtomTagSummary] = FeedFieldContent,
[AtomTagContent] = FeedFieldContent,
[AtomTagId] = FeedFieldId,
- [AtomTagLink] = FeedFieldLink,
+ [AtomTagLink] = -1,
+ [AtomTagLinkAlternate] = FeedFieldLink,
[AtomTagAuthor] = FeedFieldAuthor
};
@@ -172,6 +176,9 @@ static const char *baseurl = "";
static FeedContext ctx;
static XMLParser parser; /* XML parser state */
+static String atomlink;
+static int atomlinktype;
+
/* Unique tagid for parsed tag name. */
static enum TagId
gettag(enum FeedType feedtype, const char *name, size_t namelen)
@@ -619,11 +626,16 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
ctx.contenttype = ContentTypeHTML;
}
} else if (ctx.tagid == AtomTagLink &&
- isattr(n, nl, STRP("href")) &&
- ctx.field)
- {
- /* link href attribute */
- string_append(ctx.field, v, vl);
+ isattr(n, nl, STRP("rel"))) {
+ /* empty or "alternate": other types could be
+ "enclosure", "related", "self" or "via" */
+ if (!vl || isattr(v, vl, STRP("alternate")))
+ atomlinktype = AtomTagLinkAlternate;
+ else
+ atomlinktype = 0;
+ } else if (ctx.tagid == AtomTagLink &&
+ isattr(n, nl, STRP("href"))) {
+ string_append(&atomlink, v, vl);
}
}
}
@@ -731,12 +743,19 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
tagid = gettag(ctx.feedtype, t, tl);
ctx.tagid = tagid;
+ /* without a rel attribute the default link type is "alternate" */
+ if (tagid == AtomTagLink) {
+ atomlinktype = AtomTagLinkAlternate;
+ string_clear(&atomlink); /* reuse and clear temporary link */
+ }
+
/* map tag type to field: unknown or lesser priority is ignored,
when tags of the same type are repeated only the first is used. */
- if (tagid == TagUnknown || tagid <= ctx.fields[fieldmap[tagid]].tagid) {
+ if (fieldmap[tagid] == -1 || tagid <= ctx.fields[fieldmap[tagid]].tagid) {
ctx.field = NULL;
return;
}
+
ctx.iscontenttag = (fieldmap[ctx.tagid] == FeedFieldContent);
ctx.field = &(ctx.fields[fieldmap[ctx.tagid]].str);
ctx.fields[fieldmap[ctx.tagid]].tagid = tagid;
@@ -784,6 +803,14 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
}
return;
}
+ } else if (ctx.tagid == AtomTagLink) {
+ /* map tag type to field: unknown or lesser priority is ignored,
+ when tags of the same type are repeated only the first is used. */
+ if (atomlinktype && atomlinktype > ctx.fields[fieldmap[atomlinktype]].tagid) {
+ string_append(&ctx.fields[fieldmap[atomlinktype]].str,
+ atomlink.data, atomlink.len);
+ ctx.fields[fieldmap[atomlinktype]].tagid = atomlinktype;
+ }
} else if (!ctx.tagid && ((ctx.feedtype == FeedTypeAtom &&
istag(t, tl, STRP("entry"))) || /* Atom */
(ctx.feedtype == FeedTypeRSS &&