From aeb1398411ce245fa7982365640f7852d63b3d52 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Sat, 4 Feb 2023 12:34:56 +0100 Subject: README: describe how to add new parsed tags and fields to sfeed.c --- README | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'README') diff --git a/README b/README index a4c2d25..fba7565 100644 --- a/README +++ b/README @@ -1070,6 +1070,96 @@ file: - - - +sfeed.c: adding new XML tags or sfeed(5) fields to the parser +------------------------------------------------------------- + +sfeed.c contains definitions to parse XML tags and map them to sfeed(5) TSV +fields. Parsed RSS and Atom tag names are first stored as a TagId, which is a +number. This TagId is then mapped to the output field index. + +* Add a new TagId enum for the tag. + +* (optional) Add a new FeedField* enum for the new output field or you can map + it to an existing field. + +* Add the new XML tag name to the array variable of parsed RSS or Atom + tags: rsstags[] or atomtags[]. + + These must be defined in alphabetical order, because a binary search is used + which uses the strcasecmp() function. + +* Add the parsed TagId to the output field in the array variable fieldmap[]. + + When another tag is also mapped to the same output field then the tag with + the highest TagId number value overrides the mapped field: the order is from + least important to high. + +* If this defined tag is just using the inner data of the XML tag, then this + definition is enough. If it for example has to parse a certain attribute you + have to add a check for the TagId to the xmlattr() callback function. + +* (optional) Print the new field in the printfields() function. + +Below is a patch example to add the MRSS "media:content" field as a new field: + +diff --git a/sfeed.c b/sfeed.c +--- a/sfeed.c ++++ b/sfeed.c +@@ -50,7 +50,7 @@ enum TagId { + RSSTagGuidPermalinkTrue, + /* must be defined after GUID, because it can be a link (isPermaLink) */ + RSSTagLink, +- RSSTagEnclosure, ++ RSSTagMediaContent, RSSTagEnclosure, + RSSTagAuthor, RSSTagDccreator, + RSSTagCategory, + /* Atom */ +@@ -81,7 +81,7 @@ typedef struct field { + enum { + FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent, + FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory, +- FeedFieldLast ++ FeedFieldMediaContent, FeedFieldLast + }; + + typedef struct feedcontext { +@@ -137,6 +137,7 @@ static const FeedTag rsstags[] = { + { STRP("enclosure"), RSSTagEnclosure }, + { STRP("guid"), RSSTagGuid }, + { STRP("link"), RSSTagLink }, ++ { STRP("media:content"), RSSTagMediaContent }, + { STRP("media:description"), RSSTagMediaDescription }, + { STRP("pubdate"), RSSTagPubdate }, + { STRP("title"), RSSTagTitle } +@@ -180,6 +181,7 @@ static const int fieldmap[TagLast] = { + [RSSTagGuidPermalinkFalse] = FeedFieldId, + [RSSTagGuidPermalinkTrue] = FeedFieldId, /* special-case: both a link and an id */ + [RSSTagLink] = FeedFieldLink, ++ [RSSTagMediaContent] = FeedFieldMediaContent, + [RSSTagEnclosure] = FeedFieldEnclosure, + [RSSTagAuthor] = FeedFieldAuthor, + [RSSTagDccreator] = FeedFieldAuthor, +@@ -677,6 +679,8 @@ printfields(void) + string_print_uri(&ctx.fields[FeedFieldEnclosure].str); + putchar(FieldSeparator); + string_print_trimmed_multi(&ctx.fields[FeedFieldCategory].str); ++ putchar(FieldSeparator); ++ string_print_trimmed(&ctx.fields[FeedFieldMediaContent].str); + putchar('\n'); + + if (ferror(stdout)) /* check for errors but do not flush */ +@@ -718,7 +722,7 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, + } + + if (ctx.feedtype == FeedTypeRSS) { +- if (ctx.tag.id == RSSTagEnclosure && ++ if ((ctx.tag.id == RSSTagEnclosure || ctx.tag.id == RSSTagMediaContent) && + isattr(n, nl, STRP("url"))) { + string_append(&tmpstr, v, vl); + } else if (ctx.tag.id == RSSTagGuid && + +- - - + Running custom commands inside the sfeed_curses program ------------------------------------------------------- -- cgit v1.2.3