summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore5
-rw-r--r--CREDITS1
-rw-r--r--LICENSE21
-rw-r--r--Makefile95
-rw-r--r--README143
-rw-r--r--TODO4
-rw-r--r--common.c116
-rw-r--r--config.mk28
-rw-r--r--sfeed.144
-rw-r--r--sfeed.c477
-rw-r--r--sfeed_html.114
-rw-r--r--sfeed_html.c230
-rw-r--r--sfeed_opml_config.111
-rw-r--r--sfeed_opml_config.c87
-rw-r--r--sfeed_plain.115
-rw-r--r--sfeed_plain.c42
-rwxr-xr-xsfeed_update116
-rw-r--r--sfeed_update.182
-rw-r--r--sfeedrc.example17
19 files changed, 1548 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..abdbd0f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.o
+sfeed
+sfeed_plain
+sfeed_html
+sfeed_opml_config
diff --git a/CREDITS b/CREDITS
new file mode 100644
index 0000000..a383a90
--- /dev/null
+++ b/CREDITS
@@ -0,0 +1 @@
+raph_ael on #suckless for the idea for an opml converter
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..91da394
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT/X Consortium License
+
+© 2011-2012 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..329b97b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,95 @@
+# sfeed - simple RSS and Atom parser (and programs to add reader functionality).
+
+include config.mk
+
+NAME = sfeed
+SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_config.c
+OBJ = ${SRC:.c=.o}
+
+all: options sfeed sfeed_plain sfeed_html sfeed_opml_config
+
+options:
+ @echo ${NAME} build options:
+ @echo "CFLAGS = ${CFLAGS}"
+ @echo "LDFLAGS = ${LDFLAGS}"
+ @echo "CC = ${CC}"
+
+.c.o:
+ @echo CC $<
+ @${CC} -c ${CFLAGS} $<
+
+${OBJ}: config.mk
+
+sfeed: ${OBJ}
+ @echo CC -o $@
+ @${CC} -o $@ sfeed.c ${LDFLAGS} ${LIBEXPAT}
+
+sfeed_opml_config: sfeed_opml_config.o
+ @echo CC -o $@
+ @${CC} -o $@ sfeed_opml_config.o ${LDFLAGS} ${LIBEXPAT}
+
+sfeed_plain: sfeed_plain.o
+ @echo CC -o $@
+ @${CC} -o $@ sfeed_plain.o ${LDFLAGS}
+
+sfeed_html: sfeed_html.o
+ @echo CC -o $@
+ @${CC} -o $@ sfeed_html.o ${LDFLAGS}
+
+clean:
+ @echo cleaning
+ @rm -f sfeed sfeed_plain sfeed_html sfeed_opml_config ${OBJ} ${NAME}-${VERSION}.tar.gz
+
+dist: clean
+ @echo creating dist tarball
+ @mkdir -p ${NAME}-${VERSION}
+ @cp -R LICENSE Makefile README config.mk \
+ TODO CREDITS sfeedrc.example ${SRC} common.c sfeed_update \
+ sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_config.1 \
+ ${NAME}-${VERSION}
+ @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
+ @gzip ${NAME}-${VERSION}.tar
+ @rm -rf ${NAME}-${VERSION}
+
+install: all
+ @echo installing executable file to ${DESTDIR}${PREFIX}/bin
+ @mkdir -p ${DESTDIR}${PREFIX}/bin
+ @cp -f sfeed sfeed_html sfeed_plain sfeed_update sfeed_opml_config \
+ ${DESTDIR}${PREFIX}/bin
+ @chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \
+ ${DESTDIR}${PREFIX}/bin/sfeed_html \
+ ${DESTDIR}${PREFIX}/bin/sfeed_plain \
+ ${DESTDIR}${PREFIX}/bin/sfeed_update \
+ ${DESTDIR}${PREFIX}/bin/sfeed_opml_config
+ @mkdir -p ${DESTDIR}${PREFIX}/share/sfeed
+ @cp -f sfeedrc.example ${DESTDIR}${PREFIX}/share/${NAME}
+ @echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1
+ @mkdir -p ${DESTDIR}${MANPREFIX}/man1
+ @sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1
+ @sed "s/VERSION/${VERSION}/g" < sfeed_opml_config.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+ @chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+
+uninstall:
+ @echo removing executable file from ${DESTDIR}${PREFIX}/bin
+ @rm -f ${DESTDIR}${PREFIX}/bin/sfeed \
+ ${DESTDIR}${PREFIX}/bin/sfeed_html \
+ ${DESTDIR}${PREFIX}/bin/sfeed_plain \
+ ${DESTDIR}${PREFIX}/bin/sfeed_update \
+ ${DESTDIR}${PREFIX}/bin/sfeed_opml_config \
+ ${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example
+ @-rmdir ${DESTDIR}${PREFIX}/share/${NAME}
+ @echo removing manual pages from ${DESTDIR}${MANPREFIX}/man1
+ @rm -f ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \
+ ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+
+.PHONY: all options clean dist install uninstall
diff --git a/README b/README
new file mode 100644
index 0000000..cbdb343
--- /dev/null
+++ b/README
@@ -0,0 +1,143 @@
+sfeed v0.8
+----------
+
+Simple RSS and Atom parser (and some format programs).
+
+
+Dependencies
+------------
+
+- C compiler.
+- expat library (used by sfeed.c and sfeed_opml_config.c,
+ http://expat.sourceforge.net/).
+
+
+Optional dependencies
+---------------------
+
+- POSIX shell (used by sfeed_update).
+- curl (used by sfeed_update, http://curl.haxx.se/).
+- iconv (used by sfeed_update, http://www.gnu.org/software/libiconv/).
+
+
+Files
+-----
+
+sfeed - Binary (from sfeed.c); read XML RSS or Atom feed data from
+ stdin. Write feed data in tab-separated format to stdout.
+sfeed_update - Shellscript; update feeds and merge with old feeds in the
+ file $HOME/.sfeed/feeds by default.
+sfeed_plain - Format feeds file (TSV) from sfeed_update to plain text.
+sfeed_html - Format feeds file (TSV) from sfeed_update to HTMLi.
+sfeed_opml_config - Generate a sfeedrc config file based on an opml file.
+sfeedrc.example - Example config file.
+
+
+Files read at runtime by sfeed_update
+-------------------------------------
+
+sfeedrc - Config file. This file is evaluated as a shellscript in
+ sfeed_update. You can for example override the fetchfeed()
+ function to use wget, fetch or an other download program or
+ you can override the merge() function to change the merge
+ logic. The function feeds() is called to fetch the feeds.
+ The function feed() can safely be executed as a parallel
+ job in your sfeedrc config file to speedup updating.
+
+
+Files written at runtime by sfeed_update
+----------------------------------------
+
+feeds - Tab-separated format containing all feeds.
+ The sfeed_update script merges new items with this file.
+feeds.new - Temporary file used by sfeed_update to merge items.
+
+
+TAB-SEPARATED format
+--------------------
+
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \ (\n, \t and \\). Other whitespace except
+spaces are removed.
+
+The timestamp field is converted to a unix timestamp. The timestamp is also
+stored as formatted as a separate field. The other fields are left untouched
+(including HTML).
+
+The order and format of the fields are:
+
+item unix timestamp - string unix timestamp (GMT+0)
+item formatted timestamp - string timestamp (YYYY-mm-dd HH:MM:SS tz[+-]HHMM)
+item title - string
+item link - string
+item description - string
+item contenttype - string ("html" or "plain")
+item id - string
+item author - string
+feed type - string ("rss" or "atom")
+feed name - string (extra field added by sfeed_update)
+feed url - string (extra field added by sfeed_update)
+
+
+Usage
+-----
+
+To build and install (respects DESTDIR and PREFIX variable):
+
+make install
+
+
+Generate a sfeedrc config file from your exported list of feeds in opml
+format:
+
+sfeed_opml_config < opmlfile.xml > $HOME/.sfeed/sfeedrc
+
+
+To update feeds and format the feeds file (configfile argument is optional):
+
+sfeed_update "configfile"
+sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt
+sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html
+
+
+Example script to view feeds with dmenu, opens selected url in $BROWSER:
+
+url=$(sfeed_plain < "$HOME/.sfeed/feeds" | dmenu -l 35 -i |
+ sed 's@^.* \([a-zA-Z]*://\)\(.*\)$@\1\2@')
+[ ! "$url" = "" ] && $BROWSER "$url"
+
+
+or to view in your browser:
+
+$BROWSER "$HOME/.sfeed/feeds.html"
+
+
+or to view in your editor:
+
+$EDITOR "$HOME/.sfeed/feeds.txt"
+
+
+tip to remove feeds older than a date (change time="YYYY mm dd HH mm ss")
+
+gawk -F '\t' 'BEGIN {
+ time = mktime("2012 01 01 12 34 56");
+}
+{
+ if(int($1) >= int(time)) {
+ print $0;
+ }
+}' < feeds > feeds.clean
+
+mv feeds.clean feeds
+
+
+License
+-------
+
+MIT, see LICENSE file.
+
+
+Author
+------
+
+Hiltjo Posthuma <hiltjo@codemadness.org>
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..a2c081d
--- /dev/null
+++ b/TODO
@@ -0,0 +1,4 @@
+[ ] opml export script (WIP).
+[ ] rename sfeed_opml_config to sfeed_opml_import.
+[ ] sfeed_update / sfeedrc: add base siteurl as parameter for feed function for feeds located at an other domain, for example feedburner.com
+[ ] test opml import / export scripts with thunderbird, google reader, snownews and rssowl.
diff --git a/common.c b/common.c
new file mode 100644
index 0000000..91ac9ca
--- /dev/null
+++ b/common.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+
+enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink,
+ FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType,
+ FieldFeedName, FieldFeedUrl, FieldLast };
+
+const int FieldSeparator = '\t';
+
+char *
+afgets(char **p, size_t *size, FILE *fp) {
+ char buf[BUFSIZ], *alloc = NULL;
+ size_t n, len = 0, allocsiz;
+ int end = 0;
+
+ while(fgets(buf, sizeof(buf), fp)) {
+ n = strlen(buf);
+ if(buf[n - 1] == '\n') { /* dont store newlines. */
+ buf[n - 1] = '\0';
+ n--;
+ end = 1; /* newline found, end */
+ }
+ len += n;
+ allocsiz = len + 1;
+ if(allocsiz > *size) {
+ if((alloc = realloc(*p, allocsiz))) {
+ *p = alloc;
+ *size = allocsiz;
+ } else {
+ free(*p);
+ *p = NULL;
+ fputs("error: could not realloc\n", stderr);
+ exit(EXIT_FAILURE);
+ return NULL;
+ }
+ }
+ strncpy((*p + (len - n)), buf, n);
+ if(end || feof(fp))
+ break;
+ }
+ if(*p && len > 0) {
+ (*p)[len] = '\0';
+ return *p;
+ }
+ return NULL;
+}
+
+void /* print link; if link is relative use baseurl to make it absolute */
+printlink(const char *link, const char *baseurl) {
+ const char *ebaseproto, *ebasedomain, *p;
+ int isrelative;
+
+ /* protocol part */
+ for(p = link; *p && (isalpha(*p) || isdigit(*p) || *p == '+' || *p == '-' || *p == '.'); p++);
+ isrelative = strncmp(p, "://", strlen("://"));
+ if(isrelative) { /* relative link (baseurl is used). */
+ if((ebaseproto = strstr(baseurl, "://"))) {
+ ebaseproto += strlen("://");
+ fwrite(baseurl, 1, ebaseproto - baseurl, stdout);
+ } else {
+ ebaseproto = baseurl;
+ if(*baseurl || (link[0] == '/' && link[1] == '/'))
+ fputs("http://", stdout);
+ }
+ if(link[0] == '/') { /* relative to baseurl domain (not path). */
+ if(link[1] == '/') /* absolute url but with protocol from baseurl. */
+ link += 2;
+ else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+ fwrite(ebaseproto, 1, ebasedomain - ebaseproto, stdout);
+ else
+ fputs(ebaseproto, stdout);
+ } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+ fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, stdout);
+ else {
+ fputs(ebaseproto, stdout);
+ if(*baseurl && *link)
+ fputc('/', stdout);
+ }
+ }
+ fputs(link, stdout);
+}
+
+unsigned int
+parseline(char **line, size_t *size, char **fields, unsigned int maxfields, FILE *fp, int separator) {
+ unsigned int i = 0;
+ char *prev, *s;
+
+ if(afgets(line, size, fp)) {
+ for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) {
+ *s = '\0'; /* null terminate string. */
+ fields[i] = prev;
+ prev = s + 1;
+ }
+ fields[i] = prev;
+ for(i++; i < maxfields; i++) /* make non-parsed fields empty. */
+ fields[i] = "";
+ }
+ return i;
+}
+
+void
+printtime(time_t t) {
+ char buf[32];
+ struct tm temp = { 0 }, *mktm;
+
+ if(!(mktm = localtime_r(&t, &temp)))
+ return;
+ mktm->tm_isdst = -1;
+
+ if(!strftime(buf, sizeof(buf) - 1, "%Y-%m-%d %H:%M", mktm))
+ return;
+ fputs(buf, stdout);
+}
diff --git a/config.mk b/config.mk
new file mode 100644
index 0000000..8bddcad
--- /dev/null
+++ b/config.mk
@@ -0,0 +1,28 @@
+# sfeed version
+VERSION = 0.8
+
+# customize below to fit your system
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+
+# includes and libs
+INCS =
+LIBEXPAT = -lexpat
+LIBS = -lc
+
+# flags
+#CFLAGS = -Os -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=500 -DVERSION=\"${VERSION}\"
+#LDFLAGS = -s ${LIBS}
+
+# debug
+CFLAGS = -g -O0 -pedantic -Wall -Wextra -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=700 -DVERSION=\"${VERSION}\"
+LDFLAGS = ${LIBS}
+
+# Solaris
+#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
+#LDFLAGS = ${LIBS}
+
+# compiler and linker
+CC = cc
diff --git a/sfeed.1 b/sfeed.1
new file mode 100644
index 0000000..04227d8
--- /dev/null
+++ b/sfeed.1
@@ -0,0 +1,44 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed \- simple RSS and Atom parser
+.SH SYNOPSIS
+.B sfeed
+.SH DESCRIPTION
+Read XML RSS or Atom feed data from stdin. Write feed data in a
+tab-separated format to stdout.
+.SH TAB-SEPARATED FORMAT
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are
+removed.
+.TP
+.B item timestamp (unix timestamp in GMT+0)
+string
+.TP
+.B item timestamp (formatted)
+string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM)
+.TP
+.B item title
+string
+.TP
+.B item link
+string
+.TP
+.B item description
+string
+.TP
+.B item contenttype
+string (html or plain)
+.TP
+.B item id
+string
+.TP
+.B item author
+string
+.TP
+.B feed type
+string (rss or atom)
+.SH SEE ALSO
+.BR sh(1)
+.BR sfeed_update(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed.c b/sfeed.c
new file mode 100644
index 0000000..b83351f
--- /dev/null
+++ b/sfeed.c
@@ -0,0 +1,477 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#include <expat.h> /* libexpat */
+
+enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2, FeedTypeLast = 3 };
+const char *feedtypes[] = { "", "rss", "atom" };
+
+enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2, ContentTypeLast = 3 };
+const char *contenttypes[] = { "", "plain", "html" };
+
+typedef struct string { /* String data / pool */
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+typedef struct feeditem { /* Feed item */
+ String timestamp;
+ String title;
+ String link;
+ String content;
+ int contenttype; /* ContentTypePlain or ContentTypeHTML */
+ String id;
+ String author;
+ int feedtype; /* FeedTypeRSS or FeedTypeAtom */
+} FeedItem;
+
+void die(const char *s);
+void cleanup(void);
+
+const int FieldSeparator = '\t';
+FeedItem feeditem; /* data for current feed item */
+char tag[1024]; /* current XML tag being parsed. */
+char feeditemtag[1024]; /* current tag _inside_ a feeditem */
+XML_Parser parser; /* expat XML parser state */
+int incdata = 0;
+char *standardtz = NULL; /* TZ variable at start of program */
+
+void
+string_clear(String *s) {
+ if(s->data)
+ s->data[0] = '\0'; /* clear string only; don't free, prevents
+ unnecessary reallocation */
+ s->len = 0;
+}
+
+void
+string_buffer_init(String *s, size_t len) {
+ if(!(s->data = malloc(len)))
+ die("can't allocate enough memory");
+ s->bufsiz = len;
+ string_clear(s);
+}
+
+void
+string_free(String *s) {
+ free(s->data);
+ s->data = NULL;
+ s->bufsiz = 0;
+ s->len = 0;
+}
+
+int
+string_buffer_expand(String *s, size_t newlen) {
+ char *p;
+ size_t alloclen;
+ /* check if allocation is necesary, dont shrink buffer */
+ if(!s->data || (newlen > s->bufsiz)) {
+ /* should be more than bufsiz ofcourse */
+ for(alloclen = 16; alloclen <= newlen; alloclen *= 2);
+ if(!(p = realloc(s->data, alloclen))) {
+ string_free(s); /* free previous allocation */
+ die("can't allocate enough memory");
+ }
+ s->bufsiz = alloclen;
+ s->data = p;
+ }
+ return s->bufsiz;
+}
+
+void
+string_append(String *s, const char *data, size_t len) {
+ string_buffer_expand(s, s->len + len);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+void /* cleanup parser, free allocated memory, etc */
+cleanup(void) {
+ XML_ParserFree(parser);
+ string_free(&feeditem.timestamp);
+ string_free(&feeditem.title);
+ string_free(&feeditem.link);
+ string_free(&feeditem.content);
+ string_free(&feeditem.id);
+ string_free(&feeditem.author);
+}
+
+void /* print error message to stderr */
+die(const char *s) {
+ fputs("sfeed: ", stderr);
+ fputs(s, stderr);
+ fputc('\n', stderr);
+ cleanup();
+ exit(EXIT_FAILURE);
+}
+
+void
+gettimetz(const char *s, char *buf, size_t bufsiz) {
+ const char *p = s;
+ int tzhour = 0, tzmin = 0;
+ char tzname[128] = "", *t = NULL;
+ unsigned int i;
+
+ buf[0] = '\0';
+ for(; *p && isspace(*p); p++); /* skip whitespace */
+ /* detect time offset, assume time offset isn't specified in the first 18 characters */
+ for(i = 0; *p && ((*p != '+' && *p != '-') || i <= 18); p++, i++);
+
+ if(isalpha(*p)) {
+ if(*p == 'Z' || *p == 'z') {
+ strncpy(buf, "GMT+00:00", bufsiz);
+ return;
+ } else {
+ for(i = 0, t = &tzname[0]; i < (sizeof(tzname) - 1) && (*p && isalpha(*p)); i++)
+ *(t++) = *(p++);
+ *t = '\0';
+ }
+ } else
+ strncpy(tzname, "GMT", sizeof(tzname) - 1);
+ if(!(*p)) {
+ strncpy(buf, tzname, bufsiz);
+ return;
+ }
+ /* NOTE: reverses time offsets for TZ */
+ if((sscanf(p, "+%02d:%02d", &tzhour, &tzmin)) > 0)
+ snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin);
+ else if((sscanf(p, "-%02d:%02d", &tzhour, &tzmin)) > 0)
+ snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin);
+ else if((sscanf(p, "+%02d%02d", &tzhour, &tzmin)) > 0)
+ snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin);
+ else if((sscanf(p, "-%02d%02d", &tzhour, &tzmin)) > 0)
+ snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin);
+ else if(sscanf(p, "+%d", &tzhour) > 0)
+ snprintf(buf, bufsiz, "%s-%02d:00", tzname, tzhour);
+ else if(sscanf(p, "-%d", &tzhour) > 0)
+ snprintf(buf, bufsiz, "%s+%02d:00", tzname, tzhour);
+}
+
+time_t
+parsetime(const char *s, char *buf, size_t bufsiz) {
+ struct tm tm = { 0 };
+ time_t t = 0;
+ char timebuf[64], tz[256], *p;
+
+ if(buf)
+ buf[0] = '\0';
+ gettimetz(s, tz, sizeof(tz) - 1);
+ if(!standardtz || strcmp(standardtz, tz)) {
+ if(!strcmp(tz, "")) { /* restore TZ */
+ if(standardtz)
+ setenv("TZ", standardtz, 1);
+ else
+ unsetenv("TZ");
+ }
+ else
+ setenv("TZ", tz, 1);
+ tzset();
+ }
+ if((strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm)) ||
+ (strptime(s, "%Y-%m-%d %H:%M:%S", &tm)) ||
+ (strptime(s, "%a, %d %b %Y %H:%M:%S", &tm)) ||
+ (strptime(s, "%Y-%m-%dT%H:%M:%S", &tm))) {
+ tm.tm_isdst = -1; /* detect Daylight Saving Time */
+ if((t = mktime(&tm)) == -1)
+ t = 0;
+ if(buf && (strftime(timebuf, sizeof(timebuf) - 1,
+ "%Y-%m-%d %H:%M:%S", &tm))) {
+ for(p = tz; *p; p++) /* print time offset reverse */
+ *p = ((*p == '-') ? '+' : (*p == '+' ? '-' : *p));
+ snprintf(buf, bufsiz, "%s %s", timebuf, tz);
+ }
+ }
+ return t;
+}
+
+/* print text, ignore tabs, newline and carriage return etc
+1 * print some HTML 2.0 / XML 1.0 as normal text */
+void
+string_print_trimmed(String *s) {
+ const char *entities[] = {
+ "&amp;", "&", "&lt;", "<", "&gt;", ">", "&apos;", "'", "&quot;", "\"",
+ NULL, NULL
+ };
+ const char *p, *n, **e;
+ unsigned int len, found;
+ if(!s->data)
+ return;
+ for(p = s->data; isspace(*p); p++); /* strip leading whitespace */
+ for(; *p; ) { /* ignore tabs, newline and carriage return etc */
+ if(!isspace(*p) || *p == ' ') {
+ if(*p == '<') { /* skip tags */
+ if((n = strchr(p, '>')))
+ p = n;
+ else
+ putchar('<');
+ } else if(*p == '&') {
+ for(e = entities, found = 0; *e; e += 2) {
+ len = strlen(*e);
+ if(!strncmp(*e, p, len)) { /* compare entities and "replace" */
+ fputs(*(e + 1), stdout);
+ p += len;
+ found = 1;
+ break;
+ }
+ }
+ if(found)
+ continue;
+ else
+ putchar('&');
+ } else
+ fputc(*p, stdout);
+ }
+ p++;
+ }
+}
+
+void /* print text, escape tabs, newline and carriage return etc */
+string_print_textblock(String *s) {
+ const char *p;
+ if(!s->data)
+ return;
+ for(p = s->data; *p && isspace(*p); p++); /* strip leading whitespace */
+ for(; *p; p++) {
+ if(*p == '\n') /* escape newline */
+ fputs("\\n", stdout);
+ else if(*p == '\\') /* escape \ */
+ fputs("\\\\", stdout);
+ else if(*p == '\t') /* tab */
+ fputs("\\t", stdout);
+ else if(!isspace(*p) || *p == ' ') /* ignore other whitespace chars */
+ fputc(*p, stdout);
+ }
+}
+
+int
+istag(const char *name, const char *name2) {
+ return (!strcasecmp(name, name2));
+}
+
+int
+isattr(const char *name, const char *name2) {
+ return (!strcasecmp(name, name2));
+}
+
+char * /* search for attr value by attr name in attributes list */
+getattrvalue(const char **atts, const char *name) {
+ const char **attr = NULL, *key, *value;
+ if(!atts || !(*atts))
+ return NULL;
+ for(attr = atts; *attr; ) {
+ key = *(attr++);
+ value = *(attr++);
+ if(key && value && isattr(key, name))
+ return (char *)value;
+ }
+ return NULL;
+}
+
+void XMLCALL
+xml_handler_start_element(void *data, const char *name, const char **atts) {
+ const char *value;
+
+ strncpy(tag, name, sizeof(tag) - 1); /* set tag */
+ if(feeditem.feedtype != FeedTypeNone) { /* in item */
+ if(feeditem.feedtype == FeedTypeAtom) {
+ if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) {
+ XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+ } else if(istag(name, "link")) { /* link href attribute */
+ if((value = getattrvalue(atts, "href")))
+ string_append(&feeditem.link, value, strlen(value));
+ } else if(istag(name, "content") || istag(name, "summary")) {
+ if((value = getattrvalue(atts, "type"))) { /* content type is HTML or plain text */
+ if(!strcasecmp(value, "xhtml") || !strcasecmp(value, "text/xhtml") ||
+ !strcasecmp(value, "html") || !strcasecmp(value, "text/html"))
+ feeditem.contenttype = ContentTypeHTML;
+ }
+ }
+ } else if(feeditem.feedtype == FeedTypeRSS) {
+ if(istag(feeditemtag, "description"))
+ XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+ }
+ if(feeditemtag[0] == '\0') /* set tag if not already set. */
+ strncpy(feeditemtag, name, sizeof(feeditemtag) - 1);
+ } else { /* start of RSS or Atom entry / item */
+ if(istag(name, "entry")) { /* Atom */
+ feeditem.feedtype = FeedTypeAtom;
+ feeditem.contenttype = ContentTypePlain; /* Default content type */
+ } else if(istag(name, "item")) { /* RSS */
+ feeditem.feedtype = FeedTypeRSS;
+ feeditem.contenttype = ContentTypeHTML; /* Default content type */
+ }
+ }
+}
+
+void XMLCALL
+xml_handler_end_element(void *data, const char *name) {
+ char timebuf[64];
+
+ if(feeditem.feedtype != FeedTypeNone) {
+ /* end of RSS or Atom entry / item */
+ if((istag(name, "entry") && (feeditem.feedtype == FeedTypeAtom)) || /* Atom */
+ (istag(name, "item") && (feeditem.feedtype == FeedTypeRSS))) { /* RSS */
+ printf("%ld", (long)parsetime((&feeditem.timestamp)->data, timebuf,
+ sizeof(timebuf) - 1));
+ fputc(FieldSeparator, stdout);
+ printf("%s", timebuf);
+ fputc(FieldSeparator, stdout);
+ string_print_trimmed(&feeditem.title);
+ fputc(FieldSeparator, stdout);
+ string_print_trimmed(&feeditem.link);
+ fputc(FieldSeparator, stdout);
+ string_print_textblock(&feeditem.content);
+ fputc(FieldSeparator, stdout);
+ fputs(contenttypes[feeditem.contenttype], stdout);
+ fputc(FieldSeparator, stdout);
+ string_print_trimmed(&feeditem.id);
+ fputc(FieldSeparator, stdout);
+ string_print_trimmed(&feeditem.author);
+ fputc(FieldSeparator, stdout);
+ fputs(feedtypes[feeditem.feedtype], stdout);
+ fputc('\n', stdout);
+
+ /* clear strings */
+ string_clear(&feeditem.timestamp);
+ string_clear(&feeditem.title);
+ string_clear(&feeditem.link);
+ string_clear(&feeditem.content);
+ string_clear(&feeditem.id);
+ string_clear(&feeditem.author);
+ feeditem.feedtype = FeedTypeNone;
+ feeditem.contenttype = ContentTypePlain;
+ incdata = 0;
+ feeditemtag[0] = '\0'; /* unset tag */
+ } else if(!strcmp(feeditemtag, name)) { /* clear */
+ feeditemtag[0] = '\0'; /* unset tag */
+ } else {
+ if(feeditem.feedtype == FeedTypeAtom) {
+ if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) {
+ /* pass to default handler to process inline HTML etc */
+ XML_DefaultCurrent(parser);
+ return;
+ }
+ }
+ }
+ }
+ tag[0] = '\0'; /* unset tag */
+}
+
+/* NOTE: this handler can be called multiple times if the data in this block
+ * is bigger than the buffer */
+void XMLCALL
+xml_handler_data(void *data, const XML_Char *s, int len) {
+ if(feeditem.feedtype == FeedTypeRSS) {
+ if(istag(feeditemtag, "pubdate") || istag(feeditemtag, "dc:date"))
+ string_append(&feeditem.timestamp, s, len);
+ else if(istag(feeditemtag, "title"))
+ string_append(&feeditem.title, s, len);
+ else if(istag(feeditemtag, "link"))
+ string_append(&feeditem.link, s, len);
+ else if(istag(feeditemtag, "description")) {
+ if(incdata)
+ XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+ else
+ string_append(&feeditem.content, s, len);
+ } else if(istag(feeditemtag, "guid"))
+ string_append(&feeditem.id, s, len);
+ else if(istag(feeditemtag, "author") || istag(feeditemtag, "dc:creator"))
+ string_append(&feeditem.author, s, len);
+ } else if(feeditem.feedtype == FeedTypeAtom) {
+ if(istag(feeditemtag, "published") || istag(feeditemtag, "updated"))
+ string_append(&feeditem.timestamp, s, len);
+ else if(istag(feeditemtag, "title")) {
+ string_append(&feeditem.title, s, len);
+ } else if(istag(feeditemtag, "summary") || istag(feeditemtag, "content")) {
+ if(feeditem.contenttype == ContentTypeHTML) {
+ if(incdata)
+ XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+ else
+ string_append(&feeditem.content, s, len);
+ } else
+ XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+ } else if(istag(feeditemtag, "id"))
+ string_append(&feeditem.id, s, len);
+ else if(istag(feeditemtag, "name")) /* assume this is: <author><name></name></author> */
+ string_append(&feeditem.author, s, len);
+ }
+}
+
+int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */
+xml_parse_stream(XML_Parser parser, FILE *fp) {
+ char buffer[BUFSIZ];
+ int done = 0, len = 0;
+
+ while(!feof(fp)) {
+ len = fread(buffer, 1, sizeof(buffer), fp);
+ done = (feof(fp) || ferror(fp));
+ if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) {
+ if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS)
+ return 1; /* Ignore "no elements found" / empty document as an error */
+ fprintf(stderr, "sfeed: error parsing xml %s at line %lu column %lu\n",
+ XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser),
+ (unsigned long)XML_GetCurrentColumnNumber(parser));
+ return 0;
+ }
+ } while(!done);
+ return 1;
+}
+
+void
+xml_handler_default(void *data, const XML_Char *s, int len) {
+ if((feeditem.feedtype == FeedTypeAtom && (istag(feeditemtag, "summary") || istag(feeditemtag, "content"))) ||
+ (feeditem.feedtype == FeedTypeRSS && istag(feeditemtag, "description")))
+ /*if(!istag(tag, "script") && !istag(tag, "style"))*/ /* ignore data in inline script and style */
+ string_append(&feeditem.content, s, len);
+}
+
+void /* NOTE: data is null terminated. */
+xml_handler_comment(void *data, const XML_Char *s) {
+}
+
+void
+xml_cdata_section_handler_start(void *userdata) {
+ incdata = 1;
+}
+
+void
+xml_cdata_section_handler_end(void *userdata) {
+ incdata = 0;
+}
+
+int
+main(void) {
+ int status;
+ standardtz = getenv("TZ");
+
+ /* init strings and initial memory pool size */
+ string_buffer_init(&feeditem.timestamp, 64);
+ string_buffer_init(&feeditem.title, 256);
+ string_buffer_init(&feeditem.link, 1024);
+ string_buffer_init(&feeditem.content, 4096);
+ string_buffer_init(&feeditem.id, 1024);
+ string_buffer_init(&feeditem.author, 256);
+ feeditem.contenttype = ContentTypePlain;
+ feeditem.feedtype = FeedTypeNone;
+ feeditemtag[0] = '\0'; /* unset tag */
+ tag[0] = '\0'; /* unset tag */
+
+ if(!(parser = XML_ParserCreate("UTF-8")))
+ die("can't create parser");
+
+ XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element);
+ XML_SetCharacterDataHandler(parser, xml_handler_data);
+ XML_SetCommentHandler(parser, xml_handler_comment);
+ XML_SetCdataSectionHandler(parser, xml_cdata_section_handler_start, xml_cdata_section_handler_end);
+ XML_SetDefaultHandler(parser, xml_handler_default);
+
+ status = xml_parse_stream(parser, stdin);
+ cleanup();
+
+ return status ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sfeed_html.1 b/sfeed_html.1
new file mode 100644
index 0000000..e645d4b
--- /dev/null
+++ b/sfeed_html.1
@@ -0,0 +1,14 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_html \- format feeds file (TSV) from sfeed_update to HTML
+.SH SYNOPSIS
+.B sfeed_html
+.SH DESCRIPTION
+Format feeds file (TSV) from sfeed_update to HTML. Reads TSV data from
+stdin and writes HTML to stdout. For the exact TSV format see sfeed_update(1).
+.SH SEE ALSO
+.BR sfeed_plain(1)
+.BR sfeed_update(1)
+.BR sfeed(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_html.c b/sfeed_html.c
new file mode 100644
index 0000000..c195c86
--- /dev/null
+++ b/sfeed_html.c
@@ -0,0 +1,230 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+#include "common.c"
+
+/* Feed info. */
+struct feed {
+ char *name; /* feed name */
+ unsigned long new; /* amount of new items per feed */
+ unsigned long total; /* total items */
+ struct feed *next; /* linked list */
+};
+
+static int showsidebar = 1; /* show sidebar ? */
+
+void /* print error message to stderr */
+die(const char *s) {
+ fputs("sfeed_html: ", stderr);
+ fputs(s, stderr);
+ fputc('\n', stderr);
+ exit(EXIT_FAILURE);
+}
+
+struct feed *
+feednew(void) {
+ struct feed *f;
+ if(!(f = calloc(1, sizeof(struct feed))))
+ die("can't allocate enough memory");
+ return f;
+}
+
+void
+feedsfree(struct feed *f) {
+ struct feed *next;
+ while(f) {
+ next = f->next;
+ free(f->name);
+ free(f);
+ f = next;
+ }
+}
+
+/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */
+void
+printfeednameid(const char *s) {
+ for(; *s; s++)
+ putchar(isspace(*s) ? '-' : *s);
+}
+
+void
+printhtmlencoded(const char *s) {
+ for(; *s; s++) {
+ switch(*s) {
+ case '<': fputs("&lt;", stdout); break;
+ case '>': fputs("&gt;", stdout); break;
+ case '&': fputs("&amp;", stdout); break;
+ default:
+ putchar(*s);
+ }
+ }
+}
+
+int
+main(void) {
+ char *line = NULL, *fields[FieldLast];
+ unsigned long totalfeeds = 0, totalnew = 0;
+ unsigned int islink, isnew;
+ struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */
+ time_t parsedtime, comparetime;
+ size_t size = 0;
+
+ tzset();
+ comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
+ fputs(
+ "<!DOCTYPE HTML>\n"
+ "<html dir=\"ltr\" lang=\"en\">\n"
+ " <head>\n"
+ " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
+ " <style type=\"text/css\">\n"
+ " body {\n"
+ " font-family: monospace;\n"
+ " font-size: 9pt;\n"
+ " color: #333;\n"
+ " background-color: #fff;\n"
+ " overflow: hidden;\n"
+ " }\n"
+ " #feedcontent td {\n"
+ " white-space: nowrap;\n"
+ " }\n"
+ " #feedcontent h2 {\n"
+ " font-size: 14pt;\n"
+ " }\n"
+ " #feedcontent a {\n"
+ " display: block;\n"
+ " }\n"
+ " #feedcontent ul, #feedcontent li {\n"
+ " list-style: none;\n"
+ " padding: 0;\n"
+ " margin: 0;\n"
+ " }\n"
+ " #feedcontent h2 a, #feedcontent ul li a {\n"
+ " color: inherit;\n"
+ " }\n"
+ " #feedcontent ul li a {\n"
+ " padding: 5px 3px 5px 10px;\n"
+ " }\n"
+ " #feedcontent div#sidebar {\n"
+ " background-color: inherit;\n"
+ " position: fixed;\n"
+ " top: 0;\n"
+ " left: 0;\n"
+ " width: 175px;\n"
+ " height: 100%;\n"
+ " overflow: hidden;\n"
+ " overflow-y: auto;\n"
+ " z-index: 999;\n"
+ " }\n"
+ " #feedcontent div#items {\n"
+ " left: 175px;\n"
+ " }\n"
+ " #feedcontent div#items-nosidebar {\n"
+ " left: 0px;\n"
+ " }\n"
+ " #feedcontent div#items-nosidebar,\n"
+ " #feedcontent div#items {\n"
+ " position: absolute;\n"
+ " height: 100%;\n"
+ " top: 0;\n"
+ " right: 0;\n"
+ " overflow: auto;\n"
+ " padding: 0 15px;\n"
+ " }\n"
+ " </style>\n"
+ " </head>\n"
+ " <body>\n"
+ " <div id=\"feedcontent\">\n",
+ stdout);
+
+ while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) {
+ /* first of feed section or new feed section. */
+ if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) {
+ if(totalfeeds) { /* end previous one. */
+ fputs("</table>\n", stdout);
+ feedcurrent->next = feednew();
+ feedcurrent = feedcurrent->next;
+ } else {
+ feedcurrent = feednew();
+ feeds = feedcurrent; /* first item. */
+ fputs("\t\t<div id=\"items", stdout);
+ if(fields[FieldFeedName][0] == '\0') {
+ fputs("-nosidebar", stdout); /* set other id on div if no sidebar for styling */
+ showsidebar = 0;
+ }
+ fputs("\">\n", stdout);
+ }
+ if(!(feedcurrent->name = strdup(fields[FieldFeedName])))
+ die("can't allocate enough memory");
+ if(fields[FieldFeedName][0] != '\0') {
+ fputs("<h2 id=\"", stdout);
+ printfeednameid(feedcurrent->name);
+ fputs("\"><a href=\"#", stdout);
+ printfeednameid(feedcurrent->name);
+ fputs("\">", stdout);
+ fputs(feedcurrent->name, stdout);
+ fputs("</a></h2>\n", stdout);
+ }
+ fputs("<table>", stdout);
+ totalfeeds++;
+ }
+ parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+ isnew = (parsedtime >= comparetime);
+ islink = (strlen(fields[FieldLink]) > 0);
+ totalnew += isnew;
+ feedcurrent->new += isnew;
+ feedcurrent->total++;
+
+ fputs("<tr><td>", stdout);
+ printtime(parsedtime);
+ fputs("</td><td>", stdout);
+ if(isnew)
+ fputs("<b><u>", stdout);
+ if(islink) {
+ fputs("<a href=\"", stdout);
+ printlink(fields[FieldLink], fields[FieldFeedUrl]);
+ fputs("\">", stdout);
+ }
+ printhtmlencoded(fields[FieldTitle]);
+ if(islink)
+ fputs("</a>", stdout);
+ if(isnew)
+ fputs("</u></b>", stdout);
+ fputs("</td></tr>\n", stdout);
+ }
+ if(totalfeeds) {
+ fputs("</table>\n", stdout);
+ fputs("\t\t</div>\n", stdout); /* div items */
+ }
+ if(showsidebar) {
+ fputs("\t\t<div id=\"sidebar\">\n\t\t\t<ul>\n", stdout);
+ for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) {
+ if(!feedcurrent->name || feedcurrent->name[0] == '\0')
+ continue;
+ fputs("<li><a href=\"#", stdout);
+ printfeednameid(feedcurrent->name);
+ fputs("\">", stdout);
+ if(feedcurrent->new > 0)
+ fputs("<b><u>", stdout);
+ fputs(feedcurrent->name, stdout);
+ fprintf(stdout, " (%lu)", feedcurrent->new);
+ if(feedcurrent->new > 0)
+ fputs("</u></b>", stdout);
+ fputs("</a></li>\n", stdout);
+ }
+ fputs("\t\t\t</ul>\n\t\t</div>\n", stdout);
+ }
+ fputs(
+ " </div>\n"
+ " </body>\n"
+ " <title>Newsfeeds (",
+ stdout);
+ fprintf(stdout, "%lu", totalnew);
+ fputs(")</title>\n</html>", stdout);
+
+ free(line); /* free line */
+ feedsfree(feeds); /* free feeds linked-list */
+
+ return EXIT_SUCCESS;
+}
diff --git a/sfeed_opml_config.1 b/sfeed_opml_config.1
new file mode 100644
index 0000000..523f99e
--- /dev/null
+++ b/sfeed_opml_config.1
@@ -0,0 +1,11 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_opml_config \- generate a sfeedrc config file based on an opml file
+.SH SYNOPSIS
+.B sfeed_opml_config
+.SH DESCRIPTION
+Reads the opml XML data from stdin and writes the config file text to stdout.
+.SH SEE ALSO
+.BR sfeed_update(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_opml_config.c b/sfeed_opml_config.c
new file mode 100644
index 0000000..0d74820
--- /dev/null
+++ b/sfeed_opml_config.c
@@ -0,0 +1,87 @@
+/* convert an opml file to sfeedrc file */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <expat.h> /* libexpat */
+
+XML_Parser parser; /* expat XML parser state */
+
+char * /* search for attr value by attr name in attributes list */
+getattrvalue(const char **atts, const char *name) {
+ const char **attr = NULL, *key, *value;
+ if(!atts || !(*atts))
+ return NULL;
+ for(attr = atts; *attr; ) {
+ key = *(attr++);
+ value = *(attr++);
+ if(key && value && !strcasecmp(key, name))
+ return (char *)value;
+ }
+ return NULL;
+}
+
+void XMLCALL
+xml_handler_start_element(void *data, const char *name, const char **atts) {
+ char *feedurl = NULL, *feedname = NULL;;
+
+ if(!strcasecmp(name, "outline")) {
+ if(!(feedname = getattrvalue(atts, "text")) &&
+ !(feedname = getattrvalue(atts, "title")))
+ feedname = "unnamed";
+ if(!(feedurl = getattrvalue(atts, "xmlurl")))
+ feedurl = "";
+ printf("\tfeed \"%s\" \"%s\"\n", feedname, feedurl);
+ }
+}
+
+void XMLCALL
+xml_handler_end_element(void *data, const char *name) {
+}
+
+int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */
+xml_parse_stream(XML_Parser parser, FILE *fp) {
+ char buffer[BUFSIZ];
+ int done = 0, len = 0;
+
+ while(!feof(fp)) {
+ len = fread(buffer, 1, sizeof(buffer), fp);
+ done = (feof(fp) || ferror(fp));
+ if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) {
+ if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS)
+ return 1; /* Ignore "no elements found" / empty document as an error */
+ fprintf(stderr, "sfeed_opml_config: error parsing xml %s at line %lu column %lu\n",
+ XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser),
+ (unsigned long)XML_GetCurrentColumnNumber(parser));
+ return 0;
+ }
+ } while(!done);
+ return 1;
+}
+
+int main(void) {
+ int status;
+
+ if(!(parser = XML_ParserCreate("UTF-8"))) {
+ fputs("sfeed_opml_config: can't create parser", stderr);
+ exit(EXIT_FAILURE);
+ }
+ XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element);
+
+ fputs(
+ "# paths\n"
+ "# NOTE: make sure to uncomment all these if you change it.\n"
+ "#sfeedpath=\"$HOME/.sfeed\"\n"
+ "#sfeedfile=\"$sfeedpath/feeds\"\n"
+ "#sfeedfilenew=\"$sfeedfile.new\"\n"
+ "\n"
+ "# list of feeds to fetch:\n"
+ "feeds() {\n"
+ " # feed <name> <url> [encoding]\n", stdout);
+ status = xml_parse_stream(parser, stdin);
+ fputs("}\n", stdout);
+
+ XML_ParserFree(parser);
+
+ return status ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sfeed_plain.1 b/sfeed_plain.1
new file mode 100644
index 0000000..3f396aa
--- /dev/null
+++ b/sfeed_plain.1
@@ -0,0 +1,15 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_plain \- format feeds file (TSV) from sfeed_update to plain text
+.SH SYNOPSIS
+.B sfeed
+.SH DESCRIPTION
+Format feeds file (TSV) from sfeed_update to plain text. Reads TSV data from
+stdin and writes plain text to stdout. For the exact TSV format see
+sfeed_update(1).
+.SH SEE ALSO
+.BR sfeed_html(1)
+.BR sfeed_update(1)
+.BR sfeed(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_plain.c b/sfeed_plain.c
new file mode 100644
index 0000000..d4045c9
--- /dev/null
+++ b/sfeed_plain.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include "common.c"
+
+void
+printutf8padded(const char *s, size_t len) {
+ unsigned int n = 0, i = 0;
+
+ for(; s[i] && n < len; i++) {
+ if((s[i] & 0xc0) != 0x80) /* start of character */
+ n++;
+ putchar(s[i]);
+ }
+ for(; n < len; n++)
+ putchar(' ');
+}
+
+int
+main(void) {
+ char *line = NULL, *fields[FieldLast];
+ time_t parsedtime, comparetime;
+ size_t size = 0;
+
+ tzset();
+ comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
+ while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) {
+ parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+ printf(" %c ", (parsedtime >= comparetime) ? 'N' : ' ');
+ if(fields[FieldFeedName][0] != '\0')
+ printf("%-15.15s ", fields[FieldFeedName]);
+ printtime(parsedtime);
+ fputs(" ", stdout);
+ printutf8padded(fields[FieldTitle], 70);
+ fputs(" ", stdout);
+ printlink(fields[FieldLink], fields[FieldFeedUrl]);
+ putchar('\n');
+ }
+ free(line);
+ return EXIT_SUCCESS;
+}
diff --git a/sfeed_update b/sfeed_update
new file mode 100755
index 0000000..52b9728
--- /dev/null
+++ b/sfeed_update
@@ -0,0 +1,116 @@
+#!/bin/sh
+# update feeds, merge with old feeds.
+# NOTE: assumes "sfeed_*" files are in $PATH.
+
+# defaults
+sfeedpath="$HOME/.sfeed"
+sfeedfile="$sfeedpath/feeds"
+# temporary file for new feeds (for merging).
+sfeedfilenew="$sfeedfile.new"
+
+# load config (evaluate shellscript).
+# loadconfig(configfile)
+loadconfig() {
+ # allow to specify config via argv[1].
+ if [ ! "$1" = "" ]; then
+ # get absolute path of config file.
+ config=$(readlink -f "$1")
+ else
+ # default config location.
+ config="$HOME/.sfeed/sfeedrc"
+ fi
+
+ # load config: config is loaded here to be able to override above variables
+ # (sfeedpath, sfeedfile, etc).
+ if [ -r "$config" ]; then
+ . "$config"
+ else
+ echo "Configuration file \"$config\" does not exist or is not readable." >&2
+ echo "See sfeedrc.example for an example." >&2
+ exit 1
+ fi
+}
+
+# merge raw files.
+# merge(oldfile, newfile)
+merge() {
+ # unique sort by id, link, title.
+ # order by feedname (asc), feedurl (asc) and timestamp (desc).
+ (cat "$1" "$2" 2> /dev/null) |
+ sort -t ' ' -u -k7,7 -k4,4 -k3,3 |
+ sort -t ' ' -k10,10 -k11,11 -k1r,1
+}
+
+# fetch a feed via HTTP/HTTPS etc.
+# fetchfeed(url, name)
+fetchfeed() {
+ if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then
+ printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2
+ else
+ printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2
+ fi
+}
+
+# add field after line, output to stdout.
+# addfield(field)
+addfield() {
+ # NOTE: IFS is set and restored to prevent stripping whitespace.
+ OLDIFS="$IFS"
+ IFS="
+"
+ while read -r line; do
+ printf "%s %s\n" "${line}" "$1"
+ done
+ IFS="$OLDIFS"
+}
+
+# fetch and parse feed.
+# feed(name, url, encoding)
+feed() {
+ tmpfile=$(mktemp -p "$TMPDIR")
+ (if [ "$3" = "" ]; then
+ # don't use iconv if encoding not set in config.
+ fetchfeed "$2" "$1" | sfeed | addfield "$1 $2"
+ else
+ # use iconv to convert encoding to UTF-8.
+ fetchfeed "$2" "$1" | iconv -cs -f "$3" -t "utf-8" | sfeed | addfield "$1 $2"
+ fi) > "$tmpfile"
+}
+
+terminated() {
+ isrunning="0"
+}
+
+cleanup() {
+ # remove temporary files
+ rm -rf "$tmpfile" "$TMPDIR"
+}
+
+# load config file.
+loadconfig "$1"
+# fetch feeds and store in temporary file.
+TMPDIR=$(mktemp -d -t "sfeed_XXXXXX")
+# get date of last modified feedfile in format:
+# YYYYmmdd HH:MM:SS [+-][0-9]*
+lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-)
+# Kill whole current process group on ^C.
+isrunning="1"
+trap -- "terminated" "15" # SIGTERM: signal to terminate parent.
+trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D
+# fetch feeds specified in config file.
+feeds
+# make sure path exists.
+mkdir -p "$sfeedpath"
+# wait till all feeds are fetched (allows running in parallel).
+wait
+[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup.
+# concat all individual feed files to a single file.
+# NOTE: mktemp uses $TMPDIR for temporary directory.
+tmpfile=$(mktemp -t "sfeed_XXXXXX")
+find "$TMPDIR" -type f -exec cat {} \; > "$tmpfile"
+# get new data and merge with old.
+merge "$sfeedfile" "$tmpfile" > "$sfeedfilenew"
+# overwrite old file with updated file
+mv "$sfeedfilenew" "$sfeedfile"
+# cleanup temporary files etc.
+cleanup
diff --git a/sfeed_update.1 b/sfeed_update.1
new file mode 100644
index 0000000..e9cffc1
--- /dev/null
+++ b/sfeed_update.1
@@ -0,0 +1,82 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_update \- update feeds and merge with old feeds
+.SH SYNOPSIS
+.B sfeed_update
+.RB [configfile]
+.SH OPTIONS
+.TP
+.B [configfile]
+config file, if not specified uses the location $HOME/.sfeed/sfeedrc by default (see FILES READ section for more information).
+.SH DESCRIPTION
+.TP
+Update feeds and merge with old feeds in the file $HOME/.sfeed/feeds by default.
+.SH TAB-SEPARATED FORMAT
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are
+removed.
+.TP
+.B item timestamp (unix timestamp in GMT+0)
+string
+.TP
+.B item timestamp (formatted)
+string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM
+.TP
+.B item title
+string
+.TP
+.B item link
+string
+.TP
+.B item description
+string
+.TP
+.B item contenttype
+string (html or plain)
+.TP
+.B item id
+string
+.TP
+.B item author
+string
+.TP
+.B feed type
+string (rss or atom)
+.TP
+.B feed name
+string (extra field added by sfeed_update)
+.TP
+.B feed url
+string (extra field added by sfeed_update)
+.SH FILES READ
+.TP
+.B sfeedrc
+Config file, see the sfeedrc.example file for an example.
+This file is evaluated as a shellscript in sfeed_update.
+You can for example override the fetchfeed() function to
+use wget, fetch or an other download program or you can
+override the merge() function to change the merge logic.
+The function feeds() is called to fetch the feeds. The
+function feed() can safely be executed as a parallel job
+in your sfeedrc config file to speedup updating.
+.SH FILES WRITTEN
+.TP
+.B feeds
+Tab-separated format containing all feeds.
+The sfeed_update script merges new items with this file.
+.TP
+.B feeds.new
+Temporary file used by sfeed_update to merge items.
+.SH EXAMPLES
+.TP
+To update feeds and format the feeds file:
+.nf
+sfeed_update "configfile"
+sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt
+sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html
+.SH SEE ALSO
+.BR sh(1)
+.BR sfeed_plain(1)
+.BR sfeed_html(1)
+.SH BUGS
+Please report them!
diff --git a/sfeedrc.example b/sfeedrc.example
new file mode 100644
index 0000000..d25777a
--- /dev/null
+++ b/sfeedrc.example
@@ -0,0 +1,17 @@
+# paths
+# NOTE: make sure to uncomment all these if you change it.
+#sfeedpath="$HOME/.sfeed"
+#sfeedfile="$sfeedpath/feeds"
+#sfeedfilenew="$sfeedfile.new"
+
+# list of feeds to fetch:
+feeds() {
+ # feed <name> <url> [encoding]
+ feed "codemadness" "http://www.codemadness.nl/blog/rss.xml"
+ feed "explosm" "http://feeds.feedburner.com/Explosm"
+ feed "linux kernel" "http://kernel.org/kdist/rss.xml" "iso-8859-1"
+ feed "phoronix" "http://feeds.feedburner.com/Phoronix"
+ feed "slashdot" "http://rss.slashdot.org/Slashdot/slashdot"
+ feed "tweakers" "http://feeds.feedburner.com/tweakers/mixed" "iso-8859-1"
+ feed "xkcd" "http://xkcd.com/atom.xml"
+}