New initial repo

Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org>
author: Hiltjo Posthuma <hiltjo@codemadness.org> 2012-08-03 12:03:17 +0200
committer: Hiltjo Posthuma <hiltjo@codemadness.org> 2012-08-03 12:03:17 +0200
commit: db5ffcaa8c133d249aafa4a64f3d827dd513d995 (patch)
tree: dd3ece08c9f65ebcab6cd7406d87b6b932e19900
19 files changed, 1548 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..abdbd0f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.o
+sfeed
+sfeed_plain
+sfeed_html
+sfeed_opml_config
diff --git a/CREDITS b/CREDITS
new file mode 100644
index 0000000..a383a90
--- /dev/null
+++ b/CREDITS
@@ -0,0 +1 @@
+raph_ael on #suckless for the idea for an opml converter
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..91da394
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT/X Consortium License
+
+© 2011-2012 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..329b97b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,95 @@
+# sfeed - simple RSS and Atom parser (and programs to add reader functionality).
+
+include config.mk
+
+NAME = sfeed
+SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_config.c
+OBJ = ${SRC:.c=.o}
+
+all: options sfeed sfeed_plain sfeed_html sfeed_opml_config
+
+options:
+	@echo ${NAME} build options:
+	@echo "CFLAGS   = ${CFLAGS}"
+	@echo "LDFLAGS  = ${LDFLAGS}"
+	@echo "CC       = ${CC}"
+
+.c.o:
+	@echo CC $<
+	@${CC} -c ${CFLAGS} $<
+
+${OBJ}: config.mk
+
+sfeed: ${OBJ}
+	@echo CC -o $@
+	@${CC} -o $@ sfeed.c ${LDFLAGS} ${LIBEXPAT}
+
+sfeed_opml_config: sfeed_opml_config.o
+	@echo CC -o $@
+	@${CC} -o $@ sfeed_opml_config.o ${LDFLAGS} ${LIBEXPAT}
+
+sfeed_plain: sfeed_plain.o
+	@echo CC -o $@
+	@${CC} -o $@ sfeed_plain.o ${LDFLAGS}
+
+sfeed_html: sfeed_html.o
+	@echo CC -o $@
+	@${CC} -o $@ sfeed_html.o ${LDFLAGS}
+
+clean:
+	@echo cleaning
+	@rm -f sfeed sfeed_plain sfeed_html sfeed_opml_config ${OBJ} ${NAME}-${VERSION}.tar.gz
+
+dist: clean
+	@echo creating dist tarball
+	@mkdir -p ${NAME}-${VERSION}
+	@cp -R LICENSE Makefile README config.mk \
+		TODO CREDITS sfeedrc.example ${SRC} common.c sfeed_update \
+		sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_config.1 \
+		${NAME}-${VERSION}
+	@tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION}
+	@gzip ${NAME}-${VERSION}.tar
+	@rm -rf ${NAME}-${VERSION}
+
+install: all
+	@echo installing executable file to ${DESTDIR}${PREFIX}/bin
+	@mkdir -p ${DESTDIR}${PREFIX}/bin
+	@cp -f sfeed sfeed_html sfeed_plain sfeed_update sfeed_opml_config \
+		${DESTDIR}${PREFIX}/bin
+	@chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \
+		${DESTDIR}${PREFIX}/bin/sfeed_html \
+		${DESTDIR}${PREFIX}/bin/sfeed_plain \
+		${DESTDIR}${PREFIX}/bin/sfeed_update \
+		${DESTDIR}${PREFIX}/bin/sfeed_opml_config
+	@mkdir -p ${DESTDIR}${PREFIX}/share/sfeed
+	@cp -f sfeedrc.example ${DESTDIR}${PREFIX}/share/${NAME}
+	@echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1
+	@mkdir -p ${DESTDIR}${MANPREFIX}/man1
+	@sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1
+	@sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1
+	@sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1
+	@sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1
+	@sed "s/VERSION/${VERSION}/g" < sfeed_opml_config.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+	@chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+
+uninstall:
+	@echo removing executable file from ${DESTDIR}${PREFIX}/bin
+	@rm -f ${DESTDIR}${PREFIX}/bin/sfeed \
+		${DESTDIR}${PREFIX}/bin/sfeed_html \
+		${DESTDIR}${PREFIX}/bin/sfeed_plain \
+		${DESTDIR}${PREFIX}/bin/sfeed_update \
+		${DESTDIR}${PREFIX}/bin/sfeed_opml_config \
+		${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example
+	@-rmdir ${DESTDIR}${PREFIX}/share/${NAME}
+	@echo removing manual pages from ${DESTDIR}${MANPREFIX}/man1
+	@rm -f ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \
+		${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1
+
+.PHONY: all options clean dist install uninstall
diff --git a/README b/README
new file mode 100644
index 0000000..cbdb343
--- /dev/null
+++ b/README
@@ -0,0 +1,143 @@
+sfeed v0.8
+----------
+
+Simple RSS and Atom parser (and some format programs).
+
+
+Dependencies
+------------
+
+- C compiler.
+- expat library (used by sfeed.c and sfeed_opml_config.c,
+                 http://expat.sourceforge.net/).
+
+
+Optional dependencies
+---------------------
+
+- POSIX shell (used by sfeed_update).
+- curl  (used by sfeed_update, http://curl.haxx.se/).
+- iconv (used by sfeed_update, http://www.gnu.org/software/libiconv/).
+
+
+Files
+-----
+
+sfeed             - Binary (from sfeed.c); read XML RSS or Atom feed data from
+                    stdin. Write feed data in tab-separated format to stdout.
+sfeed_update      - Shellscript; update feeds and merge with old feeds in the
+                    file $HOME/.sfeed/feeds by default.
+sfeed_plain       - Format feeds file (TSV) from sfeed_update to plain text.
+sfeed_html        - Format feeds file (TSV) from sfeed_update to HTMLi.
+sfeed_opml_config - Generate a sfeedrc config file based on an opml file.
+sfeedrc.example   - Example config file.
+
+
+Files read at runtime by sfeed_update
+-------------------------------------
+
+sfeedrc        - Config file. This file is evaluated as a shellscript in
+                 sfeed_update. You can for example override the fetchfeed()
+                 function to use wget, fetch or an other download program or
+                 you can override the merge() function to change the merge
+                 logic. The function feeds() is called to fetch the feeds.
+                 The function feed() can safely be executed as a parallel
+                 job in your sfeedrc config file to speedup updating.
+
+
+Files written at runtime by sfeed_update
+----------------------------------------
+
+feeds          - Tab-separated format containing all feeds.
+                 The sfeed_update script merges new items with this file.
+feeds.new      - Temporary file used by sfeed_update to merge items.
+
+
+TAB-SEPARATED format
+--------------------
+
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \ (\n, \t and \\). Other whitespace except
+spaces are removed.
+
+The timestamp field is converted to a unix timestamp. The timestamp is also
+stored as formatted as a separate field. The other fields are left untouched
+(including HTML).
+
+The order and format of the fields are:
+
+item unix timestamp      - string unix timestamp (GMT+0)
+item formatted timestamp - string timestamp (YYYY-mm-dd HH:MM:SS tz[+-]HHMM)
+item title               - string
+item link                - string
+item description         - string
+item contenttype         - string ("html" or "plain")
+item id                  - string
+item author              - string
+feed type                - string ("rss" or "atom")
+feed name                - string (extra field added by sfeed_update)
+feed url                 - string (extra field added by sfeed_update)
+
+
+Usage
+-----
+
+To build and install (respects DESTDIR and PREFIX variable):
+
+make install
+
+
+Generate a sfeedrc config file from your exported list of feeds in opml
+format:
+
+sfeed_opml_config < opmlfile.xml > $HOME/.sfeed/sfeedrc
+
+
+To update feeds and format the feeds file (configfile argument is optional):
+
+sfeed_update "configfile"
+sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt
+sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html
+
+
+Example script to view feeds with dmenu, opens selected url in $BROWSER:
+
+url=$(sfeed_plain < "$HOME/.sfeed/feeds" | dmenu -l 35 -i |
+	sed 's@^.* \([a-zA-Z]*://\)\(.*\)$@\1\2@')
+[ ! "$url" = "" ] && $BROWSER "$url"
+
+
+or to view in your browser:
+
+$BROWSER "$HOME/.sfeed/feeds.html"
+
+
+or to view in your editor:
+
+$EDITOR "$HOME/.sfeed/feeds.txt"
+
+
+tip to remove feeds older than a date (change time="YYYY mm dd HH mm ss")
+
+gawk -F '\t' 'BEGIN {
+	time = mktime("2012 01 01 12 34 56");
+}
+{
+	if(int($1) >= int(time)) {
+		print $0;
+	}
+}' < feeds > feeds.clean
+
+mv feeds.clean feeds
+
+
+License
+-------
+
+MIT, see LICENSE file.
+
+
+Author
+------
+
+Hiltjo Posthuma <hiltjo@codemadness.org>
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..a2c081d
--- /dev/null
+++ b/TODO
@@ -0,0 +1,4 @@
+[ ] opml export script (WIP).
+[ ] rename sfeed_opml_config to sfeed_opml_import.
+[ ] sfeed_update / sfeedrc: add base siteurl as parameter for feed function for feeds located at an other domain, for example feedburner.com
+[ ] test opml import / export scripts with thunderbird, google reader, snownews and rssowl.
diff --git a/common.c b/common.c
new file mode 100644
index 0000000..91ac9ca
--- /dev/null
+++ b/common.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+
+enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink,
+       FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType,
+       FieldFeedName, FieldFeedUrl, FieldLast };
+
+const int FieldSeparator = '\t';
+
+char *
+afgets(char **p, size_t *size, FILE *fp) {
+	char buf[BUFSIZ], *alloc = NULL;
+	size_t n, len = 0, allocsiz;
+	int end = 0;
+
+	while(fgets(buf, sizeof(buf), fp)) {
+		n = strlen(buf);
+		if(buf[n - 1] == '\n') { /* dont store newlines. */
+			buf[n - 1] = '\0';
+			n--;
+			end = 1; /* newline found, end */
+		}
+		len += n;
+		allocsiz = len + 1;
+		if(allocsiz > *size) {
+			if((alloc = realloc(*p, allocsiz))) {
+				*p = alloc;
+				*size = allocsiz;
+			} else {
+				free(*p);
+				*p = NULL;
+				fputs("error: could not realloc\n", stderr);
+				exit(EXIT_FAILURE);
+				return NULL;
+			}
+		}
+		strncpy((*p + (len - n)), buf, n);
+		if(end || feof(fp))
+			break;
+	}
+	if(*p && len > 0) {
+		(*p)[len] = '\0';
+		return *p;
+	}
+	return NULL;
+}
+
+void /* print link; if link is relative use baseurl to make it absolute */
+printlink(const char *link, const char *baseurl) {
+	const char *ebaseproto, *ebasedomain, *p;
+	int isrelative;
+
+	/* protocol part */
+	for(p = link; *p && (isalpha(*p) || isdigit(*p) || *p == '+' || *p == '-' || *p == '.'); p++);
+	isrelative = strncmp(p, "://", strlen("://"));
+	if(isrelative) { /* relative link (baseurl is used). */
+		if((ebaseproto = strstr(baseurl, "://"))) {
+			ebaseproto += strlen("://");
+			fwrite(baseurl, 1, ebaseproto - baseurl, stdout);
+		} else {
+			ebaseproto = baseurl;
+			if(*baseurl || (link[0] == '/' && link[1] == '/'))
+				fputs("http://", stdout);
+		}
+		if(link[0] == '/') { /* relative to baseurl domain (not path).  */
+			if(link[1] == '/') /* absolute url but with protocol from baseurl. */
+				link += 2;
+			else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+				fwrite(ebaseproto, 1, ebasedomain - ebaseproto, stdout);
+			else
+				fputs(ebaseproto, stdout);
+		} else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */
+			fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, stdout);
+		else {
+			fputs(ebaseproto, stdout);
+			if(*baseurl && *link)
+				fputc('/', stdout);
+		}
+	}
+	fputs(link, stdout);
+}
+
+unsigned int
+parseline(char **line, size_t *size, char **fields, unsigned int maxfields, FILE *fp, int separator) {
+	unsigned int i = 0;
+	char *prev, *s;
+
+	if(afgets(line, size, fp)) {
+		for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) {
+			*s = '\0'; /* null terminate string. */
+			fields[i] = prev;
+			prev = s + 1;
+		}
+		fields[i] = prev;
+		for(i++; i < maxfields; i++) /* make non-parsed fields empty. */
+			fields[i] = "";
+	}
+	return i;
+}
+
+void
+printtime(time_t t) {
+	char buf[32];
+	struct tm temp = { 0 }, *mktm;
+
+	if(!(mktm = localtime_r(&t, &temp)))
+		return;
+	mktm->tm_isdst = -1;
+
+	if(!strftime(buf, sizeof(buf) - 1, "%Y-%m-%d %H:%M", mktm))
+		return;
+	fputs(buf, stdout);
+}
diff --git a/config.mk b/config.mk
new file mode 100644
index 0000000..8bddcad
--- /dev/null
+++ b/config.mk
@@ -0,0 +1,28 @@
+# sfeed version
+VERSION = 0.8
+
+# customize below to fit your system
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+
+# includes and libs
+INCS =
+LIBEXPAT = -lexpat
+LIBS = -lc
+
+# flags
+#CFLAGS = -Os -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=500 -DVERSION=\"${VERSION}\"
+#LDFLAGS = -s ${LIBS}
+
+# debug
+CFLAGS = -g -O0 -pedantic -Wall -Wextra -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=700 -DVERSION=\"${VERSION}\"
+LDFLAGS = ${LIBS}
+
+# Solaris
+#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
+#LDFLAGS = ${LIBS}
+
+# compiler and linker
+CC = cc
diff --git a/sfeed.1 b/sfeed.1
new file mode 100644
index 0000000..04227d8
--- /dev/null
+++ b/sfeed.1
@@ -0,0 +1,44 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed \- simple RSS and Atom parser
+.SH SYNOPSIS
+.B sfeed
+.SH DESCRIPTION
+Read XML RSS or Atom feed data from stdin. Write feed data in a
+tab-separated format to stdout.
+.SH TAB-SEPARATED FORMAT
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are
+removed.
+.TP
+.B item timestamp (unix timestamp in GMT+0)
+string
+.TP
+.B item timestamp (formatted)
+string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM)
+.TP
+.B item title
+string
+.TP
+.B item link
+string
+.TP
+.B item description
+string
+.TP
+.B item contenttype
+string (html or plain)
+.TP
+.B item id
+string
+.TP
+.B item author
+string
+.TP
+.B feed type
+string (rss or atom)
+.SH SEE ALSO
+.BR sh(1)
+.BR sfeed_update(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed.c b/sfeed.c
new file mode 100644
index 0000000..b83351f
--- /dev/null
+++ b/sfeed.c
@@ -0,0 +1,477 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+#include <expat.h> /* libexpat */
+
+enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2, FeedTypeLast = 3 };
+const char *feedtypes[] = {	"", "rss", "atom" };
+
+enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2, ContentTypeLast = 3 };
+const char *contenttypes[] = { "", "plain", "html" };
+
+typedef struct string { /* String data / pool */
+	char *data; /* data */
+	size_t len; /* string length */
+	size_t bufsiz; /* allocated size */
+} String;
+
+typedef struct feeditem { /* Feed item */
+	String timestamp;
+	String title;
+	String link;
+	String content;
+	int contenttype; /* ContentTypePlain or ContentTypeHTML */
+	String id;
+	String author;
+	int feedtype; /* FeedTypeRSS or FeedTypeAtom */
+} FeedItem;
+
+void die(const char *s);
+void cleanup(void);
+
+const int FieldSeparator = '\t';
+FeedItem feeditem; /* data for current feed item */
+char tag[1024]; /* current XML tag being parsed. */
+char feeditemtag[1024]; /* current tag _inside_ a feeditem */
+XML_Parser parser; /* expat XML parser state */
+int incdata = 0;
+char *standardtz = NULL; /* TZ variable at start of program */
+
+void
+string_clear(String *s) {
+	if(s->data)
+		s->data[0] = '\0'; /* clear string only; don't free, prevents
+		                      unnecessary reallocation */
+	s->len = 0;
+}
+
+void
+string_buffer_init(String *s, size_t len) {
+	if(!(s->data = malloc(len)))
+		die("can't allocate enough memory");
+	s->bufsiz = len;
+	string_clear(s);
+}
+
+void
+string_free(String *s) {
+	free(s->data);
+	s->data = NULL;
+	s->bufsiz = 0;
+	s->len = 0;
+}
+
+int
+string_buffer_expand(String *s, size_t newlen) {
+	char *p;
+	size_t alloclen;
+	/* check if allocation is necesary, dont shrink buffer */
+	if(!s->data || (newlen > s->bufsiz)) {
+		/* should be more than bufsiz ofcourse */
+		for(alloclen = 16; alloclen <= newlen; alloclen *= 2);
+		if(!(p = realloc(s->data, alloclen))) {
+			string_free(s); /* free previous allocation */
+			die("can't allocate enough memory");
+		}
+		s->bufsiz = alloclen;
+		s->data = p;
+	}
+	return s->bufsiz;
+}
+
+void
+string_append(String *s, const char *data, size_t len) {
+	string_buffer_expand(s, s->len + len);
+	memcpy(s->data + s->len, data, len);
+	s->len += len;
+	s->data[s->len] = '\0';
+}
+
+void /* cleanup parser, free allocated memory, etc */
+cleanup(void) {
+	XML_ParserFree(parser);
+	string_free(&feeditem.timestamp);
+	string_free(&feeditem.title);
+	string_free(&feeditem.link);
+	string_free(&feeditem.content);
+	string_free(&feeditem.id);
+	string_free(&feeditem.author);
+}
+
+void /* print error message to stderr */
+die(const char *s) {
+	fputs("sfeed: ", stderr);
+	fputs(s, stderr);
+	fputc('\n', stderr);
+	cleanup();
+	exit(EXIT_FAILURE);
+}
+
+void
+gettimetz(const char *s, char *buf, size_t bufsiz) {
+	const char *p = s;
+	int tzhour = 0, tzmin = 0;
+	char tzname[128] = "", *t = NULL;
+	unsigned int i;
+
+	buf[0] = '\0';
+	for(; *p && isspace(*p); p++); /* skip whitespace */
+	/* detect time offset, assume time offset isn't specified in the first 18 characters */
+	for(i = 0; *p && ((*p != '+' && *p != '-') || i <= 18); p++, i++);
+
+	if(isalpha(*p)) {
+		if(*p == 'Z' || *p == 'z') {
+			strncpy(buf, "GMT+00:00", bufsiz);
+			return;
+		} else {
+			for(i = 0, t = &tzname[0]; i < (sizeof(tzname) - 1) && (*p && isalpha(*p)); i++)
+				*(t++) = *(p++);
+			*t = '\0';
+		}
+	} else
+		strncpy(tzname, "GMT", sizeof(tzname) - 1);
+	if(!(*p)) {
+		strncpy(buf, tzname, bufsiz);
+		return;
+	}
+	/* NOTE: reverses time offsets for TZ */
+	if((sscanf(p, "+%02d:%02d", &tzhour, &tzmin)) > 0)
+		snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin);
+	else if((sscanf(p, "-%02d:%02d", &tzhour, &tzmin)) > 0)
+		snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin);
+	else if((sscanf(p, "+%02d%02d", &tzhour, &tzmin)) > 0)
+		snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin);
+	else if((sscanf(p, "-%02d%02d", &tzhour, &tzmin)) > 0)
+		snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin);
+	else if(sscanf(p, "+%d", &tzhour) > 0)
+		snprintf(buf, bufsiz, "%s-%02d:00", tzname, tzhour);
+	else if(sscanf(p, "-%d", &tzhour) > 0)
+		snprintf(buf, bufsiz, "%s+%02d:00", tzname, tzhour);
+}
+
+time_t
+parsetime(const char *s, char *buf, size_t bufsiz) {
+	struct tm tm = { 0 };
+	time_t t = 0;
+	char timebuf[64], tz[256], *p;
+
+	if(buf)
+		buf[0] = '\0';
+	gettimetz(s, tz, sizeof(tz) - 1);
+	if(!standardtz || strcmp(standardtz, tz)) {
+		if(!strcmp(tz, "")) { /* restore TZ */
+			if(standardtz)
+				setenv("TZ", standardtz, 1);
+			else
+				unsetenv("TZ");
+		}
+		else
+			setenv("TZ", tz, 1);
+		tzset();
+	}
+	if((strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm)) ||
+	   (strptime(s, "%Y-%m-%d %H:%M:%S", &tm)) ||
+	   (strptime(s, "%a, %d %b %Y %H:%M:%S", &tm)) ||
+	   (strptime(s, "%Y-%m-%dT%H:%M:%S", &tm))) {
+		tm.tm_isdst = -1; /* detect Daylight Saving Time */
+		if((t = mktime(&tm)) == -1)
+			t = 0;
+		if(buf && (strftime(timebuf, sizeof(timebuf) - 1,
+		           "%Y-%m-%d %H:%M:%S", &tm))) {
+			for(p = tz; *p; p++) /* print time offset reverse */
+				*p = ((*p == '-') ? '+' : (*p == '+' ? '-' : *p));
+			snprintf(buf, bufsiz, "%s %s", timebuf, tz);
+		}
+	}
+	return t;
+}
+
+/* print text, ignore tabs, newline and carriage return etc
+1 * print some HTML 2.0 / XML 1.0 as normal text */
+void
+string_print_trimmed(String *s) {
+	const char *entities[] = {
+		"&amp;", "&", "&lt;", "<", "&gt;", ">",	"&apos;", "'", "&quot;", "\"",
+		NULL, NULL
+	};
+	const char *p, *n, **e;
+	unsigned int len, found;
+	if(!s->data)
+		return;
+	for(p = s->data; isspace(*p); p++); /* strip leading whitespace */
+	for(; *p; ) { /* ignore tabs, newline and carriage return etc */
+		if(!isspace(*p) || *p == ' ') {
+			if(*p == '<') { /* skip tags */
+				if((n = strchr(p, '>')))
+					p = n;
+				else
+					putchar('<');
+			} else if(*p == '&') {
+				for(e = entities, found = 0; *e; e += 2) {
+					len = strlen(*e);
+					if(!strncmp(*e, p, len)) { /* compare entities and "replace" */
+						fputs(*(e + 1), stdout);
+						p += len;
+						found = 1;
+						break;
+					}
+				}
+				if(found)
+					continue;
+				else
+					putchar('&');
+			} else
+				fputc(*p, stdout);
+		}
+		p++;
+	}
+}
+
+void /* print text, escape tabs, newline and carriage return etc */
+string_print_textblock(String *s) {
+	const char *p;
+	if(!s->data)
+		return;
+	for(p = s->data; *p && isspace(*p); p++); /* strip leading whitespace */
+	for(; *p; p++) {
+		if(*p == '\n') /* escape newline */
+			fputs("\\n", stdout);
+		else if(*p == '\\') /* escape \ */
+			fputs("\\\\", stdout);
+		else if(*p == '\t') /* tab */
+			fputs("\\t", stdout);
+		else if(!isspace(*p) || *p == ' ') /* ignore other whitespace chars */
+			fputc(*p, stdout);
+	}
+}
+
+int
+istag(const char *name, const char *name2) {
+	return (!strcasecmp(name, name2));
+}
+
+int
+isattr(const char *name, const char *name2) {
+	return (!strcasecmp(name, name2));
+}
+
+char * /* search for attr value by attr name in attributes list */
+getattrvalue(const char **atts, const char *name) {
+	const char **attr = NULL, *key, *value;
+	if(!atts || !(*atts))
+		return NULL;
+	for(attr = atts; *attr; ) {
+		key = *(attr++);
+		value = *(attr++);
+		if(key && value && isattr(key, name))
+			return (char *)value;
+	}
+	return NULL;
+}
+
+void XMLCALL
+xml_handler_start_element(void *data, const char *name, const char **atts) {
+	const char *value;
+
+	strncpy(tag, name, sizeof(tag) - 1); /* set tag */
+	if(feeditem.feedtype != FeedTypeNone) { /* in item */
+		if(feeditem.feedtype == FeedTypeAtom) {
+			if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) {
+				XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+			} else if(istag(name, "link")) { /* link href attribute */
+				if((value = getattrvalue(atts, "href")))
+					string_append(&feeditem.link, value, strlen(value));
+			} else if(istag(name, "content") || istag(name, "summary")) {
+				if((value = getattrvalue(atts, "type"))) {  /* content type is HTML or plain text */
+					if(!strcasecmp(value, "xhtml") || !strcasecmp(value, "text/xhtml") ||
+					   !strcasecmp(value, "html") || !strcasecmp(value, "text/html"))
+						feeditem.contenttype = ContentTypeHTML;
+				}
+			}
+		} else if(feeditem.feedtype == FeedTypeRSS) {
+			if(istag(feeditemtag, "description"))
+				XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+		}
+		if(feeditemtag[0] == '\0') /* set tag if not already set. */
+			strncpy(feeditemtag, name, sizeof(feeditemtag) - 1);
+	} else { /* start of RSS or Atom entry / item */
+		if(istag(name, "entry")) { /* Atom */
+			feeditem.feedtype = FeedTypeAtom;
+			feeditem.contenttype = ContentTypePlain; /* Default content type */
+		} else if(istag(name, "item")) { /* RSS */
+			feeditem.feedtype = FeedTypeRSS;
+			feeditem.contenttype = ContentTypeHTML; /* Default content type */
+		}
+	}
+}
+
+void XMLCALL
+xml_handler_end_element(void *data, const char *name) {
+	char timebuf[64];
+
+	if(feeditem.feedtype != FeedTypeNone) {
+		/* end of RSS or Atom entry / item */
+		if((istag(name, "entry") && (feeditem.feedtype == FeedTypeAtom)) || /* Atom */
+		  (istag(name, "item") && (feeditem.feedtype == FeedTypeRSS))) { /* RSS */
+			printf("%ld", (long)parsetime((&feeditem.timestamp)->data, timebuf,
+			       sizeof(timebuf) - 1));
+			fputc(FieldSeparator, stdout);
+			printf("%s", timebuf);
+			fputc(FieldSeparator, stdout);
+			string_print_trimmed(&feeditem.title);
+			fputc(FieldSeparator, stdout);
+			string_print_trimmed(&feeditem.link);
+			fputc(FieldSeparator, stdout);
+			string_print_textblock(&feeditem.content);
+			fputc(FieldSeparator, stdout);
+			fputs(contenttypes[feeditem.contenttype], stdout);
+			fputc(FieldSeparator, stdout);
+			string_print_trimmed(&feeditem.id);
+			fputc(FieldSeparator, stdout);
+			string_print_trimmed(&feeditem.author);
+			fputc(FieldSeparator, stdout);
+			fputs(feedtypes[feeditem.feedtype], stdout);
+			fputc('\n', stdout);
+
+			/* clear strings */
+			string_clear(&feeditem.timestamp);
+			string_clear(&feeditem.title);
+			string_clear(&feeditem.link);
+			string_clear(&feeditem.content);
+			string_clear(&feeditem.id);
+			string_clear(&feeditem.author);
+			feeditem.feedtype = FeedTypeNone;
+			feeditem.contenttype = ContentTypePlain;
+			incdata = 0;
+			feeditemtag[0] = '\0'; /* unset tag */
+		} else if(!strcmp(feeditemtag, name)) { /* clear */
+			feeditemtag[0] = '\0'; /* unset tag */
+		} else {
+			if(feeditem.feedtype == FeedTypeAtom) {
+				if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) {
+					/* pass to default handler to process inline HTML etc */
+					XML_DefaultCurrent(parser);
+					return;
+				}
+			}
+		}
+	}
+	tag[0] = '\0'; /* unset tag */
+}
+
+/* NOTE: this handler can be called multiple times if the data in this block
+ * is bigger than the buffer */
+void XMLCALL
+xml_handler_data(void *data, const XML_Char *s, int len) {
+	if(feeditem.feedtype == FeedTypeRSS) {
+		if(istag(feeditemtag, "pubdate") || istag(feeditemtag, "dc:date"))
+			string_append(&feeditem.timestamp, s, len);
+		else if(istag(feeditemtag, "title"))
+			string_append(&feeditem.title, s, len);
+		else if(istag(feeditemtag, "link"))
+			string_append(&feeditem.link, s, len);
+		else if(istag(feeditemtag, "description")) {
+			if(incdata)
+				XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+			else
+				string_append(&feeditem.content, s, len);
+		} else if(istag(feeditemtag, "guid"))
+			string_append(&feeditem.id, s, len);
+		else if(istag(feeditemtag, "author") || istag(feeditemtag, "dc:creator"))
+			string_append(&feeditem.author, s, len);
+	} else if(feeditem.feedtype == FeedTypeAtom) {
+		if(istag(feeditemtag, "published") || istag(feeditemtag, "updated"))
+			string_append(&feeditem.timestamp, s, len);
+		else if(istag(feeditemtag, "title")) {
+			string_append(&feeditem.title, s, len);
+		} else if(istag(feeditemtag, "summary") || istag(feeditemtag, "content")) {
+			if(feeditem.contenttype == ContentTypeHTML) {
+				if(incdata)
+					XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+				else
+					string_append(&feeditem.content, s, len);
+			} else
+				XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */
+		} else if(istag(feeditemtag, "id"))
+			string_append(&feeditem.id, s, len);
+		else if(istag(feeditemtag, "name")) /* assume this is: <author><name></name></author> */
+			string_append(&feeditem.author, s, len);
+	}
+}
+
+int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */
+xml_parse_stream(XML_Parser parser, FILE *fp) {
+	char buffer[BUFSIZ];
+	int done = 0, len = 0;
+
+	while(!feof(fp)) {
+		len = fread(buffer, 1, sizeof(buffer), fp);
+		done = (feof(fp) || ferror(fp));
+		if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) {
+			if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS)
+				return 1; /* Ignore "no elements found" / empty document as an error */
+			fprintf(stderr, "sfeed: error parsing xml %s at line %lu column %lu\n",
+			        XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser),
+			        (unsigned long)XML_GetCurrentColumnNumber(parser));
+			return 0;
+		}
+	} while(!done);
+	return 1;
+}
+
+void
+xml_handler_default(void *data, const XML_Char *s, int len) {
+	if((feeditem.feedtype == FeedTypeAtom && (istag(feeditemtag, "summary") || istag(feeditemtag, "content"))) ||
+	   (feeditem.feedtype == FeedTypeRSS && istag(feeditemtag, "description")))
+		/*if(!istag(tag, "script") && !istag(tag, "style"))*/ /* ignore data in inline script and style */
+			string_append(&feeditem.content, s, len);
+}
+
+void /* NOTE: data is null terminated. */
+xml_handler_comment(void *data, const XML_Char *s) {
+}
+
+void
+xml_cdata_section_handler_start(void *userdata) {
+	incdata = 1;
+}
+
+void
+xml_cdata_section_handler_end(void *userdata) {
+	incdata = 0;
+}
+
+int
+main(void) {
+	int status;
+	standardtz = getenv("TZ");
+
+	/* init strings and initial memory pool size */
+	string_buffer_init(&feeditem.timestamp, 64);
+	string_buffer_init(&feeditem.title, 256);
+	string_buffer_init(&feeditem.link, 1024);
+	string_buffer_init(&feeditem.content, 4096);
+	string_buffer_init(&feeditem.id, 1024);
+	string_buffer_init(&feeditem.author, 256);
+	feeditem.contenttype = ContentTypePlain;
+	feeditem.feedtype = FeedTypeNone;
+	feeditemtag[0] = '\0'; /* unset tag */
+	tag[0] = '\0'; /* unset tag */
+
+	if(!(parser = XML_ParserCreate("UTF-8")))
+		die("can't create parser");
+
+	XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element);
+	XML_SetCharacterDataHandler(parser, xml_handler_data);
+	XML_SetCommentHandler(parser, xml_handler_comment);
+	XML_SetCdataSectionHandler(parser, xml_cdata_section_handler_start, xml_cdata_section_handler_end);
+	XML_SetDefaultHandler(parser, xml_handler_default);
+
+	status = xml_parse_stream(parser, stdin);
+	cleanup();
+
+	return status ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sfeed_html.1 b/sfeed_html.1
new file mode 100644
index 0000000..e645d4b
--- /dev/null
+++ b/sfeed_html.1
@@ -0,0 +1,14 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_html \- format feeds file (TSV) from sfeed_update to HTML
+.SH SYNOPSIS
+.B sfeed_html
+.SH DESCRIPTION
+Format feeds file (TSV) from sfeed_update to HTML. Reads TSV data from
+stdin and writes HTML to stdout. For the exact TSV format see sfeed_update(1).
+.SH SEE ALSO
+.BR sfeed_plain(1)
+.BR sfeed_update(1)
+.BR sfeed(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_html.c b/sfeed_html.c
new file mode 100644
index 0000000..c195c86
--- /dev/null
+++ b/sfeed_html.c
@@ -0,0 +1,230 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <ctype.h>
+#include "common.c"
+
+/* Feed info. */
+struct feed {
+	char *name; /* feed name */
+	unsigned long new; /* amount of new items per feed */
+	unsigned long total; /* total items */
+	struct feed *next; /* linked list */
+};
+
+static int showsidebar = 1; /* show sidebar ? */
+
+void /* print error message to stderr */
+die(const char *s) {
+	fputs("sfeed_html: ", stderr);
+	fputs(s, stderr);
+	fputc('\n', stderr);
+	exit(EXIT_FAILURE);
+}
+
+struct feed *
+feednew(void) {
+	struct feed *f;
+	if(!(f = calloc(1, sizeof(struct feed))))
+		die("can't allocate enough memory");
+	return f;
+}
+
+void
+feedsfree(struct feed *f) {
+	struct feed *next;
+	while(f) {
+		next = f->next;
+		free(f->name);
+		free(f);
+		f = next;
+	}
+}
+
+/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */
+void
+printfeednameid(const char *s) {
+	for(; *s; s++)
+		putchar(isspace(*s) ? '-' : *s);
+}
+
+void
+printhtmlencoded(const char *s) {
+	for(; *s; s++) {
+		switch(*s) {
+		case '<': fputs("&lt;", stdout); break;
+		case '>': fputs("&gt;", stdout); break;
+		case '&': fputs("&amp;", stdout); break;
+		default:
+			putchar(*s);
+		}
+	}
+}
+
+int
+main(void) {
+	char *line = NULL, *fields[FieldLast];
+	unsigned long totalfeeds = 0, totalnew = 0;
+	unsigned int islink, isnew;
+	struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */
+	time_t parsedtime, comparetime;
+	size_t size = 0;
+
+	tzset();
+	comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
+	fputs(
+		"<!DOCTYPE HTML>\n"
+		"<html dir=\"ltr\" lang=\"en\">\n"
+		"	<head>\n"
+		"		<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
+		"		<style type=\"text/css\">\n"
+		"			body {\n"
+		"				font-family: monospace;\n"
+		"				font-size: 9pt;\n"
+		"				color: #333;\n"
+		"				background-color: #fff;\n"
+		"				overflow: hidden;\n"
+		"			}\n"
+		"			#feedcontent td {\n"
+		"				white-space: nowrap;\n"
+		"			}\n"
+		"			#feedcontent h2 {\n"
+		"				font-size: 14pt;\n"
+		"			}\n"
+		"			#feedcontent a {\n"
+		"				display: block;\n"
+		"			}\n"
+		"			#feedcontent ul, #feedcontent li {\n"
+		"				list-style: none;\n"
+		"				padding: 0;\n"
+		"				margin: 0;\n"
+		"			}\n"
+		"			#feedcontent h2 a, #feedcontent ul li a {\n"
+		"				color: inherit;\n"
+		"			}\n"
+		"			#feedcontent ul li a {\n"
+		"				padding: 5px 3px 5px 10px;\n"
+		"			}\n"
+		"			#feedcontent div#sidebar {\n"
+		"				background-color: inherit;\n"
+		"				position: fixed;\n"
+		"				top: 0;\n"
+		"				left: 0;\n"
+		"				width: 175px;\n"
+		"				height: 100%;\n"
+		"				overflow: hidden;\n"
+		"				overflow-y: auto;\n"
+		"				z-index: 999;\n"
+		"			}\n"
+		"			#feedcontent div#items {\n"
+		"				left: 175px;\n"
+		"			}\n"
+		"			#feedcontent div#items-nosidebar {\n"
+		"				left: 0px;\n"
+		"			}\n"
+		"			#feedcontent div#items-nosidebar,\n"
+		"			#feedcontent div#items {\n"
+		"				position: absolute;\n"
+		"				height: 100%;\n"
+		"				top: 0;\n"
+		"				right: 0;\n"
+		"				overflow: auto;\n"
+		"				padding: 0 15px;\n"
+		"			}\n"
+		"		</style>\n"
+		"	</head>\n"
+		"	<body>\n"
+		"		<div id=\"feedcontent\">\n",
+	stdout);
+
+	while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) {
+		/* first of feed section or new feed section. */
+		if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) {
+			if(totalfeeds) { /* end previous one. */
+				fputs("</table>\n", stdout);
+				feedcurrent->next = feednew();
+				feedcurrent = feedcurrent->next;
+			} else {
+				feedcurrent = feednew();
+				feeds = feedcurrent; /* first item. */
+				fputs("\t\t<div id=\"items", stdout);
+				if(fields[FieldFeedName][0] == '\0') {
+					fputs("-nosidebar", stdout); /* set other id on div if no sidebar for styling */
+					showsidebar = 0;
+				}
+				fputs("\">\n", stdout);
+			}
+			if(!(feedcurrent->name = strdup(fields[FieldFeedName])))
+				die("can't allocate enough memory");
+			if(fields[FieldFeedName][0] != '\0') {
+				fputs("<h2 id=\"", stdout);
+				printfeednameid(feedcurrent->name);
+				fputs("\"><a href=\"#", stdout);
+				printfeednameid(feedcurrent->name);
+				fputs("\">", stdout);
+				fputs(feedcurrent->name, stdout);
+				fputs("</a></h2>\n", stdout);
+			}
+			fputs("<table>", stdout);
+			totalfeeds++;
+		}
+		parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+		isnew = (parsedtime >= comparetime);
+		islink = (strlen(fields[FieldLink]) > 0);
+		totalnew += isnew;
+		feedcurrent->new += isnew;
+		feedcurrent->total++;
+
+		fputs("<tr><td>", stdout);
+		printtime(parsedtime);
+		fputs("</td><td>", stdout);
+		if(isnew)
+			fputs("<b><u>", stdout);
+		if(islink) {
+			fputs("<a href=\"", stdout);
+			printlink(fields[FieldLink], fields[FieldFeedUrl]);
+			fputs("\">", stdout);
+		}
+		printhtmlencoded(fields[FieldTitle]);
+		if(islink)
+			fputs("</a>", stdout);
+		if(isnew)
+			fputs("</u></b>", stdout);
+		fputs("</td></tr>\n", stdout);
+	}
+	if(totalfeeds) {
+		fputs("</table>\n", stdout);
+		fputs("\t\t</div>\n", stdout); /* div items */
+	}
+	if(showsidebar) {
+		fputs("\t\t<div id=\"sidebar\">\n\t\t\t<ul>\n", stdout);
+		for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) {
+			if(!feedcurrent->name || feedcurrent->name[0] == '\0')
+				continue;
+			fputs("<li><a href=\"#", stdout);
+			printfeednameid(feedcurrent->name);
+			fputs("\">", stdout);
+			if(feedcurrent->new > 0)
+				fputs("<b><u>", stdout);
+			fputs(feedcurrent->name, stdout);
+			fprintf(stdout, " (%lu)", feedcurrent->new);
+			if(feedcurrent->new > 0)
+				fputs("</u></b>", stdout);
+			fputs("</a></li>\n", stdout);
+		}
+		fputs("\t\t\t</ul>\n\t\t</div>\n", stdout);
+	}
+	fputs(
+		"		</div>\n"
+		"	</body>\n"
+		"		<title>Newsfeeds (",
+	stdout);
+	fprintf(stdout, "%lu", totalnew);
+	fputs(")</title>\n</html>", stdout);
+
+	free(line); /* free line */
+	feedsfree(feeds); /* free feeds linked-list */
+
+	return EXIT_SUCCESS;
+}
diff --git a/sfeed_opml_config.1 b/sfeed_opml_config.1
new file mode 100644
index 0000000..523f99e
--- /dev/null
+++ b/sfeed_opml_config.1
@@ -0,0 +1,11 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_opml_config \- generate a sfeedrc config file based on an opml file
+.SH SYNOPSIS
+.B sfeed_opml_config
+.SH DESCRIPTION
+Reads the opml XML data from stdin and writes the config file text to stdout.
+.SH SEE ALSO
+.BR sfeed_update(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_opml_config.c b/sfeed_opml_config.c
new file mode 100644
index 0000000..0d74820
--- /dev/null
+++ b/sfeed_opml_config.c
@@ -0,0 +1,87 @@
+/* convert an opml file to sfeedrc file */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <expat.h> /* libexpat */
+
+XML_Parser parser; /* expat XML parser state */
+
+char * /* search for attr value by attr name in attributes list */
+getattrvalue(const char **atts, const char *name) {
+	const char **attr = NULL, *key, *value;
+	if(!atts || !(*atts))
+		return NULL;
+	for(attr = atts; *attr; ) {
+		key = *(attr++);
+		value = *(attr++);
+		if(key && value && !strcasecmp(key, name))
+			return (char *)value;
+	}
+	return NULL;
+}
+
+void XMLCALL
+xml_handler_start_element(void *data, const char *name, const char **atts) {
+	char *feedurl = NULL, *feedname = NULL;;
+
+	if(!strcasecmp(name, "outline")) {
+		if(!(feedname = getattrvalue(atts, "text")) &&
+		   !(feedname = getattrvalue(atts, "title")))
+			feedname = "unnamed";
+		if(!(feedurl = getattrvalue(atts, "xmlurl")))
+			feedurl = "";
+		printf("\tfeed \"%s\" \"%s\"\n", feedname, feedurl);
+	}
+}
+
+void XMLCALL
+xml_handler_end_element(void *data, const char *name) {
+}
+
+int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */
+xml_parse_stream(XML_Parser parser, FILE *fp) {
+	char buffer[BUFSIZ];
+	int done = 0, len = 0;
+
+	while(!feof(fp)) {
+		len = fread(buffer, 1, sizeof(buffer), fp);
+		done = (feof(fp) || ferror(fp));
+		if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) {
+			if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS)
+				return 1; /* Ignore "no elements found" / empty document as an error */
+			fprintf(stderr, "sfeed_opml_config: error parsing xml %s at line %lu column %lu\n",
+			        XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser),
+			        (unsigned long)XML_GetCurrentColumnNumber(parser));
+			return 0;
+		}
+	} while(!done);
+	return 1;
+}
+
+int main(void) {
+	int status;
+
+	if(!(parser = XML_ParserCreate("UTF-8"))) {
+		fputs("sfeed_opml_config: can't create parser", stderr);
+		exit(EXIT_FAILURE);
+	}
+	XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element);
+
+	fputs(
+		"# paths\n"
+		"# NOTE: make sure to uncomment all these if you change it.\n"
+		"#sfeedpath=\"$HOME/.sfeed\"\n"
+		"#sfeedfile=\"$sfeedpath/feeds\"\n"
+		"#sfeedfilenew=\"$sfeedfile.new\"\n"
+		"\n"
+		"# list of feeds to fetch:\n"
+		"feeds() {\n"
+		"	# feed <name> <url> [encoding]\n", stdout);
+	status = xml_parse_stream(parser, stdin);
+	fputs("}\n", stdout);
+
+	XML_ParserFree(parser);
+
+	return status ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sfeed_plain.1 b/sfeed_plain.1
new file mode 100644
index 0000000..3f396aa
--- /dev/null
+++ b/sfeed_plain.1
@@ -0,0 +1,15 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_plain \- format feeds file (TSV) from sfeed_update to plain text
+.SH SYNOPSIS
+.B sfeed
+.SH DESCRIPTION
+Format feeds file (TSV) from sfeed_update to plain text. Reads TSV data from
+stdin and writes plain text to stdout. For the exact TSV format see
+sfeed_update(1).
+.SH SEE ALSO
+.BR sfeed_html(1)
+.BR sfeed_update(1)
+.BR sfeed(1)
+.SH BUGS
+Please report them!
diff --git a/sfeed_plain.c b/sfeed_plain.c
new file mode 100644
index 0000000..d4045c9
--- /dev/null
+++ b/sfeed_plain.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include "common.c"
+
+void
+printutf8padded(const char *s, size_t len) {
+	unsigned int n = 0, i = 0;
+
+	for(; s[i] && n < len; i++) {
+		if((s[i] & 0xc0) != 0x80) /* start of character */
+			n++;
+		putchar(s[i]);
+	}
+	for(; n < len; n++)
+		putchar(' ');
+}
+
+int
+main(void) {
+	char *line = NULL, *fields[FieldLast];
+	time_t parsedtime, comparetime;
+	size_t size = 0;
+
+	tzset();
+	comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */
+	while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) {
+		parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10);
+		printf(" %c  ", (parsedtime >= comparetime) ? 'N' : ' ');
+		if(fields[FieldFeedName][0] != '\0')
+			printf("%-15.15s  ", fields[FieldFeedName]);
+		printtime(parsedtime);
+		fputs("  ", stdout);
+		printutf8padded(fields[FieldTitle], 70);
+		fputs("  ", stdout);
+		printlink(fields[FieldLink], fields[FieldFeedUrl]);
+		putchar('\n');
+	}
+	free(line);
+	return EXIT_SUCCESS;
+}
diff --git a/sfeed_update b/sfeed_update
new file mode 100755
index 0000000..52b9728
--- /dev/null
+++ b/sfeed_update
@@ -0,0 +1,116 @@
+#!/bin/sh
+# update feeds, merge with old feeds.
+# NOTE: assumes "sfeed_*" files are in $PATH.
+
+# defaults
+sfeedpath="$HOME/.sfeed"
+sfeedfile="$sfeedpath/feeds"
+# temporary file for new feeds (for merging).
+sfeedfilenew="$sfeedfile.new"
+
+# load config (evaluate shellscript).
+# loadconfig(configfile)
+loadconfig() {
+	# allow to specify config via argv[1].
+	if [ ! "$1" = "" ]; then
+		# get absolute path of config file.
+		config=$(readlink -f "$1")
+	else
+		# default config location.
+		config="$HOME/.sfeed/sfeedrc"
+	fi
+
+	# load config: config is loaded here to be able to override above variables
+	# (sfeedpath, sfeedfile, etc).
+	if [ -r "$config" ]; then
+		. "$config"
+	else
+		echo "Configuration file \"$config\" does not exist or is not readable." >&2
+		echo "See sfeedrc.example for an example." >&2
+		exit 1
+	fi
+}
+
+# merge raw files.
+# merge(oldfile, newfile)
+merge() {
+	# unique sort by id, link, title.
+	# order by feedname (asc), feedurl (asc) and timestamp (desc).
+	(cat "$1" "$2" 2> /dev/null) |
+		sort -t '	' -u -k7,7 -k4,4 -k3,3 |
+		sort -t '	' -k10,10 -k11,11 -k1r,1
+}
+
+# fetch a feed via HTTP/HTTPS etc.
+# fetchfeed(url, name)
+fetchfeed() {
+	if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then
+		printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2
+	else
+		printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2
+	fi
+}
+
+# add field after line, output to stdout.
+# addfield(field)
+addfield() {
+	# NOTE: IFS is set and restored to prevent stripping whitespace.
+	OLDIFS="$IFS"
+	IFS="
+"
+	while read -r line; do
+		printf "%s	%s\n" "${line}" "$1"
+	done
+	IFS="$OLDIFS"
+}
+
+# fetch and parse feed.
+# feed(name, url, encoding)
+feed() {
+	tmpfile=$(mktemp -p "$TMPDIR")
+	(if [ "$3" = "" ]; then
+		# don't use iconv if encoding not set in config.
+		fetchfeed "$2" "$1" | sfeed | addfield "$1	$2"
+	else
+		# use iconv to convert encoding to UTF-8.
+		fetchfeed "$2" "$1" | iconv -cs -f "$3" -t "utf-8" | sfeed | addfield "$1	$2"
+	fi) > "$tmpfile"
+}
+
+terminated() {
+	isrunning="0"
+}
+
+cleanup() {
+	# remove temporary files
+	rm -rf "$tmpfile" "$TMPDIR"
+}
+
+# load config file.
+loadconfig "$1"
+# fetch feeds and store in temporary file.
+TMPDIR=$(mktemp -d -t "sfeed_XXXXXX")
+# get date of last modified feedfile in format:
+# YYYYmmdd HH:MM:SS [+-][0-9]*
+lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-)
+# Kill whole current process group on ^C.
+isrunning="1"
+trap -- "terminated" "15" # SIGTERM: signal to terminate parent.
+trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D
+# fetch feeds specified in config file.
+feeds
+# make sure path exists.
+mkdir -p "$sfeedpath"
+# wait till all feeds are fetched (allows running in parallel).
+wait
+[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup.
+# concat all individual feed files to a single file.
+# NOTE: mktemp uses $TMPDIR for temporary directory.
+tmpfile=$(mktemp -t "sfeed_XXXXXX")
+find "$TMPDIR" -type f -exec cat {} \; > "$tmpfile"
+# get new data and merge with old.
+merge "$sfeedfile" "$tmpfile" > "$sfeedfilenew"
+# overwrite old file with updated file
+mv "$sfeedfilenew" "$sfeedfile"
+# cleanup temporary files etc.
+cleanup
diff --git a/sfeed_update.1 b/sfeed_update.1
new file mode 100644
index 0000000..e9cffc1
--- /dev/null
+++ b/sfeed_update.1
@@ -0,0 +1,82 @@
+.TH SFEED 1 sfeed\-VERSION
+.SH NAME
+sfeed_update \- update feeds and merge with old feeds
+.SH SYNOPSIS
+.B sfeed_update
+.RB [configfile]
+.SH OPTIONS
+.TP
+.B [configfile]
+config file, if not specified uses the location $HOME/.sfeed/sfeedrc by default (see FILES READ section for more information).
+.SH DESCRIPTION
+.TP
+Update feeds and merge with old feeds in the file $HOME/.sfeed/feeds by default.
+.SH TAB-SEPARATED FORMAT
+The items are saved in a TSV-like format except newlines, tabs and
+backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are
+removed.
+.TP
+.B item timestamp (unix timestamp in GMT+0)
+string
+.TP
+.B item timestamp (formatted)
+string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM
+.TP
+.B item title
+string
+.TP
+.B item link
+string
+.TP
+.B item description
+string
+.TP
+.B item contenttype
+string (html or plain)
+.TP
+.B item id
+string
+.TP
+.B item author
+string
+.TP
+.B feed type
+string (rss or atom)
+.TP
+.B feed name
+string (extra field added by sfeed_update)
+.TP
+.B feed url
+string (extra field added by sfeed_update)
+.SH FILES READ
+.TP
+.B sfeedrc
+Config file, see the sfeedrc.example file for an example.
+This file is evaluated as a shellscript in sfeed_update.
+You can for example override the fetchfeed() function to
+use wget, fetch or an other download program or you can
+override the merge() function to change the merge logic.
+The function feeds() is called to fetch the feeds. The
+function feed() can safely be executed as a parallel job
+in your sfeedrc config file to speedup updating.
+.SH FILES WRITTEN
+.TP
+.B feeds
+Tab-separated format containing all feeds.
+The sfeed_update script merges new items with this file.
+.TP
+.B feeds.new
+Temporary file used by sfeed_update to merge items.
+.SH EXAMPLES
+.TP
+To update feeds and format the feeds file:
+.nf
+sfeed_update "configfile"
+sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt
+sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html
+.SH SEE ALSO
+.BR sh(1)
+.BR sfeed_plain(1)
+.BR sfeed_html(1)
+.SH BUGS
+Please report them!
diff --git a/sfeedrc.example b/sfeedrc.example
new file mode 100644
index 0000000..d25777a
--- /dev/null
+++ b/sfeedrc.example
@@ -0,0 +1,17 @@
+# paths
+# NOTE: make sure to uncomment all these if you change it.
+#sfeedpath="$HOME/.sfeed"
+#sfeedfile="$sfeedpath/feeds"
+#sfeedfilenew="$sfeedfile.new"
+
+# list of feeds to fetch:
+feeds() {
+	# feed <name> <url> [encoding]
+	feed "codemadness" "http://www.codemadness.nl/blog/rss.xml"
+	feed "explosm" "http://feeds.feedburner.com/Explosm"
+	feed "linux kernel" "http://kernel.org/kdist/rss.xml" "iso-8859-1"
+	feed "phoronix" "http://feeds.feedburner.com/Phoronix"
+	feed "slashdot" "http://rss.slashdot.org/Slashdot/slashdot"
+	feed "tweakers" "http://feeds.feedburner.com/tweakers/mixed" "iso-8859-1"
+	feed "xkcd" "http://xkcd.com/atom.xml"
+}
author	Hiltjo Posthuma <hiltjo@codemadness.org>	2012-08-03 12:03:17 +0200
committer	Hiltjo Posthuma <hiltjo@codemadness.org>	2012-08-03 12:03:17 +0200
commit	db5ffcaa8c133d249aafa4a64f3d827dd513d995 (patch)
tree	dd3ece08c9f65ebcab6cd7406d87b6b932e19900