diff options
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | CREDITS | 1 | ||||
-rw-r--r-- | LICENSE | 21 | ||||
-rw-r--r-- | Makefile | 95 | ||||
-rw-r--r-- | README | 143 | ||||
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | common.c | 116 | ||||
-rw-r--r-- | config.mk | 28 | ||||
-rw-r--r-- | sfeed.1 | 44 | ||||
-rw-r--r-- | sfeed.c | 477 | ||||
-rw-r--r-- | sfeed_html.1 | 14 | ||||
-rw-r--r-- | sfeed_html.c | 230 | ||||
-rw-r--r-- | sfeed_opml_config.1 | 11 | ||||
-rw-r--r-- | sfeed_opml_config.c | 87 | ||||
-rw-r--r-- | sfeed_plain.1 | 15 | ||||
-rw-r--r-- | sfeed_plain.c | 42 | ||||
-rwxr-xr-x | sfeed_update | 116 | ||||
-rw-r--r-- | sfeed_update.1 | 82 | ||||
-rw-r--r-- | sfeedrc.example | 17 |
19 files changed, 1548 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abdbd0f --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +sfeed +sfeed_plain +sfeed_html +sfeed_opml_config @@ -0,0 +1 @@ +raph_ael on #suckless for the idea for an opml converter @@ -0,0 +1,21 @@ +MIT/X Consortium License + +© 2011-2012 Hiltjo Posthuma <hiltjo@codemadness.org> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..329b97b --- /dev/null +++ b/Makefile @@ -0,0 +1,95 @@ +# sfeed - simple RSS and Atom parser (and programs to add reader functionality). + +include config.mk + +NAME = sfeed +SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_config.c +OBJ = ${SRC:.c=.o} + +all: options sfeed sfeed_plain sfeed_html sfeed_opml_config + +options: + @echo ${NAME} build options: + @echo "CFLAGS = ${CFLAGS}" + @echo "LDFLAGS = ${LDFLAGS}" + @echo "CC = ${CC}" + +.c.o: + @echo CC $< + @${CC} -c ${CFLAGS} $< + +${OBJ}: config.mk + +sfeed: ${OBJ} + @echo CC -o $@ + @${CC} -o $@ sfeed.c ${LDFLAGS} ${LIBEXPAT} + +sfeed_opml_config: sfeed_opml_config.o + @echo CC -o $@ + @${CC} -o $@ sfeed_opml_config.o ${LDFLAGS} ${LIBEXPAT} + +sfeed_plain: sfeed_plain.o + @echo CC -o $@ + @${CC} -o $@ sfeed_plain.o ${LDFLAGS} + +sfeed_html: sfeed_html.o + @echo CC -o $@ + @${CC} -o $@ sfeed_html.o ${LDFLAGS} + +clean: + @echo cleaning + @rm -f sfeed sfeed_plain sfeed_html sfeed_opml_config ${OBJ} ${NAME}-${VERSION}.tar.gz + +dist: clean + @echo creating dist tarball + @mkdir -p ${NAME}-${VERSION} + @cp -R LICENSE Makefile README config.mk \ + TODO CREDITS sfeedrc.example ${SRC} common.c sfeed_update \ + sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_config.1 \ + ${NAME}-${VERSION} + @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION} + @gzip ${NAME}-${VERSION}.tar + @rm -rf ${NAME}-${VERSION} + +install: all + @echo installing executable file to ${DESTDIR}${PREFIX}/bin + @mkdir -p ${DESTDIR}${PREFIX}/bin + @cp -f sfeed sfeed_html sfeed_plain sfeed_update sfeed_opml_config \ + ${DESTDIR}${PREFIX}/bin + @chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \ + ${DESTDIR}${PREFIX}/bin/sfeed_html \ + ${DESTDIR}${PREFIX}/bin/sfeed_plain \ + ${DESTDIR}${PREFIX}/bin/sfeed_update \ + ${DESTDIR}${PREFIX}/bin/sfeed_opml_config + @mkdir -p ${DESTDIR}${PREFIX}/share/sfeed + @cp -f sfeedrc.example ${DESTDIR}${PREFIX}/share/${NAME} + @echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1 + @mkdir -p ${DESTDIR}${MANPREFIX}/man1 + @sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_opml_config.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + @chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + +uninstall: + @echo removing executable file from ${DESTDIR}${PREFIX}/bin + @rm -f ${DESTDIR}${PREFIX}/bin/sfeed \ + ${DESTDIR}${PREFIX}/bin/sfeed_html \ + ${DESTDIR}${PREFIX}/bin/sfeed_plain \ + ${DESTDIR}${PREFIX}/bin/sfeed_update \ + ${DESTDIR}${PREFIX}/bin/sfeed_opml_config \ + ${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example + @-rmdir ${DESTDIR}${PREFIX}/share/${NAME} + @echo removing manual pages from ${DESTDIR}${MANPREFIX}/man1 + @rm -f ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + +.PHONY: all options clean dist install uninstall @@ -0,0 +1,143 @@ +sfeed v0.8 +---------- + +Simple RSS and Atom parser (and some format programs). + + +Dependencies +------------ + +- C compiler. +- expat library (used by sfeed.c and sfeed_opml_config.c, + http://expat.sourceforge.net/). + + +Optional dependencies +--------------------- + +- POSIX shell (used by sfeed_update). +- curl (used by sfeed_update, http://curl.haxx.se/). +- iconv (used by sfeed_update, http://www.gnu.org/software/libiconv/). + + +Files +----- + +sfeed - Binary (from sfeed.c); read XML RSS or Atom feed data from + stdin. Write feed data in tab-separated format to stdout. +sfeed_update - Shellscript; update feeds and merge with old feeds in the + file $HOME/.sfeed/feeds by default. +sfeed_plain - Format feeds file (TSV) from sfeed_update to plain text. +sfeed_html - Format feeds file (TSV) from sfeed_update to HTMLi. +sfeed_opml_config - Generate a sfeedrc config file based on an opml file. +sfeedrc.example - Example config file. + + +Files read at runtime by sfeed_update +------------------------------------- + +sfeedrc - Config file. This file is evaluated as a shellscript in + sfeed_update. You can for example override the fetchfeed() + function to use wget, fetch or an other download program or + you can override the merge() function to change the merge + logic. The function feeds() is called to fetch the feeds. + The function feed() can safely be executed as a parallel + job in your sfeedrc config file to speedup updating. + + +Files written at runtime by sfeed_update +---------------------------------------- + +feeds - Tab-separated format containing all feeds. + The sfeed_update script merges new items with this file. +feeds.new - Temporary file used by sfeed_update to merge items. + + +TAB-SEPARATED format +-------------------- + +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \ (\n, \t and \\). Other whitespace except +spaces are removed. + +The timestamp field is converted to a unix timestamp. The timestamp is also +stored as formatted as a separate field. The other fields are left untouched +(including HTML). + +The order and format of the fields are: + +item unix timestamp - string unix timestamp (GMT+0) +item formatted timestamp - string timestamp (YYYY-mm-dd HH:MM:SS tz[+-]HHMM) +item title - string +item link - string +item description - string +item contenttype - string ("html" or "plain") +item id - string +item author - string +feed type - string ("rss" or "atom") +feed name - string (extra field added by sfeed_update) +feed url - string (extra field added by sfeed_update) + + +Usage +----- + +To build and install (respects DESTDIR and PREFIX variable): + +make install + + +Generate a sfeedrc config file from your exported list of feeds in opml +format: + +sfeed_opml_config < opmlfile.xml > $HOME/.sfeed/sfeedrc + + +To update feeds and format the feeds file (configfile argument is optional): + +sfeed_update "configfile" +sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt +sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html + + +Example script to view feeds with dmenu, opens selected url in $BROWSER: + +url=$(sfeed_plain < "$HOME/.sfeed/feeds" | dmenu -l 35 -i | + sed 's@^.* \([a-zA-Z]*://\)\(.*\)$@\1\2@') +[ ! "$url" = "" ] && $BROWSER "$url" + + +or to view in your browser: + +$BROWSER "$HOME/.sfeed/feeds.html" + + +or to view in your editor: + +$EDITOR "$HOME/.sfeed/feeds.txt" + + +tip to remove feeds older than a date (change time="YYYY mm dd HH mm ss") + +gawk -F '\t' 'BEGIN { + time = mktime("2012 01 01 12 34 56"); +} +{ + if(int($1) >= int(time)) { + print $0; + } +}' < feeds > feeds.clean + +mv feeds.clean feeds + + +License +------- + +MIT, see LICENSE file. + + +Author +------ + +Hiltjo Posthuma <hiltjo@codemadness.org> @@ -0,0 +1,4 @@ +[ ] opml export script (WIP). +[ ] rename sfeed_opml_config to sfeed_opml_import. +[ ] sfeed_update / sfeedrc: add base siteurl as parameter for feed function for feeds located at an other domain, for example feedburner.com +[ ] test opml import / export scripts with thunderbird, google reader, snownews and rssowl. diff --git a/common.c b/common.c new file mode 100644 index 0000000..91ac9ca --- /dev/null +++ b/common.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <ctype.h> + +enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink, + FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType, + FieldFeedName, FieldFeedUrl, FieldLast }; + +const int FieldSeparator = '\t'; + +char * +afgets(char **p, size_t *size, FILE *fp) { + char buf[BUFSIZ], *alloc = NULL; + size_t n, len = 0, allocsiz; + int end = 0; + + while(fgets(buf, sizeof(buf), fp)) { + n = strlen(buf); + if(buf[n - 1] == '\n') { /* dont store newlines. */ + buf[n - 1] = '\0'; + n--; + end = 1; /* newline found, end */ + } + len += n; + allocsiz = len + 1; + if(allocsiz > *size) { + if((alloc = realloc(*p, allocsiz))) { + *p = alloc; + *size = allocsiz; + } else { + free(*p); + *p = NULL; + fputs("error: could not realloc\n", stderr); + exit(EXIT_FAILURE); + return NULL; + } + } + strncpy((*p + (len - n)), buf, n); + if(end || feof(fp)) + break; + } + if(*p && len > 0) { + (*p)[len] = '\0'; + return *p; + } + return NULL; +} + +void /* print link; if link is relative use baseurl to make it absolute */ +printlink(const char *link, const char *baseurl) { + const char *ebaseproto, *ebasedomain, *p; + int isrelative; + + /* protocol part */ + for(p = link; *p && (isalpha(*p) || isdigit(*p) || *p == '+' || *p == '-' || *p == '.'); p++); + isrelative = strncmp(p, "://", strlen("://")); + if(isrelative) { /* relative link (baseurl is used). */ + if((ebaseproto = strstr(baseurl, "://"))) { + ebaseproto += strlen("://"); + fwrite(baseurl, 1, ebaseproto - baseurl, stdout); + } else { + ebaseproto = baseurl; + if(*baseurl || (link[0] == '/' && link[1] == '/')) + fputs("http://", stdout); + } + if(link[0] == '/') { /* relative to baseurl domain (not path). */ + if(link[1] == '/') /* absolute url but with protocol from baseurl. */ + link += 2; + else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto, stdout); + else + fputs(ebaseproto, stdout); + } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, stdout); + else { + fputs(ebaseproto, stdout); + if(*baseurl && *link) + fputc('/', stdout); + } + } + fputs(link, stdout); +} + +unsigned int +parseline(char **line, size_t *size, char **fields, unsigned int maxfields, FILE *fp, int separator) { + unsigned int i = 0; + char *prev, *s; + + if(afgets(line, size, fp)) { + for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) { + *s = '\0'; /* null terminate string. */ + fields[i] = prev; + prev = s + 1; + } + fields[i] = prev; + for(i++; i < maxfields; i++) /* make non-parsed fields empty. */ + fields[i] = ""; + } + return i; +} + +void +printtime(time_t t) { + char buf[32]; + struct tm temp = { 0 }, *mktm; + + if(!(mktm = localtime_r(&t, &temp))) + return; + mktm->tm_isdst = -1; + + if(!strftime(buf, sizeof(buf) - 1, "%Y-%m-%d %H:%M", mktm)) + return; + fputs(buf, stdout); +} diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..8bddcad --- /dev/null +++ b/config.mk @@ -0,0 +1,28 @@ +# sfeed version +VERSION = 0.8 + +# customize below to fit your system + +# paths +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/share/man + +# includes and libs +INCS = +LIBEXPAT = -lexpat +LIBS = -lc + +# flags +#CFLAGS = -Os -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=500 -DVERSION=\"${VERSION}\" +#LDFLAGS = -s ${LIBS} + +# debug +CFLAGS = -g -O0 -pedantic -Wall -Wextra -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=700 -DVERSION=\"${VERSION}\" +LDFLAGS = ${LIBS} + +# Solaris +#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\" +#LDFLAGS = ${LIBS} + +# compiler and linker +CC = cc @@ -0,0 +1,44 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed \- simple RSS and Atom parser +.SH SYNOPSIS +.B sfeed +.SH DESCRIPTION +Read XML RSS or Atom feed data from stdin. Write feed data in a +tab-separated format to stdout. +.SH TAB-SEPARATED FORMAT +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are +removed. +.TP +.B item timestamp (unix timestamp in GMT+0) +string +.TP +.B item timestamp (formatted) +string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM) +.TP +.B item title +string +.TP +.B item link +string +.TP +.B item description +string +.TP +.B item contenttype +string (html or plain) +.TP +.B item id +string +.TP +.B item author +string +.TP +.B feed type +string (rss or atom) +.SH SEE ALSO +.BR sh(1) +.BR sfeed_update(1) +.SH BUGS +Please report them! @@ -0,0 +1,477 @@ +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <string.h> +#include <time.h> +#include <ctype.h> +#include <expat.h> /* libexpat */ + +enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2, FeedTypeLast = 3 }; +const char *feedtypes[] = { "", "rss", "atom" }; + +enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2, ContentTypeLast = 3 }; +const char *contenttypes[] = { "", "plain", "html" }; + +typedef struct string { /* String data / pool */ + char *data; /* data */ + size_t len; /* string length */ + size_t bufsiz; /* allocated size */ +} String; + +typedef struct feeditem { /* Feed item */ + String timestamp; + String title; + String link; + String content; + int contenttype; /* ContentTypePlain or ContentTypeHTML */ + String id; + String author; + int feedtype; /* FeedTypeRSS or FeedTypeAtom */ +} FeedItem; + +void die(const char *s); +void cleanup(void); + +const int FieldSeparator = '\t'; +FeedItem feeditem; /* data for current feed item */ +char tag[1024]; /* current XML tag being parsed. */ +char feeditemtag[1024]; /* current tag _inside_ a feeditem */ +XML_Parser parser; /* expat XML parser state */ +int incdata = 0; +char *standardtz = NULL; /* TZ variable at start of program */ + +void +string_clear(String *s) { + if(s->data) + s->data[0] = '\0'; /* clear string only; don't free, prevents + unnecessary reallocation */ + s->len = 0; +} + +void +string_buffer_init(String *s, size_t len) { + if(!(s->data = malloc(len))) + die("can't allocate enough memory"); + s->bufsiz = len; + string_clear(s); +} + +void +string_free(String *s) { + free(s->data); + s->data = NULL; + s->bufsiz = 0; + s->len = 0; +} + +int +string_buffer_expand(String *s, size_t newlen) { + char *p; + size_t alloclen; + /* check if allocation is necesary, dont shrink buffer */ + if(!s->data || (newlen > s->bufsiz)) { + /* should be more than bufsiz ofcourse */ + for(alloclen = 16; alloclen <= newlen; alloclen *= 2); + if(!(p = realloc(s->data, alloclen))) { + string_free(s); /* free previous allocation */ + die("can't allocate enough memory"); + } + s->bufsiz = alloclen; + s->data = p; + } + return s->bufsiz; +} + +void +string_append(String *s, const char *data, size_t len) { + string_buffer_expand(s, s->len + len); + memcpy(s->data + s->len, data, len); + s->len += len; + s->data[s->len] = '\0'; +} + +void /* cleanup parser, free allocated memory, etc */ +cleanup(void) { + XML_ParserFree(parser); + string_free(&feeditem.timestamp); + string_free(&feeditem.title); + string_free(&feeditem.link); + string_free(&feeditem.content); + string_free(&feeditem.id); + string_free(&feeditem.author); +} + +void /* print error message to stderr */ +die(const char *s) { + fputs("sfeed: ", stderr); + fputs(s, stderr); + fputc('\n', stderr); + cleanup(); + exit(EXIT_FAILURE); +} + +void +gettimetz(const char *s, char *buf, size_t bufsiz) { + const char *p = s; + int tzhour = 0, tzmin = 0; + char tzname[128] = "", *t = NULL; + unsigned int i; + + buf[0] = '\0'; + for(; *p && isspace(*p); p++); /* skip whitespace */ + /* detect time offset, assume time offset isn't specified in the first 18 characters */ + for(i = 0; *p && ((*p != '+' && *p != '-') || i <= 18); p++, i++); + + if(isalpha(*p)) { + if(*p == 'Z' || *p == 'z') { + strncpy(buf, "GMT+00:00", bufsiz); + return; + } else { + for(i = 0, t = &tzname[0]; i < (sizeof(tzname) - 1) && (*p && isalpha(*p)); i++) + *(t++) = *(p++); + *t = '\0'; + } + } else + strncpy(tzname, "GMT", sizeof(tzname) - 1); + if(!(*p)) { + strncpy(buf, tzname, bufsiz); + return; + } + /* NOTE: reverses time offsets for TZ */ + if((sscanf(p, "+%02d:%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "-%02d:%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "+%02d%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "-%02d%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin); + else if(sscanf(p, "+%d", &tzhour) > 0) + snprintf(buf, bufsiz, "%s-%02d:00", tzname, tzhour); + else if(sscanf(p, "-%d", &tzhour) > 0) + snprintf(buf, bufsiz, "%s+%02d:00", tzname, tzhour); +} + +time_t +parsetime(const char *s, char *buf, size_t bufsiz) { + struct tm tm = { 0 }; + time_t t = 0; + char timebuf[64], tz[256], *p; + + if(buf) + buf[0] = '\0'; + gettimetz(s, tz, sizeof(tz) - 1); + if(!standardtz || strcmp(standardtz, tz)) { + if(!strcmp(tz, "")) { /* restore TZ */ + if(standardtz) + setenv("TZ", standardtz, 1); + else + unsetenv("TZ"); + } + else + setenv("TZ", tz, 1); + tzset(); + } + if((strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm)) || + (strptime(s, "%Y-%m-%d %H:%M:%S", &tm)) || + (strptime(s, "%a, %d %b %Y %H:%M:%S", &tm)) || + (strptime(s, "%Y-%m-%dT%H:%M:%S", &tm))) { + tm.tm_isdst = -1; /* detect Daylight Saving Time */ + if((t = mktime(&tm)) == -1) + t = 0; + if(buf && (strftime(timebuf, sizeof(timebuf) - 1, + "%Y-%m-%d %H:%M:%S", &tm))) { + for(p = tz; *p; p++) /* print time offset reverse */ + *p = ((*p == '-') ? '+' : (*p == '+' ? '-' : *p)); + snprintf(buf, bufsiz, "%s %s", timebuf, tz); + } + } + return t; +} + +/* print text, ignore tabs, newline and carriage return etc +1 * print some HTML 2.0 / XML 1.0 as normal text */ +void +string_print_trimmed(String *s) { + const char *entities[] = { + "&", "&", "<", "<", ">", ">", "'", "'", """, "\"", + NULL, NULL + }; + const char *p, *n, **e; + unsigned int len, found; + if(!s->data) + return; + for(p = s->data; isspace(*p); p++); /* strip leading whitespace */ + for(; *p; ) { /* ignore tabs, newline and carriage return etc */ + if(!isspace(*p) || *p == ' ') { + if(*p == '<') { /* skip tags */ + if((n = strchr(p, '>'))) + p = n; + else + putchar('<'); + } else if(*p == '&') { + for(e = entities, found = 0; *e; e += 2) { + len = strlen(*e); + if(!strncmp(*e, p, len)) { /* compare entities and "replace" */ + fputs(*(e + 1), stdout); + p += len; + found = 1; + break; + } + } + if(found) + continue; + else + putchar('&'); + } else + fputc(*p, stdout); + } + p++; + } +} + +void /* print text, escape tabs, newline and carriage return etc */ +string_print_textblock(String *s) { + const char *p; + if(!s->data) + return; + for(p = s->data; *p && isspace(*p); p++); /* strip leading whitespace */ + for(; *p; p++) { + if(*p == '\n') /* escape newline */ + fputs("\\n", stdout); + else if(*p == '\\') /* escape \ */ + fputs("\\\\", stdout); + else if(*p == '\t') /* tab */ + fputs("\\t", stdout); + else if(!isspace(*p) || *p == ' ') /* ignore other whitespace chars */ + fputc(*p, stdout); + } +} + +int +istag(const char *name, const char *name2) { + return (!strcasecmp(name, name2)); +} + +int +isattr(const char *name, const char *name2) { + return (!strcasecmp(name, name2)); +} + +char * /* search for attr value by attr name in attributes list */ +getattrvalue(const char **atts, const char *name) { + const char **attr = NULL, *key, *value; + if(!atts || !(*atts)) + return NULL; + for(attr = atts; *attr; ) { + key = *(attr++); + value = *(attr++); + if(key && value && isattr(key, name)) + return (char *)value; + } + return NULL; +} + +void XMLCALL +xml_handler_start_element(void *data, const char *name, const char **atts) { + const char *value; + + strncpy(tag, name, sizeof(tag) - 1); /* set tag */ + if(feeditem.feedtype != FeedTypeNone) { /* in item */ + if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) { + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } else if(istag(name, "link")) { /* link href attribute */ + if((value = getattrvalue(atts, "href"))) + string_append(&feeditem.link, value, strlen(value)); + } else if(istag(name, "content") || istag(name, "summary")) { + if((value = getattrvalue(atts, "type"))) { /* content type is HTML or plain text */ + if(!strcasecmp(value, "xhtml") || !strcasecmp(value, "text/xhtml") || + !strcasecmp(value, "html") || !strcasecmp(value, "text/html")) + feeditem.contenttype = ContentTypeHTML; + } + } + } else if(feeditem.feedtype == FeedTypeRSS) { + if(istag(feeditemtag, "description")) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } + if(feeditemtag[0] == '\0') /* set tag if not already set. */ + strncpy(feeditemtag, name, sizeof(feeditemtag) - 1); + } else { /* start of RSS or Atom entry / item */ + if(istag(name, "entry")) { /* Atom */ + feeditem.feedtype = FeedTypeAtom; + feeditem.contenttype = ContentTypePlain; /* Default content type */ + } else if(istag(name, "item")) { /* RSS */ + feeditem.feedtype = FeedTypeRSS; + feeditem.contenttype = ContentTypeHTML; /* Default content type */ + } + } +} + +void XMLCALL +xml_handler_end_element(void *data, const char *name) { + char timebuf[64]; + + if(feeditem.feedtype != FeedTypeNone) { + /* end of RSS or Atom entry / item */ + if((istag(name, "entry") && (feeditem.feedtype == FeedTypeAtom)) || /* Atom */ + (istag(name, "item") && (feeditem.feedtype == FeedTypeRSS))) { /* RSS */ + printf("%ld", (long)parsetime((&feeditem.timestamp)->data, timebuf, + sizeof(timebuf) - 1)); + fputc(FieldSeparator, stdout); + printf("%s", timebuf); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.title); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.link); + fputc(FieldSeparator, stdout); + string_print_textblock(&feeditem.content); + fputc(FieldSeparator, stdout); + fputs(contenttypes[feeditem.contenttype], stdout); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.id); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.author); + fputc(FieldSeparator, stdout); + fputs(feedtypes[feeditem.feedtype], stdout); + fputc('\n', stdout); + + /* clear strings */ + string_clear(&feeditem.timestamp); + string_clear(&feeditem.title); + string_clear(&feeditem.link); + string_clear(&feeditem.content); + string_clear(&feeditem.id); + string_clear(&feeditem.author); + feeditem.feedtype = FeedTypeNone; + feeditem.contenttype = ContentTypePlain; + incdata = 0; + feeditemtag[0] = '\0'; /* unset tag */ + } else if(!strcmp(feeditemtag, name)) { /* clear */ + feeditemtag[0] = '\0'; /* unset tag */ + } else { + if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) { + /* pass to default handler to process inline HTML etc */ + XML_DefaultCurrent(parser); + return; + } + } + } + } + tag[0] = '\0'; /* unset tag */ +} + +/* NOTE: this handler can be called multiple times if the data in this block + * is bigger than the buffer */ +void XMLCALL +xml_handler_data(void *data, const XML_Char *s, int len) { + if(feeditem.feedtype == FeedTypeRSS) { + if(istag(feeditemtag, "pubdate") || istag(feeditemtag, "dc:date")) + string_append(&feeditem.timestamp, s, len); + else if(istag(feeditemtag, "title")) + string_append(&feeditem.title, s, len); + else if(istag(feeditemtag, "link")) + string_append(&feeditem.link, s, len); + else if(istag(feeditemtag, "description")) { + if(incdata) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + else + string_append(&feeditem.content, s, len); + } else if(istag(feeditemtag, "guid")) + string_append(&feeditem.id, s, len); + else if(istag(feeditemtag, "author") || istag(feeditemtag, "dc:creator")) + string_append(&feeditem.author, s, len); + } else if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "published") || istag(feeditemtag, "updated")) + string_append(&feeditem.timestamp, s, len); + else if(istag(feeditemtag, "title")) { + string_append(&feeditem.title, s, len); + } else if(istag(feeditemtag, "summary") || istag(feeditemtag, "content")) { + if(feeditem.contenttype == ContentTypeHTML) { + if(incdata) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + else + string_append(&feeditem.content, s, len); + } else + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } else if(istag(feeditemtag, "id")) + string_append(&feeditem.id, s, len); + else if(istag(feeditemtag, "name")) /* assume this is: <author><name></name></author> */ + string_append(&feeditem.author, s, len); + } +} + +int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */ +xml_parse_stream(XML_Parser parser, FILE *fp) { + char buffer[BUFSIZ]; + int done = 0, len = 0; + + while(!feof(fp)) { + len = fread(buffer, 1, sizeof(buffer), fp); + done = (feof(fp) || ferror(fp)); + if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) { + if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS) + return 1; /* Ignore "no elements found" / empty document as an error */ + fprintf(stderr, "sfeed: error parsing xml %s at line %lu column %lu\n", + XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser), + (unsigned long)XML_GetCurrentColumnNumber(parser)); + return 0; + } + } while(!done); + return 1; +} + +void +xml_handler_default(void *data, const XML_Char *s, int len) { + if((feeditem.feedtype == FeedTypeAtom && (istag(feeditemtag, "summary") || istag(feeditemtag, "content"))) || + (feeditem.feedtype == FeedTypeRSS && istag(feeditemtag, "description"))) + /*if(!istag(tag, "script") && !istag(tag, "style"))*/ /* ignore data in inline script and style */ + string_append(&feeditem.content, s, len); +} + +void /* NOTE: data is null terminated. */ +xml_handler_comment(void *data, const XML_Char *s) { +} + +void +xml_cdata_section_handler_start(void *userdata) { + incdata = 1; +} + +void +xml_cdata_section_handler_end(void *userdata) { + incdata = 0; +} + +int +main(void) { + int status; + standardtz = getenv("TZ"); + + /* init strings and initial memory pool size */ + string_buffer_init(&feeditem.timestamp, 64); + string_buffer_init(&feeditem.title, 256); + string_buffer_init(&feeditem.link, 1024); + string_buffer_init(&feeditem.content, 4096); + string_buffer_init(&feeditem.id, 1024); + string_buffer_init(&feeditem.author, 256); + feeditem.contenttype = ContentTypePlain; + feeditem.feedtype = FeedTypeNone; + feeditemtag[0] = '\0'; /* unset tag */ + tag[0] = '\0'; /* unset tag */ + + if(!(parser = XML_ParserCreate("UTF-8"))) + die("can't create parser"); + + XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element); + XML_SetCharacterDataHandler(parser, xml_handler_data); + XML_SetCommentHandler(parser, xml_handler_comment); + XML_SetCdataSectionHandler(parser, xml_cdata_section_handler_start, xml_cdata_section_handler_end); + XML_SetDefaultHandler(parser, xml_handler_default); + + status = xml_parse_stream(parser, stdin); + cleanup(); + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sfeed_html.1 b/sfeed_html.1 new file mode 100644 index 0000000..e645d4b --- /dev/null +++ b/sfeed_html.1 @@ -0,0 +1,14 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_html \- format feeds file (TSV) from sfeed_update to HTML +.SH SYNOPSIS +.B sfeed_html +.SH DESCRIPTION +Format feeds file (TSV) from sfeed_update to HTML. Reads TSV data from +stdin and writes HTML to stdout. For the exact TSV format see sfeed_update(1). +.SH SEE ALSO +.BR sfeed_plain(1) +.BR sfeed_update(1) +.BR sfeed(1) +.SH BUGS +Please report them! diff --git a/sfeed_html.c b/sfeed_html.c new file mode 100644 index 0000000..c195c86 --- /dev/null +++ b/sfeed_html.c @@ -0,0 +1,230 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <ctype.h> +#include "common.c" + +/* Feed info. */ +struct feed { + char *name; /* feed name */ + unsigned long new; /* amount of new items per feed */ + unsigned long total; /* total items */ + struct feed *next; /* linked list */ +}; + +static int showsidebar = 1; /* show sidebar ? */ + +void /* print error message to stderr */ +die(const char *s) { + fputs("sfeed_html: ", stderr); + fputs(s, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); +} + +struct feed * +feednew(void) { + struct feed *f; + if(!(f = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + return f; +} + +void +feedsfree(struct feed *f) { + struct feed *next; + while(f) { + next = f->next; + free(f->name); + free(f); + f = next; + } +} + +/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */ +void +printfeednameid(const char *s) { + for(; *s; s++) + putchar(isspace(*s) ? '-' : *s); +} + +void +printhtmlencoded(const char *s) { + for(; *s; s++) { + switch(*s) { + case '<': fputs("<", stdout); break; + case '>': fputs(">", stdout); break; + case '&': fputs("&", stdout); break; + default: + putchar(*s); + } + } +} + +int +main(void) { + char *line = NULL, *fields[FieldLast]; + unsigned long totalfeeds = 0, totalnew = 0; + unsigned int islink, isnew; + struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */ + time_t parsedtime, comparetime; + size_t size = 0; + + tzset(); + comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ + fputs( + "<!DOCTYPE HTML>\n" + "<html dir=\"ltr\" lang=\"en\">\n" + " <head>\n" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" + " <style type=\"text/css\">\n" + " body {\n" + " font-family: monospace;\n" + " font-size: 9pt;\n" + " color: #333;\n" + " background-color: #fff;\n" + " overflow: hidden;\n" + " }\n" + " #feedcontent td {\n" + " white-space: nowrap;\n" + " }\n" + " #feedcontent h2 {\n" + " font-size: 14pt;\n" + " }\n" + " #feedcontent a {\n" + " display: block;\n" + " }\n" + " #feedcontent ul, #feedcontent li {\n" + " list-style: none;\n" + " padding: 0;\n" + " margin: 0;\n" + " }\n" + " #feedcontent h2 a, #feedcontent ul li a {\n" + " color: inherit;\n" + " }\n" + " #feedcontent ul li a {\n" + " padding: 5px 3px 5px 10px;\n" + " }\n" + " #feedcontent div#sidebar {\n" + " background-color: inherit;\n" + " position: fixed;\n" + " top: 0;\n" + " left: 0;\n" + " width: 175px;\n" + " height: 100%;\n" + " overflow: hidden;\n" + " overflow-y: auto;\n" + " z-index: 999;\n" + " }\n" + " #feedcontent div#items {\n" + " left: 175px;\n" + " }\n" + " #feedcontent div#items-nosidebar {\n" + " left: 0px;\n" + " }\n" + " #feedcontent div#items-nosidebar,\n" + " #feedcontent div#items {\n" + " position: absolute;\n" + " height: 100%;\n" + " top: 0;\n" + " right: 0;\n" + " overflow: auto;\n" + " padding: 0 15px;\n" + " }\n" + " </style>\n" + " </head>\n" + " <body>\n" + " <div id=\"feedcontent\">\n", + stdout); + + while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) { + /* first of feed section or new feed section. */ + if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) { + if(totalfeeds) { /* end previous one. */ + fputs("</table>\n", stdout); + feedcurrent->next = feednew(); + feedcurrent = feedcurrent->next; + } else { + feedcurrent = feednew(); + feeds = feedcurrent; /* first item. */ + fputs("\t\t<div id=\"items", stdout); + if(fields[FieldFeedName][0] == '\0') { + fputs("-nosidebar", stdout); /* set other id on div if no sidebar for styling */ + showsidebar = 0; + } + fputs("\">\n", stdout); + } + if(!(feedcurrent->name = strdup(fields[FieldFeedName]))) + die("can't allocate enough memory"); + if(fields[FieldFeedName][0] != '\0') { + fputs("<h2 id=\"", stdout); + printfeednameid(feedcurrent->name); + fputs("\"><a href=\"#", stdout); + printfeednameid(feedcurrent->name); + fputs("\">", stdout); + fputs(feedcurrent->name, stdout); + fputs("</a></h2>\n", stdout); + } + fputs("<table>", stdout); + totalfeeds++; + } + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + isnew = (parsedtime >= comparetime); + islink = (strlen(fields[FieldLink]) > 0); + totalnew += isnew; + feedcurrent->new += isnew; + feedcurrent->total++; + + fputs("<tr><td>", stdout); + printtime(parsedtime); + fputs("</td><td>", stdout); + if(isnew) + fputs("<b><u>", stdout); + if(islink) { + fputs("<a href=\"", stdout); + printlink(fields[FieldLink], fields[FieldFeedUrl]); + fputs("\">", stdout); + } + printhtmlencoded(fields[FieldTitle]); + if(islink) + fputs("</a>", stdout); + if(isnew) + fputs("</u></b>", stdout); + fputs("</td></tr>\n", stdout); + } + if(totalfeeds) { + fputs("</table>\n", stdout); + fputs("\t\t</div>\n", stdout); /* div items */ + } + if(showsidebar) { + fputs("\t\t<div id=\"sidebar\">\n\t\t\t<ul>\n", stdout); + for(feedcurrent = feeds; feedcurrent; feedcurrent = feedcurrent->next) { + if(!feedcurrent->name || feedcurrent->name[0] == '\0') + continue; + fputs("<li><a href=\"#", stdout); + printfeednameid(feedcurrent->name); + fputs("\">", stdout); + if(feedcurrent->new > 0) + fputs("<b><u>", stdout); + fputs(feedcurrent->name, stdout); + fprintf(stdout, " (%lu)", feedcurrent->new); + if(feedcurrent->new > 0) + fputs("</u></b>", stdout); + fputs("</a></li>\n", stdout); + } + fputs("\t\t\t</ul>\n\t\t</div>\n", stdout); + } + fputs( + " </div>\n" + " </body>\n" + " <title>Newsfeeds (", + stdout); + fprintf(stdout, "%lu", totalnew); + fputs(")</title>\n</html>", stdout); + + free(line); /* free line */ + feedsfree(feeds); /* free feeds linked-list */ + + return EXIT_SUCCESS; +} diff --git a/sfeed_opml_config.1 b/sfeed_opml_config.1 new file mode 100644 index 0000000..523f99e --- /dev/null +++ b/sfeed_opml_config.1 @@ -0,0 +1,11 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_opml_config \- generate a sfeedrc config file based on an opml file +.SH SYNOPSIS +.B sfeed_opml_config +.SH DESCRIPTION +Reads the opml XML data from stdin and writes the config file text to stdout. +.SH SEE ALSO +.BR sfeed_update(1) +.SH BUGS +Please report them! diff --git a/sfeed_opml_config.c b/sfeed_opml_config.c new file mode 100644 index 0000000..0d74820 --- /dev/null +++ b/sfeed_opml_config.c @@ -0,0 +1,87 @@ +/* convert an opml file to sfeedrc file */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <strings.h> +#include <expat.h> /* libexpat */ + +XML_Parser parser; /* expat XML parser state */ + +char * /* search for attr value by attr name in attributes list */ +getattrvalue(const char **atts, const char *name) { + const char **attr = NULL, *key, *value; + if(!atts || !(*atts)) + return NULL; + for(attr = atts; *attr; ) { + key = *(attr++); + value = *(attr++); + if(key && value && !strcasecmp(key, name)) + return (char *)value; + } + return NULL; +} + +void XMLCALL +xml_handler_start_element(void *data, const char *name, const char **atts) { + char *feedurl = NULL, *feedname = NULL;; + + if(!strcasecmp(name, "outline")) { + if(!(feedname = getattrvalue(atts, "text")) && + !(feedname = getattrvalue(atts, "title"))) + feedname = "unnamed"; + if(!(feedurl = getattrvalue(atts, "xmlurl"))) + feedurl = ""; + printf("\tfeed \"%s\" \"%s\"\n", feedname, feedurl); + } +} + +void XMLCALL +xml_handler_end_element(void *data, const char *name) { +} + +int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */ +xml_parse_stream(XML_Parser parser, FILE *fp) { + char buffer[BUFSIZ]; + int done = 0, len = 0; + + while(!feof(fp)) { + len = fread(buffer, 1, sizeof(buffer), fp); + done = (feof(fp) || ferror(fp)); + if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) { + if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS) + return 1; /* Ignore "no elements found" / empty document as an error */ + fprintf(stderr, "sfeed_opml_config: error parsing xml %s at line %lu column %lu\n", + XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser), + (unsigned long)XML_GetCurrentColumnNumber(parser)); + return 0; + } + } while(!done); + return 1; +} + +int main(void) { + int status; + + if(!(parser = XML_ParserCreate("UTF-8"))) { + fputs("sfeed_opml_config: can't create parser", stderr); + exit(EXIT_FAILURE); + } + XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element); + + fputs( + "# paths\n" + "# NOTE: make sure to uncomment all these if you change it.\n" + "#sfeedpath=\"$HOME/.sfeed\"\n" + "#sfeedfile=\"$sfeedpath/feeds\"\n" + "#sfeedfilenew=\"$sfeedfile.new\"\n" + "\n" + "# list of feeds to fetch:\n" + "feeds() {\n" + " # feed <name> <url> [encoding]\n", stdout); + status = xml_parse_stream(parser, stdin); + fputs("}\n", stdout); + + XML_ParserFree(parser); + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sfeed_plain.1 b/sfeed_plain.1 new file mode 100644 index 0000000..3f396aa --- /dev/null +++ b/sfeed_plain.1 @@ -0,0 +1,15 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_plain \- format feeds file (TSV) from sfeed_update to plain text +.SH SYNOPSIS +.B sfeed +.SH DESCRIPTION +Format feeds file (TSV) from sfeed_update to plain text. Reads TSV data from +stdin and writes plain text to stdout. For the exact TSV format see +sfeed_update(1). +.SH SEE ALSO +.BR sfeed_html(1) +.BR sfeed_update(1) +.BR sfeed(1) +.SH BUGS +Please report them! diff --git a/sfeed_plain.c b/sfeed_plain.c new file mode 100644 index 0000000..d4045c9 --- /dev/null +++ b/sfeed_plain.c @@ -0,0 +1,42 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include "common.c" + +void +printutf8padded(const char *s, size_t len) { + unsigned int n = 0, i = 0; + + for(; s[i] && n < len; i++) { + if((s[i] & 0xc0) != 0x80) /* start of character */ + n++; + putchar(s[i]); + } + for(; n < len; n++) + putchar(' '); +} + +int +main(void) { + char *line = NULL, *fields[FieldLast]; + time_t parsedtime, comparetime; + size_t size = 0; + + tzset(); + comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ + while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) { + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + printf(" %c ", (parsedtime >= comparetime) ? 'N' : ' '); + if(fields[FieldFeedName][0] != '\0') + printf("%-15.15s ", fields[FieldFeedName]); + printtime(parsedtime); + fputs(" ", stdout); + printutf8padded(fields[FieldTitle], 70); + fputs(" ", stdout); + printlink(fields[FieldLink], fields[FieldFeedUrl]); + putchar('\n'); + } + free(line); + return EXIT_SUCCESS; +} diff --git a/sfeed_update b/sfeed_update new file mode 100755 index 0000000..52b9728 --- /dev/null +++ b/sfeed_update @@ -0,0 +1,116 @@ +#!/bin/sh +# update feeds, merge with old feeds. +# NOTE: assumes "sfeed_*" files are in $PATH. + +# defaults +sfeedpath="$HOME/.sfeed" +sfeedfile="$sfeedpath/feeds" +# temporary file for new feeds (for merging). +sfeedfilenew="$sfeedfile.new" + +# load config (evaluate shellscript). +# loadconfig(configfile) +loadconfig() { + # allow to specify config via argv[1]. + if [ ! "$1" = "" ]; then + # get absolute path of config file. + config=$(readlink -f "$1") + else + # default config location. + config="$HOME/.sfeed/sfeedrc" + fi + + # load config: config is loaded here to be able to override above variables + # (sfeedpath, sfeedfile, etc). + if [ -r "$config" ]; then + . "$config" + else + echo "Configuration file \"$config\" does not exist or is not readable." >&2 + echo "See sfeedrc.example for an example." >&2 + exit 1 + fi +} + +# merge raw files. +# merge(oldfile, newfile) +merge() { + # unique sort by id, link, title. + # order by feedname (asc), feedurl (asc) and timestamp (desc). + (cat "$1" "$2" 2> /dev/null) | + sort -t ' ' -u -k7,7 -k4,4 -k3,3 | + sort -t ' ' -k10,10 -k11,11 -k1r,1 +} + +# fetch a feed via HTTP/HTTPS etc. +# fetchfeed(url, name) +fetchfeed() { + if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then + printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2 + else + printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2 + fi +} + +# add field after line, output to stdout. +# addfield(field) +addfield() { + # NOTE: IFS is set and restored to prevent stripping whitespace. + OLDIFS="$IFS" + IFS=" +" + while read -r line; do + printf "%s %s\n" "${line}" "$1" + done + IFS="$OLDIFS" +} + +# fetch and parse feed. +# feed(name, url, encoding) +feed() { + tmpfile=$(mktemp -p "$TMPDIR") + (if [ "$3" = "" ]; then + # don't use iconv if encoding not set in config. + fetchfeed "$2" "$1" | sfeed | addfield "$1 $2" + else + # use iconv to convert encoding to UTF-8. + fetchfeed "$2" "$1" | iconv -cs -f "$3" -t "utf-8" | sfeed | addfield "$1 $2" + fi) > "$tmpfile" +} + +terminated() { + isrunning="0" +} + +cleanup() { + # remove temporary files + rm -rf "$tmpfile" "$TMPDIR" +} + +# load config file. +loadconfig "$1" +# fetch feeds and store in temporary file. +TMPDIR=$(mktemp -d -t "sfeed_XXXXXX") +# get date of last modified feedfile in format: +# YYYYmmdd HH:MM:SS [+-][0-9]* +lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-) +# Kill whole current process group on ^C. +isrunning="1" +trap -- "terminated" "15" # SIGTERM: signal to terminate parent. +trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D +# fetch feeds specified in config file. +feeds +# make sure path exists. +mkdir -p "$sfeedpath" +# wait till all feeds are fetched (allows running in parallel). +wait +[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup. +# concat all individual feed files to a single file. +# NOTE: mktemp uses $TMPDIR for temporary directory. +tmpfile=$(mktemp -t "sfeed_XXXXXX") +find "$TMPDIR" -type f -exec cat {} \; > "$tmpfile" +# get new data and merge with old. +merge "$sfeedfile" "$tmpfile" > "$sfeedfilenew" +# overwrite old file with updated file +mv "$sfeedfilenew" "$sfeedfile" +# cleanup temporary files etc. +cleanup diff --git a/sfeed_update.1 b/sfeed_update.1 new file mode 100644 index 0000000..e9cffc1 --- /dev/null +++ b/sfeed_update.1 @@ -0,0 +1,82 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_update \- update feeds and merge with old feeds +.SH SYNOPSIS +.B sfeed_update +.RB [configfile] +.SH OPTIONS +.TP +.B [configfile] +config file, if not specified uses the location $HOME/.sfeed/sfeedrc by default (see FILES READ section for more information). +.SH DESCRIPTION +.TP +Update feeds and merge with old feeds in the file $HOME/.sfeed/feeds by default. +.SH TAB-SEPARATED FORMAT +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are +removed. +.TP +.B item timestamp (unix timestamp in GMT+0) +string +.TP +.B item timestamp (formatted) +string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM +.TP +.B item title +string +.TP +.B item link +string +.TP +.B item description +string +.TP +.B item contenttype +string (html or plain) +.TP +.B item id +string +.TP +.B item author +string +.TP +.B feed type +string (rss or atom) +.TP +.B feed name +string (extra field added by sfeed_update) +.TP +.B feed url +string (extra field added by sfeed_update) +.SH FILES READ +.TP +.B sfeedrc +Config file, see the sfeedrc.example file for an example. +This file is evaluated as a shellscript in sfeed_update. +You can for example override the fetchfeed() function to +use wget, fetch or an other download program or you can +override the merge() function to change the merge logic. +The function feeds() is called to fetch the feeds. The +function feed() can safely be executed as a parallel job +in your sfeedrc config file to speedup updating. +.SH FILES WRITTEN +.TP +.B feeds +Tab-separated format containing all feeds. +The sfeed_update script merges new items with this file. +.TP +.B feeds.new +Temporary file used by sfeed_update to merge items. +.SH EXAMPLES +.TP +To update feeds and format the feeds file: +.nf +sfeed_update "configfile" +sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt +sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html +.SH SEE ALSO +.BR sh(1) +.BR sfeed_plain(1) +.BR sfeed_html(1) +.SH BUGS +Please report them! diff --git a/sfeedrc.example b/sfeedrc.example new file mode 100644 index 0000000..d25777a --- /dev/null +++ b/sfeedrc.example @@ -0,0 +1,17 @@ +# paths +# NOTE: make sure to uncomment all these if you change it. +#sfeedpath="$HOME/.sfeed" +#sfeedfile="$sfeedpath/feeds" +#sfeedfilenew="$sfeedfile.new" + +# list of feeds to fetch: +feeds() { + # feed <name> <url> [encoding] + feed "codemadness" "http://www.codemadness.nl/blog/rss.xml" + feed "explosm" "http://feeds.feedburner.com/Explosm" + feed "linux kernel" "http://kernel.org/kdist/rss.xml" "iso-8859-1" + feed "phoronix" "http://feeds.feedburner.com/Phoronix" + feed "slashdot" "http://rss.slashdot.org/Slashdot/slashdot" + feed "tweakers" "http://feeds.feedburner.com/tweakers/mixed" "iso-8859-1" + feed "xkcd" "http://xkcd.com/atom.xml" +} |