From db5ffcaa8c133d249aafa4a64f3d827dd513d995 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 3 Aug 2012 12:03:17 +0200 Subject: New initial repo Signed-off-by: Hiltjo Posthuma --- .gitignore | 5 + CREDITS | 1 + LICENSE | 21 +++ Makefile | 95 +++++++++++ README | 143 ++++++++++++++++ TODO | 4 + common.c | 116 +++++++++++++ config.mk | 28 +++ sfeed.1 | 44 +++++ sfeed.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sfeed_html.1 | 14 ++ sfeed_html.c | 230 +++++++++++++++++++++++++ sfeed_opml_config.1 | 11 ++ sfeed_opml_config.c | 87 ++++++++++ sfeed_plain.1 | 15 ++ sfeed_plain.c | 42 +++++ sfeed_update | 116 +++++++++++++ sfeed_update.1 | 82 +++++++++ sfeedrc.example | 17 ++ 19 files changed, 1548 insertions(+) create mode 100644 .gitignore create mode 100644 CREDITS create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README create mode 100644 TODO create mode 100644 common.c create mode 100644 config.mk create mode 100644 sfeed.1 create mode 100644 sfeed.c create mode 100644 sfeed_html.1 create mode 100644 sfeed_html.c create mode 100644 sfeed_opml_config.1 create mode 100644 sfeed_opml_config.c create mode 100644 sfeed_plain.1 create mode 100644 sfeed_plain.c create mode 100755 sfeed_update create mode 100644 sfeed_update.1 create mode 100644 sfeedrc.example diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abdbd0f --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +sfeed +sfeed_plain +sfeed_html +sfeed_opml_config diff --git a/CREDITS b/CREDITS new file mode 100644 index 0000000..a383a90 --- /dev/null +++ b/CREDITS @@ -0,0 +1 @@ +raph_ael on #suckless for the idea for an opml converter diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..91da394 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT/X Consortium License + +© 2011-2012 Hiltjo Posthuma + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..329b97b --- /dev/null +++ b/Makefile @@ -0,0 +1,95 @@ +# sfeed - simple RSS and Atom parser (and programs to add reader functionality). + +include config.mk + +NAME = sfeed +SRC = sfeed.c sfeed_plain.c sfeed_html.c sfeed_opml_config.c +OBJ = ${SRC:.c=.o} + +all: options sfeed sfeed_plain sfeed_html sfeed_opml_config + +options: + @echo ${NAME} build options: + @echo "CFLAGS = ${CFLAGS}" + @echo "LDFLAGS = ${LDFLAGS}" + @echo "CC = ${CC}" + +.c.o: + @echo CC $< + @${CC} -c ${CFLAGS} $< + +${OBJ}: config.mk + +sfeed: ${OBJ} + @echo CC -o $@ + @${CC} -o $@ sfeed.c ${LDFLAGS} ${LIBEXPAT} + +sfeed_opml_config: sfeed_opml_config.o + @echo CC -o $@ + @${CC} -o $@ sfeed_opml_config.o ${LDFLAGS} ${LIBEXPAT} + +sfeed_plain: sfeed_plain.o + @echo CC -o $@ + @${CC} -o $@ sfeed_plain.o ${LDFLAGS} + +sfeed_html: sfeed_html.o + @echo CC -o $@ + @${CC} -o $@ sfeed_html.o ${LDFLAGS} + +clean: + @echo cleaning + @rm -f sfeed sfeed_plain sfeed_html sfeed_opml_config ${OBJ} ${NAME}-${VERSION}.tar.gz + +dist: clean + @echo creating dist tarball + @mkdir -p ${NAME}-${VERSION} + @cp -R LICENSE Makefile README config.mk \ + TODO CREDITS sfeedrc.example ${SRC} common.c sfeed_update \ + sfeed.1 sfeed_update.1 sfeed_plain.1 sfeed_html.1 sfeed_opml_config.1 \ + ${NAME}-${VERSION} + @tar -cf ${NAME}-${VERSION}.tar ${NAME}-${VERSION} + @gzip ${NAME}-${VERSION}.tar + @rm -rf ${NAME}-${VERSION} + +install: all + @echo installing executable file to ${DESTDIR}${PREFIX}/bin + @mkdir -p ${DESTDIR}${PREFIX}/bin + @cp -f sfeed sfeed_html sfeed_plain sfeed_update sfeed_opml_config \ + ${DESTDIR}${PREFIX}/bin + @chmod 755 ${DESTDIR}${PREFIX}/bin/sfeed \ + ${DESTDIR}${PREFIX}/bin/sfeed_html \ + ${DESTDIR}${PREFIX}/bin/sfeed_plain \ + ${DESTDIR}${PREFIX}/bin/sfeed_update \ + ${DESTDIR}${PREFIX}/bin/sfeed_opml_config + @mkdir -p ${DESTDIR}${PREFIX}/share/sfeed + @cp -f sfeedrc.example ${DESTDIR}${PREFIX}/share/${NAME} + @echo installing manual pages to ${DESTDIR}${MANPREFIX}/man1 + @mkdir -p ${DESTDIR}${MANPREFIX}/man1 + @sed "s/VERSION/${VERSION}/g" < sfeed.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_update.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_plain.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_html.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 + @sed "s/VERSION/${VERSION}/g" < sfeed_opml_config.1 > ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + @chmod 644 ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + +uninstall: + @echo removing executable file from ${DESTDIR}${PREFIX}/bin + @rm -f ${DESTDIR}${PREFIX}/bin/sfeed \ + ${DESTDIR}${PREFIX}/bin/sfeed_html \ + ${DESTDIR}${PREFIX}/bin/sfeed_plain \ + ${DESTDIR}${PREFIX}/bin/sfeed_update \ + ${DESTDIR}${PREFIX}/bin/sfeed_opml_config \ + ${DESTDIR}${PREFIX}/share/${NAME}/sfeedrc.example + @-rmdir ${DESTDIR}${PREFIX}/share/${NAME} + @echo removing manual pages from ${DESTDIR}${MANPREFIX}/man1 + @rm -f ${DESTDIR}${MANPREFIX}/man1/sfeed.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_update.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_plain.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_html.1 \ + ${DESTDIR}${MANPREFIX}/man1/sfeed_opml_config.1 + +.PHONY: all options clean dist install uninstall diff --git a/README b/README new file mode 100644 index 0000000..cbdb343 --- /dev/null +++ b/README @@ -0,0 +1,143 @@ +sfeed v0.8 +---------- + +Simple RSS and Atom parser (and some format programs). + + +Dependencies +------------ + +- C compiler. +- expat library (used by sfeed.c and sfeed_opml_config.c, + http://expat.sourceforge.net/). + + +Optional dependencies +--------------------- + +- POSIX shell (used by sfeed_update). +- curl (used by sfeed_update, http://curl.haxx.se/). +- iconv (used by sfeed_update, http://www.gnu.org/software/libiconv/). + + +Files +----- + +sfeed - Binary (from sfeed.c); read XML RSS or Atom feed data from + stdin. Write feed data in tab-separated format to stdout. +sfeed_update - Shellscript; update feeds and merge with old feeds in the + file $HOME/.sfeed/feeds by default. +sfeed_plain - Format feeds file (TSV) from sfeed_update to plain text. +sfeed_html - Format feeds file (TSV) from sfeed_update to HTMLi. +sfeed_opml_config - Generate a sfeedrc config file based on an opml file. +sfeedrc.example - Example config file. + + +Files read at runtime by sfeed_update +------------------------------------- + +sfeedrc - Config file. This file is evaluated as a shellscript in + sfeed_update. You can for example override the fetchfeed() + function to use wget, fetch or an other download program or + you can override the merge() function to change the merge + logic. The function feeds() is called to fetch the feeds. + The function feed() can safely be executed as a parallel + job in your sfeedrc config file to speedup updating. + + +Files written at runtime by sfeed_update +---------------------------------------- + +feeds - Tab-separated format containing all feeds. + The sfeed_update script merges new items with this file. +feeds.new - Temporary file used by sfeed_update to merge items. + + +TAB-SEPARATED format +-------------------- + +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \ (\n, \t and \\). Other whitespace except +spaces are removed. + +The timestamp field is converted to a unix timestamp. The timestamp is also +stored as formatted as a separate field. The other fields are left untouched +(including HTML). + +The order and format of the fields are: + +item unix timestamp - string unix timestamp (GMT+0) +item formatted timestamp - string timestamp (YYYY-mm-dd HH:MM:SS tz[+-]HHMM) +item title - string +item link - string +item description - string +item contenttype - string ("html" or "plain") +item id - string +item author - string +feed type - string ("rss" or "atom") +feed name - string (extra field added by sfeed_update) +feed url - string (extra field added by sfeed_update) + + +Usage +----- + +To build and install (respects DESTDIR and PREFIX variable): + +make install + + +Generate a sfeedrc config file from your exported list of feeds in opml +format: + +sfeed_opml_config < opmlfile.xml > $HOME/.sfeed/sfeedrc + + +To update feeds and format the feeds file (configfile argument is optional): + +sfeed_update "configfile" +sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt +sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html + + +Example script to view feeds with dmenu, opens selected url in $BROWSER: + +url=$(sfeed_plain < "$HOME/.sfeed/feeds" | dmenu -l 35 -i | + sed 's@^.* \([a-zA-Z]*://\)\(.*\)$@\1\2@') +[ ! "$url" = "" ] && $BROWSER "$url" + + +or to view in your browser: + +$BROWSER "$HOME/.sfeed/feeds.html" + + +or to view in your editor: + +$EDITOR "$HOME/.sfeed/feeds.txt" + + +tip to remove feeds older than a date (change time="YYYY mm dd HH mm ss") + +gawk -F '\t' 'BEGIN { + time = mktime("2012 01 01 12 34 56"); +} +{ + if(int($1) >= int(time)) { + print $0; + } +}' < feeds > feeds.clean + +mv feeds.clean feeds + + +License +------- + +MIT, see LICENSE file. + + +Author +------ + +Hiltjo Posthuma diff --git a/TODO b/TODO new file mode 100644 index 0000000..a2c081d --- /dev/null +++ b/TODO @@ -0,0 +1,4 @@ +[ ] opml export script (WIP). +[ ] rename sfeed_opml_config to sfeed_opml_import. +[ ] sfeed_update / sfeedrc: add base siteurl as parameter for feed function for feeds located at an other domain, for example feedburner.com +[ ] test opml import / export scripts with thunderbird, google reader, snownews and rssowl. diff --git a/common.c b/common.c new file mode 100644 index 0000000..91ac9ca --- /dev/null +++ b/common.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include + +enum { FieldUnixTimestamp = 0, FieldTimeFormatted, FieldTitle, FieldLink, + FieldContent, FieldContentType, FieldId, FieldAuthor, FieldFeedType, + FieldFeedName, FieldFeedUrl, FieldLast }; + +const int FieldSeparator = '\t'; + +char * +afgets(char **p, size_t *size, FILE *fp) { + char buf[BUFSIZ], *alloc = NULL; + size_t n, len = 0, allocsiz; + int end = 0; + + while(fgets(buf, sizeof(buf), fp)) { + n = strlen(buf); + if(buf[n - 1] == '\n') { /* dont store newlines. */ + buf[n - 1] = '\0'; + n--; + end = 1; /* newline found, end */ + } + len += n; + allocsiz = len + 1; + if(allocsiz > *size) { + if((alloc = realloc(*p, allocsiz))) { + *p = alloc; + *size = allocsiz; + } else { + free(*p); + *p = NULL; + fputs("error: could not realloc\n", stderr); + exit(EXIT_FAILURE); + return NULL; + } + } + strncpy((*p + (len - n)), buf, n); + if(end || feof(fp)) + break; + } + if(*p && len > 0) { + (*p)[len] = '\0'; + return *p; + } + return NULL; +} + +void /* print link; if link is relative use baseurl to make it absolute */ +printlink(const char *link, const char *baseurl) { + const char *ebaseproto, *ebasedomain, *p; + int isrelative; + + /* protocol part */ + for(p = link; *p && (isalpha(*p) || isdigit(*p) || *p == '+' || *p == '-' || *p == '.'); p++); + isrelative = strncmp(p, "://", strlen("://")); + if(isrelative) { /* relative link (baseurl is used). */ + if((ebaseproto = strstr(baseurl, "://"))) { + ebaseproto += strlen("://"); + fwrite(baseurl, 1, ebaseproto - baseurl, stdout); + } else { + ebaseproto = baseurl; + if(*baseurl || (link[0] == '/' && link[1] == '/')) + fputs("http://", stdout); + } + if(link[0] == '/') { /* relative to baseurl domain (not path). */ + if(link[1] == '/') /* absolute url but with protocol from baseurl. */ + link += 2; + else if((ebasedomain = strchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto, stdout); + else + fputs(ebaseproto, stdout); + } else if((ebasedomain = strrchr(ebaseproto, '/'))) /* relative to baseurl and baseurl path. */ + fwrite(ebaseproto, 1, ebasedomain - ebaseproto + 1, stdout); + else { + fputs(ebaseproto, stdout); + if(*baseurl && *link) + fputc('/', stdout); + } + } + fputs(link, stdout); +} + +unsigned int +parseline(char **line, size_t *size, char **fields, unsigned int maxfields, FILE *fp, int separator) { + unsigned int i = 0; + char *prev, *s; + + if(afgets(line, size, fp)) { + for(prev = *line; (s = strchr(prev, separator)) && i <= maxfields; i++) { + *s = '\0'; /* null terminate string. */ + fields[i] = prev; + prev = s + 1; + } + fields[i] = prev; + for(i++; i < maxfields; i++) /* make non-parsed fields empty. */ + fields[i] = ""; + } + return i; +} + +void +printtime(time_t t) { + char buf[32]; + struct tm temp = { 0 }, *mktm; + + if(!(mktm = localtime_r(&t, &temp))) + return; + mktm->tm_isdst = -1; + + if(!strftime(buf, sizeof(buf) - 1, "%Y-%m-%d %H:%M", mktm)) + return; + fputs(buf, stdout); +} diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..8bddcad --- /dev/null +++ b/config.mk @@ -0,0 +1,28 @@ +# sfeed version +VERSION = 0.8 + +# customize below to fit your system + +# paths +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/share/man + +# includes and libs +INCS = +LIBEXPAT = -lexpat +LIBS = -lc + +# flags +#CFLAGS = -Os -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=500 -DVERSION=\"${VERSION}\" +#LDFLAGS = -s ${LIBS} + +# debug +CFLAGS = -g -O0 -pedantic -Wall -Wextra -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=700 -DVERSION=\"${VERSION}\" +LDFLAGS = ${LIBS} + +# Solaris +#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\" +#LDFLAGS = ${LIBS} + +# compiler and linker +CC = cc diff --git a/sfeed.1 b/sfeed.1 new file mode 100644 index 0000000..04227d8 --- /dev/null +++ b/sfeed.1 @@ -0,0 +1,44 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed \- simple RSS and Atom parser +.SH SYNOPSIS +.B sfeed +.SH DESCRIPTION +Read XML RSS or Atom feed data from stdin. Write feed data in a +tab-separated format to stdout. +.SH TAB-SEPARATED FORMAT +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are +removed. +.TP +.B item timestamp (unix timestamp in GMT+0) +string +.TP +.B item timestamp (formatted) +string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM) +.TP +.B item title +string +.TP +.B item link +string +.TP +.B item description +string +.TP +.B item contenttype +string (html or plain) +.TP +.B item id +string +.TP +.B item author +string +.TP +.B feed type +string (rss or atom) +.SH SEE ALSO +.BR sh(1) +.BR sfeed_update(1) +.SH BUGS +Please report them! diff --git a/sfeed.c b/sfeed.c new file mode 100644 index 0000000..b83351f --- /dev/null +++ b/sfeed.c @@ -0,0 +1,477 @@ +#include +#include +#include +#include +#include +#include +#include /* libexpat */ + +enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2, FeedTypeLast = 3 }; +const char *feedtypes[] = { "", "rss", "atom" }; + +enum { ContentTypeNone = 0, ContentTypePlain = 1, ContentTypeHTML = 2, ContentTypeLast = 3 }; +const char *contenttypes[] = { "", "plain", "html" }; + +typedef struct string { /* String data / pool */ + char *data; /* data */ + size_t len; /* string length */ + size_t bufsiz; /* allocated size */ +} String; + +typedef struct feeditem { /* Feed item */ + String timestamp; + String title; + String link; + String content; + int contenttype; /* ContentTypePlain or ContentTypeHTML */ + String id; + String author; + int feedtype; /* FeedTypeRSS or FeedTypeAtom */ +} FeedItem; + +void die(const char *s); +void cleanup(void); + +const int FieldSeparator = '\t'; +FeedItem feeditem; /* data for current feed item */ +char tag[1024]; /* current XML tag being parsed. */ +char feeditemtag[1024]; /* current tag _inside_ a feeditem */ +XML_Parser parser; /* expat XML parser state */ +int incdata = 0; +char *standardtz = NULL; /* TZ variable at start of program */ + +void +string_clear(String *s) { + if(s->data) + s->data[0] = '\0'; /* clear string only; don't free, prevents + unnecessary reallocation */ + s->len = 0; +} + +void +string_buffer_init(String *s, size_t len) { + if(!(s->data = malloc(len))) + die("can't allocate enough memory"); + s->bufsiz = len; + string_clear(s); +} + +void +string_free(String *s) { + free(s->data); + s->data = NULL; + s->bufsiz = 0; + s->len = 0; +} + +int +string_buffer_expand(String *s, size_t newlen) { + char *p; + size_t alloclen; + /* check if allocation is necesary, dont shrink buffer */ + if(!s->data || (newlen > s->bufsiz)) { + /* should be more than bufsiz ofcourse */ + for(alloclen = 16; alloclen <= newlen; alloclen *= 2); + if(!(p = realloc(s->data, alloclen))) { + string_free(s); /* free previous allocation */ + die("can't allocate enough memory"); + } + s->bufsiz = alloclen; + s->data = p; + } + return s->bufsiz; +} + +void +string_append(String *s, const char *data, size_t len) { + string_buffer_expand(s, s->len + len); + memcpy(s->data + s->len, data, len); + s->len += len; + s->data[s->len] = '\0'; +} + +void /* cleanup parser, free allocated memory, etc */ +cleanup(void) { + XML_ParserFree(parser); + string_free(&feeditem.timestamp); + string_free(&feeditem.title); + string_free(&feeditem.link); + string_free(&feeditem.content); + string_free(&feeditem.id); + string_free(&feeditem.author); +} + +void /* print error message to stderr */ +die(const char *s) { + fputs("sfeed: ", stderr); + fputs(s, stderr); + fputc('\n', stderr); + cleanup(); + exit(EXIT_FAILURE); +} + +void +gettimetz(const char *s, char *buf, size_t bufsiz) { + const char *p = s; + int tzhour = 0, tzmin = 0; + char tzname[128] = "", *t = NULL; + unsigned int i; + + buf[0] = '\0'; + for(; *p && isspace(*p); p++); /* skip whitespace */ + /* detect time offset, assume time offset isn't specified in the first 18 characters */ + for(i = 0; *p && ((*p != '+' && *p != '-') || i <= 18); p++, i++); + + if(isalpha(*p)) { + if(*p == 'Z' || *p == 'z') { + strncpy(buf, "GMT+00:00", bufsiz); + return; + } else { + for(i = 0, t = &tzname[0]; i < (sizeof(tzname) - 1) && (*p && isalpha(*p)); i++) + *(t++) = *(p++); + *t = '\0'; + } + } else + strncpy(tzname, "GMT", sizeof(tzname) - 1); + if(!(*p)) { + strncpy(buf, tzname, bufsiz); + return; + } + /* NOTE: reverses time offsets for TZ */ + if((sscanf(p, "+%02d:%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "-%02d:%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "+%02d%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s-%02d:%02d", tzname, tzhour, tzmin); + else if((sscanf(p, "-%02d%02d", &tzhour, &tzmin)) > 0) + snprintf(buf, bufsiz, "%s+%02d:%02d", tzname, tzhour, tzmin); + else if(sscanf(p, "+%d", &tzhour) > 0) + snprintf(buf, bufsiz, "%s-%02d:00", tzname, tzhour); + else if(sscanf(p, "-%d", &tzhour) > 0) + snprintf(buf, bufsiz, "%s+%02d:00", tzname, tzhour); +} + +time_t +parsetime(const char *s, char *buf, size_t bufsiz) { + struct tm tm = { 0 }; + time_t t = 0; + char timebuf[64], tz[256], *p; + + if(buf) + buf[0] = '\0'; + gettimetz(s, tz, sizeof(tz) - 1); + if(!standardtz || strcmp(standardtz, tz)) { + if(!strcmp(tz, "")) { /* restore TZ */ + if(standardtz) + setenv("TZ", standardtz, 1); + else + unsetenv("TZ"); + } + else + setenv("TZ", tz, 1); + tzset(); + } + if((strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm)) || + (strptime(s, "%Y-%m-%d %H:%M:%S", &tm)) || + (strptime(s, "%a, %d %b %Y %H:%M:%S", &tm)) || + (strptime(s, "%Y-%m-%dT%H:%M:%S", &tm))) { + tm.tm_isdst = -1; /* detect Daylight Saving Time */ + if((t = mktime(&tm)) == -1) + t = 0; + if(buf && (strftime(timebuf, sizeof(timebuf) - 1, + "%Y-%m-%d %H:%M:%S", &tm))) { + for(p = tz; *p; p++) /* print time offset reverse */ + *p = ((*p == '-') ? '+' : (*p == '+' ? '-' : *p)); + snprintf(buf, bufsiz, "%s %s", timebuf, tz); + } + } + return t; +} + +/* print text, ignore tabs, newline and carriage return etc +1 * print some HTML 2.0 / XML 1.0 as normal text */ +void +string_print_trimmed(String *s) { + const char *entities[] = { + "&", "&", "<", "<", ">", ">", "'", "'", """, "\"", + NULL, NULL + }; + const char *p, *n, **e; + unsigned int len, found; + if(!s->data) + return; + for(p = s->data; isspace(*p); p++); /* strip leading whitespace */ + for(; *p; ) { /* ignore tabs, newline and carriage return etc */ + if(!isspace(*p) || *p == ' ') { + if(*p == '<') { /* skip tags */ + if((n = strchr(p, '>'))) + p = n; + else + putchar('<'); + } else if(*p == '&') { + for(e = entities, found = 0; *e; e += 2) { + len = strlen(*e); + if(!strncmp(*e, p, len)) { /* compare entities and "replace" */ + fputs(*(e + 1), stdout); + p += len; + found = 1; + break; + } + } + if(found) + continue; + else + putchar('&'); + } else + fputc(*p, stdout); + } + p++; + } +} + +void /* print text, escape tabs, newline and carriage return etc */ +string_print_textblock(String *s) { + const char *p; + if(!s->data) + return; + for(p = s->data; *p && isspace(*p); p++); /* strip leading whitespace */ + for(; *p; p++) { + if(*p == '\n') /* escape newline */ + fputs("\\n", stdout); + else if(*p == '\\') /* escape \ */ + fputs("\\\\", stdout); + else if(*p == '\t') /* tab */ + fputs("\\t", stdout); + else if(!isspace(*p) || *p == ' ') /* ignore other whitespace chars */ + fputc(*p, stdout); + } +} + +int +istag(const char *name, const char *name2) { + return (!strcasecmp(name, name2)); +} + +int +isattr(const char *name, const char *name2) { + return (!strcasecmp(name, name2)); +} + +char * /* search for attr value by attr name in attributes list */ +getattrvalue(const char **atts, const char *name) { + const char **attr = NULL, *key, *value; + if(!atts || !(*atts)) + return NULL; + for(attr = atts; *attr; ) { + key = *(attr++); + value = *(attr++); + if(key && value && isattr(key, name)) + return (char *)value; + } + return NULL; +} + +void XMLCALL +xml_handler_start_element(void *data, const char *name, const char **atts) { + const char *value; + + strncpy(tag, name, sizeof(tag) - 1); /* set tag */ + if(feeditem.feedtype != FeedTypeNone) { /* in item */ + if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) { + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } else if(istag(name, "link")) { /* link href attribute */ + if((value = getattrvalue(atts, "href"))) + string_append(&feeditem.link, value, strlen(value)); + } else if(istag(name, "content") || istag(name, "summary")) { + if((value = getattrvalue(atts, "type"))) { /* content type is HTML or plain text */ + if(!strcasecmp(value, "xhtml") || !strcasecmp(value, "text/xhtml") || + !strcasecmp(value, "html") || !strcasecmp(value, "text/html")) + feeditem.contenttype = ContentTypeHTML; + } + } + } else if(feeditem.feedtype == FeedTypeRSS) { + if(istag(feeditemtag, "description")) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } + if(feeditemtag[0] == '\0') /* set tag if not already set. */ + strncpy(feeditemtag, name, sizeof(feeditemtag) - 1); + } else { /* start of RSS or Atom entry / item */ + if(istag(name, "entry")) { /* Atom */ + feeditem.feedtype = FeedTypeAtom; + feeditem.contenttype = ContentTypePlain; /* Default content type */ + } else if(istag(name, "item")) { /* RSS */ + feeditem.feedtype = FeedTypeRSS; + feeditem.contenttype = ContentTypeHTML; /* Default content type */ + } + } +} + +void XMLCALL +xml_handler_end_element(void *data, const char *name) { + char timebuf[64]; + + if(feeditem.feedtype != FeedTypeNone) { + /* end of RSS or Atom entry / item */ + if((istag(name, "entry") && (feeditem.feedtype == FeedTypeAtom)) || /* Atom */ + (istag(name, "item") && (feeditem.feedtype == FeedTypeRSS))) { /* RSS */ + printf("%ld", (long)parsetime((&feeditem.timestamp)->data, timebuf, + sizeof(timebuf) - 1)); + fputc(FieldSeparator, stdout); + printf("%s", timebuf); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.title); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.link); + fputc(FieldSeparator, stdout); + string_print_textblock(&feeditem.content); + fputc(FieldSeparator, stdout); + fputs(contenttypes[feeditem.contenttype], stdout); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.id); + fputc(FieldSeparator, stdout); + string_print_trimmed(&feeditem.author); + fputc(FieldSeparator, stdout); + fputs(feedtypes[feeditem.feedtype], stdout); + fputc('\n', stdout); + + /* clear strings */ + string_clear(&feeditem.timestamp); + string_clear(&feeditem.title); + string_clear(&feeditem.link); + string_clear(&feeditem.content); + string_clear(&feeditem.id); + string_clear(&feeditem.author); + feeditem.feedtype = FeedTypeNone; + feeditem.contenttype = ContentTypePlain; + incdata = 0; + feeditemtag[0] = '\0'; /* unset tag */ + } else if(!strcmp(feeditemtag, name)) { /* clear */ + feeditemtag[0] = '\0'; /* unset tag */ + } else { + if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "content") || istag(feeditemtag, "summary")) { + /* pass to default handler to process inline HTML etc */ + XML_DefaultCurrent(parser); + return; + } + } + } + } + tag[0] = '\0'; /* unset tag */ +} + +/* NOTE: this handler can be called multiple times if the data in this block + * is bigger than the buffer */ +void XMLCALL +xml_handler_data(void *data, const XML_Char *s, int len) { + if(feeditem.feedtype == FeedTypeRSS) { + if(istag(feeditemtag, "pubdate") || istag(feeditemtag, "dc:date")) + string_append(&feeditem.timestamp, s, len); + else if(istag(feeditemtag, "title")) + string_append(&feeditem.title, s, len); + else if(istag(feeditemtag, "link")) + string_append(&feeditem.link, s, len); + else if(istag(feeditemtag, "description")) { + if(incdata) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + else + string_append(&feeditem.content, s, len); + } else if(istag(feeditemtag, "guid")) + string_append(&feeditem.id, s, len); + else if(istag(feeditemtag, "author") || istag(feeditemtag, "dc:creator")) + string_append(&feeditem.author, s, len); + } else if(feeditem.feedtype == FeedTypeAtom) { + if(istag(feeditemtag, "published") || istag(feeditemtag, "updated")) + string_append(&feeditem.timestamp, s, len); + else if(istag(feeditemtag, "title")) { + string_append(&feeditem.title, s, len); + } else if(istag(feeditemtag, "summary") || istag(feeditemtag, "content")) { + if(feeditem.contenttype == ContentTypeHTML) { + if(incdata) + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + else + string_append(&feeditem.content, s, len); + } else + XML_DefaultCurrent(parser); /* pass to default handler to process inline HTML etc */ + } else if(istag(feeditemtag, "id")) + string_append(&feeditem.id, s, len); + else if(istag(feeditemtag, "name")) /* assume this is: */ + string_append(&feeditem.author, s, len); + } +} + +int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */ +xml_parse_stream(XML_Parser parser, FILE *fp) { + char buffer[BUFSIZ]; + int done = 0, len = 0; + + while(!feof(fp)) { + len = fread(buffer, 1, sizeof(buffer), fp); + done = (feof(fp) || ferror(fp)); + if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) { + if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS) + return 1; /* Ignore "no elements found" / empty document as an error */ + fprintf(stderr, "sfeed: error parsing xml %s at line %lu column %lu\n", + XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser), + (unsigned long)XML_GetCurrentColumnNumber(parser)); + return 0; + } + } while(!done); + return 1; +} + +void +xml_handler_default(void *data, const XML_Char *s, int len) { + if((feeditem.feedtype == FeedTypeAtom && (istag(feeditemtag, "summary") || istag(feeditemtag, "content"))) || + (feeditem.feedtype == FeedTypeRSS && istag(feeditemtag, "description"))) + /*if(!istag(tag, "script") && !istag(tag, "style"))*/ /* ignore data in inline script and style */ + string_append(&feeditem.content, s, len); +} + +void /* NOTE: data is null terminated. */ +xml_handler_comment(void *data, const XML_Char *s) { +} + +void +xml_cdata_section_handler_start(void *userdata) { + incdata = 1; +} + +void +xml_cdata_section_handler_end(void *userdata) { + incdata = 0; +} + +int +main(void) { + int status; + standardtz = getenv("TZ"); + + /* init strings and initial memory pool size */ + string_buffer_init(&feeditem.timestamp, 64); + string_buffer_init(&feeditem.title, 256); + string_buffer_init(&feeditem.link, 1024); + string_buffer_init(&feeditem.content, 4096); + string_buffer_init(&feeditem.id, 1024); + string_buffer_init(&feeditem.author, 256); + feeditem.contenttype = ContentTypePlain; + feeditem.feedtype = FeedTypeNone; + feeditemtag[0] = '\0'; /* unset tag */ + tag[0] = '\0'; /* unset tag */ + + if(!(parser = XML_ParserCreate("UTF-8"))) + die("can't create parser"); + + XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element); + XML_SetCharacterDataHandler(parser, xml_handler_data); + XML_SetCommentHandler(parser, xml_handler_comment); + XML_SetCdataSectionHandler(parser, xml_cdata_section_handler_start, xml_cdata_section_handler_end); + XML_SetDefaultHandler(parser, xml_handler_default); + + status = xml_parse_stream(parser, stdin); + cleanup(); + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sfeed_html.1 b/sfeed_html.1 new file mode 100644 index 0000000..e645d4b --- /dev/null +++ b/sfeed_html.1 @@ -0,0 +1,14 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_html \- format feeds file (TSV) from sfeed_update to HTML +.SH SYNOPSIS +.B sfeed_html +.SH DESCRIPTION +Format feeds file (TSV) from sfeed_update to HTML. Reads TSV data from +stdin and writes HTML to stdout. For the exact TSV format see sfeed_update(1). +.SH SEE ALSO +.BR sfeed_plain(1) +.BR sfeed_update(1) +.BR sfeed(1) +.SH BUGS +Please report them! diff --git a/sfeed_html.c b/sfeed_html.c new file mode 100644 index 0000000..c195c86 --- /dev/null +++ b/sfeed_html.c @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include "common.c" + +/* Feed info. */ +struct feed { + char *name; /* feed name */ + unsigned long new; /* amount of new items per feed */ + unsigned long total; /* total items */ + struct feed *next; /* linked list */ +}; + +static int showsidebar = 1; /* show sidebar ? */ + +void /* print error message to stderr */ +die(const char *s) { + fputs("sfeed_html: ", stderr); + fputs(s, stderr); + fputc('\n', stderr); + exit(EXIT_FAILURE); +} + +struct feed * +feednew(void) { + struct feed *f; + if(!(f = calloc(1, sizeof(struct feed)))) + die("can't allocate enough memory"); + return f; +} + +void +feedsfree(struct feed *f) { + struct feed *next; + while(f) { + next = f->next; + free(f->name); + free(f); + f = next; + } +} + +/* print feed name for id; spaces and tabs in string as "-" (spaces in anchors are not valid). */ +void +printfeednameid(const char *s) { + for(; *s; s++) + putchar(isspace(*s) ? '-' : *s); +} + +void +printhtmlencoded(const char *s) { + for(; *s; s++) { + switch(*s) { + case '<': fputs("<", stdout); break; + case '>': fputs(">", stdout); break; + case '&': fputs("&", stdout); break; + default: + putchar(*s); + } + } +} + +int +main(void) { + char *line = NULL, *fields[FieldLast]; + unsigned long totalfeeds = 0, totalnew = 0; + unsigned int islink, isnew; + struct feed *feedcurrent = NULL, *feeds = NULL; /* start of feeds linked-list. */ + time_t parsedtime, comparetime; + size_t size = 0; + + tzset(); + comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ + fputs( + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + "
\n", + stdout); + + while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) { + /* first of feed section or new feed section. */ + if(!totalfeeds || strcmp(feedcurrent->name, fields[FieldFeedName])) { + if(totalfeeds) { /* end previous one. */ + fputs("\n", stdout); + feedcurrent->next = feednew(); + feedcurrent = feedcurrent->next; + } else { + feedcurrent = feednew(); + feeds = feedcurrent; /* first item. */ + fputs("\t\t
\n", stdout); + } + if(!(feedcurrent->name = strdup(fields[FieldFeedName]))) + die("can't allocate enough memory"); + if(fields[FieldFeedName][0] != '\0') { + fputs("

name); + fputs("\">name); + fputs("\">", stdout); + fputs(feedcurrent->name, stdout); + fputs("

\n", stdout); + } + fputs("", stdout); + totalfeeds++; + } + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + isnew = (parsedtime >= comparetime); + islink = (strlen(fields[FieldLink]) > 0); + totalnew += isnew; + feedcurrent->new += isnew; + feedcurrent->total++; + + fputs("\n", stdout); + } + if(totalfeeds) { + fputs("
", stdout); + printtime(parsedtime); + fputs("", stdout); + if(isnew) + fputs("", stdout); + if(islink) { + fputs("", stdout); + } + printhtmlencoded(fields[FieldTitle]); + if(islink) + fputs("", stdout); + if(isnew) + fputs("", stdout); + fputs("
\n", stdout); + fputs("\t\t
\n", stdout); /* div items */ + } + if(showsidebar) { + fputs("\t\t
\n\t\t\t\n\t\t
\n", stdout); + } + fputs( + "
\n" + " \n" + " Newsfeeds (", + stdout); + fprintf(stdout, "%lu", totalnew); + fputs(")\n", stdout); + + free(line); /* free line */ + feedsfree(feeds); /* free feeds linked-list */ + + return EXIT_SUCCESS; +} diff --git a/sfeed_opml_config.1 b/sfeed_opml_config.1 new file mode 100644 index 0000000..523f99e --- /dev/null +++ b/sfeed_opml_config.1 @@ -0,0 +1,11 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_opml_config \- generate a sfeedrc config file based on an opml file +.SH SYNOPSIS +.B sfeed_opml_config +.SH DESCRIPTION +Reads the opml XML data from stdin and writes the config file text to stdout. +.SH SEE ALSO +.BR sfeed_update(1) +.SH BUGS +Please report them! diff --git a/sfeed_opml_config.c b/sfeed_opml_config.c new file mode 100644 index 0000000..0d74820 --- /dev/null +++ b/sfeed_opml_config.c @@ -0,0 +1,87 @@ +/* convert an opml file to sfeedrc file */ +#include +#include +#include +#include +#include /* libexpat */ + +XML_Parser parser; /* expat XML parser state */ + +char * /* search for attr value by attr name in attributes list */ +getattrvalue(const char **atts, const char *name) { + const char **attr = NULL, *key, *value; + if(!atts || !(*atts)) + return NULL; + for(attr = atts; *attr; ) { + key = *(attr++); + value = *(attr++); + if(key && value && !strcasecmp(key, name)) + return (char *)value; + } + return NULL; +} + +void XMLCALL +xml_handler_start_element(void *data, const char *name, const char **atts) { + char *feedurl = NULL, *feedname = NULL;; + + if(!strcasecmp(name, "outline")) { + if(!(feedname = getattrvalue(atts, "text")) && + !(feedname = getattrvalue(atts, "title"))) + feedname = "unnamed"; + if(!(feedurl = getattrvalue(atts, "xmlurl"))) + feedurl = ""; + printf("\tfeed \"%s\" \"%s\"\n", feedname, feedurl); + } +} + +void XMLCALL +xml_handler_end_element(void *data, const char *name) { +} + +int /* parse XML from stream using setup parser, return 1 on success, 0 on failure. */ +xml_parse_stream(XML_Parser parser, FILE *fp) { + char buffer[BUFSIZ]; + int done = 0, len = 0; + + while(!feof(fp)) { + len = fread(buffer, 1, sizeof(buffer), fp); + done = (feof(fp) || ferror(fp)); + if(XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR && (len > 0)) { + if(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS) + return 1; /* Ignore "no elements found" / empty document as an error */ + fprintf(stderr, "sfeed_opml_config: error parsing xml %s at line %lu column %lu\n", + XML_ErrorString(XML_GetErrorCode(parser)), (unsigned long)XML_GetCurrentLineNumber(parser), + (unsigned long)XML_GetCurrentColumnNumber(parser)); + return 0; + } + } while(!done); + return 1; +} + +int main(void) { + int status; + + if(!(parser = XML_ParserCreate("UTF-8"))) { + fputs("sfeed_opml_config: can't create parser", stderr); + exit(EXIT_FAILURE); + } + XML_SetElementHandler(parser, xml_handler_start_element, xml_handler_end_element); + + fputs( + "# paths\n" + "# NOTE: make sure to uncomment all these if you change it.\n" + "#sfeedpath=\"$HOME/.sfeed\"\n" + "#sfeedfile=\"$sfeedpath/feeds\"\n" + "#sfeedfilenew=\"$sfeedfile.new\"\n" + "\n" + "# list of feeds to fetch:\n" + "feeds() {\n" + " # feed [encoding]\n", stdout); + status = xml_parse_stream(parser, stdin); + fputs("}\n", stdout); + + XML_ParserFree(parser); + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sfeed_plain.1 b/sfeed_plain.1 new file mode 100644 index 0000000..3f396aa --- /dev/null +++ b/sfeed_plain.1 @@ -0,0 +1,15 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_plain \- format feeds file (TSV) from sfeed_update to plain text +.SH SYNOPSIS +.B sfeed +.SH DESCRIPTION +Format feeds file (TSV) from sfeed_update to plain text. Reads TSV data from +stdin and writes plain text to stdout. For the exact TSV format see +sfeed_update(1). +.SH SEE ALSO +.BR sfeed_html(1) +.BR sfeed_update(1) +.BR sfeed(1) +.SH BUGS +Please report them! diff --git a/sfeed_plain.c b/sfeed_plain.c new file mode 100644 index 0000000..d4045c9 --- /dev/null +++ b/sfeed_plain.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include "common.c" + +void +printutf8padded(const char *s, size_t len) { + unsigned int n = 0, i = 0; + + for(; s[i] && n < len; i++) { + if((s[i] & 0xc0) != 0x80) /* start of character */ + n++; + putchar(s[i]); + } + for(; n < len; n++) + putchar(' '); +} + +int +main(void) { + char *line = NULL, *fields[FieldLast]; + time_t parsedtime, comparetime; + size_t size = 0; + + tzset(); + comparetime = time(NULL) - (3600 * 24); /* 1 day is old news */ + while(parseline(&line, &size, fields, FieldLast, stdin, FieldSeparator) > 0) { + parsedtime = (time_t)strtol(fields[FieldUnixTimestamp], NULL, 10); + printf(" %c ", (parsedtime >= comparetime) ? 'N' : ' '); + if(fields[FieldFeedName][0] != '\0') + printf("%-15.15s ", fields[FieldFeedName]); + printtime(parsedtime); + fputs(" ", stdout); + printutf8padded(fields[FieldTitle], 70); + fputs(" ", stdout); + printlink(fields[FieldLink], fields[FieldFeedUrl]); + putchar('\n'); + } + free(line); + return EXIT_SUCCESS; +} diff --git a/sfeed_update b/sfeed_update new file mode 100755 index 0000000..52b9728 --- /dev/null +++ b/sfeed_update @@ -0,0 +1,116 @@ +#!/bin/sh +# update feeds, merge with old feeds. +# NOTE: assumes "sfeed_*" files are in $PATH. + +# defaults +sfeedpath="$HOME/.sfeed" +sfeedfile="$sfeedpath/feeds" +# temporary file for new feeds (for merging). +sfeedfilenew="$sfeedfile.new" + +# load config (evaluate shellscript). +# loadconfig(configfile) +loadconfig() { + # allow to specify config via argv[1]. + if [ ! "$1" = "" ]; then + # get absolute path of config file. + config=$(readlink -f "$1") + else + # default config location. + config="$HOME/.sfeed/sfeedrc" + fi + + # load config: config is loaded here to be able to override above variables + # (sfeedpath, sfeedfile, etc). + if [ -r "$config" ]; then + . "$config" + else + echo "Configuration file \"$config\" does not exist or is not readable." >&2 + echo "See sfeedrc.example for an example." >&2 + exit 1 + fi +} + +# merge raw files. +# merge(oldfile, newfile) +merge() { + # unique sort by id, link, title. + # order by feedname (asc), feedurl (asc) and timestamp (desc). + (cat "$1" "$2" 2> /dev/null) | + sort -t ' ' -u -k7,7 -k4,4 -k3,3 | + sort -t ' ' -k10,10 -k11,11 -k1r,1 +} + +# fetch a feed via HTTP/HTTPS etc. +# fetchfeed(url, name) +fetchfeed() { + if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then + printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2 + else + printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2 + fi +} + +# add field after line, output to stdout. +# addfield(field) +addfield() { + # NOTE: IFS is set and restored to prevent stripping whitespace. + OLDIFS="$IFS" + IFS=" +" + while read -r line; do + printf "%s %s\n" "${line}" "$1" + done + IFS="$OLDIFS" +} + +# fetch and parse feed. +# feed(name, url, encoding) +feed() { + tmpfile=$(mktemp -p "$TMPDIR") + (if [ "$3" = "" ]; then + # don't use iconv if encoding not set in config. + fetchfeed "$2" "$1" | sfeed | addfield "$1 $2" + else + # use iconv to convert encoding to UTF-8. + fetchfeed "$2" "$1" | iconv -cs -f "$3" -t "utf-8" | sfeed | addfield "$1 $2" + fi) > "$tmpfile" +} + +terminated() { + isrunning="0" +} + +cleanup() { + # remove temporary files + rm -rf "$tmpfile" "$TMPDIR" +} + +# load config file. +loadconfig "$1" +# fetch feeds and store in temporary file. +TMPDIR=$(mktemp -d -t "sfeed_XXXXXX") +# get date of last modified feedfile in format: +# YYYYmmdd HH:MM:SS [+-][0-9]* +lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-) +# Kill whole current process group on ^C. +isrunning="1" +trap -- "terminated" "15" # SIGTERM: signal to terminate parent. +trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D +# fetch feeds specified in config file. +feeds +# make sure path exists. +mkdir -p "$sfeedpath" +# wait till all feeds are fetched (allows running in parallel). +wait +[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup. +# concat all individual feed files to a single file. +# NOTE: mktemp uses $TMPDIR for temporary directory. +tmpfile=$(mktemp -t "sfeed_XXXXXX") +find "$TMPDIR" -type f -exec cat {} \; > "$tmpfile" +# get new data and merge with old. +merge "$sfeedfile" "$tmpfile" > "$sfeedfilenew" +# overwrite old file with updated file +mv "$sfeedfilenew" "$sfeedfile" +# cleanup temporary files etc. +cleanup diff --git a/sfeed_update.1 b/sfeed_update.1 new file mode 100644 index 0000000..e9cffc1 --- /dev/null +++ b/sfeed_update.1 @@ -0,0 +1,82 @@ +.TH SFEED 1 sfeed\-VERSION +.SH NAME +sfeed_update \- update feeds and merge with old feeds +.SH SYNOPSIS +.B sfeed_update +.RB [configfile] +.SH OPTIONS +.TP +.B [configfile] +config file, if not specified uses the location $HOME/.sfeed/sfeedrc by default (see FILES READ section for more information). +.SH DESCRIPTION +.TP +Update feeds and merge with old feeds in the file $HOME/.sfeed/feeds by default. +.SH TAB-SEPARATED FORMAT +The items are saved in a TSV-like format except newlines, tabs and +backslash are escaped with \\ (\\n, \\t and \\\\). Carriage returns (\\r) are +removed. +.TP +.B item timestamp (unix timestamp in GMT+0) +string +.TP +.B item timestamp (formatted) +string (YYYY-mm-dd HH:MM:SS tzname[+-]HHMM +.TP +.B item title +string +.TP +.B item link +string +.TP +.B item description +string +.TP +.B item contenttype +string (html or plain) +.TP +.B item id +string +.TP +.B item author +string +.TP +.B feed type +string (rss or atom) +.TP +.B feed name +string (extra field added by sfeed_update) +.TP +.B feed url +string (extra field added by sfeed_update) +.SH FILES READ +.TP +.B sfeedrc +Config file, see the sfeedrc.example file for an example. +This file is evaluated as a shellscript in sfeed_update. +You can for example override the fetchfeed() function to +use wget, fetch or an other download program or you can +override the merge() function to change the merge logic. +The function feeds() is called to fetch the feeds. The +function feed() can safely be executed as a parallel job +in your sfeedrc config file to speedup updating. +.SH FILES WRITTEN +.TP +.B feeds +Tab-separated format containing all feeds. +The sfeed_update script merges new items with this file. +.TP +.B feeds.new +Temporary file used by sfeed_update to merge items. +.SH EXAMPLES +.TP +To update feeds and format the feeds file: +.nf +sfeed_update "configfile" +sfeed_plain < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.txt +sfeed_html < $HOME/.sfeed/feeds > $HOME/.sfeed/feeds.html +.SH SEE ALSO +.BR sh(1) +.BR sfeed_plain(1) +.BR sfeed_html(1) +.SH BUGS +Please report them! diff --git a/sfeedrc.example b/sfeedrc.example new file mode 100644 index 0000000..d25777a --- /dev/null +++ b/sfeedrc.example @@ -0,0 +1,17 @@ +# paths +# NOTE: make sure to uncomment all these if you change it. +#sfeedpath="$HOME/.sfeed" +#sfeedfile="$sfeedpath/feeds" +#sfeedfilenew="$sfeedfile.new" + +# list of feeds to fetch: +feeds() { + # feed [encoding] + feed "codemadness" "http://www.codemadness.nl/blog/rss.xml" + feed "explosm" "http://feeds.feedburner.com/Explosm" + feed "linux kernel" "http://kernel.org/kdist/rss.xml" "iso-8859-1" + feed "phoronix" "http://feeds.feedburner.com/Phoronix" + feed "slashdot" "http://rss.slashdot.org/Slashdot/slashdot" + feed "tweakers" "http://feeds.feedburner.com/tweakers/mixed" "iso-8859-1" + feed "xkcd" "http://xkcd.com/atom.xml" +} -- cgit v1.2.3