summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2016-01-31 15:38:19 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2016-01-31 15:38:19 +0100
commitc75f540ac59c5d6e3676878170c42d35b11d0c34 (patch)
treed9ecba8c2adfea08958b80508f02c17a64c51689
parent77a603a904087dd9fd3350da029f279f076e4f4b (diff)
add sfeed_tail (test), might be removed again later
fix Makefile (compat)
-rw-r--r--Makefile34
-rw-r--r--sfeed_tail.141
-rw-r--r--sfeed_tail.c154
3 files changed, 215 insertions, 14 deletions
diff --git a/Makefile b/Makefile
index 2c8f6e2..c5af23c 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,7 @@ SRC = \
sfeed_mbox.c\
sfeed_opml_import.c\
sfeed_plain.c\
+ sfeed_tail.c\
sfeed_web.c\
sfeed_xmlenc.c\
util.c\
@@ -22,6 +23,7 @@ BIN = \
sfeed_mbox\
sfeed_opml_import\
sfeed_plain\
+ sfeed_tail\
sfeed_web\
sfeed_xmlenc
SCRIPTS = \
@@ -35,6 +37,7 @@ MAN1 = \
sfeed_opml_export.1\
sfeed_opml_import.1\
sfeed_plain.1\
+ sfeed_tail.1\
sfeed_update.1\
sfeed_web.1\
sfeed_xmlenc.1
@@ -71,26 +74,29 @@ dist: $(BIN)
${OBJ}: config.mk ${HDR}
-sfeed: sfeed.o xml.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed: sfeed.o xml.o util.o
+ ${CC} -o $@ sfeed.o xml.o util.o ${LDFLAGS}
-sfeed_frames: sfeed_frames.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_frames.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_frames: sfeed_frames.o util.o
+ ${CC} -o $@ sfeed_frames.o util.o ${LDFLAGS}
-sfeed_html: sfeed_html.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_html.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_html: sfeed_html.o util.o
+ ${CC} -o $@ sfeed_html.o util.o ${LDFLAGS}
-sfeed_mbox: sfeed_mbox.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_mbox.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_mbox: sfeed_mbox.o util.o
+ ${CC} -o $@ sfeed_mbox.o util.o ${LDFLAGS}
-sfeed_opml_import: sfeed_opml_import.o xml.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_opml_import.o xml.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_opml_import: sfeed_opml_import.o xml.o
+ ${CC} -o $@ sfeed_opml_import.o xml.o ${LDFLAGS}
-sfeed_plain: sfeed_plain.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_plain.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_plain: sfeed_plain.o util.o
+ ${CC} -o $@ sfeed_plain.o util.o ${LDFLAGS}
-sfeed_web: sfeed_web.o xml.o util.o ${EXTRAOBJ}
- ${CC} -o $@ sfeed_web.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_tail: sfeed_tail.o util.o
+ ${CC} -o $@ sfeed_tail.o util.o ${LDFLAGS}
+
+sfeed_web: sfeed_web.o xml.o util.o
+ ${CC} -o $@ sfeed_web.o xml.o util.o ${LDFLAGS}
sfeed_xmlenc: sfeed_xmlenc.o xml.o
${CC} -o $@ sfeed_xmlenc.o xml.o ${LDFLAGS}
diff --git a/sfeed_tail.1 b/sfeed_tail.1
new file mode 100644
index 0000000..3259dbc
--- /dev/null
+++ b/sfeed_tail.1
@@ -0,0 +1,41 @@
+.Dd January 29, 2016
+.Dt SFEED_TAIL 1
+.Os
+.Sh NAME
+.Nm sfeed_tail
+.Nd format unseen feed data to a plain-text list
+.Sh SYNOPSIS
+.Nm
+.Op Ar file...
+.Sh DESCRIPTION
+.Nm
+formats unseen feed data (TSV) from
+.Xr sfeed 1
+from stdin or
+.Ar file
+to stdout as a plain-text list. If one or more
+.Ar file
+are specified, the basename of the
+.Ar file
+is used as the feed name in the output. If no
+.Ar file
+parameters are specified and so the data is read from stdin the feed name
+is empty.
+.Pp
+.Nm
+will mark the initial items on the first run as seen. Then it will print the
+initial items if data is read from stdin, if the data is read on the first
+run by specifying the argument(s)
+.Ar file
+it will not show the initial seen items.
+.Pp
+Unseen items are printed per line in a similar format to
+.Xr sfeed_plain 1 ,
+duplicate items are ignored. The list of unique items is determined by the
+fields: feedname, item id and UNIX timestamp of the item date.
+.Sh SEE ALSO
+.Xr sfeed 1 ,
+.Xr sfeed_plain 1 ,
+.Xr tail 1
+.Sh AUTHORS
+.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
diff --git a/sfeed_tail.c b/sfeed_tail.c
new file mode 100644
index 0000000..a57455b
--- /dev/null
+++ b/sfeed_tail.c
@@ -0,0 +1,154 @@
+#include <ctype.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+
+#include "util.h"
+
+static int firsttime;
+static char *line;
+static size_t linesize;
+
+struct line {
+ char *timestamp;
+ char *id;
+ struct line *next;
+};
+
+/* ofcourse: bigger bucket size uses more memory, but has less collisions. */
+#define BUCKET_SIZE 65535
+struct bucket {
+ struct line cols[BUCKET_SIZE];
+};
+static struct bucket *buckets;
+static struct bucket *bucket;
+
+static char *
+estrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s)))
+ err(1, "strdup");
+ return p;
+}
+
+static void *
+ecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size)))
+ err(1, "calloc");
+ return p;
+}
+
+/* jenkins one-at-a-time hash */
+static uint32_t
+jenkins1(const char *s)
+{
+ uint32_t hash = 0;
+
+ for (; *s; s++) {
+ hash += (int)*s;
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+
+ return hash + (hash << 15);
+}
+
+/* print `len' columns of characters. If string is shorter pad the rest
+ * with characters `pad`. */
+static void
+printutf8pad(FILE *fp, const char *s, size_t len, int pad)
+{
+ wchar_t w;
+ size_t n = 0, i;
+ int r;
+
+ for (i = 0; *s && n < len; i++, s++) {
+ if (ISUTF8(*s)) {
+ if ((r = mbtowc(&w, s, 4)) == -1)
+ break;
+ if ((r = wcwidth(w)) == -1)
+ r = 1;
+ n += (size_t)r;
+ }
+ putc(*s, fp);
+ }
+ for (; n < len; n++)
+ putc(pad, fp);
+}
+
+static void
+printfeed(FILE *fp, const char *feedname)
+{
+ struct line *match;
+ char *fields[FieldLast];
+ uint32_t hash;
+ int uniq;
+
+ while (parseline(&line, &linesize, fields, fp) > 0) {
+ hash = (jenkins1(fields[FieldUnixTimestamp]) +
+ jenkins1(fields[FieldId])) % BUCKET_SIZE;
+ for (uniq = 1, match = &(bucket->cols[hash]);
+ match;
+ match = match->next) {
+ /* check for collision, can still be unique. */
+ if (match->id && !strcmp(match->id, fields[FieldId]) &&
+ match->timestamp && !strcmp(match->timestamp, fields[FieldUnixTimestamp])) {
+ uniq = 0;
+ break;
+ }
+ /* nonexistent or no collision */
+ if (!match->next) {
+ match = match->next = ecalloc(1, sizeof(struct line));
+ match->id = estrdup(fields[FieldId]);
+ match->timestamp = estrdup(fields[FieldUnixTimestamp]);
+ break;
+ }
+ }
+ if (!uniq || firsttime)
+ continue;
+ if (feedname[0])
+ printf("%-15.15s %-30.30s",
+ feedname, fields[FieldTimeFormatted]);
+ printutf8pad(stdout, fields[FieldTitle], 70, ' ');
+ printf(" %s\n", fields[FieldLink]);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *name;
+ FILE *fp;
+ int i;
+
+ bucket = buckets = ecalloc(argc, sizeof(struct bucket));
+ for (firsttime = (argc > 1); ; firsttime = 0) {
+ if (argc == 1) {
+ printfeed(stdin, "");
+ } else {
+ for (i = 1; i < argc; i++) {
+ bucket = &buckets[i - 1];
+ if (!(fp = fopen(argv[i], "r")))
+ err(1, "fopen: %s", argv[i]);
+ name = xbasename(argv[i]);
+ printfeed(fp, name);
+ free(name);
+ if (ferror(fp))
+ err(1, "ferror: %s", argv[i]);
+ fclose(fp);
+ }
+ }
+ sleep(60);
+ }
+ return 0;
+}