From c75f540ac59c5d6e3676878170c42d35b11d0c34 Mon Sep 17 00:00:00 2001
From: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 31 Jan 2016 15:38:19 +0100
Subject: add sfeed_tail (test), might be removed again later

fix Makefile (compat)
---
 Makefile     |  34 +++++++------
 sfeed_tail.1 |  41 ++++++++++++++++
 sfeed_tail.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 14 deletions(-)
 create mode 100644 sfeed_tail.1
 create mode 100644 sfeed_tail.c

diff --git a/Makefile b/Makefile
index 2c8f6e2..c5af23c 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,7 @@ SRC = \
 	sfeed_mbox.c\
 	sfeed_opml_import.c\
 	sfeed_plain.c\
+	sfeed_tail.c\
 	sfeed_web.c\
 	sfeed_xmlenc.c\
 	util.c\
@@ -22,6 +23,7 @@ BIN = \
 	sfeed_mbox\
 	sfeed_opml_import\
 	sfeed_plain\
+	sfeed_tail\
 	sfeed_web\
 	sfeed_xmlenc
 SCRIPTS = \
@@ -35,6 +37,7 @@ MAN1 = \
 	sfeed_opml_export.1\
 	sfeed_opml_import.1\
 	sfeed_plain.1\
+	sfeed_tail.1\
 	sfeed_update.1\
 	sfeed_web.1\
 	sfeed_xmlenc.1
@@ -71,26 +74,29 @@ dist: $(BIN)
 
 ${OBJ}: config.mk ${HDR}
 
-sfeed: sfeed.o xml.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed: sfeed.o xml.o util.o
+	${CC} -o $@ sfeed.o xml.o util.o ${LDFLAGS}
 
-sfeed_frames: sfeed_frames.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_frames.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_frames: sfeed_frames.o util.o
+	${CC} -o $@ sfeed_frames.o util.o ${LDFLAGS}
 
-sfeed_html: sfeed_html.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_html.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_html: sfeed_html.o util.o
+	${CC} -o $@ sfeed_html.o util.o ${LDFLAGS}
 
-sfeed_mbox: sfeed_mbox.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_mbox.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_mbox: sfeed_mbox.o util.o
+	${CC} -o $@ sfeed_mbox.o util.o ${LDFLAGS}
 
-sfeed_opml_import: sfeed_opml_import.o xml.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_opml_import.o xml.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_opml_import: sfeed_opml_import.o xml.o
+	${CC} -o $@ sfeed_opml_import.o xml.o ${LDFLAGS}
 
-sfeed_plain: sfeed_plain.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_plain.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_plain: sfeed_plain.o util.o
+	${CC} -o $@ sfeed_plain.o util.o ${LDFLAGS}
 
-sfeed_web: sfeed_web.o xml.o util.o ${EXTRAOBJ}
-	${CC} -o $@ sfeed_web.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS}
+sfeed_tail: sfeed_tail.o util.o
+	${CC} -o $@ sfeed_tail.o util.o ${LDFLAGS}
+
+sfeed_web: sfeed_web.o xml.o util.o
+	${CC} -o $@ sfeed_web.o xml.o util.o ${LDFLAGS}
 
 sfeed_xmlenc: sfeed_xmlenc.o xml.o
 	${CC} -o $@ sfeed_xmlenc.o xml.o ${LDFLAGS}
diff --git a/sfeed_tail.1 b/sfeed_tail.1
new file mode 100644
index 0000000..3259dbc
--- /dev/null
+++ b/sfeed_tail.1
@@ -0,0 +1,41 @@
+.Dd January 29, 2016
+.Dt SFEED_TAIL 1
+.Os
+.Sh NAME
+.Nm sfeed_tail
+.Nd format unseen feed data to a plain-text list
+.Sh SYNOPSIS
+.Nm
+.Op Ar file...
+.Sh DESCRIPTION
+.Nm
+formats unseen feed data (TSV) from
+.Xr sfeed 1
+from stdin or
+.Ar file
+to stdout as a plain-text list. If one or more
+.Ar file
+are specified, the basename of the
+.Ar file
+is used as the feed name in the output. If no
+.Ar file
+parameters are specified and so the data is read from stdin the feed name
+is empty.
+.Pp
+.Nm
+will mark the initial items on the first run as seen. Then it will print the
+initial items if data is read from stdin, if the data is read on the first
+run by specifying the argument(s)
+.Ar file
+it will not show the initial seen items.
+.Pp
+Unseen items are printed per line in a similar format to
+.Xr sfeed_plain 1 ,
+duplicate items are ignored. The list of unique items is determined by the
+fields: feedname, item id and UNIX timestamp of the item date.
+.Sh SEE ALSO
+.Xr sfeed 1 ,
+.Xr sfeed_plain 1 ,
+.Xr tail 1
+.Sh AUTHORS
+.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
diff --git a/sfeed_tail.c b/sfeed_tail.c
new file mode 100644
index 0000000..a57455b
--- /dev/null
+++ b/sfeed_tail.c
@@ -0,0 +1,154 @@
+#include <ctype.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+
+#include "util.h"
+
+static int firsttime;
+static char *line;
+static size_t linesize;
+
+struct line {
+	char *timestamp;
+	char *id;
+	struct line *next;
+};
+
+/* ofcourse: bigger bucket size uses more memory, but has less collisions. */
+#define BUCKET_SIZE 65535
+struct bucket {
+	struct line cols[BUCKET_SIZE];
+};
+static struct bucket *buckets;
+static struct bucket *bucket;
+
+static char *
+estrdup(const char *s)
+{
+	char *p;
+
+	if (!(p = strdup(s)))
+		err(1, "strdup");
+	return p;
+}
+
+static void *
+ecalloc(size_t nmemb, size_t size)
+{
+	void *p;
+
+	if (!(p = calloc(nmemb, size)))
+		err(1, "calloc");
+	return p;
+}
+
+/* jenkins one-at-a-time hash */
+static uint32_t
+jenkins1(const char *s)
+{
+	uint32_t hash = 0;
+
+	for (; *s; s++) {
+		hash += (int)*s;
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+
+	return hash + (hash << 15);
+}
+
+/* print `len' columns of characters. If string is shorter pad the rest
+ * with characters `pad`. */
+static void
+printutf8pad(FILE *fp, const char *s, size_t len, int pad)
+{
+	wchar_t w;
+	size_t n = 0, i;
+	int r;
+
+	for (i = 0; *s && n < len; i++, s++) {
+		if (ISUTF8(*s)) {
+			if ((r = mbtowc(&w, s, 4)) == -1)
+				break;
+			if ((r = wcwidth(w)) == -1)
+				r = 1;
+			n += (size_t)r;
+		}
+		putc(*s, fp);
+	}
+	for (; n < len; n++)
+		putc(pad, fp);
+}
+
+static void
+printfeed(FILE *fp, const char *feedname)
+{
+	struct line *match;
+	char *fields[FieldLast];
+	uint32_t hash;
+	int uniq;
+
+	while (parseline(&line, &linesize, fields, fp) > 0) {
+		hash = (jenkins1(fields[FieldUnixTimestamp]) +
+		       jenkins1(fields[FieldId])) % BUCKET_SIZE;
+		for (uniq = 1, match = &(bucket->cols[hash]);
+		     match;
+		     match = match->next) {
+			/* check for collision, can still be unique. */
+			if (match->id && !strcmp(match->id, fields[FieldId]) &&
+			    match->timestamp && !strcmp(match->timestamp, fields[FieldUnixTimestamp])) {
+				uniq = 0;
+				break;
+			}
+			/* nonexistent or no collision */
+			if (!match->next) {
+				match = match->next = ecalloc(1, sizeof(struct line));
+				match->id = estrdup(fields[FieldId]);
+				match->timestamp = estrdup(fields[FieldUnixTimestamp]);
+					break;
+			}
+		}
+		if (!uniq || firsttime)
+			continue;
+		if (feedname[0])
+			printf("%-15.15s %-30.30s",
+			       feedname, fields[FieldTimeFormatted]);
+		printutf8pad(stdout, fields[FieldTitle], 70, ' ');
+		printf(" %s\n", fields[FieldLink]);
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *name;
+	FILE *fp;
+	int i;
+
+	bucket = buckets = ecalloc(argc, sizeof(struct bucket));
+	for (firsttime = (argc > 1); ; firsttime = 0) {
+		if (argc == 1) {
+			printfeed(stdin, "");
+		} else {
+			for (i = 1; i < argc; i++) {
+				bucket = &buckets[i - 1];
+				if (!(fp = fopen(argv[i], "r")))
+					err(1, "fopen: %s", argv[i]);
+				name = xbasename(argv[i]);
+				printfeed(fp, name);
+				free(name);
+				if (ferror(fp))
+					err(1, "ferror: %s", argv[i]);
+				fclose(fp);
+			}
+		}
+		sleep(60);
+	}
+	return 0;
+}
-- 
cgit v1.2.3