summaryrefslogtreecommitdiff
path: root/sfeed_tail.c
diff options
context:
space:
mode:
Diffstat (limited to 'sfeed_tail.c')
-rw-r--r--sfeed_tail.c131
1 files changed, 87 insertions, 44 deletions
diff --git a/sfeed_tail.c b/sfeed_tail.c
index fbbcba0..3549a7c 100644
--- a/sfeed_tail.c
+++ b/sfeed_tail.c
@@ -1,39 +1,67 @@
#include <ctype.h>
#include <err.h>
+#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include "tree.h"
#include "util.h"
static int firsttime;
+static int runs;
static char *line;
static size_t linesize;
+time_t comparetime;
struct line {
- char *s;
- size_t len;
- struct line *next;
+ char *id;
+ char *link;
+ char *title;
+ time_t timestamp;
+ RB_ENTRY(line) entry;
};
-/* ofcourse: bigger bucket size uses more memory, but has less collisions. */
-#define BUCKET_SIZE 16384
-struct bucket {
- struct line cols[BUCKET_SIZE];
-};
-static struct bucket *buckets;
-static struct bucket *bucket;
-static const uint32_t seed = 1167266473;
+int
+linecmp(struct line *e1, struct line *e2)
+{
+ int r;
+
+ if ((r = strcmp(e1->id, e2->id)))
+ return r;
+ if ((r = strcmp(e1->link, e2->link)))
+ return r;
+ return strcmp(e1->title, e2->title);
+}
+RB_HEAD(linetree, line) head = RB_INITIALIZER(&head);
+RB_GENERATE_STATIC(linetree, line, entry, linecmp)
+
+/* remove old entries from the tree that won't be shown anyway. */
+static void
+gc(void)
+{
+ struct line *line, *tmp;
+
+ RB_FOREACH_SAFE(line, linetree, &head, tmp) {
+ if (line->timestamp < comparetime) {
+/* printf("DEBUG: gc: removing: %s %s\n",
+ line->id, line->title);*/
+ free(line->id);
+ free(line->link);
+ free(line->title);
+ RB_REMOVE(linetree, &head, line);
+ free(line);
+ }
+ }
+}
static void
printfeed(FILE *fp, const char *feedname)
{
- struct line *match;
+ struct line *add, search;
char *fields[FieldLast];
- uint32_t hash;
- int uniq;
ssize_t linelen;
time_t parsedtime;
struct tm *tm;
@@ -41,41 +69,44 @@ printfeed(FILE *fp, const char *feedname)
while ((linelen = getline(&line, &linesize, fp)) > 0) {
if (line[linelen - 1] == '\n')
line[--linelen] = '\0';
- hash = murmur3_32(line, (size_t)linelen, seed) % BUCKET_SIZE;
-
- for (uniq = 1, match = &(bucket->cols[hash]);
- match;
- match = match->next) {
- /* check for collision, can still be unique. */
- if (match->s && match->len == (size_t)linelen &&
- !strcmp(line, match->s)) {
- uniq = 0;
- break;
- }
- /* nonexistent or no collision */
- if (!match->next) {
- if (!(match = match->next = calloc(1, sizeof(struct line))))
- err(1, "calloc");
- if (!(match->s = strdup(line)))
- err(1, "strdup");
- match->len = (size_t)linelen;
- break;
- }
- }
- if (!uniq || firsttime)
- continue;
if (!parseline(line, fields))
break;
-
parsedtime = 0;
strtotime(fields[FieldUnixTimestamp], &parsedtime);
if (!(tm = localtime(&parsedtime)))
err(1, "localtime");
+ /* old news: skip */
+ if (parsedtime < comparetime)
+ continue;
+
+ search.id = fields[FieldId];
+ search.link = fields[FieldLink];
+ search.title = fields[FieldTitle];
+ search.timestamp = parsedtime;
+ if (RB_FIND(linetree, &head, &search))
+ continue;
+
+/* printf("DEBUG: new: id: %s, link: %s, title: %s\n",
+ fields[FieldId], fields[FieldLink], fields[FieldTitle]);*/
+
+ if (!(add = calloc(1, sizeof(*add))))
+ err(1, "calloc");
+ if (!(add->id = strdup(fields[FieldId])))
+ err(1, "strdup");
+ if (!(add->link = strdup(fields[FieldLink])))
+ err(1, "strdup");
+ if (!(add->title = strdup(fields[FieldTitle])))
+ err(1, "strdup");
+ add->timestamp = parsedtime;
+ RB_INSERT(linetree, &head, add);
+
+ if (firsttime)
+ continue;
+
if (feedname[0])
printf("%-15.15s ", feedname);
-
printf("%04d-%02d-%02d %02d:%02d ",
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min);
@@ -91,17 +122,23 @@ main(int argc, char *argv[])
FILE *fp;
int i;
+ if (pledge("stdio rpath", NULL) == -1)
+ err(1, "pledge");
+
+ setlocale(LC_CTYPE, "");
+
if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
err(1, "pledge");
- if (!(bucket = buckets = calloc(argc, sizeof(struct bucket))))
- err(1, "calloc");
- for (firsttime = (argc > 1); ; firsttime = 0) {
+ for (runs = 0, firsttime = (argc > 1); ; ++runs, firsttime = 0) {
+ if ((comparetime = time(NULL)) == -1)
+ err(1, "time");
+ /* 1 day is old news */
+ comparetime -= 86400;
if (argc == 1) {
printfeed(stdin, "");
} else {
for (i = 1; i < argc; i++) {
- bucket = &buckets[i - 1];
if (!(fp = fopen(argv[i], "r")))
err(1, "fopen: %s", argv[i]);
name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i];
@@ -111,7 +148,13 @@ main(int argc, char *argv[])
fclose(fp);
}
}
- sleep(60);
+ sleep(300);
+ /* gc once every 12 runs, each run takes some CPU time and
+ a 5 minute sleep */
+ if (runs && (runs % 12) == 0) {
+ gc();
+ runs = 0;
+ }
}
return 0;
}