diff options
Diffstat (limited to 'sfeed_tail.c')
-rw-r--r-- | sfeed_tail.c | 131 |
1 files changed, 87 insertions, 44 deletions
diff --git a/sfeed_tail.c b/sfeed_tail.c index fbbcba0..3549a7c 100644 --- a/sfeed_tail.c +++ b/sfeed_tail.c @@ -1,39 +1,67 @@ #include <ctype.h> #include <err.h> +#include <locale.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include "tree.h" #include "util.h" static int firsttime; +static int runs; static char *line; static size_t linesize; +time_t comparetime; struct line { - char *s; - size_t len; - struct line *next; + char *id; + char *link; + char *title; + time_t timestamp; + RB_ENTRY(line) entry; }; -/* ofcourse: bigger bucket size uses more memory, but has less collisions. */ -#define BUCKET_SIZE 16384 -struct bucket { - struct line cols[BUCKET_SIZE]; -}; -static struct bucket *buckets; -static struct bucket *bucket; -static const uint32_t seed = 1167266473; +int +linecmp(struct line *e1, struct line *e2) +{ + int r; + + if ((r = strcmp(e1->id, e2->id))) + return r; + if ((r = strcmp(e1->link, e2->link))) + return r; + return strcmp(e1->title, e2->title); +} +RB_HEAD(linetree, line) head = RB_INITIALIZER(&head); +RB_GENERATE_STATIC(linetree, line, entry, linecmp) + +/* remove old entries from the tree that won't be shown anyway. */ +static void +gc(void) +{ + struct line *line, *tmp; + + RB_FOREACH_SAFE(line, linetree, &head, tmp) { + if (line->timestamp < comparetime) { +/* printf("DEBUG: gc: removing: %s %s\n", + line->id, line->title);*/ + free(line->id); + free(line->link); + free(line->title); + RB_REMOVE(linetree, &head, line); + free(line); + } + } +} static void printfeed(FILE *fp, const char *feedname) { - struct line *match; + struct line *add, search; char *fields[FieldLast]; - uint32_t hash; - int uniq; ssize_t linelen; time_t parsedtime; struct tm *tm; @@ -41,41 +69,44 @@ printfeed(FILE *fp, const char *feedname) while ((linelen = getline(&line, &linesize, fp)) > 0) { if (line[linelen - 1] == '\n') line[--linelen] = '\0'; - hash = murmur3_32(line, (size_t)linelen, seed) % BUCKET_SIZE; - - for (uniq = 1, match = &(bucket->cols[hash]); - match; - match = match->next) { - /* check for collision, can still be unique. */ - if (match->s && match->len == (size_t)linelen && - !strcmp(line, match->s)) { - uniq = 0; - break; - } - /* nonexistent or no collision */ - if (!match->next) { - if (!(match = match->next = calloc(1, sizeof(struct line)))) - err(1, "calloc"); - if (!(match->s = strdup(line))) - err(1, "strdup"); - match->len = (size_t)linelen; - break; - } - } - if (!uniq || firsttime) - continue; if (!parseline(line, fields)) break; - parsedtime = 0; strtotime(fields[FieldUnixTimestamp], &parsedtime); if (!(tm = localtime(&parsedtime))) err(1, "localtime"); + /* old news: skip */ + if (parsedtime < comparetime) + continue; + + search.id = fields[FieldId]; + search.link = fields[FieldLink]; + search.title = fields[FieldTitle]; + search.timestamp = parsedtime; + if (RB_FIND(linetree, &head, &search)) + continue; + +/* printf("DEBUG: new: id: %s, link: %s, title: %s\n", + fields[FieldId], fields[FieldLink], fields[FieldTitle]);*/ + + if (!(add = calloc(1, sizeof(*add)))) + err(1, "calloc"); + if (!(add->id = strdup(fields[FieldId]))) + err(1, "strdup"); + if (!(add->link = strdup(fields[FieldLink]))) + err(1, "strdup"); + if (!(add->title = strdup(fields[FieldTitle]))) + err(1, "strdup"); + add->timestamp = parsedtime; + RB_INSERT(linetree, &head, add); + + if (firsttime) + continue; + if (feedname[0]) printf("%-15.15s ", feedname); - printf("%04d-%02d-%02d %02d:%02d ", tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min); @@ -91,17 +122,23 @@ main(int argc, char *argv[]) FILE *fp; int i; + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + + setlocale(LC_CTYPE, ""); + if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) err(1, "pledge"); - if (!(bucket = buckets = calloc(argc, sizeof(struct bucket)))) - err(1, "calloc"); - for (firsttime = (argc > 1); ; firsttime = 0) { + for (runs = 0, firsttime = (argc > 1); ; ++runs, firsttime = 0) { + if ((comparetime = time(NULL)) == -1) + err(1, "time"); + /* 1 day is old news */ + comparetime -= 86400; if (argc == 1) { printfeed(stdin, ""); } else { for (i = 1; i < argc; i++) { - bucket = &buckets[i - 1]; if (!(fp = fopen(argv[i], "r"))) err(1, "fopen: %s", argv[i]); name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i]; @@ -111,7 +148,13 @@ main(int argc, char *argv[]) fclose(fp); } } - sleep(60); + sleep(300); + /* gc once every 12 runs, each run takes some CPU time and + a 5 minute sleep */ + if (runs && (runs % 12) == 0) { + gc(); + runs = 0; + } } return 0; } |