From b7e288a96418e1ea5e7904ab2896edb3f4615a10 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Thu, 16 Aug 2018 14:16:58 +0200 Subject: sfeed_frames: overhaul sfeed_frames used to write HTML pages for each entry for each feed. This can be useful but had security issues, because the context of the content changes. sfeed_frames is now a HTML version which works better with browsers that don't support CSS or tables well like w3m and lynx. It is now an alternative for sfeed_html. - Don't reference and embed HTML content for security reasons. This was documented under "SECURITY CONSIDERATIONS" in the man page. - Tighten pledge(2). - Simplify --- sfeed_frames.1 | 31 +++------ sfeed_frames.c | 208 ++++++++------------------------------------------------- 2 files changed, 36 insertions(+), 203 deletions(-) diff --git a/sfeed_frames.1 b/sfeed_frames.1 index 4dc1515..0bf6f04 100644 --- a/sfeed_frames.1 +++ b/sfeed_frames.1 @@ -1,4 +1,4 @@ -.Dd August 5, 2015 +.Dd August 16, 2018 .Dt SFEED_FRAMES 1 .Os .Sh NAME @@ -14,39 +14,24 @@ formats feed data (TSV) from to HTML. It reads TSV data from stdin or .Ar file -and writes HTML files to the current directory. +and writes HTML files for the frameset to the current directory. If no .Ar file parameters are specified and therefore the data is read from stdin then the -feed name is named "unnamed". +menu.html file is not written. .Sh FILES WRITTEN .Bl -tag -width 13n .It index.html -The main HTML file referencing to the frames items.html and -menu.html. +The main HTML file referencing to the frames items.html and menu.html. .It items.html -Contains all the items as HTML links to the local content. +The items frame contains all the item HTML links to the remote content. .It menu.html -Menu frame which contains navigation "anchor" links to the feed names -in items.html. +The menu frame which contains navigation "anchor" links to the feed names in +items.html. .El -.Sh FILE STRUCTURE -Items for each feed category are in the format: feedname/itemname.html. -The feedname and item names are normalized, whitespace characters are replaced -with a - character, multiple whitespaces are replaced by a single - character -and trailing whitespace will be removed. -The itemname is based on the title of the items. -The feedname and title is truncated to a maximum of 128 bytes. -The maximum length of the path is PATH_MAX or filesystem-specific (truncated). .Sh SEE ALSO .Xr sfeed 1 , +.Xr sfeed_html 1 , .Xr sfeed_plain 1 .Sh AUTHORS .An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org -.Sh SECURITY CONSIDERATIONS -Each item content file contains the content formatted as HTML, if the feed data -contains HTML like Javascripts, tracking cookies, custom styles and such -these will also be displayed. -Due to the crazy nature of "the web" these things are complex to filter. -Some security and privacy can be gained by using an adblocker, script blocker -and to set your browser settings more strictly. diff --git a/sfeed_frames.c b/sfeed_frames.c index 1d5020c..5d27195 100644 --- a/sfeed_frames.c +++ b/sfeed_frames.c @@ -12,120 +12,23 @@ #include #include #include -#include #include "util.h" static struct feed **feeds; static char *line; static size_t linesize; -static struct timespec times[2]; static time_t comparetime; static unsigned long totalnew; -/* Unescape / decode fields printed by string_print_encoded() - * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences - * are ignored: "\z" etc. */ -static void -printcontent(const char *s, FILE *fp) -{ - for (; *s; s++) { - switch (*s) { - case '\\': - switch (*(++s)) { - case '\0': return; /* ignore */ - case '\\': fputc('\\', fp); break; - case 't': fputc('\t', fp); break; - case 'n': fputc('\n', fp); break; - } - break; - default: - fputc((int)*s, fp); - } - } -} - -/* Unescape / decode fields printed by string_print_encoded() - * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences - * are ignored: "\z" etc. Encode HTML 2.0 / XML 1.0 entities. */ -static void -printcontentxml(const char *s, FILE *fp) -{ - for (; *s; s++) { - switch (*s) { - case '\\': - switch (*(++s)) { - case '\0': return; /* ignore */ - case '\\': fputc('\\', fp); break; - case 't': fputc('\t', fp); break; - case 'n': fputc('\n', fp); break; - } - break; - /* XML entities */ - case '<': fputs("<", fp); break; - case '>': fputs(">", fp); break; - case '\'': fputs("'", fp); break; - case '&': fputs("&", fp); break; - case '"': fputs(""", fp); break; - default: fputc((int)*s, fp); - } - } -} - -/* normalize path names, transform to lower-case and replace non-alpha and - * non-digit with '-' */ -static size_t -normalizepath(const char *path, char *buf, size_t bufsiz) -{ - size_t i, r = 0; - - for (i = 0; *path && i < bufsiz; path++) { - if (isalpha((int)*path) || isdigit((int)*path)) { - buf[i++] = tolower((int)*path); - r = 0; - } else { - /* don't repeat '-', don't start with '-' */ - if (!r && i) - buf[i++] = '-'; - r++; - } - } - /* remove trailing '-' */ - for (; i > 0 && (buf[i - 1] == '-'); i--) - ; - - if (bufsiz > 0) - buf[i] = '\0'; - - return i; -} - static void printfeed(FILE *fpitems, FILE *fpin, struct feed *f) { - char dirpath[PATH_MAX], filepath[PATH_MAX]; - char *fields[FieldLast], *feedname, name[128]; + char *fields[FieldLast]; ssize_t linelen; - FILE *fpcontent = NULL; unsigned int isnew; struct tm *tm; time_t parsedtime; - int fd, r; - - if (f->name[0]) - feedname = f->name; - else - feedname = "unnamed"; - - /* make directory for feedname */ - if (!normalizepath(feedname, name, sizeof(name))) - return; - - strlcpy(dirpath, name, sizeof(dirpath)); - - /* error creating directory and it doesn't exist. */ - if (mkdir(dirpath, S_IRWXU | S_IRWXG | S_IRWXO) == -1 && errno != EEXIST) - err(1, "mkdir: %s", dirpath); /* menu if not unnamed */ if (f->name[0]) { @@ -150,68 +53,6 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) if (!(tm = localtime(&parsedtime))) err(1, "localtime"); - if (!normalizepath(fields[FieldTitle], name, sizeof(name))) - continue; - - r = snprintf(filepath, sizeof(filepath), "%s/%s-%lld.html", - dirpath, name, (long long)parsedtime); - if (r == -1 || (size_t)r >= sizeof(filepath)) - errx(1, "snprintf: path truncation: '%s/%s-%lld.html'", - dirpath, name, (long long)parsedtime); - - /* content file doesn't exist yet and has error? */ - if ((fd = open(filepath, O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) == -1) { - if (errno != EEXIST) - err(1, "open: %s", filepath); - } else { - if (!(fpcontent = fdopen(fd, "wb"))) - err(1, "fdopen: %s", filepath); - fputs("" - "" - "" - "\n" - "

", fpcontent); - - if (fields[FieldLink][0]) { - fputs("", fpcontent); - } - xmlencode(fields[FieldTitle], fpcontent); - if (fields[FieldLink][0]) - fputs("", fpcontent); - fputs("

", fpcontent); - - /* NOTE: this prints the raw HTML of the feed, this is - * potentially dangerous, it is left up to the - * user / browser to trust a feed it's HTML content. */ - if (!strcmp(fields[FieldContentType], "html")) { - printcontent(fields[FieldContent], fpcontent); - } else { - /* plain-text, wrap with
 */
-				fputs("
", fpcontent);
-				printcontentxml(fields[FieldContent], fpcontent);
-				fputs("
", fpcontent); - } - fputs("
\n", fpcontent); - - /* set modified and access time of file to time of item. */ - if (parsedtime) { - /* flush writes before setting atime and mtime - else the remaining (buffered) write can occur at - fclose() and overwrite our time again. */ - fflush(fpcontent); - - times[0].tv_sec = parsedtime; - times[1].tv_sec = parsedtime; - - if (futimens(fd, times) == -1) - err(1, "futimens"); - } - fclose(fpcontent); - } - isnew = (parsedtime >= comparetime) ? 1 : 0; totalnew += isnew; f->totalnew += isnew; @@ -223,11 +64,15 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) if (isnew) fputs("", fpitems); - fputs("", fpitems); - xmlencode(fields[FieldTitle], fpitems); - fputs("", fpitems); + if (fields[FieldLink][0]) { + fputs("", fpitems); + xmlencode(fields[FieldTitle], fpitems); + fputs("", fpitems); + } else { + xmlencode(fields[FieldTitle], fpitems); + } if (isnew) fputs("", fpitems); fputs("\n", fpitems); @@ -237,12 +82,12 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) int main(int argc, char *argv[]) { - FILE *fpindex, *fpitems, *fpmenu, *fp; + FILE *fpindex, *fpitems, *fpmenu = NULL, *fp; char *name; int i, showsidebar = (argc > 1); struct feed *f; - if (pledge("stdio rpath wpath cpath fattr", NULL) == -1) + if (pledge("stdio rpath wpath cpath", NULL) == -1) err(1, "pledge"); if (!(feeds = calloc(argc, sizeof(struct feed *)))) @@ -256,11 +101,15 @@ main(int argc, char *argv[]) /* write main index page */ if (!(fpindex = fopen("index.html", "wb"))) err(1, "fopen: index.html"); - if (!(fpmenu = fopen("menu.html", "wb"))) - err(1, "fopen: menu.html"); if (!(fpitems = fopen("items.html", "wb"))) err(1, "fopen: items.html"); - fputs("" + if (showsidebar && !(fpmenu = fopen("menu.html", "wb"))) + err(1, "fopen: menu.html"); + + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + + fputs("" "" "
", fpitems);
 
@@ -288,7 +137,7 @@ main(int argc, char *argv[])
 
 	if (showsidebar) {
 		fputs(""
-		      "\n"
+		      "\n"
 		      "\n"
 		      "
", fpmenu); @@ -312,25 +161,24 @@ main(int argc, char *argv[]) } fputs("\n\tNewsfeed (", fpindex); fprintf(fpindex, "%lu", totalnew); - fputs(")\n\t\n" + fputs(")\n\t\n" "\n" "\n", fpindex); if (showsidebar) { - fputs("\n" - " \n", fpindex); + fputs("\n" + "\t\n", fpindex); } else { fputs("\n", fpindex); } - fputs("\t\n" - "\t\t\n" - "\t\t\n" - "\t\n" + fputs( + "\t\n" "\n" "\n", fpindex); - fclose(fpitems); - fclose(fpmenu); fclose(fpindex); + fclose(fpitems); + if (fpmenu) + fclose(fpmenu); return 0; } -- cgit v1.2.3