diff options
author | Hiltjo Posthuma <hiltjo@codemadness.org> | 2021-01-15 18:31:52 +0100 |
---|---|---|
committer | Hiltjo Posthuma <hiltjo@codemadness.org> | 2021-01-16 12:26:16 +0100 |
commit | 7feab0fd885c1c93344fd427904eae91943493b7 (patch) | |
tree | e183706253651adc9dabf05a7d715392a906e2ac /README | |
parent | f18f4818ed2c992aa9b7b91c74bb9ce7cc1bc745 (diff) |
README: newsboat sqlite3 export script: improvements
- Export read/unread state to a separate plain-text "urls" file, line by line.
- Handle white-space control-chars better.
From the sfeed(1) man page:
" The fields: title, id, author are not allowed to have newlines and TABs,
all whitespace characters are replaced by a single space character.
Control characters are removed."
So do the reverse for newsboat aswell: change white-space characters which are
also control-characters (such as TABs and newlines) to a single space
character.
Diffstat (limited to 'README')
-rw-r--r-- | README | 31 |
1 files changed, 22 insertions, 9 deletions
@@ -628,10 +628,12 @@ sfeedrc file and change the curl options "-L --max-redirs 0". Shellscript to export existing newsboat cached items from sqlite3 to the sfeed TSV format. - + #!/bin/sh # Export newsbeuter/newsboat cached items from sqlite3 to the sfeed TSV format. # The data is split per file per feed with the name of the newsboat title/url. + # It writes the urls of the read items line by line to a "urls" file. + # # Dependencies: sqlite3, awk. # # Usage: create some directory to store the feeds, run this script. @@ -653,8 +655,8 @@ TSV format. SELECT i.pubDate, i.title, i.url, i.content, i.guid, i.author, i.enclosure_url, - f.rssurl AS rssurl, f.title AS feedtitle --, - -- i.id, i.unread, i.enclosure_type, i.enqueued, i.flags, i.deleted, + f.rssurl AS rssurl, f.title AS feedtitle, i.unread --, + -- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted, -- i.base FROM rss_feed f INNER JOIN rss_item i ON i.feedurl = f.rssurl @@ -668,13 +670,19 @@ TSV format. FS = "\x1f"; RS = "\x1e"; } - # strip all control-chars for normal fields. - function strip(s) { + # normal non-content fields. + function field(s) { + gsub("^[[:space:]]*", "", s); + gsub("[[:space:]]*$", "", s); + gsub("[[:space:]]", " ", s); gsub("[[:cntrl:]]", "", s); return s; } - # escape chars in content field. - function escape(s) { + # content field. + function content(s) { + gsub("^[[:space:]]*", "", s); + gsub("[[:space:]]*$", "", s); + # escape chars in content field. gsub("\\\\", "\\\\", s); gsub("\n", "\\n", s); gsub("\t", "\\t", s); @@ -690,9 +698,14 @@ TSV format. print "Writing file: \"" fname "\" (title: " $9 ", url: " $8 ")" > "/dev/stderr"; } - print $1 "\t" strip($2) "\t" strip($3) "\t" escape($4) "\t" \ - "html" "\t" strip($5) "\t" strip($6) "\t" strip($7) \ + print $1 "\t" field($2) "\t" field($3) "\t" content($4) "\t" \ + "html" "\t" field($5) "\t" field($6) "\t" field($7) \ > fname; + + # write urls of the read items to a file line by line. + if ($10 == "0") { + print $3 > "urls"; + } }' |