summaryrefslogtreecommitdiff
path: root/sfeed_update
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2019-04-14 15:00:19 +0200
committerHiltjo Posthuma <hiltjo@codemadness.org>2019-04-14 15:26:47 +0200
commit22af8f5c3a9f79f28cf2c56d9b244804a70ddcc7 (patch)
tree89750858eec51a5e0108298e0a916670970c1ca0 /sfeed_update
parent57a90ba638f38fa589119dd6dc6a23482c58bfda (diff)
sfeed_update improvements
- Better checking and verbose logging (on failure) of each stage: fetchfeed, filter, merge, order, convertencoding. This makes sure on out-of-memory, disk-space or other resource limits the output is not corrupted. - This also has the added advantage it runs less processes (piped) at the same time. - Clear previous unneeded file to preserve space in /tmp (/tmp is often mounted as mfs/tmpfs). - Add logging function (able to override), use more logical logging format (pun intended). - Code-style: order overridable functions in execution order.
Diffstat (limited to 'sfeed_update')
-rwxr-xr-xsfeed_update123
1 files changed, 80 insertions, 43 deletions
diff --git a/sfeed_update b/sfeed_update
index 868d758..a4cc4c8 100755
--- a/sfeed_update
+++ b/sfeed_update
@@ -31,6 +31,20 @@ loadconfig() {
fi
}
+# log(name,s)
+log() {
+ printf '[%s] %-50.50s %s\n' "$(date +'%H:%M:%S')" "$1" "$2" >&2
+}
+
+# fetch a feed via HTTP/HTTPS etc.
+# fetchfeed(name, url, feedfile)
+fetchfeed() {
+ # fail on redirects,, hide User-Agent, timeout is 15 seconds,
+ # -z for If-Modified-Since.
+ curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
+ -z "$3" "$2" 2>/dev/null
+}
+
# convert encoding from one encoding to another.
# convertencoding(from, to)
convertencoding() {
@@ -42,12 +56,6 @@ convertencoding() {
fi
}
-# merge raw files: unique sort by id, title, link.
-# merge(name, oldfile, newfile)
-merge() {
- sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
-}
-
# filter fields.
# filter(name)
filter() {
@@ -60,15 +68,10 @@ order() {
sort -t ' ' -k1rn,1
}
-# fetch a feed via HTTP/HTTPS etc.
-# fetchfeed(name, url, feedfile)
-fetchfeed() {
- if curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
- -z "$3" "$2" 2>/dev/null; then
- printf " OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
- else
- printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
- fi
+# merge raw files: unique sort by id, title, link.
+# merge(name, oldfile, newfile)
+merge() {
+ sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
}
# fetch and parse feed.
@@ -85,37 +88,71 @@ feed() {
filename="$(printf '%s' "$1" | tr '/' '_')"
feedurl="$2"
basesiteurl="$3"
- tmpfeedfile="${sfeedtmpdir}/${filename}"
- tmpencfile=""
encoding="$4"
+
sfeedfile="${sfeedpath}/${filename}"
+ tmpfeedfile="${sfeedtmpdir}/${filename}"
+
+ if ! fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpfeedfile}.fetch"; then
+ log "${name}" "FAIL (FETCH)"
+ return
+ fi
+
+ # try to detect encoding (if not specified). if detecting the encoding fails assume utf-8.
+ [ "${encoding}" = "" ] && encoding=$(sfeed_xmlenc < "${tmpfeedfile}.fetch")
+
+ if ! convertencoding "${encoding}" "utf-8" < "${tmpfeedfile}.fetch" > "${tmpfeedfile}.utf8"; then
+ log "${name}" "FAIL (ENCODING)"
+ return
+ fi
+ rm -f "${tmpfeedfile}.fetch"
+
+ if ! sfeed "${basesiteurl}" < "${tmpfeedfile}.utf8" > "${tmpfeedfile}.tsv"; then
+ log "${name}" "FAIL (CONVERT)"
+ return
+ fi
+ rm -f "${tmpfeedfile}.enc"
+
+ if ! filter "${name}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then
+ log "${name}" "FAIL (FILTER)"
+ return
+ fi
+ rm -f "${tmpfeedfile}.tsv"
+
+ # new feed data is empty: no need for below stages.
+ if [ ! -s "${tmpfeedfile}.filter" ]; then
+ log "${name}" "OK"
+ return
+ fi
+
+ # if file does not exist yet "merge" with /dev/null.
+ if [ -e "${sfeedfile}" ]; then
+ oldfile="${sfeedfile}"
+ else
+ oldfile="/dev/null"
+ fi
+
+ if ! merge "${name}" "${oldfile}" "${tmpfeedfile}.filter" > "${tmpfeedfile}.merge"; then
+ log "${name}" "FAIL (MERGE)"
+ return
+ fi
+ rm -f "${tmpfeedfile}.filter"
+
+ if ! order "${name}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then
+ log "${name}" "FAIL (ORDER)"
+ return
+ fi
+ rm -f "${tmpfeedfile}.merge"
+
+ # atomic move.
+ if ! mv "${tmpfeedfile}.order" "${sfeedfile}"; then
+ log "${name}" "FAIL (MOVE)"
+ return
+ fi
- if [ "${encoding}" != "" ]; then
- fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
- convertencoding "${encoding}" "utf-8"
- else # detect encoding.
- tmpencfile="${tmpfeedfile}.enc"
- fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
- detectenc=$(sfeed_xmlenc < "${tmpencfile}")
- convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
- fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"
-
- # get new data and merge with old.
- sfeedfilenew="${sfeedpath}/${filename}.new"
- # new feed data is non-empty.
- if [ -s "${tmpfeedfile}" ]; then
- # if file exists, merge
- if [ -e "${sfeedfile}" ]; then
- merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
- order "${name}" > "${sfeedfilenew}"
-
- # overwrite old file with updated file
- mv "${sfeedfilenew}" "${sfeedfile}"
- else
- merge "${name}" "/dev/null" "${tmpfeedfile}" | \
- order "${name}" > "${sfeedfile}"
- fi
- fi) &
+ # OK
+ log "${name}" "OK"
+ ) &
}
cleanup() {