From 587ce6fb85995f22ba6b0b9e3b944cb2896bc7cc Mon Sep 17 00:00:00 2001 From: Tommy Nguyen Date: Mon, 21 Mar 2022 10:37:01 +0100 Subject: sfeed_update: log FAILs to stderr --- sfeed_update | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index ba9e242..b5f38bc 100755 --- a/sfeed_update +++ b/sfeed_update @@ -35,6 +35,11 @@ loadconfig() { # log(name, s) log() { + printf '[%s] %-50.50s %s\n' "$(date +'%H:%M:%S')" "$1" "$2" +} + +# log_error(name, s) +log_error() { printf '[%s] %-50.50s %s\n' "$(date +'%H:%M:%S')" "$1" "$2" >&2 } @@ -97,7 +102,7 @@ _feed() { [ -e "${sfeedfile}" ] || touch "${sfeedfile}" 2>/dev/null if ! fetch "${name}" "${feedurl}" "${sfeedfile}" > "${tmpfeedfile}.fetch"; then - log "${name}" "FAIL (FETCH)" + log_error "${name}" "FAIL (FETCH)" return fi @@ -105,20 +110,20 @@ _feed() { [ "${encoding}" = "" ] && encoding=$(sfeed_xmlenc < "${tmpfeedfile}.fetch") if ! convertencoding "${name}" "${encoding}" "utf-8" < "${tmpfeedfile}.fetch" > "${tmpfeedfile}.utf8"; then - log "${name}" "FAIL (ENCODING)" + log_error "${name}" "FAIL (ENCODING)" return fi rm -f "${tmpfeedfile}.fetch" # if baseurl is empty then use feedurl. if ! parse "${name}" "${feedurl}" "${basesiteurl:-${feedurl}}" < "${tmpfeedfile}.utf8" > "${tmpfeedfile}.tsv"; then - log "${name}" "FAIL (PARSE)" + log_error "${name}" "FAIL (PARSE)" return fi rm -f "${tmpfeedfile}.utf8" if ! filter "${name}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then - log "${name}" "FAIL (FILTER)" + log_error "${name}" "FAIL (FILTER)" return fi rm -f "${tmpfeedfile}.tsv" @@ -130,20 +135,20 @@ _feed() { fi if ! merge "${name}" "${sfeedfile}" "${tmpfeedfile}.filter" > "${tmpfeedfile}.merge"; then - log "${name}" "FAIL (MERGE)" + log_error "${name}" "FAIL (MERGE)" return fi rm -f "${tmpfeedfile}.filter" if ! order "${name}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then - log "${name}" "FAIL (ORDER)" + log_error "${name}" "FAIL (ORDER)" return fi rm -f "${tmpfeedfile}.merge" # copy if ! cp "${tmpfeedfile}.order" "${sfeedfile}"; then - log "${name}" "FAIL (COPY)" + log_error "${name}" "FAIL (COPY)" return fi rm -f "${tmpfeedfile}.order" -- cgit v1.2.3 From 125164fc972b3149aa65db4a839224dbf24ef8aa Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Mon, 21 Mar 2022 11:47:07 +0100 Subject: sfeed_update: set exit status non-zero if any of the feeds failed In practise this may change the meaning of the examples: sfeed_update && pkill -SIGHUP sfeed_curses An alternative: sfeed_update; pkill -SIGHUP sfeed_curses --- sfeed_update | 11 +++++++++-- sfeed_update.1 | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index b5f38bc..f2c97ae 100755 --- a/sfeed_update +++ b/sfeed_update @@ -41,6 +41,8 @@ log() { # log_error(name, s) log_error() { printf '[%s] %-50.50s %s\n' "$(date +'%H:%M:%S')" "$1" "$2" >&2 + # set error exit status indicator for parallel jobs. + rm -f "${sfeedtmpdir}/ok" } # fetch a feed via HTTP/HTTPS etc. @@ -96,7 +98,7 @@ _feed() { filename="$(printf '%s' "${name}" | tr '/' '_')" sfeedfile="${sfeedpath}/${filename}" - tmpfeedfile="${sfeedtmpdir}/${filename}" + tmpfeedfile="${sfeedtmpdir}/feeds/${filename}" # if file does not exist yet create it. [ -e "${sfeedfile}" ] || touch "${sfeedfile}" 2>/dev/null @@ -201,17 +203,22 @@ main() { loadconfig "$1" # fetch feeds and store in temporary directory. sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" + mkdir -p "${sfeedtmpdir}/feeds" + touch "${sfeedtmpdir}/ok" # make sure path exists. mkdir -p "${sfeedpath}" # fetch feeds specified in config file. feeds # wait till all feeds are fetched (concurrently). [ ${signo} -eq 0 ] && wait + # check error exit status indicator for parallel jobs. + test -f "${sfeedtmpdir}/ok" + status=$? # cleanup temporary files etc. cleanup # on signal SIGINT and SIGTERM exit with signal number + 128. [ ${signo} -ne 0 ] && exit $((signo+128)) - return 0 + return ${status} } [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" diff --git a/sfeed_update.1 b/sfeed_update.1 index aca52de..3edf551 100644 --- a/sfeed_update.1 +++ b/sfeed_update.1 @@ -62,7 +62,6 @@ stdout in the format: .Ed .Sh EXIT STATUS .Ex -std -A (temporary) failure with processing a feed is not considered an error here. .Sh EXAMPLES To update your feeds and format them in various formats: .Bd -literal -- cgit v1.2.3 From 5934cb409782091b6d4481ceccf77e82832167ec Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 22 Mar 2022 09:45:34 +0100 Subject: sfeed_update: return status in _feed() function This can be useful for scripts, for example the sfeed_update_xargs example script in the README. This way the process can signal an error and xargs will exit with the code 123: "One or more invocations of utility returned a nonzero exit status." --- sfeed_update | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index f2c97ae..b9880c3 100755 --- a/sfeed_update +++ b/sfeed_update @@ -105,7 +105,7 @@ _feed() { if ! fetch "${name}" "${feedurl}" "${sfeedfile}" > "${tmpfeedfile}.fetch"; then log_error "${name}" "FAIL (FETCH)" - return + return 1 fi # try to detect encoding (if not specified). if detecting the encoding fails assume utf-8. @@ -113,50 +113,51 @@ _feed() { if ! convertencoding "${name}" "${encoding}" "utf-8" < "${tmpfeedfile}.fetch" > "${tmpfeedfile}.utf8"; then log_error "${name}" "FAIL (ENCODING)" - return + return 1 fi rm -f "${tmpfeedfile}.fetch" # if baseurl is empty then use feedurl. if ! parse "${name}" "${feedurl}" "${basesiteurl:-${feedurl}}" < "${tmpfeedfile}.utf8" > "${tmpfeedfile}.tsv"; then log_error "${name}" "FAIL (PARSE)" - return + return 1 fi rm -f "${tmpfeedfile}.utf8" if ! filter "${name}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then log_error "${name}" "FAIL (FILTER)" - return + return 1 fi rm -f "${tmpfeedfile}.tsv" # new feed data is empty: no need for below stages. if [ ! -s "${tmpfeedfile}.filter" ]; then log "${name}" "OK" - return + return 0 fi if ! merge "${name}" "${sfeedfile}" "${tmpfeedfile}.filter" > "${tmpfeedfile}.merge"; then log_error "${name}" "FAIL (MERGE)" - return + return 1 fi rm -f "${tmpfeedfile}.filter" if ! order "${name}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then log_error "${name}" "FAIL (ORDER)" - return + return 1 fi rm -f "${tmpfeedfile}.merge" # copy if ! cp "${tmpfeedfile}.order" "${sfeedfile}"; then log_error "${name}" "FAIL (COPY)" - return + return 1 fi rm -f "${tmpfeedfile}.order" # OK log "${name}" "OK" + return 0 } # fetch and process a feed in parallel. -- cgit v1.2.3 From 74cf6a026e13a6e275d37bc17014908a76b41042 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Wed, 23 Mar 2022 19:38:30 +0100 Subject: shellscripts: use [ for test consistently --- sfeed_markread | 4 ++-- sfeed_update | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_markread b/sfeed_markread index a40e572..4c509f3 100755 --- a/sfeed_markread +++ b/sfeed_markread @@ -9,7 +9,7 @@ usage() { } urlfile="${2:-${SFEED_URL_FILE}}" -if test -z "${urlfile}"; then +if [ -z "${urlfile}" ]; then usage fi @@ -20,7 +20,7 @@ read) unread) tmp=$(mktemp) trap "rm -f ${tmp}" EXIT - test -f "${urlfile}" || touch "${urlfile}" 2>/dev/null + [ -f "${urlfile}" ] || touch "${urlfile}" 2>/dev/null LC_ALL=C awk -F '\t' ' { FILENR += (FNR == 1) } FILENR == 1 { urls[$0] = 1 } diff --git a/sfeed_update b/sfeed_update index b9880c3..2e54a59 100755 --- a/sfeed_update +++ b/sfeed_update @@ -213,7 +213,7 @@ main() { # wait till all feeds are fetched (concurrently). [ ${signo} -eq 0 ] && wait # check error exit status indicator for parallel jobs. - test -f "${sfeedtmpdir}/ok" + [ -f "${sfeedtmpdir}/ok" ] status=$? # cleanup temporary files etc. cleanup -- cgit v1.2.3 From ca3f3fe68ae72fec6f607278bf88d30ab1497627 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 25 Mar 2022 15:43:47 +0100 Subject: change echo to printf and for sfeed_opml_export use a control-character separator echo is unportable in this way and names containing characters like an option (-n) or backslash or escape codes (\e, \n, etc) could be messy. For awk set LC_ALL=C for simple collation. This makes sfeed_opml_export slower in some shells that don't have printf builtin though. For example with about 150 feeds in a config file it is a bit slower on OpenBSD ksh. time ./sfeed_opml_export | wc -l 152 0m00.29s real 0m00.05s user 0m00.20s system time sfeed_opml_export | wc -l 152 0m00.02s real 0m00.00s user 0m00.03s system --- README | 2 +- sfeed_markread | 2 +- sfeed_opml_export | 12 ++++++++---- sfeed_update | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) (limited to 'sfeed_update') diff --git a/README b/README index 6ce4231..9658ab6 100644 --- a/README +++ b/README @@ -578,7 +578,7 @@ procmail_maildirs.sh file: mkdir -p "${maildir}/.cache" if ! test -r "${procmailconfig}"; then - echo "Procmail configuration file \"${procmailconfig}\" does not exist or is not readable." >&2 + printf "Procmail configuration file \"%s\" does not exist or is not readable.\n" "${procmailconfig}" >&2 echo "See procmailrc.example for an example." >&2 exit 1 fi diff --git a/sfeed_markread b/sfeed_markread index 4c509f3..b262bdd 100755 --- a/sfeed_markread +++ b/sfeed_markread @@ -2,7 +2,7 @@ # Mark items as read/unread: the input is the read / unread URL per line. usage() { - echo "usage: $0 [urlfile]" >&2 + printf "usage: %s [urlfile]\n" "$0" >&2 echo "" >&2 echo "An urlfile must be specified as an argument or with the environment variable \$SFEED_URL_FILE" >&2 exit 1 diff --git a/sfeed_opml_export b/sfeed_opml_export index eb22520..2a9396a 100755 --- a/sfeed_opml_export +++ b/sfeed_opml_export @@ -18,7 +18,7 @@ loadconfig() { if [ -r "${path}" ]; then . "${path}" else - echo "Configuration file \"${config}\" cannot be read." >&2 + printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 exit 1 fi @@ -27,8 +27,8 @@ loadconfig() { # override feed function to output OPML XML. # feed(name, feedurl, [basesiteurl], [encoding]) feed() { - # TABs, newlines and echo options in field values are not checked. - echo "$1 $2" + # uses the characters 0x1f and 0x1e as a separator. + printf '%s\037%s\036' "$1" "$2" } # load config file. @@ -43,7 +43,11 @@ cat < ! -feeds | awk -F '\t' '{ +feeds | LC_ALL=C awk ' +BEGIN { + FS = "\x1f"; RS = "\x1e"; +} +{ gsub("&", "\\&"); gsub("\"", "\\""); gsub("'"'"'", "\\'"); diff --git a/sfeed_update b/sfeed_update index 2e54a59..857f537 100755 --- a/sfeed_update +++ b/sfeed_update @@ -27,7 +27,7 @@ loadconfig() { if [ -r "${path}" ]; then . "${path}" else - echo "Configuration file \"${config}\" cannot be read." >&2 + printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 exit 1 fi @@ -187,7 +187,7 @@ sighandler() { } feeds() { - echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2 + printf "Configuration file \"%s\" is invalid or does not contain a \"feeds\" function.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 } -- cgit v1.2.3 From df2250aa196b674c0783d3ba1862b1cfb5df5719 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Mon, 28 Mar 2022 13:07:58 +0200 Subject: sfeed_update: change return to exit in main Pedantic change: Make main more consistent since other functions in it exit too and main is not supposed to return or used like that. --- sfeed_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 857f537..fc7447f 100755 --- a/sfeed_update +++ b/sfeed_update @@ -219,7 +219,7 @@ main() { cleanup # on signal SIGINT and SIGTERM exit with signal number + 128. [ ${signo} -ne 0 ] && exit $((signo+128)) - return ${status} + exit ${status} } [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" -- cgit v1.2.3 From 70007b61aaa485c7084badc5d8336ee480b2d138 Mon Sep 17 00:00:00 2001 From: NRK Date: Tue, 21 Jun 2022 05:09:14 +0600 Subject: fix some typis found via codespell $ codespell --ignore-regex Nd --- README | 2 +- README.xml | 4 ++-- sfeed_update | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'sfeed_update') diff --git a/README b/README index 06f2c02..b994c55 100644 --- a/README +++ b/README @@ -834,7 +834,7 @@ arguments are specified then the data is read from stdin. if [ "${SFEED_DOWNLOAD_CHILD}" = "1" ]; then # Downloader helper for parallel downloading. # Receives arguments: $1 = URL, $2 = title, $3 = feed filename or "-". - # It should write the URI to the cachefile if it is succesful. + # It should write the URI to the cachefile if it is successful. downloader "$1" "$2" "$3" exit $? fi diff --git a/README.xml b/README.xml index edbac28..ee205c0 100644 --- a/README.xml +++ b/README.xml @@ -28,7 +28,7 @@ Supports - Tags in short-form (). - Tag attributes. -- Short attributes without an explicity set value (). +- Short attributes without an explicitly set value (). - Comments - CDATA sections. - Helper function (xml_entitytostr) to convert XML 1.0 / HTML 2.0 named entities @@ -55,7 +55,7 @@ Caveats - The XML specification has no limits on tag and attribute names. For simplicity/sanity sake this XML parser takes some liberties. Tag and attribute names are truncated if they are excessively long. -- Entity expansions are not parsed aswell as DOCTYPE, ATTLIST etc. +- Entity expansions are not parsed as well as DOCTYPE, ATTLIST etc. Files used diff --git a/sfeed_update b/sfeed_update index fc7447f..44ce23c 100755 --- a/sfeed_update +++ b/sfeed_update @@ -182,7 +182,7 @@ sighandler() { signo="$1" # ignore TERM signal for myself. trap -- "" TERM - # kill all running childs >:D + # kill all running children >:D kill -TERM -$$ } -- cgit v1.2.3 From 9d6dd2a5072c0702cb56cced861ca4539e98c553 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 17 Feb 2023 15:39:29 +0100 Subject: sfeed_update, sfeed_opml_export, README: reference the example sfeedrc man page ... and some small rewording. --- README | 4 ++-- sfeed_opml_export | 2 +- sfeed_update | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'sfeed_update') diff --git a/README b/README index 26de06c..dcff323 100644 --- a/README +++ b/README @@ -251,8 +251,8 @@ output example: - - - -Make sure your sfeedrc config file exists, see sfeedrc.example. To update your -feeds (configfile argument is optional): +Make sure your sfeedrc config file exists, see the sfeedrc.example file. To +update your feeds (configfile argument is optional): sfeed_update "configfile" diff --git a/sfeed_opml_export b/sfeed_opml_export index 2a9396a..3835934 100755 --- a/sfeed_opml_export +++ b/sfeed_opml_export @@ -19,7 +19,7 @@ loadconfig() { . "${path}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 - echo "See sfeedrc.example for an example." >&2 + echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 exit 1 fi } diff --git a/sfeed_update b/sfeed_update index 44ce23c..f989be6 100755 --- a/sfeed_update +++ b/sfeed_update @@ -28,7 +28,7 @@ loadconfig() { . "${path}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 - echo "See sfeedrc.example for an example." >&2 + echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 exit 1 fi } -- cgit v1.2.3 From 5a27c58675ddf4113d64a84f715cb3fecb681a6d Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Wed, 5 Apr 2023 21:41:19 +0200 Subject: sfeed_update: fail early if creating a temporary directory or status file fails If creating a temporary directory for the feed files failed then $sfeedtmpdir would be empty and it would try to: mkdir -p "/feed" touch "/feed/ok" After failing it would also still try to process all the feeds. Now just fail early. mktemp or touch themselve will print the actual error to stderr. --- sfeed_update | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index f989be6..3da45d3 100755 --- a/sfeed_update +++ b/sfeed_update @@ -203,9 +203,9 @@ main() { # load config file. loadconfig "$1" # fetch feeds and store in temporary directory. - sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" + sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" || exit 1 mkdir -p "${sfeedtmpdir}/feeds" - touch "${sfeedtmpdir}/ok" + touch "${sfeedtmpdir}/ok" || exit 1 # make sure path exists. mkdir -p "${sfeedpath}" # fetch feeds specified in config file. -- cgit v1.2.3 From 96d7afc7d7511f05ba07c5acbb5bbfb2847bc126 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 9 Jun 2023 14:40:40 +0200 Subject: sfeed_update/sfeedrc: add url as parameter to the filter() and order() function This might make it easier to set filters or ordering by pattern matching on a group of feeds by the feed URL. For example youtube or reddit feeds. Another way which was already possible is prefixing names with for example: "reddit somename" or "yt somename". --- README | 2 +- sfeed_update | 8 ++++---- sfeedrc.5 | 10 +++++++--- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'sfeed_update') diff --git a/README b/README index fbe40b6..d128360 100644 --- a/README +++ b/README @@ -336,7 +336,7 @@ filtering items per feed. It can be used to shorten URLs, filter away advertisements, strip tracking parameters and more. # filter fields. - # filter(name) + # filter(name, url) filter() { case "$1" in "tweakers") diff --git a/sfeed_update b/sfeed_update index 3da45d3..dadea9b 100755 --- a/sfeed_update +++ b/sfeed_update @@ -71,7 +71,7 @@ parse() { } # filter fields. -# filter(name) +# filter(name, url) filter() { cat } @@ -83,7 +83,7 @@ merge() { } # order by timestamp (descending). -# order(name) +# order(name, url) order() { sort -t ' ' -k1rn,1 } @@ -124,7 +124,7 @@ _feed() { fi rm -f "${tmpfeedfile}.utf8" - if ! filter "${name}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then + if ! filter "${name}" "${feedurl}" < "${tmpfeedfile}.tsv" > "${tmpfeedfile}.filter"; then log_error "${name}" "FAIL (FILTER)" return 1 fi @@ -142,7 +142,7 @@ _feed() { fi rm -f "${tmpfeedfile}.filter" - if ! order "${name}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then + if ! order "${name}" "${feedurl}" < "${tmpfeedfile}.merge" > "${tmpfeedfile}.order"; then log_error "${name}" "FAIL (ORDER)" return 1 fi diff --git a/sfeedrc.5 b/sfeedrc.5 index 91f82ce..16e0641 100644 --- a/sfeedrc.5 +++ b/sfeedrc.5 @@ -1,4 +1,4 @@ -.Dd January 18, 2023 +.Dd June 9, 2023 .Dt SFEEDRC 5 .Os .Sh NAME @@ -101,13 +101,15 @@ URL of the feed. Base URL of the feed links. This argument allows to fix relative item links. .El -.It Fn filter "name" +.It Fn filter "name" "url" Filter .Xr sfeed 5 data from stdin and write it to stdout, its arguments are: .Bl -tag -width Ds .It Fa name Feed name. +.It Fa url +URL of the feed. .El .It Fn merge "name" "oldfile" "newfile" Merge @@ -121,13 +123,15 @@ Old file. .It Fa newfile New file. .El -.It Fn order "name" +.It Fn order "name" "url" Sort .Xr sfeed 5 data from stdin and write it to stdout, its arguments are: .Bl -tag -width Ds .It Fa name Feed name. +.It Fa url +URL of the feed. .El .El .Sh EXAMPLES -- cgit v1.2.3 From 63308527f5197ddbcad6b06c5c1bbaf12f997e57 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Sat, 9 Dec 2023 21:51:45 +0100 Subject: improve compatibility with zsh as a non-interactive shell In zsh the variables $path and $status are special. https://zsh.sourceforge.io/Doc/Release/Parameters.html#index-path https://zsh.sourceforge.io/Doc/Release/Parameters.html#index-status (No promises I will keep up with this insanity in the future though) --- README | 6 +++--- sfeed_opml_export | 8 ++++---- sfeed_update | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'sfeed_update') diff --git a/README b/README index 8098a76..85a3fb9 100644 --- a/README +++ b/README @@ -752,12 +752,12 @@ sfeed_update_xargs shellscript: mkdir -p "${sfeedpath}" # print feeds for parallel processing with xargs. feeds | SFEED_UPDATE_CHILD="1" xargs -r -0 -P "${maxjobs}" -L 6 "$(readlink -f "$0")" - status=$? + statuscode=$? # check error exit status indicator for parallel jobs. - test -f "${sfeedtmpdir}/ok" || status=1 + test -f "${sfeedtmpdir}/ok" || statuscode=1 # cleanup temporary files etc. cleanup - exit ${status} + exit ${statuscode} - - - diff --git a/sfeed_opml_export b/sfeed_opml_export index 7c96d5d..6420d5e 100755 --- a/sfeed_opml_export +++ b/sfeed_opml_export @@ -7,16 +7,16 @@ loadconfig() { if [ "$1" != "" ]; then # get absolute path of config file required for including. config="$1" - path=$(readlink -f "${config}" 2>/dev/null) + configpath=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" - path="${config}" + configpath="${config}" fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${path}" ]; then - . "${path}" + if [ -r "${configpath}" ]; then + . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 diff --git a/sfeed_update b/sfeed_update index dadea9b..2b7d89b 100755 --- a/sfeed_update +++ b/sfeed_update @@ -16,16 +16,16 @@ loadconfig() { if [ "$1" != "" ]; then # get absolute path of config file required for including. config="$1" - path=$(readlink -f "${config}" 2>/dev/null) + configpath=$(readlink -f "${config}" 2>/dev/null) else # default config location. config="$HOME/.sfeed/sfeedrc" - path="${config}" + configpath="${config}" fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${path}" ]; then - . "${path}" + if [ -r "${configpath}" ]; then + . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 @@ -214,12 +214,12 @@ main() { [ ${signo} -eq 0 ] && wait # check error exit status indicator for parallel jobs. [ -f "${sfeedtmpdir}/ok" ] - status=$? + statuscode=$? # cleanup temporary files etc. cleanup # on signal SIGINT and SIGTERM exit with signal number + 128. [ ${signo} -ne 0 ] && exit $((signo+128)) - exit ${status} + exit ${statuscode} } [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" -- cgit v1.2.3 From a2aa09baf8a1f4a98313f8691d999eaff8b4ceea Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 15 Dec 2023 13:46:21 +0100 Subject: sfeed_update: add die() function for exit and cleanup, respect $TMPDIR - Add a die() helper function to cleanup and exit. - NOTE that with an empty sfeedtmpdir the case rm -rf "" is fine. - Respect $TMPDIR for creating temporary files like many UNIX tools do. - Fix: when creating "${sfeedtmpdir}/ok" fails for some reason cleanup the whole temporary directory as well. - Fix: when the feeds() function is not defined exit with status code 1 (this was incorrectly status code 0). Reproduce: sfeed_update /dev/null; echo $? --- sfeed_update | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 2b7d89b..014985c 100755 --- a/sfeed_update +++ b/sfeed_update @@ -29,7 +29,7 @@ loadconfig() { else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 echo "See the sfeedrc.example file or the sfeedrc(5) man page for an example." >&2 - exit 1 + die fi } @@ -178,6 +178,14 @@ cleanup() { rm -rf "${sfeedtmpdir}" } +# die(statuscode) +die() { + statuscode="${1:-1}" # default: exit 1 + # cleanup temporary files etc. + cleanup + exit "${statuscode}" +} + sighandler() { signo="$1" # ignore TERM signal for myself. @@ -189,6 +197,7 @@ sighandler() { feeds() { printf "Configuration file \"%s\" is invalid or does not contain a \"feeds\" function.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 + die } main() { @@ -203,9 +212,9 @@ main() { # load config file. loadconfig "$1" # fetch feeds and store in temporary directory. - sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')" || exit 1 + sfeedtmpdir="$(mktemp -p "${TMPDIR:-/tmp}" -d 'sfeed_XXXXXX')" || die mkdir -p "${sfeedtmpdir}/feeds" - touch "${sfeedtmpdir}/ok" || exit 1 + touch "${sfeedtmpdir}/ok" || die # make sure path exists. mkdir -p "${sfeedpath}" # fetch feeds specified in config file. @@ -215,11 +224,9 @@ main() { # check error exit status indicator for parallel jobs. [ -f "${sfeedtmpdir}/ok" ] statuscode=$? - # cleanup temporary files etc. - cleanup # on signal SIGINT and SIGTERM exit with signal number + 128. - [ ${signo} -ne 0 ] && exit $((signo+128)) - exit ${statuscode} + [ ${signo} -ne 0 ] && die $((signo+128)) + die ${statuscode} } [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" -- cgit v1.2.3 From 9754fe74f7b5c0600cc41eef8c6f5c8305a74a18 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 26 Dec 2023 15:17:17 +0100 Subject: sfeed_update: disallow using a directory as a config file Tested on NetBSD 5.1: evaluating directories as config files could allow garbage, so disallow it. Devices / fifo, etc are still allowed. --- sfeed_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 014985c..79f23ee 100755 --- a/sfeed_update +++ b/sfeed_update @@ -24,7 +24,7 @@ loadconfig() { fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${configpath}" ]; then + if [ -r "${configpath}" ] && [ ! -d "${configpath}" ]; then . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 -- cgit v1.2.3 From 62bfed65ca91c34ea24b81b191c23d4542a7075b Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 26 Dec 2023 15:23:15 +0100 Subject: sfeed_update: mktemp: improve compatibility with older systems Tested on NetBSD 5.1: - mktemp -p doesn't exist there yet. - mktemp without any arguments/template doesnt work - mktemp -d without any arguments/template doesnt work --- sfeed_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 79f23ee..0628e2e 100755 --- a/sfeed_update +++ b/sfeed_update @@ -212,7 +212,7 @@ main() { # load config file. loadconfig "$1" # fetch feeds and store in temporary directory. - sfeedtmpdir="$(mktemp -p "${TMPDIR:-/tmp}" -d 'sfeed_XXXXXX')" || die + sfeedtmpdir="$(mktemp -d "${TMPDIR:-/tmp}/sfeed_XXXXXX")" || die mkdir -p "${sfeedtmpdir}/feeds" touch "${sfeedtmpdir}/ok" || die # make sure path exists. -- cgit v1.2.3 From cdb8f7feb135adf6f18e389b4bbf47886089474a Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 26 Dec 2023 15:59:39 +0100 Subject: sfeed_update: use xargs -P -0 Some of the options, like -P are as of writing (2023) non-POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html. However many systems support this useful extension for many years now. Some historic context: The xargs -0 option was added on 1996-06-11, about a year after the NetBSD import (over 27 years ago at the time of writing): http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.2&content-type=text/x-cvsweb-markup On OpenBSD the xargs -P option was added on 2003-12-06 by syncing the FreeBSD code: http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.14&content-type=text/x-cvsweb-markup Looking at the imported git history log of GNU findutils (which has xargs), the very first commit already had the -0 and -P option on Sun Feb 4 20:35:16 1996 +0000. Tested on many systems, old and new, some notable: - OpenBSD 7.4 - Void Linux - FreeBSD 12 - NetBSD 9.3 - HaikuOS (uses GNU tools). - Slackware 11 - OpenBSD 3.8 - NetBSD 5.1 Some shells: - oksh - bash - dash - zsh During testing there are some incompatibilities found in parsing the fields so the arguments are passed as one argument which is split later on by the child program. --- sfeed_update | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 0628e2e..309b348 100755 --- a/sfeed_update +++ b/sfeed_update @@ -163,14 +163,12 @@ _feed() { # fetch and process a feed in parallel. # feed(name, feedurl, [basesiteurl], [encoding]) feed() { - # wait until ${maxjobs} are finished: will stall the queue if an item - # is slow, but it is portable. - [ ${signo} -ne 0 ] && return - [ $((curjobs % maxjobs)) -eq 0 ] && wait - [ ${signo} -ne 0 ] && return - curjobs=$((curjobs + 1)) - - _feed "$@" & + # Job parameters for xargs. + # Specify fields as a single parameter separated by the NUL separator. + # These fields are split later by the child process, this allows xargs + # with empty fields across many implementations. + printf '%s\037%s\037%s\037%s\037%s\037%s\0' \ + "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4" } cleanup() { @@ -201,8 +199,6 @@ feeds() { } main() { - # job counter. - curjobs=0 # signal number received for parent. signo=0 # SIGINT: signal to interrupt parent. @@ -217,16 +213,36 @@ main() { touch "${sfeedtmpdir}/ok" || die # make sure path exists. mkdir -p "${sfeedpath}" - # fetch feeds specified in config file. - feeds - # wait till all feeds are fetched (concurrently). - [ ${signo} -eq 0 ] && wait - # check error exit status indicator for parallel jobs. - [ -f "${sfeedtmpdir}/ok" ] + + # print feeds for parallel processing with xargs. + feeds > "${sfeedtmpdir}/jobs" || die + SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \ + "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs" statuscode=$? + + # check error exit status indicator for parallel jobs. + [ -f "${sfeedtmpdir}/ok" ] || statuscode=1 # on signal SIGINT and SIGTERM exit with signal number + 128. [ ${signo} -ne 0 ] && die $((signo+128)) die ${statuscode} } +# process a single feed. +# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding +if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then + IFS="" # "\037" + [ "$1" = "" ] && exit 0 # must have an argument set + printf '%s\n' "$1" | \ + while read -r config tmpdir name feedurl basesiteurl encoding; do + # load config file, sets $config. + loadconfig "${config}" + sfeedtmpdir="${tmpdir}" + _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}" + exit "$?" + done + exit 0 +fi + +# ...else parent mode: +argv0="$0" # remember $0, in shells like zsh $0 is the function name. [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" -- cgit v1.2.3 From 87d5c99ebfee6e2255bd057e0eb45f1631b8b987 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Tue, 26 Dec 2023 16:34:58 +0100 Subject: sfeedrc: bump default maxjobs from 8 to 16 --- sfeed_update | 2 +- sfeedrc.5 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 309b348..94b64df 100755 --- a/sfeed_update +++ b/sfeed_update @@ -7,7 +7,7 @@ sfeedpath="$HOME/.sfeed/feeds" # used for processing feeds concurrently: wait until ${maxjobs} amount of # feeds are finished at a time. -maxjobs=8 +maxjobs=16 # load config (evaluate shellscript). # loadconfig(configfile) diff --git a/sfeedrc.5 b/sfeedrc.5 index 7aabe75..7640a28 100644 --- a/sfeedrc.5 +++ b/sfeedrc.5 @@ -1,4 +1,4 @@ -.Dd July 7, 2023 +.Dd December 26, 2023 .Dt SFEEDRC 5 .Os .Sh NAME @@ -18,7 +18,7 @@ The default is can be used to change the amount of concurrent .Fn feed jobs. -The default is 8. +The default is 16. .El .Sh FUNCTIONS .Bl -tag -width Ds -- cgit v1.2.3 From 8bdb849e449c5236a1ef7e8b4b91186790f7fb29 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Wed, 27 Dec 2023 13:16:40 +0100 Subject: sfeed_update: suppress output to stderr, like merge() already does Noticed while testing TMPDIR=/noaccess sort on Illumos/OpenIndiana, which gives a warning to stderr. For sort temporary directories might be used for large output. --- sfeed_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 94b64df..85cd759 100755 --- a/sfeed_update +++ b/sfeed_update @@ -85,7 +85,7 @@ merge() { # order by timestamp (descending). # order(name, url) order() { - sort -t ' ' -k1rn,1 + sort -t ' ' -k1rn,1 2>/dev/null } # internal handler to fetch and process a feed. -- cgit v1.2.3 From 1a5fa7454c92c5497c23e179e599224f0d96a920 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Wed, 27 Dec 2023 13:18:03 +0100 Subject: sfeed_update: remove xargs -s Theres no need to specify. POSIX defines it should support at least LINE_MAX (2048 typically). OpenIndiana xargs doesn't conform to POSIX. It doesn't use the largest constraint but errors out. From POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html "Values of size up to at least {LINE_MAX} bytes shall be supported, provided that the constraints specified in the DESCRIPTION are met. It shall not be considered an error if a value larger than that supported by the implementation or exceeding the constraints specified in the DESCRIPTION is given; xargs shall use the largest value it supports within the constraints." --- sfeed_update | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 85cd759..1bdafed 100755 --- a/sfeed_update +++ b/sfeed_update @@ -216,7 +216,7 @@ main() { # print feeds for parallel processing with xargs. feeds > "${sfeedtmpdir}/jobs" || die - SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \ + SFEED_UPDATE_CHILD="1" xargs -x -0 -P "${maxjobs}" -n 1 \ "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs" statuscode=$? -- cgit v1.2.3 From 0a5e36032373b34558e62f309be0b0ef7e925459 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Wed, 27 Dec 2023 13:20:07 +0100 Subject: sfeed_update: rename local variables just in case The config is loaded for each child program. These could override these variables if the user specifies the same name. --- sfeed_update | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 1bdafed..607c048 100755 --- a/sfeed_update +++ b/sfeed_update @@ -233,11 +233,11 @@ if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then IFS="" # "\037" [ "$1" = "" ] && exit 0 # must have an argument set printf '%s\n' "$1" | \ - while read -r config tmpdir name feedurl basesiteurl encoding; do + while read -r _config _tmpdir _name _feedurl _basesiteurl _encoding; do # load config file, sets $config. - loadconfig "${config}" - sfeedtmpdir="${tmpdir}" - _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}" + loadconfig "${_config}" + sfeedtmpdir="${_tmpdir}" + _feed "${_name}" "${_feedurl}" "${_basesiteurl}" "${_encoding}" exit "$?" done exit 0 -- cgit v1.2.3 From cbf92f526528fc995e309d2f13b7dcebfd1e5c75 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 29 Dec 2023 13:50:55 +0100 Subject: sfeed_update/sfeed_opml_export: only allow regular files Be more strict and only allow regular files. It makes no sense to use device files or fifos with sfeed_update and it can cause issues, because sfeed_update expects to read the config file for each (child) invocation also. --- sfeed_opml_export | 2 +- sfeed_update | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_opml_export b/sfeed_opml_export index 9e769f7..f949488 100755 --- a/sfeed_opml_export +++ b/sfeed_opml_export @@ -15,7 +15,7 @@ loadconfig() { fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${configpath}" ] && [ ! -d "${configpath}" ]; then + if [ -r "${configpath}" ] && [ -f "${configpath}" ]; then . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 diff --git a/sfeed_update b/sfeed_update index 607c048..1f797f8 100755 --- a/sfeed_update +++ b/sfeed_update @@ -24,7 +24,7 @@ loadconfig() { fi # config is loaded here to be able to override $sfeedpath or functions. - if [ -r "${configpath}" ] && [ ! -d "${configpath}" ]; then + if [ -r "${configpath}" ] && [ -f "${configpath}" ]; then . "${configpath}" else printf "Configuration file \"%s\" cannot be read.\n" "${config}" >&2 -- cgit v1.2.3 From 04c76f86b66d2c30cadc6fef4df98cb3077ebcd9 Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 29 Dec 2023 13:56:42 +0100 Subject: sfeed_update: code-style and consistency: add some comments for functions --- sfeed_update | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 1f797f8..18096ea 100755 --- a/sfeed_update +++ b/sfeed_update @@ -171,6 +171,7 @@ feed() { "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4" } +# cleanup() cleanup() { # remove temporary directory with feed files. rm -rf "${sfeedtmpdir}" @@ -184,6 +185,7 @@ die() { exit "${statuscode}" } +# sighandler(signo) sighandler() { signo="$1" # ignore TERM signal for myself. @@ -192,12 +194,14 @@ sighandler() { kill -TERM -$$ } +# feeds() feeds() { printf "Configuration file \"%s\" is invalid or does not contain a \"feeds\" function.\n" "${config}" >&2 echo "See sfeedrc.example for an example." >&2 die } +# main(args...) main() { # signal number received for parent. signo=0 -- cgit v1.2.3 From 391a556d308fe19b22614498d8bdefab0c3016be Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 29 Dec 2023 14:21:30 +0100 Subject: sfeed_update: reword some comments --- sfeed_update | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'sfeed_update') diff --git a/sfeed_update b/sfeed_update index 18096ea..fd468a5 100755 --- a/sfeed_update +++ b/sfeed_update @@ -163,10 +163,10 @@ _feed() { # fetch and process a feed in parallel. # feed(name, feedurl, [basesiteurl], [encoding]) feed() { - # Job parameters for xargs. - # Specify fields as a single parameter separated by the NUL separator. - # These fields are split later by the child process, this allows xargs - # with empty fields across many implementations. + # Output job parameters for xargs. + # Specify fields as a single parameter separated by a NUL byte. + # The parameter is split into fields later by the child process, this + # allows using xargs with empty fields across many implementations. printf '%s\037%s\037%s\037%s\037%s\037%s\0' \ "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4" } @@ -238,7 +238,6 @@ if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then [ "$1" = "" ] && exit 0 # must have an argument set printf '%s\n' "$1" | \ while read -r _config _tmpdir _name _feedurl _basesiteurl _encoding; do - # load config file, sets $config. loadconfig "${_config}" sfeedtmpdir="${_tmpdir}" _feed "${_name}" "${_feedurl}" "${_basesiteurl}" "${_encoding}" @@ -248,5 +247,5 @@ if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then fi # ...else parent mode: -argv0="$0" # remember $0, in shells like zsh $0 is the function name. +argv0="$0" # store $0, in the zsh shell $0 is the name of the function. [ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" -- cgit v1.2.3