summaryrefslogtreecommitdiff
path: root/sfeed_update
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2023-12-26 15:59:39 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2023-12-26 19:06:47 +0100
commitcdb8f7feb135adf6f18e389b4bbf47886089474a (patch)
tree3b7c2e23c42ce90460cd4472c5d35191eccf42e9 /sfeed_update
parent62bfed65ca91c34ea24b81b191c23d4542a7075b (diff)
sfeed_update: use xargs -P -0
Some of the options, like -P are as of writing (2023) non-POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html. However many systems support this useful extension for many years now. Some historic context: The xargs -0 option was added on 1996-06-11, about a year after the NetBSD import (over 27 years ago at the time of writing): http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.2&content-type=text/x-cvsweb-markup On OpenBSD the xargs -P option was added on 2003-12-06 by syncing the FreeBSD code: http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.14&content-type=text/x-cvsweb-markup Looking at the imported git history log of GNU findutils (which has xargs), the very first commit already had the -0 and -P option on Sun Feb 4 20:35:16 1996 +0000. Tested on many systems, old and new, some notable: - OpenBSD 7.4 - Void Linux - FreeBSD 12 - NetBSD 9.3 - HaikuOS (uses GNU tools). - Slackware 11 - OpenBSD 3.8 - NetBSD 5.1 Some shells: - oksh - bash - dash - zsh During testing there are some incompatibilities found in parsing the fields so the arguments are passed as one argument which is split later on by the child program.
Diffstat (limited to 'sfeed_update')
-rwxr-xr-xsfeed_update48
1 files changed, 32 insertions, 16 deletions
diff --git a/sfeed_update b/sfeed_update
index 0628e2e..309b348 100755
--- a/sfeed_update
+++ b/sfeed_update
@@ -163,14 +163,12 @@ _feed() {
# fetch and process a feed in parallel.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
- # wait until ${maxjobs} are finished: will stall the queue if an item
- # is slow, but it is portable.
- [ ${signo} -ne 0 ] && return
- [ $((curjobs % maxjobs)) -eq 0 ] && wait
- [ ${signo} -ne 0 ] && return
- curjobs=$((curjobs + 1))
-
- _feed "$@" &
+ # Job parameters for xargs.
+ # Specify fields as a single parameter separated by the NUL separator.
+ # These fields are split later by the child process, this allows xargs
+ # with empty fields across many implementations.
+ printf '%s\037%s\037%s\037%s\037%s\037%s\0' \
+ "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4"
}
cleanup() {
@@ -201,8 +199,6 @@ feeds() {
}
main() {
- # job counter.
- curjobs=0
# signal number received for parent.
signo=0
# SIGINT: signal to interrupt parent.
@@ -217,16 +213,36 @@ main() {
touch "${sfeedtmpdir}/ok" || die
# make sure path exists.
mkdir -p "${sfeedpath}"
- # fetch feeds specified in config file.
- feeds
- # wait till all feeds are fetched (concurrently).
- [ ${signo} -eq 0 ] && wait
- # check error exit status indicator for parallel jobs.
- [ -f "${sfeedtmpdir}/ok" ]
+
+ # print feeds for parallel processing with xargs.
+ feeds > "${sfeedtmpdir}/jobs" || die
+ SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \
+ "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs"
statuscode=$?
+
+ # check error exit status indicator for parallel jobs.
+ [ -f "${sfeedtmpdir}/ok" ] || statuscode=1
# on signal SIGINT and SIGTERM exit with signal number + 128.
[ ${signo} -ne 0 ] && die $((signo+128))
die ${statuscode}
}
+# process a single feed.
+# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding
+if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then
+ IFS="" # "\037"
+ [ "$1" = "" ] && exit 0 # must have an argument set
+ printf '%s\n' "$1" | \
+ while read -r config tmpdir name feedurl basesiteurl encoding; do
+ # load config file, sets $config.
+ loadconfig "${config}"
+ sfeedtmpdir="${tmpdir}"
+ _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}"
+ exit "$?"
+ done
+ exit 0
+fi
+
+# ...else parent mode:
+argv0="$0" # remember $0, in shells like zsh $0 is the function name.
[ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@"