summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2023-12-26 15:59:39 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2023-12-26 19:06:47 +0100
commitcdb8f7feb135adf6f18e389b4bbf47886089474a (patch)
tree3b7c2e23c42ce90460cd4472c5d35191eccf42e9
parent62bfed65ca91c34ea24b81b191c23d4542a7075b (diff)
sfeed_update: use xargs -P -0
Some of the options, like -P are as of writing (2023) non-POSIX: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html. However many systems support this useful extension for many years now. Some historic context: The xargs -0 option was added on 1996-06-11, about a year after the NetBSD import (over 27 years ago at the time of writing): http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.2&content-type=text/x-cvsweb-markup On OpenBSD the xargs -P option was added on 2003-12-06 by syncing the FreeBSD code: http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.14&content-type=text/x-cvsweb-markup Looking at the imported git history log of GNU findutils (which has xargs), the very first commit already had the -0 and -P option on Sun Feb 4 20:35:16 1996 +0000. Tested on many systems, old and new, some notable: - OpenBSD 7.4 - Void Linux - FreeBSD 12 - NetBSD 9.3 - HaikuOS (uses GNU tools). - Slackware 11 - OpenBSD 3.8 - NetBSD 5.1 Some shells: - oksh - bash - dash - zsh During testing there are some incompatibilities found in parsing the fields so the arguments are passed as one argument which is split later on by the child program.
-rwxr-xr-xsfeed_update48
1 files changed, 32 insertions, 16 deletions
diff --git a/sfeed_update b/sfeed_update
index 0628e2e..309b348 100755
--- a/sfeed_update
+++ b/sfeed_update
@@ -163,14 +163,12 @@ _feed() {
# fetch and process a feed in parallel.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
- # wait until ${maxjobs} are finished: will stall the queue if an item
- # is slow, but it is portable.
- [ ${signo} -ne 0 ] && return
- [ $((curjobs % maxjobs)) -eq 0 ] && wait
- [ ${signo} -ne 0 ] && return
- curjobs=$((curjobs + 1))
-
- _feed "$@" &
+ # Job parameters for xargs.
+ # Specify fields as a single parameter separated by the NUL separator.
+ # These fields are split later by the child process, this allows xargs
+ # with empty fields across many implementations.
+ printf '%s\037%s\037%s\037%s\037%s\037%s\0' \
+ "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4"
}
cleanup() {
@@ -201,8 +199,6 @@ feeds() {
}
main() {
- # job counter.
- curjobs=0
# signal number received for parent.
signo=0
# SIGINT: signal to interrupt parent.
@@ -217,16 +213,36 @@ main() {
touch "${sfeedtmpdir}/ok" || die
# make sure path exists.
mkdir -p "${sfeedpath}"
- # fetch feeds specified in config file.
- feeds
- # wait till all feeds are fetched (concurrently).
- [ ${signo} -eq 0 ] && wait
- # check error exit status indicator for parallel jobs.
- [ -f "${sfeedtmpdir}/ok" ]
+
+ # print feeds for parallel processing with xargs.
+ feeds > "${sfeedtmpdir}/jobs" || die
+ SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \
+ "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs"
statuscode=$?
+
+ # check error exit status indicator for parallel jobs.
+ [ -f "${sfeedtmpdir}/ok" ] || statuscode=1
# on signal SIGINT and SIGTERM exit with signal number + 128.
[ ${signo} -ne 0 ] && die $((signo+128))
die ${statuscode}
}
+# process a single feed.
+# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding
+if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then
+ IFS="" # "\037"
+ [ "$1" = "" ] && exit 0 # must have an argument set
+ printf '%s\n' "$1" | \
+ while read -r config tmpdir name feedurl basesiteurl encoding; do
+ # load config file, sets $config.
+ loadconfig "${config}"
+ sfeedtmpdir="${tmpdir}"
+ _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}"
+ exit "$?"
+ done
+ exit 0
+fi
+
+# ...else parent mode:
+argv0="$0" # remember $0, in shells like zsh $0 is the function name.
[ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@"