#!/bin/sh
# update feeds, merge with old feeds.
# NOTE: assumes "sfeed_*" executables are in $PATH.

# defaults
sfeedpath="$HOME/.sfeed/feeds"

# used for processing feeds concurrently: wait until ${maxjobs} amount of
# feeds are finished at a time.
maxjobs=8

# load config (evaluate shellscript).
# loadconfig(configfile)
loadconfig() {
	# allow to specify config via argv[1].
	if [ "$1" != "" ]; then
		# get absolute path of config file.
		config=$(readlink -f "$1")
	else
		# default config location.
		config="$HOME/.sfeed/sfeedrc"
	fi

	# config is loaded here to be able to override $sfeedpath or functions.
	if [ -r "${config}" ]; then
		. "${config}"
	else
		echo "Configuration file \"${config}\" does not exist or is not readable." >&2
		echo "See sfeedrc.example for an example." >&2
		exit 1
	fi
}

# convert encoding from one encoding to another.
# convertencoding(from, to)
convertencoding() {
	# if from != to
	if [ "$1" != "" ] && [ "$2" != "" ] && [ "$1" != "$2" ]; then
		iconv -cs -f "$1" -t "$2" 2> /dev/null
	else
		# else no convert, just output
		cat
	fi
}

# merge raw files: unique sort by id, title, link.
# merge(name, oldfile, newfile)
merge() {
	sort -t '	' -u -k6,6 -k2,2 -k3,3 "$2" "$3" 2>/dev/null
}

# filter fields.
# filter(name)
filter() {
	cat
}

# order by timestamp (descending).
# order(name)
order() {
	sort -t '	' -k1rn,1
}

# fetch a feed via HTTP/HTTPS etc.
# fetchfeed(name, url, feedfile)
fetchfeed() {
	if curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
		-z "$3" "$2" 2>/dev/null; then
		printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
	else
		printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
	fi
}

# fetch and parse feed.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
	# wait until ${maxjobs} are finished: throughput using this logic is
	# non-optimal, but it is simple and portable.
	[ ${signo} -ne 0 ] && return
	[ $((curjobs % maxjobs)) -eq 0 ] && wait
	[ ${signo} -ne 0 ] && return
	curjobs=$((curjobs + 1))

	(name="$1"
	filename="$(printf '%s' "$1" | tr '/' '_')"
	feedurl="$2"
	basesiteurl="$3"
	tmpfeedfile="${sfeedtmpdir}/${filename}"
	tmpencfile=""
	encoding="$4"
	sfeedfile="${sfeedpath}/${filename}"

	if [ "${encoding}" != "" ]; then
		fetchfeed "${name}" "${feedurl}" "${sfeedfile}" | \
			convertencoding "${encoding}" "utf-8"
	else # detect encoding.
		tmpencfile="${tmpfeedfile}.enc"
		fetchfeed "${name}" "${feedurl}" "${sfeedfile}" > "${tmpencfile}"
		detectenc=$(sfeed_xmlenc < "${tmpencfile}")
		convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
	fi | sfeed "${basesiteurl}" | filter "${name}" > "${tmpfeedfile}"

	# get new data and merge with old.
	sfeedfilenew="${sfeedpath}/${filename}.new"
	# new feed data is non-empty.
	if [ -s "${tmpfeedfile}" ]; then
		# if file exists, merge
		if [ -e "${sfeedfile}" ]; then
			merge "${name}" "${sfeedfile}" "${tmpfeedfile}" | \
				order "${name}" > "${sfeedfilenew}"

			# overwrite old file with updated file
			mv "${sfeedfilenew}" "${sfeedfile}"
		else
			merge "${name}" "/dev/null" "${tmpfeedfile}" | \
				order "${name}" > "${sfeedfile}"
		fi
	fi) &
}

cleanup() {
	# remove temporary directory with feed files.
	rm -rf "${sfeedtmpdir}"
}

sighandler() {
	signo="$1"
	# ignore TERM signal for myself.
	trap -- "" TERM
	# kill all running childs >:D
	kill -TERM -$$
}

feeds() {
	echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2
	echo "See sfeedrc.example for an example." >&2
}

# job counter.
curjobs=0
# signal number received for parent.
signo=0
# SIGINT: signal to interrupt parent.
trap -- "sighandler 2" "INT"
# SIGTERM: signal to terminate parent.
trap -- "sighandler 15" "TERM"
# load config file.
loadconfig "$1"
# fetch feeds and store in temporary directory.
sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')"
# make sure path exists.
mkdir -p "${sfeedpath}"
# fetch feeds specified in config file.
feeds
# wait till all feeds are fetched (concurrently).
[ ${signo} -eq 0 ] && wait
# cleanup temporary files etc.
cleanup
# on signal SIGINT and SIGTERM exit with signal number + 128.
[ ${signo} -ne 0 ] && exit $((signo+128))
exit 0