summaryrefslogtreecommitdiff
path: root/sfeed_update
blob: de47ed9ff24ffb42dced8260b7253e8515a61638 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/sh
# update feeds, merge with old feeds.
# NOTE: assumes "sfeed_*" files are in $PATH.

# defaults
sfeedpath="$HOME/.sfeed"
sfeedfile="$sfeedpath/feeds"
# temporary file for new feeds (for merging).
sfeedfilenew="$sfeedfile.new"

# load config (evaluate shellscript).
# loadconfig(configfile)
loadconfig() {
	# allow to specify config via argv[1].
	if [ ! "$1" = "" ]; then
		# get absolute path of config file.
		config=$(readlink -f "$1")
	else
		# default config location.
		config="$HOME/.sfeed/sfeedrc"
	fi

	# load config: config is loaded here to be able to override above variables
	# (sfeedpath, sfeedfile, etc).
	if [ -r "$config" ]; then
		. "$config"
	else
		echo "Configuration file \"$config\" does not exist or is not readable." >&2
		echo "See sfeedrc.example for an example." >&2
		exit 1
	fi
}

# merge raw files.
# merge(oldfile, newfile)
merge() {
	# unique sort by id, link, title.
	# order by feedname (asc), feedurl (asc) and timestamp (desc).
	(cat "$1" "$2" 2> /dev/null) |
		sort -t '	' -u -k7,7 -k4,4 -k3,3 |
		sort -t '	' -k10,10 -k11,11 -k1r,1
}

# fetch a feed via HTTP/HTTPS etc.
# fetchfeed(url, name)
fetchfeed() {
	if (curl -f -s -S -L --max-time 30 -z "$lastupdated" "$1"); then
		printf "%s\n" "[`date`] Fetching $2 [$1] ... done" >&2
	else
		printf "%s\n" "[`date`] Fetching $2 [$1] ... fail" >&2
	fi
}

# add field after line, output to stdout.
# addfield(field)
addfield() {
	# NOTE: IFS is set and restored to prevent stripping whitespace.
	OLDIFS="$IFS"
	IFS="
"
	while read -r line; do
		printf "%s	%s\n" "${line}" "$1"
	done
	IFS="$OLDIFS"
}

# fetch and parse feed.
# feed(name, feedurl, basesiteurl, [encoding])
feed() {
	tmpfile=$(mktemp -p "$TMPDIR")
	(if [ "$4" = "" ]; then
		# don't use iconv if encoding not set in config.
		fetchfeed "$2" "$1" | sfeed | addfield "$1	$2	$3"
	else
		# use iconv to convert encoding to UTF-8.
		fetchfeed "$2" "$1" | iconv -cs -f "$4" -t "utf-8" | sfeed | addfield "$1	$2	$3"
	fi) > "$tmpfile"
}

terminated() {
	isrunning="0"
}

cleanup() {
	# remove temporary files
	rm -rf "$tmpfile" "$TMPDIR"
}

# load config file.
loadconfig "$1"
# fetch feeds and store in temporary file.
TMPDIR=$(mktemp -d -t "sfeed_XXXXXX")
# get date of last modified feedfile in format:
# YYYYmmdd HH:MM:SS [+-][0-9]*
lastupdated=$(stat -c "%y" "$sfeedfile" 2> /dev/null | cut -c 1-4,6-7,9-10,11-19,30-)
# Kill whole current process group on ^C.
isrunning="1"
trap -- "terminated" "15" # SIGTERM: signal to terminate parent.
trap -- "kill -TERM -$$" "2" # SIGINT: kill all running childs >:D
# fetch feeds specified in config file.
feeds
# make sure path exists.
mkdir -p "$sfeedpath"
# wait till all feeds are fetched (allows running in parallel).
wait
[ "$isrunning" = "0" ] && cleanup && exit 1 # if terminated cleanup.
# concat all individual feed files to a single file.
# NOTE: mktemp uses $TMPDIR for temporary directory.
tmpfile=$(mktemp -t "sfeed_XXXXXX")
find "$TMPDIR" -type f -exec cat {} \; > "$tmpfile"
# get new data and merge with old.
merge "$sfeedfile" "$tmpfile" > "$sfeedfilenew"
# overwrite old file with updated file
mv "$sfeedfilenew" "$sfeedfile"
# cleanup temporary files etc.
cleanup