summaryrefslogtreecommitdiff
path: root/sfeed_update
blob: 5bc6388a23e52794f0fb2a1c1a94a5968ab54309 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/bin/sh
# update feeds, merge with old feeds.
# NOTE: assumes "sfeed_*" executables are in $PATH.

# defaults
sfeedpath="$HOME/.sfeed/feeds"

# load config (evaluate shellscript).
# loadconfig(configfile)
loadconfig() {
	# allow to specify config via argv[1].
	if [ ! x"$1" = x"" ]; then
		# get absolute path of config file.
		config=$(readlink -f "$1")
	else
		# default config location.
		config="$HOME/.sfeed/sfeedrc"
	fi

	# load config: config is loaded here to be able to override $sfeedpath
	# or functions.
	if [ -r "${config}" ]; then
		. "${config}"
	else
		echo "Configuration file \"${config}\" does not exist or is not readable." >&2
		echo "See sfeedrc.example for an example." >&2
		exit 1
	fi
}

# merge raw files.
# merge(oldfile, newfile)
merge() {
	# unique sort by id, link, title.
	# order by timestamp (desc).
	(sort -t '	' -u -k6,6 -k3,3 -k2,2 "$1" "$2" 2>/dev/null) |
	sort -t '	' -k1rn,1
}

# fetch a feed via HTTP/HTTPS etc.
# fetchfeed(url, name, feedfile)
fetchfeed() {
	if curl -H 'User-Agent:' -f -s -S --max-time 15 -z "$3" "$1"; then
		printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
	else
		printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
	fi
}

# convert encoding from one encoding to another.
# convertencoding(from, to)
convertencoding() {
	# if from != to
	if [ ! "$1" = "" ] && [ ! "$2" = "" ] && [ ! "$1" = "$2" ]; then
		iconv -cs -f "$1" -t "$2" 2> /dev/null
	else
		# else no convert, just output
		cat
	fi
}

# fetch and parse feed.
# feed(name, feedurl, [basesiteurl], [encoding])
feed() {
	(name="$1"
	tmpfeedfile="${sfeedtmpdir}/${name}"
	tmpencfile=""
	encoding="$4"
	sfeedfile="${sfeedpath}/$1"
	if [ ! "${encoding}" = "" ]; then
		fetchfeed "$2" "$1" "${sfeedfile}" | convertencoding "${encoding}" "utf-8"
	else # detect encoding.
		tmpencfile="${tmpfeedfile}.enc"
		fetchfeed "$2" "$1" "${sfeedfile}" > "${tmpencfile}"
		detectenc=$(sfeed_xmlenc < "${tmpencfile}")
		convertencoding "${detectenc}" "utf-8" < "${tmpencfile}"
	fi | sfeed "$3" > "${tmpfeedfile}"

	# get new data and merge with old.
	sfeedfilenew="${sfeedpath}/${name}.new"
	# new feed data is non-empty.
	if [ -s "${tmpfeedfile}" ]; then
		# if file exists, merge
		if [ -e "${sfeedfile}" ]; then
			merge "${sfeedfile}" "${tmpfeedfile}" > "${sfeedfilenew}"
			# overwrite old file with updated file
			mv "${sfeedfilenew}" "${sfeedfile}"
		else
			# else just copy
			mv "${tmpfeedfile}" "${sfeedfile}"
		fi
	fi) &
}

terminated() {
	isrunning="0"
}

cleanup() {
	# remove temporary files
	rm -rf "${sfeedtmpdir}"
}

feeds() {
	echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2
	echo "See sfeedrc.example for an example." >&2
}

# load config file.
loadconfig "$1"
# fetch feeds and store in temporary file.
sfeedtmpdir="$(mktemp -d '/tmp/sfeed_XXXXXX')"
# kill whole current process group on ^C.
isrunning="1"
# SIGTERM: signal to terminate parent.
trap -- "terminated" "15"
# SIGINT: kill all running childs >:D
trap -- "kill -TERM -$$" "2"
# make sure path exists.
mkdir -p "${sfeedpath}"
# fetch feeds specified in config file.
feeds
# wait till all feeds are fetched (concurrently).
wait
# cleanup temporary files etc.
cleanup
# if terminated.
[ "${isrunning}" = "0" ] && exit 1