From 774dc3ed45bc2a1efcddeea2eb885e140949f9eb Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 5 Oct 2018 20:22:58 +0200 Subject: README: improve filter example, compile flags order --- README | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/README b/README index 5c7dab1..cb6d9bc 100644 --- a/README +++ b/README @@ -223,16 +223,14 @@ argument is optional): filter() { case "$1" in "tweakers") - LC_LOCALE=C awk -F ' ' 'BEGIN { - OFS = " "; - } + LC_LOCALE=C awk -F ' ' 'BEGIN { OFS = " "; } # skip ads. $2 ~ /^ADV:/ { next; } # shorten link. { - if (match($3, /^https:\/\/tweakers\.net\/(nieuws|downloads|reviews|geek)\/[0-9]+\//)) { + if (match($3, /^https:\/\/tweakers\.net\/[a-z]+\/[0-9]+\//)) { $3 = substr($3, RSTART, RLENGTH); } print $0; @@ -245,15 +243,23 @@ filter() { esac | \ # replace youtube links with embed links. sed 's@www.youtube.com/watch?v=@www.youtube.com/embed/@g' | \ - # try to strip utm_ tracking parameters. - LC_LOCALE=C awk -F ' ' 'BEGIN { - OFS = " "; - } + + LC_LOCALE=C awk -F ' ' 'BEGIN { OFS = " "; } { - gsub(/\?utm_([^&]+)/, "?", $3); - gsub(/&utm_([^&]+)/, "", $3); + # shorten feedburner links. + if (match($3, /^(http|https):\/\/[^/]+\/~r\/.*\/~3\/[^\/]+\//)) { + $3 = substr($3, RSTART, RLENGTH); + } + + # strip tracking parameters + + # urchin, facebook, piwik, webtrekk and generic. + gsub(/\?(ad|campaign|pk|tm|wt)_([^&]+)/, "?", $3); + gsub(/&(ad|campaign|pk|tm|wt)_([^&]+)/, "", $3); + gsub(/\?&/, "?", $3); gsub(/[\?&]+$/, "", $3); + print $0; }' } @@ -314,7 +320,7 @@ File sfeed_archive.c: Now compile and run: - $ cc util.c sfeed_archive.c -o sfeed_archive -std=c99 + $ cc -std=c99 -o sfeed_archive util.c sfeed_archive.c $ ./sfeed_archive 20150101 < feeds > feeds.new $ mv feeds feeds.bak $ mv feeds.new feeds -- cgit v1.2.3