summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2019-04-20 16:01:02 +0200
committerHiltjo Posthuma <hiltjo@codemadness.org>2019-04-20 16:01:02 +0200
commiteb9e9a957a75069edecbfcc1872930d07e146a1a (patch)
tree8b49afe7815db0ff5bde9ded948e5d5cec5fe4cb
parent4aeb397ef388962380cb5ce5b3de48bd22dbfb40 (diff)
README: update filter example
- Show how to filter protocol schemes more strictly. For example to allow only http://, https:// and gopher:// (not file://, javascript:, etc). - Filter links and now also enclosures.
-rw-r--r--README26
1 files changed, 18 insertions, 8 deletions
diff --git a/README b/README
index 2e02296..d5d0679 100644
--- a/README
+++ b/README
@@ -260,20 +260,30 @@ filter() {
sed 's@www.youtube.com/watch?v=@www.youtube.com/embed/@g' | \
LC_LOCALE=C awk -F ' ' 'BEGIN { OFS = " "; }
- {
+ function filterlink(s) {
+ # protocol must start with http, https or gopher.
+ if (match(s, /^(http|https|gopher):\/\//) == 0) {
+ return "";
+ }
+
# shorten feedburner links.
- if (match($3, /^(http|https):\/\/[^/]+\/~r\/.*\/~3\/[^\/]+\//)) {
- $3 = substr($3, RSTART, RLENGTH);
+ if (match(s, /^(http|https):\/\/[^/]+\/~r\/.*\/~3\/[^\/]+\//)) {
+ s = substr($3, RSTART, RLENGTH);
}
# strip tracking parameters
-
# urchin, facebook, piwik, webtrekk and generic.
- gsub(/\?(ad|campaign|pk|tm|wt)_([^&]+)/, "?", $3);
- gsub(/&(ad|campaign|pk|tm|wt)_([^&]+)/, "", $3);
+ gsub(/\?(ad|campaign|pk|tm|wt)_([^&]+)/, "?", s);
+ gsub(/&(ad|campaign|pk|tm|wt)_([^&]+)/, "", s);
+
+ gsub(/\?&/, "?", s);
+ gsub(/[\?&]+$/, "", s);
- gsub(/\?&/, "?", $3);
- gsub(/[\?&]+$/, "", $3);
+ return s
+ }
+ {
+ $3 = filterlink($3); # link
+ $8 = filterlink($8); # enclosure
print $0;
}'