summaryrefslogtreecommitdiff
path: root/sfeed_web.c
blob: e25e91c9589dd6a7eb9895ea22c2e654afc09741 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#include <stdio.h>
#include <strings.h>

#include "util.h"
#include "xml.h"

/* string and size */
#define STRP(s) s,sizeof(s)-1

static XMLParser parser;
static int isbasetag, islinktag, ishrefattr, istypeattr;
static char linkhref[4096], linktype[256], basehref[4096];

static void
printvalue(const char *s)
{
	for (; *s; s++)
		if (!ISCNTRL((unsigned char)*s))
			putchar(*s);
}

static void
xmltagstart(XMLParser *p, const char *t, size_t tl)
{
	isbasetag = islinktag = 0;

	if (!strcasecmp(t, "base")) {
		isbasetag = 1;
	} else if (!strcasecmp(t, "link")) {
		islinktag = 1;
		linkhref[0] = '\0';
		linktype[0] = '\0';
	}
}

static void
xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
{
	struct uri baseuri, linkuri, u;
	char buf[4096];
	int r = -1;

	if (!islinktag)
		return;

	if (strncasecmp(linktype, STRP("application/atom")) &&
	    strncasecmp(linktype, STRP("application/xml")) &&
	    strncasecmp(linktype, STRP("application/rss")))
		return;

	/* parse base URI each time: it can change. */
	if (basehref[0] &&
	    uri_parse(linkhref, &linkuri) != -1 && !linkuri.proto[0] &&
	    uri_parse(basehref, &baseuri) != -1 &&
	    uri_makeabs(&u, &linkuri, &baseuri) != -1 && u.proto[0])
		r = uri_format(buf, sizeof(buf), &u);

	if (r >= 0 && (size_t)r < sizeof(buf))
		printvalue(buf);
	else
		printvalue(linkhref);

	putchar('\t');
	printvalue(linktype);
	putchar('\n');
}

static void
xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
{
	ishrefattr = istypeattr = 0;

	if (!isbasetag && !islinktag)
		return;

	if (!strcasecmp(a, "href")) {
		ishrefattr = 1;
		if (isbasetag)
			basehref[0] = '\0';
		else if (islinktag)
			linkhref[0] = '\0';
	} else if (!strcasecmp(a, "type") && islinktag) {
		istypeattr = 1;
		linktype[0] = '\0';
	}
}

static void
xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
	const char *v, size_t vl)
{
	if (isbasetag && ishrefattr) {
		strlcat(basehref, v, sizeof(basehref));
	} else if (islinktag) {
		if (ishrefattr)
			strlcat(linkhref, v, sizeof(linkhref));
		else if (istypeattr)
			strlcat(linktype, v, sizeof(linktype));
	}
}

static void
xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
              const char *v, size_t vl)
{
	char buf[8];
	int len;

	if (!ishrefattr && !istypeattr)
		return;

	/* try to translate entity, else just pass as data to
	 * xmlattr handler. */
	if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
		xmlattr(p, t, tl, a, al, buf, (size_t)len);
	else
		xmlattr(p, t, tl, a, al, v, vl);
}

int
main(int argc, char *argv[])
{
	if (pledge("stdio", NULL) == -1)
		err(1, "pledge");

	if (argc > 1)
		strlcpy(basehref, argv[1], sizeof(basehref));

	parser.xmlattr = xmlattr;
	parser.xmlattrentity = xmlattrentity;
	parser.xmlattrstart = xmlattrstart;
	parser.xmltagstart = xmltagstart;
	parser.xmltagstartparsed = xmltagstartparsed;

	/* NOTE: GETNEXT is defined in xml.h for inline optimization */
	xml_parse(&parser);

	checkfileerror(stdin, "<stdin>", 'r');
	checkfileerror(stdout, "<stdout>", 'w');

	return 0;
}