summaryrefslogtreecommitdiff
path: root/README.xml
blob: 2589f07a883ce2663d43d2880da90ce14b4c7e1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
XML parser
==========


Dependencies
------------

- C compiler (C99)


Features
--------

- Relatively small parser suitable for embedded systems.
- Pretty simple API comparable to libexpat.


Supports
--------

- Tags in short-form (<img src="lolcat.jpg" title="Meow" />).
- Attributes
- Short attributes without an explicity set value (<input type="checkbox" checked />).
  - Attribute entities.
- Comments
- CDATA sections.


Caveats
-------

- Internally static buffers are used, callbacks like XMLParser.xmldata can be
  called multiple times for the same tag if the data size is bigger than the
  internal buffer size (sizeof(XMLParser.data)). To differentiate between this
  you can use xml*start and xml*end.
- No table of (HTML / XML) named entities you should handle this with the
  XMLParser.xmldataentity callback yourself.
- The XML is not checked for errors so it will happily continue parsing invalid
  XML data, this is by design.


Interface / API
---------------

Should be straightforward, see xml.h


Files
-----
xml.c, xml.h


Example (get RSS/Atom links from a webpage)
-------------------------------------------

#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <ctype.h>

#include "util.h"
#include "xml.h"

static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0;
static char feedlink[4096] = "", basehref[4096] = "", feedtype[256] = "";

static void
xmltagstart(XMLParser *p, const char *tag, size_t taglen) {
	isbase = islink = isfeedlink = 0;
	if(taglen == 4) { /* optimization */
		if(!strncasecmp(tag, "base", taglen))
			isbase = 1;
		else if(!strncasecmp(tag, "link", taglen))
			islink = 1;
	}
}

static void
xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) {
	if(isfeedlink) {
		if(*feedtype) {
			fputs(feedtype, stdout);
			putchar(' ');
		}
		printlink(feedlink, basehref, stdout); /* this is in util.h (program-specific) */
		putchar('\n');
		found++;
	}
}

static void
xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
        size_t namelen, const char *value, size_t valuelen) {

	if(namelen != 4) /* optimization */
		return;
	if(isbase) {
		if(!strncasecmp(name, "href", namelen))
			strlcpy(basehref, value, sizeof(basehref));
	} else if(islink) {
		if(!strncasecmp(name, "type", namelen)) {
			if(!strncasecmp(value, "application/atom", strlen("application/atom")) ||
			   !strncasecmp(value, "application/rss", strlen("application/rss"))) {
				isfeedlink = 1;
				strlcpy(feedtype, value, sizeof(feedtype));
			}
		} else if(!strncasecmp(name, "href", namelen))
			strlcpy(feedlink, value, sizeof(feedlink));
	}
}

int
main(int argc, char **argv) {
	XMLParser x;

	/* base href */
	if(argc > 1)
		strlcpy(basehref, argv[1], sizeof(basehref));

	xmlparser_init(&x, stdin);
	x.xmltagstart = xmltagstart;
	x.xmlattr = xmlattr;
	x.xmltagstartparsed = xmltagstartparsed;

	xmlparser_parse(&x);

	return found > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
}