summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2014-06-27 15:40:21 +0200
committerHiltjo Posthuma <hiltjo@codemadness.org>2014-06-27 15:40:21 +0200
commitd0cb258e59ae59ddae3fb2a9de2420de9491849e (patch)
tree41aa4b52ef5624a88591c157d164a1b48be552a9
parente0da2d775ac34a06949c554c61d015d076890be6 (diff)
add initial README for xml parser
Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org>
-rw-r--r--README.xml130
1 files changed, 130 insertions, 0 deletions
diff --git a/README.xml b/README.xml
new file mode 100644
index 0000000..2589f07
--- /dev/null
+++ b/README.xml
@@ -0,0 +1,130 @@
+XML parser
+==========
+
+
+Dependencies
+------------
+
+- C compiler (C99)
+
+
+Features
+--------
+
+- Relatively small parser suitable for embedded systems.
+- Pretty simple API comparable to libexpat.
+
+
+Supports
+--------
+
+- Tags in short-form (<img src="lolcat.jpg" title="Meow" />).
+- Attributes
+- Short attributes without an explicity set value (<input type="checkbox" checked />).
+ - Attribute entities.
+- Comments
+- CDATA sections.
+
+
+Caveats
+-------
+
+- Internally static buffers are used, callbacks like XMLParser.xmldata can be
+ called multiple times for the same tag if the data size is bigger than the
+ internal buffer size (sizeof(XMLParser.data)). To differentiate between this
+ you can use xml*start and xml*end.
+- No table of (HTML / XML) named entities you should handle this with the
+ XMLParser.xmldataentity callback yourself.
+- The XML is not checked for errors so it will happily continue parsing invalid
+ XML data, this is by design.
+
+
+Interface / API
+---------------
+
+Should be straightforward, see xml.h
+
+
+Files
+-----
+xml.c, xml.h
+
+
+Example (get RSS/Atom links from a webpage)
+-------------------------------------------
+
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "util.h"
+#include "xml.h"
+
+static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0;
+static char feedlink[4096] = "", basehref[4096] = "", feedtype[256] = "";
+
+static void
+xmltagstart(XMLParser *p, const char *tag, size_t taglen) {
+ isbase = islink = isfeedlink = 0;
+ if(taglen == 4) { /* optimization */
+ if(!strncasecmp(tag, "base", taglen))
+ isbase = 1;
+ else if(!strncasecmp(tag, "link", taglen))
+ islink = 1;
+ }
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) {
+ if(isfeedlink) {
+ if(*feedtype) {
+ fputs(feedtype, stdout);
+ putchar(' ');
+ }
+ printlink(feedlink, basehref, stdout); /* this is in util.h (program-specific) */
+ putchar('\n');
+ found++;
+ }
+}
+
+static void
+xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
+ size_t namelen, const char *value, size_t valuelen) {
+
+ if(namelen != 4) /* optimization */
+ return;
+ if(isbase) {
+ if(!strncasecmp(name, "href", namelen))
+ strlcpy(basehref, value, sizeof(basehref));
+ } else if(islink) {
+ if(!strncasecmp(name, "type", namelen)) {
+ if(!strncasecmp(value, "application/atom", strlen("application/atom")) ||
+ !strncasecmp(value, "application/rss", strlen("application/rss"))) {
+ isfeedlink = 1;
+ strlcpy(feedtype, value, sizeof(feedtype));
+ }
+ } else if(!strncasecmp(name, "href", namelen))
+ strlcpy(feedlink, value, sizeof(feedlink));
+ }
+}
+
+int
+main(int argc, char **argv) {
+ XMLParser x;
+
+ /* base href */
+ if(argc > 1)
+ strlcpy(basehref, argv[1], sizeof(basehref));
+
+ xmlparser_init(&x, stdin);
+ x.xmltagstart = xmltagstart;
+ x.xmlattr = xmlattr;
+ x.xmltagstartparsed = xmltagstartparsed;
+
+ xmlparser_parse(&x);
+
+ return found > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+