From d0cb258e59ae59ddae3fb2a9de2420de9491849e Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Fri, 27 Jun 2014 15:40:21 +0200 Subject: add initial README for xml parser Signed-off-by: Hiltjo Posthuma --- README.xml | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 README.xml diff --git a/README.xml b/README.xml new file mode 100644 index 0000000..2589f07 --- /dev/null +++ b/README.xml @@ -0,0 +1,130 @@ +XML parser +========== + + +Dependencies +------------ + +- C compiler (C99) + + +Features +-------- + +- Relatively small parser suitable for embedded systems. +- Pretty simple API comparable to libexpat. + + +Supports +-------- + +- Tags in short-form (). +- Attributes +- Short attributes without an explicity set value (). + - Attribute entities. +- Comments +- CDATA sections. + + +Caveats +------- + +- Internally static buffers are used, callbacks like XMLParser.xmldata can be + called multiple times for the same tag if the data size is bigger than the + internal buffer size (sizeof(XMLParser.data)). To differentiate between this + you can use xml*start and xml*end. +- No table of (HTML / XML) named entities you should handle this with the + XMLParser.xmldataentity callback yourself. +- The XML is not checked for errors so it will happily continue parsing invalid + XML data, this is by design. + + +Interface / API +--------------- + +Should be straightforward, see xml.h + + +Files +----- +xml.c, xml.h + + +Example (get RSS/Atom links from a webpage) +------------------------------------------- + +#include +#include +#include +#include +#include + +#include "util.h" +#include "xml.h" + +static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0; +static char feedlink[4096] = "", basehref[4096] = "", feedtype[256] = ""; + +static void +xmltagstart(XMLParser *p, const char *tag, size_t taglen) { + isbase = islink = isfeedlink = 0; + if(taglen == 4) { /* optimization */ + if(!strncasecmp(tag, "base", taglen)) + isbase = 1; + else if(!strncasecmp(tag, "link", taglen)) + islink = 1; + } +} + +static void +xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { + if(isfeedlink) { + if(*feedtype) { + fputs(feedtype, stdout); + putchar(' '); + } + printlink(feedlink, basehref, stdout); /* this is in util.h (program-specific) */ + putchar('\n'); + found++; + } +} + +static void +xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, + size_t namelen, const char *value, size_t valuelen) { + + if(namelen != 4) /* optimization */ + return; + if(isbase) { + if(!strncasecmp(name, "href", namelen)) + strlcpy(basehref, value, sizeof(basehref)); + } else if(islink) { + if(!strncasecmp(name, "type", namelen)) { + if(!strncasecmp(value, "application/atom", strlen("application/atom")) || + !strncasecmp(value, "application/rss", strlen("application/rss"))) { + isfeedlink = 1; + strlcpy(feedtype, value, sizeof(feedtype)); + } + } else if(!strncasecmp(name, "href", namelen)) + strlcpy(feedlink, value, sizeof(feedlink)); + } +} + +int +main(int argc, char **argv) { + XMLParser x; + + /* base href */ + if(argc > 1) + strlcpy(basehref, argv[1], sizeof(basehref)); + + xmlparser_init(&x, stdin); + x.xmltagstart = xmltagstart; + x.xmlattr = xmlattr; + x.xmltagstartparsed = xmltagstartparsed; + + xmlparser_parse(&x); + + return found > 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} + -- cgit v1.2.3