From 82d390808d7121df472d9692071a49236e3efd2f Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Mon, 5 Jan 2015 20:35:04 +0100 Subject: README.xml: reword some things, reference files (to always have working examples) --- README.xml | 123 +++++++++++++------------------------------------------------ 1 file changed, 26 insertions(+), 97 deletions(-) diff --git a/README.xml b/README.xml index d8eeec4..7f2d191 100644 --- a/README.xml +++ b/README.xml @@ -13,15 +13,16 @@ Features - Relatively small parser. - Pretty simple API comparable with libexpat. +- Pretty fast. +- Portable Supports -------- - Tags in short-form (). -- Attributes +- Tag attributes. - Short attributes without an explicity set value (). - - Attribute entities. - Comments - CDATA sections. @@ -31,106 +32,34 @@ Caveats - Internally static buffers are used, callbacks like XMLParser.xmldata are called multiple times for the same tag if the data size is bigger than the - internal buffer size (sizeof(XMLParser.data)). To differentiate between this - you can use xml*start and xml*end. -- If xmlattrentity or xmldataentity is NULL it will pass the data to xmlattr - and xmldata. -- No table of (HTML / XML) named entities you should handle this with the - XMLParser.xmldataentity callback yourself. + internal buffer size (sizeof(XMLParser.data)). To differentiate between new + calls for data you can use the xml*start and xml*end handlers. +- If the xmlattrentity or xmldataentity handler is NULL it will pass the data + to the xmlattr and xmldata handler. +- There is no table of (HTML / XML) named entities you should handle this with + the XMLParser.xmldataentity handler yourself. - The XML is not checked for errors so it will happily continue parsing invalid XML data, this is by design. +Files used +---------- +xml.c and xml.h + + Interface / API --------------- -Should be straightforward, see xml.{c,h} and the example below. - - -Files ------ -xml.c, xml.h - - -Example (get RSS/Atom links from a webpage) -------------------------------------------- - -#include -#include -#include -#include -#include - -#include "util.h" -#include "xml.h" - -static unsigned int isbase = 0, islink = 0, isfeedlink = 0, found = 0; -static char feedlink[4096] = "", basehref[4096] = "", feedtype[256] = ""; - -static void -xmltagstart(XMLParser *p, const char *tag, size_t taglen) -{ - isbase = islink = isfeedlink = 0; - if(taglen == 4) { /* optimization */ - if(!strncasecmp(tag, "base", taglen)) - isbase = 1; - else if(!strncasecmp(tag, "link", taglen)) - islink = 1; - } -} - -static void -xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) -{ - if(isfeedlink) { - if(*feedtype) { - fputs(feedtype, stdout); - putchar(' '); - } - printlink(feedlink, basehref, stdout); /* this is in util.h (program-specific) */ - putchar('\n'); - found++; - } -} - -static void -xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, - size_t namelen, const char *value, size_t valuelen) -{ - - if(namelen != 4) /* optimization */ - return; - if(isbase) { - if(!strncasecmp(name, "href", namelen)) - strlcpy(basehref, value, sizeof(basehref)); - } else if(islink) { - if(!strncasecmp(name, "type", namelen)) { - if(!strncasecmp(value, "application/atom", strlen("application/atom")) || - !strncasecmp(value, "application/rss", strlen("application/rss"))) { - isfeedlink = 1; - strlcpy(feedtype, value, sizeof(feedtype)); - } - } else if(!strncasecmp(name, "href", namelen)) - strlcpy(feedlink, value, sizeof(feedlink)); - } -} - -int -main(int argc, char **argv) -{ - XMLParser x; - - /* base href */ - if(argc > 1) - strlcpy(basehref, argv[1], sizeof(basehref)); - - xmlparser_init(&x, stdin); - x.xmltagstart = xmltagstart; - x.xmlattr = xmlattr; - x.xmltagstartparsed = xmltagstartparsed; - - xmlparser_parse(&x); - - return found > 0 ? 0 : 1; -} +Should be straightforward, see xml.{c,h} and the examples below. + + +Examples +-------- + +sfeed_opml_import.c or sfeed_web.c or sfeed_xmlenc.c + + +License +------- +Same as sfeed, see LICENSE file. -- cgit v1.2.3