summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2015-08-22 20:11:05 +0200
committerHiltjo Posthuma <hiltjo@codemadness.org>2015-08-22 20:11:05 +0200
commit2d5b42efae35300a9eb7cc3a834dc8c522bf5a71 (patch)
treeee85145ae4e426bf2ea2e01934d43545d5bc39a2
parent1ad44956f0f7b044440f04ee72a7affef48b804c (diff)
xml: simplify XML reader
-rw-r--r--README.xml5
-rw-r--r--sfeed.c3
-rw-r--r--sfeed_opml_import.c3
-rw-r--r--sfeed_web.c3
-rw-r--r--sfeed_xmlenc.c3
-rw-r--r--util.h2
-rw-r--r--xml.c107
-rw-r--r--xml.h5
8 files changed, 27 insertions, 104 deletions
diff --git a/README.xml b/README.xml
index 5385ecf..c949da5 100644
--- a/README.xml
+++ b/README.xml
@@ -55,6 +55,8 @@ Should be trivial, see xml.c and xml.h and the examples below.
The most minimal implementation to read and parse from fd 0 (stdin) is:
+ #include <stdio.h>
+
#include "xml.h"
static XMLParser x;
@@ -62,7 +64,8 @@ The most minimal implementation to read and parse from fd 0 (stdin) is:
int
main(void)
{
- xml_parse_fd(&x, 0); /* xml_parse_buf(&x, "<sup />", 7); */
+ x.getnext = getchar;
+ xml_parse(&x);
return 0;
}
diff --git a/sfeed.c b/sfeed.c
index 84308a9..47876de 100644
--- a/sfeed.c
+++ b/sfeed.c
@@ -743,7 +743,8 @@ main(int argc, char *argv[])
parser.xmltagstart = xml_handler_start_el;
parser.xmltagstartparsed = xml_handler_start_el_parsed;
- xml_parse_fd(&parser, 0);
+ parser.getnext = getchar;
+ xml_parse(&parser);
return 0;
}
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
index 248d4c2..c8dc60b 100644
--- a/sfeed_opml_import.c
+++ b/sfeed_opml_import.c
@@ -95,7 +95,8 @@ main(void)
"# list of feeds to fetch:\n"
"feeds() {\n"
" # feed <name> <feedurl> [basesiteurl] [encoding]\n", stdout);
- xml_parse_fd(&parser, 0);
+ parser.getnext = getchar;
+ xml_parse(&parser);
fputs("}\n", stdout);
return 0;
diff --git a/sfeed_web.c b/sfeed_web.c
index 29cecfc..95d60f0 100644
--- a/sfeed_web.c
+++ b/sfeed_web.c
@@ -98,7 +98,8 @@ main(int argc, char *argv[])
parser.xmltagstart = xmltagstart;
parser.xmltagstartparsed = xmltagstartparsed;
- xml_parse_fd(&parser, 0);
+ parser.getnext = getchar;
+ xml_parse(&parser);
return found > 0 ? 0: 1;
}
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
index 180fa68..1075e65 100644
--- a/sfeed_xmlenc.c
+++ b/sfeed_xmlenc.c
@@ -61,7 +61,8 @@ main(void)
parser.xmltagend = xmltagend;
parser.xmltagstart = xmltagstart;
- xml_parse_fd(&parser, 0);
+ parser.getnext = getchar;
+ xml_parse(&parser);
return 1;
}
diff --git a/util.h b/util.h
index efcfc21..b329bb5 100644
--- a/util.h
+++ b/util.h
@@ -31,5 +31,3 @@ int parseuri(const char *, struct uri *, int);
int strtotime(const char *, time_t *);
char * xbasename(const char *);
void xmlencode(const char *, FILE *);
-
-
diff --git a/xml.c b/xml.c
index f7691cb..32b1be0 100644
--- a/xml.c
+++ b/xml.c
@@ -8,71 +8,13 @@
#include "xml.h"
-struct xml_context_fd {
- char buf[BUFSIZ];
- int readerrno;
- int fd;
- size_t nread;
- size_t offset;
-};
-
-struct xml_context_buf {
- const char *buf;
- size_t len;
- size_t offset;
-};
-
-static int
-xml_getnext_buf(XMLParser *x)
-{
- struct xml_context_buf *d = (struct xml_context_buf *)x->getnext_data;
-
- if (d->offset >= d->len)
- return EOF;
- return (int)d->buf[d->offset++];
-}
-
-static int /* read from fd with some buffering */
-xml_getnext_fd(XMLParser *x)
-{
- struct xml_context_fd *d = (struct xml_context_fd *)x->getnext_data;
- ssize_t r;
-
- /* previous read error was set */
- if (d->readerrno)
- return EOF;
-
- if (d->offset >= d->nread) {
- d->offset = 0;
-again:
- r = read(d->fd, d->buf, sizeof(d->buf));
- if (r == -1) {
- if (errno == EINTR)
- goto again;
- d->readerrno = errno;
- d->nread = 0;
- return EOF;
- } else if (!r) {
- return EOF;
- }
- d->nread = r;
- }
- return (int)d->buf[d->offset++];
-}
-
-static int
-xml_getnext(XMLParser *x)
-{
- return x->getnext(x);
-}
-
static void
xml_parseattrs(XMLParser *x)
{
size_t namelen = 0, valuelen;
int c, endsep, endname = 0;
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (isspace(c)) { /* TODO: simplify endname ? */
if (namelen)
endname = 1;
@@ -99,7 +41,7 @@ xml_parseattrs(XMLParser *x)
endsep = c; /* c is end separator */
if (x->xmlattrstart)
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
- for (valuelen = 0; (c = xml_getnext(x)) != EOF;) {
+ for (valuelen = 0; (c = x->getnext()) != EOF;) {
if (c == '&') { /* entities */
x->data[valuelen] = '\0';
/* call data function with data before entity if there is data */
@@ -107,7 +49,7 @@ xml_parseattrs(XMLParser *x)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
x->data[0] = c;
valuelen = 1;
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == endsep)
break;
if (valuelen < sizeof(x->data) - 1)
@@ -173,7 +115,7 @@ xml_parsecomment(XMLParser *x)
if (x->xmlcommentstart)
x->xmlcommentstart(x);
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == end[i]) {
if (end[++i] == '\0') { /* end */
x->data[datalen] = '\0';
@@ -217,7 +159,7 @@ xml_parsecdata(XMLParser *x)
if (x->xmlcdatastart)
x->xmlcdatastart(x);
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == end[i]) {
if (end[++i] == '\0') { /* end */
x->data[datalen] = '\0';
@@ -374,17 +316,19 @@ xml_parse(XMLParser *x)
int c, ispi;
size_t datalen, tagdatalen, taglen;
- while ((c = xml_getnext(x)) != EOF && c != '<')
+ if (!x->getnext)
+ return;
+ while ((c = x->getnext()) != EOF && c != '<')
; /* skip until < */
while (c != EOF) {
if (c == '<') { /* parse tag */
- if ((c = xml_getnext(x)) == EOF)
+ if ((c = x->getnext()) == EOF)
return;
x->tag[0] = '\0';
x->taglen = 0;
if (c == '!') { /* cdata and comments */
- for (tagdatalen = 0; (c = xml_getnext(x)) != EOF;) {
+ for (tagdatalen = 0; (c = x->getnext()) != EOF;) {
if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */
x->data[tagdatalen++] = c; /* TODO: prevent overflow */
if (c == '>')
@@ -404,7 +348,7 @@ xml_parse(XMLParser *x)
} else {
/* normal tag (open, short open, close), processing instruction. */
if (isspace(c))
- while ((c = xml_getnext(x)) != EOF && isspace(c))
+ while ((c = x->getnext()) != EOF && isspace(c))
;
if (c == EOF)
return;
@@ -412,7 +356,7 @@ xml_parse(XMLParser *x)
ispi = (c == '?') ? 1 : 0;
x->isshorttag = ispi;
taglen = 1;
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == '/') /* TODO: simplify short tag? */
x->isshorttag = 1; /* short tag */
else if (c == '>' || isspace(c)) {
@@ -444,7 +388,7 @@ xml_parse(XMLParser *x)
datalen = 0;
if (x->xmldatastart)
x->xmldatastart(x);
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == '&') {
if (datalen) {
x->data[datalen] = '\0';
@@ -453,7 +397,7 @@ xml_parse(XMLParser *x)
}
x->data[0] = c;
datalen = 1;
- while ((c = xml_getnext(x)) != EOF) {
+ while ((c = x->getnext()) != EOF) {
if (c == '<')
break;
if (datalen < sizeof(x->data) - 1)
@@ -491,26 +435,3 @@ xml_parse(XMLParser *x)
}
}
}
-
-void
-xml_parse_buf(XMLParser *x, const char *buf, size_t len)
-{
- struct xml_context_buf ctx = { .buf = buf, .len = len };
-
- x->getnext = xml_getnext_buf;
- x->getnext_data = (void *)&ctx;
- xml_parse(x);
-}
-
-void
-xml_parse_fd(XMLParser *x, int fd)
-{
- struct xml_context_fd ctx;
-
- memset(&ctx, 0, sizeof(ctx));
- ctx.fd = fd;
-
- x->getnext = xml_getnext_fd;
- x->getnext_data = (void *)&ctx;
- xml_parse(x);
-}
diff --git a/xml.h b/xml.h
index aa9c59d..6315874 100644
--- a/xml.h
+++ b/xml.h
@@ -23,8 +23,7 @@ typedef struct xmlparser {
void (*xmltagstartparsed)(struct xmlparser *, const char *,
size_t, int);
- int (*getnext)(struct xmlparser *);
- void *getnext_data; /* custom data for getnext */
+ int (*getnext)(void);
/* current tag */
char tag[1024];
@@ -43,5 +42,3 @@ ssize_t xml_namedentitytostr(const char *, char *, size_t);
ssize_t xml_numericetitytostr(const char *, char *, size_t);
void xml_parse(XMLParser *);
-void xml_parse_buf(XMLParser *, const char *, size_t);
-void xml_parse_fd(XMLParser *, int);