summaryrefslogtreecommitdiff
path: root/xml.c
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2018-12-02 12:33:20 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2018-12-02 12:33:20 +0100
commitb4a6220906d4ceb047e73e35554b5ab6898a8db3 (patch)
treea6af37809838d56bb136129a289b2dc3a534b972 /xml.c
parentaf021fb1161c2b0f669991bf64a7cbb696830156 (diff)
XML tag parse improvements for PI and end tags
- Stricter parsing of tags, no whitespace stripping after <. - For end tags the "internal" context x->tag would be "/sometag". Make sure this matches exactly with the parameter tag. - Reset tagname after parsing an end tag. - Make end tag handling more consistent. - Remove temporary variable taglen.
Diffstat (limited to 'xml.c')
-rw-r--r--xml.c52
1 files changed, 29 insertions, 23 deletions
diff --git a/xml.c b/xml.c
index 89a7fd8..6586a8c 100644
--- a/xml.c
+++ b/xml.c
@@ -334,8 +334,8 @@ xml_entitytostr(const char *e, char *buf, size_t bufsiz)
void
xml_parse(XMLParser *x)
{
- int c, ispi;
- size_t datalen, tagdatalen, taglen;
+ size_t datalen, tagdatalen;
+ int c, isend;
if (!x->getnext)
return;
@@ -367,30 +367,32 @@ xml_parse(XMLParser *x)
}
}
} else {
- x->tag[0] = '\0';
- x->taglen = 0;
-
/* normal tag (open, short open, close), processing instruction. */
- if (isspace(c))
- while ((c = x->getnext()) != EOF && isspace(c))
- ;
- if (c == EOF)
- return;
x->tag[0] = c;
- ispi = (c == '?') ? 1 : 0;
- x->isshorttag = ispi;
- taglen = 1;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, don't strip "?" prefix. */
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = x->getnext()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
while ((c = x->getnext()) != EOF) {
if (c == '/')
x->isshorttag = 1; /* short tag */
else if (c == '>' || isspace(c)) {
- x->tag[taglen] = '\0';
- if (x->tag[0] == '/') { /* end tag, starts with </ */
- x->taglen = --taglen; /* len -1 because of / */
- if (taglen && x->xmltagend)
- x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, starts with </ */
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
} else {
- x->taglen = taglen;
/* start tag */
if (x->xmltagstart)
x->xmltagstart(x, x->tag, x->taglen);
@@ -400,11 +402,15 @@ xml_parse(XMLParser *x)
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
}
/* call tagend for shortform or processing instruction */
- if ((x->isshorttag || ispi) && x->xmltagend)
- x->xmltagend(x, x->tag, x->taglen, 1);
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
break;
- } else if (taglen < sizeof(x->tag) - 1)
- x->tag[taglen++] = c; /* NOTE: tag name truncation */
+ } else if (x->taglen < sizeof(x->tag) - 1)
+ x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
}
}
} else {