From d5ee385b4b5f19934a00408a2addc70f965ea4a9 Mon Sep 17 00:00:00 2001
From: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Tue, 29 Mar 2022 11:03:54 +0200
Subject: compatibility: reduce the assumption the builtin libc locale is
 ASCII-compatible

This is not clearly defined by the C99 standard.
Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or
something like noticed on OpenBSD 3.8).

(In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise
this would break many programs)
---
 xml.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'xml.c')

diff --git a/xml.c b/xml.c
index f16cbbb..3bd9557 100644
--- a/xml.c
+++ b/xml.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -6,6 +5,9 @@
 
 #include "xml.h"
 
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
 static void
 xml_parseattrs(XMLParser *x)
 {
@@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x)
 	int c, endsep, endname = 0, valuestart = 0;
 
 	while ((c = GETNEXT()) != EOF) {
-		if (isspace(c)) {
+		if (ISSPACE(c)) {
 			if (namelen)
 				endname = 1;
 			continue;
@@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x)
 			x->name[namelen] = '\0';
 			valuestart = 1;
 			endname = 1;
-		} else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) {
+		} else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
 			/* attribute without value */
 			x->name[namelen] = '\0';
 			if (x->xmlattrstart)
@@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x)
 			if (c == '\'' || c == '"') {
 				endsep = c;
 			} else {
-				endsep = ' '; /* isspace() */
+				endsep = ' '; /* ISSPACE() */
 				goto startvalue;
 			}
 
@@ -58,7 +60,7 @@ startvalue:
 					x->data[0] = c;
 					valuelen = 1;
 					while ((c = GETNEXT()) != EOF) {
-						if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
+						if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
 							break;
 						if (valuelen < sizeof(x->data) - 1)
 							x->data[valuelen++] = c;
@@ -79,7 +81,7 @@ startvalue:
 							break;
 						}
 					}
-				} else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) {
+				} else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
 					if (valuelen < sizeof(x->data) - 1) {
 						x->data[valuelen++] = c;
 					} else {
@@ -90,7 +92,7 @@ startvalue:
 						valuelen = 1;
 					}
 				}
-				if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) {
+				if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
 					x->data[valuelen] = '\0';
 					if (x->xmlattr)
 						x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
@@ -328,7 +330,7 @@ xml_parse(XMLParser *x)
 				while ((c = GETNEXT()) != EOF) {
 					if (c == '/')
 						x->isshorttag = 1; /* short tag */
-					else if (c == '>' || isspace(c)) {
+					else if (c == '>' || ISSPACE(c)) {
 						x->tag[x->taglen] = '\0';
 						if (isend) { /* end tag, starts with </ */
 							if (x->xmltagend)
@@ -339,7 +341,7 @@ xml_parse(XMLParser *x)
 							/* start tag */
 							if (x->xmltagstart)
 								x->xmltagstart(x, x->tag, x->taglen);
-							if (isspace(c))
+							if (ISSPACE(c))
 								xml_parseattrs(x);
 							if (x->xmltagstartparsed)
 								x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
-- 
cgit v1.2.3


From 60e402d0f2c086dbbbd21436bb1b3aa5ad9b77d6 Mon Sep 17 00:00:00 2001
From: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 26 Aug 2022 21:55:35 +0200
Subject: improve comment: uppercase cdata -> CDATA

---
 xml.c | 2 +-
 xml.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'xml.c')

diff --git a/xml.c b/xml.c
index 3bd9557..a82053e 100644
--- a/xml.c
+++ b/xml.c
@@ -292,7 +292,7 @@ xml_parse(XMLParser *x)
 			if ((c = GETNEXT()) == EOF)
 				return;
 
-			if (c == '!') { /* cdata and comments */
+			if (c == '!') { /* CDATA and comments */
 				for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
 					/* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
 					if (tagdatalen <= sizeof("[CDATA[") - 1)
diff --git a/xml.h b/xml.h
index cef4a05..ad7d26b 100644
--- a/xml.h
+++ b/xml.h
@@ -34,7 +34,7 @@ typedef struct xmlparser {
 	int isshorttag;
 	/* current attribute name */
 	char name[1024];
-	/* data buffer used for tag data, cdata and attribute data */
+	/* data buffer used for tag data, CDATA and attribute data */
 	char data[BUFSIZ];
 } XMLParser;
 
-- 
cgit v1.2.3


From 21a263cb27aeaf02b4a0a0319f435fac92f1ea28 Mon Sep 17 00:00:00 2001
From: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Tue, 15 Aug 2023 19:10:51 +0200
Subject: improve wording and small typos

---
 sfeed.c             | 6 +++---
 sfeed_curses.c      | 2 +-
 sfeed_opml_import.c | 2 +-
 sfeed_web.c         | 2 +-
 sfeed_xmlenc.c      | 2 +-
 xml.c               | 4 ++--
 xml.h               | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'xml.c')

diff --git a/sfeed.c b/sfeed.c
index 888a575..cdd528c 100644
--- a/sfeed.c
+++ b/sfeed.c
@@ -724,8 +724,8 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
 	if (!ctx.tag.id)
 		return;
 
-	/* content-type may be: Atom: text, xhtml, html or mime-type.
-	   MRSS (media:description): plain, html. */
+	/* content-type may be for Atom: text, xhtml, html or a mime-type.
+	   for MRSS (media:description): plain, html. */
 	if (ISCONTENTTAG(ctx)) {
 		if (isattr(n, nl, STRP("type")))
 			string_append(&attrtype, v, vl);
@@ -1073,7 +1073,7 @@ main(int argc, char *argv[])
 	parser.xmltagstart = xmltagstart;
 	parser.xmltagstartparsed = xmltagstartparsed;
 
-	/* NOTE: getnext is defined in xml.h for inline optimization */
+	/* NOTE: GETNEXT is defined in xml.h for inline optimization */
 	xml_parse(&parser);
 
 	checkfileerror(stdin, "<stdin>", 'r');
diff --git a/sfeed_curses.c b/sfeed_curses.c
index 95421fd..0434812 100644
--- a/sfeed_curses.c
+++ b/sfeed_curses.c
@@ -1875,7 +1875,7 @@ markread(struct pane *p, off_t from, off_t to, int isread)
 		_exit(status);
 	default:
 		/* waitpid() and block on process status change,
-		   fail if exit statuscode was unavailable or non-zero */
+		   fail if the exit status code was unavailable or non-zero */
 		if (waitpid(pid, &status, 0) <= 0 || status)
 			break;
 
diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c
index 9922133..ce33aac 100644
--- a/sfeed_opml_import.c
+++ b/sfeed_opml_import.c
@@ -96,7 +96,7 @@ main(void)
 	    "# list of feeds to fetch:\n"
 	    "feeds() {\n"
 	    "	# feed <name> <feedurl> [basesiteurl] [encoding]\n", stdout);
-	/* NOTE: getnext is defined in xml.h for inline optimization */
+	/* NOTE: GETNEXT is defined in xml.h for inline optimization */
 	xml_parse(&parser);
 	fputs("}\n", stdout);
 
diff --git a/sfeed_web.c b/sfeed_web.c
index 0082f2d..e25e91c 100644
--- a/sfeed_web.c
+++ b/sfeed_web.c
@@ -132,7 +132,7 @@ main(int argc, char *argv[])
 	parser.xmltagstart = xmltagstart;
 	parser.xmltagstartparsed = xmltagstartparsed;
 
-	/* NOTE: getnext is defined in xml.h for inline optimization */
+	/* NOTE: GETNEXT is defined in xml.h for inline optimization */
 	xml_parse(&parser);
 
 	checkfileerror(stdin, "<stdin>", 'r');
diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c
index 7fc93ae..461c047 100644
--- a/sfeed_xmlenc.c
+++ b/sfeed_xmlenc.c
@@ -52,7 +52,7 @@ main(void)
 	parser.xmlattrend = xmlattrend;
 	parser.xmltagstart = xmltagstart;
 
-	/* NOTE: getnext is defined in xml.h for inline optimization */
+	/* NOTE: GETNEXT is defined in xml.h for inline optimization */
 	xml_parse(&parser);
 
 	checkfileerror(stdin, "<stdin>", 'r');
diff --git a/xml.c b/xml.c
index a82053e..1524d1f 100644
--- a/xml.c
+++ b/xml.c
@@ -317,7 +317,7 @@ xml_parse(XMLParser *x)
 				x->taglen = 1;
 				x->isshorttag = isend = 0;
 
-				/* treat processing instruction as shorttag, don't strip "?" prefix. */
+				/* treat processing instruction as short tag, don't strip "?" prefix. */
 				if (c == '?') {
 					x->isshorttag = 1;
 				} else if (c == '/') {
@@ -346,7 +346,7 @@ xml_parse(XMLParser *x)
 							if (x->xmltagstartparsed)
 								x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
 						}
-						/* call tagend for shortform or processing instruction */
+						/* call tagend for short tag or processing instruction */
 						if (x->isshorttag) {
 							if (x->xmltagend)
 								x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
diff --git a/xml.h b/xml.h
index 122726c..6f3cf71 100644
--- a/xml.h
+++ b/xml.h
@@ -30,7 +30,7 @@ typedef struct xmlparser {
 	/* current tag */
 	char tag[1024];
 	size_t taglen;
-	/* current tag is in shortform ? <tag /> */
+	/* current tag is a short tag ? <tag /> */
 	int isshorttag;
 	/* current attribute name */
 	char name[1024];
-- 
cgit v1.2.3


From 1f6dfac83e8325a42dc5faa2e31cc25593acdfc8 Mon Sep 17 00:00:00 2001
From: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 30 Jun 2024 09:59:48 +0200
Subject: improve parsing whitespace after end tag names

Simplified test-case:

https://git.codemadness.org/sfeed_tests/commit/e091160c3125322193bd8f27691c87eaa48cfc93.html
---
 xml.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'xml.c')

diff --git a/xml.c b/xml.c
index 1524d1f..35b7961 100644
--- a/xml.c
+++ b/xml.c
@@ -333,6 +333,8 @@ xml_parse(XMLParser *x)
 					else if (c == '>' || ISSPACE(c)) {
 						x->tag[x->taglen] = '\0';
 						if (isend) { /* end tag, starts with </ */
+							while (c != '>' && c != EOF) /* skip until > */
+								c = GETNEXT();
 							if (x->xmltagend)
 								x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
 							x->tag[0] = '\0';
-- 
cgit v1.2.3