summaryrefslogtreecommitdiff
path: root/xml.c
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2018-08-21 20:15:50 +0200
committerHiltjo Posthuma <hiltjo@codemadness.org>2018-08-21 21:21:03 +0200
commita40fe1970021b85d2f12b218027ee6bfd23de1e9 (patch)
treefb6d5b90273e012217e3fe4cff2444a813e1e282 /xml.c
parentc58600a2a7ee2081c644268fc40269fd2beee77c (diff)
xml: rewrite codepointtoutf8 function
No more converting to a uint32_t type. Just convert to a byte buffer. Tested on little- and big-endian. The code should be more clear too hopefully.
Diffstat (limited to 'xml.c')
-rw-r--r--xml.c45
1 files changed, 25 insertions, 20 deletions
diff --git a/xml.c b/xml.c
index 83a54d9..6b58bd4 100644
--- a/xml.c
+++ b/xml.c
@@ -207,26 +207,31 @@ xml_parsecdata(XMLParser *x)
static int
codepointtoutf8(const uint32_t r, uint8_t *s)
{
- if (cp >= 0x10000) {
- /* 4 bytes */
- *utf = 0xf0808080 | ((cp & 0xfc0000) << 6) |
- ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
- (cp & 0x3f);
- return 4;
- } else if (cp >= 0x00800) {
- /* 3 bytes */
- *utf = 0xe08080 |
- ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
- (cp & 0x3f);
- return 3;
- } else if (cp >= 0x80) {
- /* 2 bytes */
- *utf = 0xc080 |
- ((cp & 0xfc0) << 2) | (cp & 0x3f);
+ if (r == 0) {
+ return 0; /* NUL byte */
+ } else if (r <= 0x7F) {
+ /* 1 byte: 0aaaaaaa */
+ s[0] = r;
+ return 1;
+ } else if (r <= 0x07FF) {
+ /* 2 bytes: 00000aaa aabbbbbb */
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
return 2;
+ } else if (r <= 0xFFFF) {
+ /* 3 bytes: aaaabbbb bbcccccc */
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
+ return 3;
+ } else {
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
+ return 4;
}
- *utf = cp & 0xff;
- return *utf ? 1 : 0; /* 1 byte */
}
static int
@@ -270,8 +275,8 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
static int
numericentitytostr(const char *e, char *buf, size_t bufsiz)
{
- uint32_t l = 0, cp = 0;
- size_t b, len;
+ uint32_t l;
+ int len;
char *end;
/* buffer is too small */