diff options
Diffstat (limited to 'util.c')
-rw-r--r-- | util.c | 284 |
1 files changed, 160 insertions, 124 deletions
@@ -7,167 +7,203 @@ #include "util.h" +/* check if string has a non-empty scheme / protocol part */ int -parseuri(const char *s, struct uri *u, int rel) +uri_hasscheme(const char *s) { - const char *p = s, *b; - char *endptr = NULL; + const char *p = s; + + for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + *p == '+' || *p == '-' || *p == '.'; p++) + ; + /* scheme, except if empty and starts with ":" then it is a path */ + return (*p == ':' && p != s); +} + +int +uri_parse(const char *s, struct uri *u) +{ + const char *p = s; + char *endptr; size_t i; - unsigned long l; + long l; - u->proto[0] = u->host[0] = u->path[0] = u->port[0] = '\0'; - if (!*s) - return 0; + u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; + u->path[0] = u->query[0] = u->fragment[0] = '\0'; - /* prefix is "//", don't read protocol, skip to domain parsing */ - if (!strncmp(p, "//", 2)) { + /* protocol-relative */ + if (*p == '/' && *(p + 1) == '/') { p += 2; /* skip "//" */ - } else { - /* protocol part */ - for (p = s; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || - *p == '+' || *p == '-' || *p == '.'; p++) - ; - if (!strncmp(p, "://", 3)) { - if ((size_t)(p - s) >= sizeof(u->proto)) - return -1; /* protocol too long */ - memcpy(u->proto, s, p - s); - u->proto[p - s] = '\0'; + goto parseauth; + } + + /* scheme / protocol part */ + for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + *p == '+' || *p == '-' || *p == '.'; p++) + ; + /* scheme, except if empty and starts with ":" then it is a path */ + if (*p == ':' && p != s) { + if (*(p + 1) == '/' && *(p + 2) == '/') p += 3; /* skip "://" */ - } else { - p = s; /* no protocol format, set to start */ - /* relative url: read rest as path, else as domain */ - if (rel) - goto readpath; - } + else + p++; /* skip ":" */ + + if ((size_t)(p - s) >= sizeof(u->proto)) + return -1; /* protocol too long */ + memcpy(u->proto, s, p - s); + u->proto[p - s] = '\0'; + + if (*(p - 1) != '/') + goto parsepath; + } else { + p = s; /* no scheme format, reset to start */ + goto parsepath; + } + +parseauth: + /* userinfo (username:password) */ + i = strcspn(p, "@/?#"); + if (p[i] == '@') { + if (i >= sizeof(u->userinfo)) + return -1; /* userinfo too long */ + memcpy(u->userinfo, p, i); + u->userinfo[i] = '\0'; + p += i + 1; } + /* IPv6 address */ if (*p == '[') { - /* bracket not found or host too long */ - if (!(b = strchr(p, ']')) || (size_t)(b - p) < 3 || - (size_t)(b - p) >= sizeof(u->host)) + /* bracket not found, host too short or too long */ + i = strcspn(p, "]"); + if (p[i] != ']' || i < 3) return -1; - memcpy(u->host, p, b - p + 1); - u->host[b - p + 1] = '\0'; - p = b + 1; + i++; /* including "]" */ } else { /* domain / host part, skip until port, path or end. */ - if ((i = strcspn(p, ":/")) >= sizeof(u->host)) - return -1; /* host too long */ - memcpy(u->host, p, i); - u->host[i] = '\0'; - p = &p[i]; + i = strcspn(p, ":/?#"); } + if (i >= sizeof(u->host)) + return -1; /* host too long */ + memcpy(u->host, p, i); + u->host[i] = '\0'; + p += i; + /* port */ if (*p == ':') { - if ((i = strcspn(++p, "/")) >= sizeof(u->port)) + p++; + if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) return -1; /* port too long */ memcpy(u->port, p, i); u->port[i] = '\0'; - /* check for valid port: range 1 - 65535 */ + /* check for valid port: range 1 - 65535, may be empty */ errno = 0; - l = strtoul(u->port, &endptr, 10); - if (errno || u->port[0] == '\0' || *endptr || - !l || l > 65535) + l = strtol(u->port, &endptr, 10); + if (i && (errno || *endptr || l <= 0 || l > 65535)) return -1; - p = &p[i]; + p += i; } -readpath: - if (u->host[0]) { - p = &p[strspn(p, "/")]; - strlcpy(u->path, "/", sizeof(u->path)); - } else { - /* absolute uri must have a host specified */ - if (!rel) - return -1; - } - /* treat truncation as an error */ - if (strlcat(u->path, p, sizeof(u->path)) >= sizeof(u->path)) - return -1; - return 0; -} -static int -encodeuri(char *buf, size_t bufsiz, const char *s) -{ - static const char *table = "0123456789ABCDEF"; - size_t i, b; +parsepath: + /* path */ + if ((i = strcspn(p, "?#")) >= sizeof(u->path)) + return -1; /* path too long */ + memcpy(u->path, p, i); + u->path[i] = '\0'; + p += i; - for (i = 0, b = 0; s[i]; i++) { - if ((unsigned char)s[i] <= ' ' || - (unsigned char)s[i] >= 127) { - if (b + 3 >= bufsiz) - return -1; - buf[b++] = '%'; - buf[b++] = table[((unsigned char)s[i] >> 4) & 15]; - buf[b++] = table[(unsigned char)s[i] & 15]; - } else if (b < bufsiz) { - buf[b++] = s[i]; - } else { - return -1; - } + /* query */ + if (*p == '?') { + p++; + if ((i = strcspn(p, "#")) >= sizeof(u->query)) + return -1; /* query too long */ + memcpy(u->query, p, i); + u->query[i] = '\0'; + p += i; + } + + /* fragment */ + if (*p == '#') { + p++; + if ((i = strlen(p)) >= sizeof(u->fragment)) + return -1; /* fragment too long */ + memcpy(u->fragment, p, i); + u->fragment[i] = '\0'; } - if (b >= bufsiz) - return -1; - buf[b] = '\0'; return 0; } -/* Get absolute uri; if `link` is relative use `base` to make it absolute. - * the returned string in `buf` is uri encoded, see: encodeuri(). */ +/* Transform and try to make the URI `u` absolute using base URI `b` into `a`. + Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". + Returns 0 on success, -1 on error or truncation. */ int -absuri(char *buf, size_t bufsiz, const char *link, const char *base) +uri_makeabs(struct uri *a, struct uri *u, struct uri *b) { - struct uri ulink, ubase; - char tmp[4096], *host, *p, *port; - int c, r; - size_t i; + char *p; + int c; - buf[0] = '\0'; - if (parseuri(base, &ubase, 0) == -1 || - parseuri(link, &ulink, 1) == -1 || - (!ulink.host[0] && !ubase.host[0])) - return -1; + strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); - if (!strncmp(link, "//", 2)) { - host = ulink.host; - port = ulink.port; - } else { - host = ulink.host[0] ? ulink.host : ubase.host; - port = ulink.port[0] ? ulink.port : ubase.port; + if (u->proto[0] || u->host[0]) { + strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto)); + strlcpy(a->host, u->host, sizeof(a->host)); + strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); + strlcpy(a->host, u->host, sizeof(a->host)); + strlcpy(a->port, u->port, sizeof(a->port)); + strlcpy(a->path, u->path, sizeof(a->path)); + strlcpy(a->query, u->query, sizeof(a->query)); + return 0; } - r = snprintf(tmp, sizeof(tmp), "%s://%s%s%s", - ulink.proto[0] ? - ulink.proto : - (ubase.proto[0] ? ubase.proto : "http"), - host, - port[0] ? ":" : "", - port); - if (r < 0 || (size_t)r >= sizeof(tmp)) - return -1; /* error or truncation */ - - /* relative to root */ - if (!ulink.host[0] && ulink.path[0] != '/') { - /* relative to base url path */ - if (ulink.path[0]) { - if ((p = strrchr(ubase.path, '/'))) { - /* temporary null-terminate */ - c = *(++p); - *p = '\0'; - i = strlcat(tmp, ubase.path, sizeof(tmp)); - *p = c; /* restore */ - if (i >= sizeof(tmp)) - return -1; - } - } else if (strlcat(tmp, ubase.path, sizeof(tmp)) >= - sizeof(tmp)) { - return -1; + + strlcpy(a->proto, b->proto, sizeof(a->proto)); + strlcpy(a->host, b->host, sizeof(a->host)); + strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); + strlcpy(a->host, b->host, sizeof(a->host)); + strlcpy(a->port, b->port, sizeof(a->port)); + + if (!u->path[0]) { + strlcpy(a->path, b->path, sizeof(a->path)); + } else if (u->path[0] == '/') { + strlcpy(a->path, u->path, sizeof(a->path)); + } else { + a->path[0] = (a->host[0] && b->path[0] != '/') ? '/' : '\0'; + a->path[1] = '\0'; + + if ((p = strrchr(b->path, '/'))) { + c = *(++p); + *p = '\0'; /* temporary NUL-terminate */ + if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path)) + return -1; + *p = c; /* restore */ } + if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path)) + return -1; } - if (strlcat(tmp, ulink.path, sizeof(tmp)) >= sizeof(tmp)) - return -1; - return encodeuri(buf, bufsiz, tmp); + if (u->path[0] || u->query[0]) + strlcpy(a->query, u->query, sizeof(a->query)); + else + strlcpy(a->query, b->query, sizeof(a->query)); + + return 0; +} + +int +uri_format(char *buf, size_t bufsiz, struct uri *u) +{ + return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", + u->proto, + u->userinfo[0] ? u->userinfo : "", + u->userinfo[0] ? "@" : "", + u->host, + u->port[0] ? ":" : "", + u->port, + u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", + u->path, + u->query[0] ? "?" : "", + u->query, + u->fragment[0] ? "#" : "", + u->fragment); } /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0') |