util.c (8769B) - raw


      1 #include <ctype.h>
      2 #include <errno.h>
      3 #include <stdarg.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 #include <wchar.h>
      8 
      9 #include "util.h"
     10 
     11 /* print to stderr, print error message of errno and exit().
     12    Unlike BSD err() it does not prefix __progname */
     13 __dead void
     14 err(int exitstatus, const char *fmt, ...)
     15 {
     16 	va_list ap;
     17 	int saved_errno;
     18 
     19 	saved_errno = errno;
     20 
     21 	if (fmt) {
     22 		va_start(ap, fmt);
     23 		vfprintf(stderr, fmt, ap);
     24 		va_end(ap);
     25 		fputs(": ", stderr);
     26 	}
     27 	fprintf(stderr, "%s\n", strerror(saved_errno));
     28 
     29 	exit(exitstatus);
     30 }
     31 
     32 /* print to stderr and exit().
     33    Unlike BSD errx() it does not prefix __progname */
     34 __dead void
     35 errx(int exitstatus, const char *fmt, ...)
     36 {
     37 	va_list ap;
     38 
     39 	if (fmt) {
     40 		va_start(ap, fmt);
     41 		vfprintf(stderr, fmt, ap);
     42 		va_end(ap);
     43 	}
     44 	fputs("\n", stderr);
     45 
     46 	exit(exitstatus);
     47 }
     48 
     49 /* strcasestr() included for portability */
     50 char *
     51 strcasestr(const char *h, const char *n)
     52 {
     53 	size_t i;
     54 
     55 	if (!n[0])
     56 		return (char *)h;
     57 
     58 	for (; *h; ++h) {
     59 		for (i = 0; n[i] && tolower((unsigned char)n[i]) ==
     60 		            tolower((unsigned char)h[i]); ++i)
     61 			;
     62 		if (n[i] == '\0')
     63 			return (char *)h;
     64 	}
     65 
     66 	return NULL;
     67 }
     68 
     69 /* Check if string has a non-empty scheme / protocol part. */
     70 int
     71 uri_hasscheme(const char *s)
     72 {
     73 	const char *p = s;
     74 
     75 	for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
     76 		       *p == '+' || *p == '-' || *p == '.'; p++)
     77 		;
     78 	/* scheme, except if empty and starts with ":" then it is a path */
     79 	return (*p == ':' && p != s);
     80 }
     81 
     82 /* Parse URI string `s` into an uri structure `u`.
     83    Returns 0 on success or -1 on failure */
     84 int
     85 uri_parse(const char *s, struct uri *u)
     86 {
     87 	const char *p = s;
     88 	char *endptr;
     89 	size_t i;
     90 	long l;
     91 
     92 	u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
     93 	u->path[0] = u->query[0] = u->fragment[0] = '\0';
     94 
     95 	/* protocol-relative */
     96 	if (*p == '/' && *(p + 1) == '/') {
     97 		p += 2; /* skip "//" */
     98 		goto parseauth;
     99 	}
    100 
    101 	/* scheme / protocol part */
    102 	for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
    103 		       *p == '+' || *p == '-' || *p == '.'; p++)
    104 		;
    105 	/* scheme, except if empty and starts with ":" then it is a path */
    106 	if (*p == ':' && p != s) {
    107 		if (*(p + 1) == '/' && *(p + 2) == '/')
    108 			p += 3; /* skip "://" */
    109 		else
    110 			p++; /* skip ":" */
    111 
    112 		if ((size_t)(p - s) >= sizeof(u->proto))
    113 			return -1; /* protocol too long */
    114 		memcpy(u->proto, s, p - s);
    115 		u->proto[p - s] = '\0';
    116 
    117 		if (*(p - 1) != '/')
    118 			goto parsepath;
    119 	} else {
    120 		p = s; /* no scheme format, reset to start */
    121 		goto parsepath;
    122 	}
    123 
    124 parseauth:
    125 	/* userinfo (username:password) */
    126 	i = strcspn(p, "@/?#");
    127 	if (p[i] == '@') {
    128 		if (i >= sizeof(u->userinfo))
    129 			return -1; /* userinfo too long */
    130 		memcpy(u->userinfo, p, i);
    131 		u->userinfo[i] = '\0';
    132 		p += i + 1;
    133 	}
    134 
    135 	/* IPv6 address */
    136 	if (*p == '[') {
    137 		/* bracket not found, host too short or too long */
    138 		i = strcspn(p, "]");
    139 		if (p[i] != ']' || i < 3)
    140 			return -1;
    141 		i++; /* including "]" */
    142 	} else {
    143 		/* domain / host part, skip until port, path or end. */
    144 		i = strcspn(p, ":/?#");
    145 	}
    146 	if (i >= sizeof(u->host))
    147 		return -1; /* host too long */
    148 	memcpy(u->host, p, i);
    149 	u->host[i] = '\0';
    150 	p += i;
    151 
    152 	/* port */
    153 	if (*p == ':') {
    154 		p++;
    155 		if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
    156 			return -1; /* port too long */
    157 		memcpy(u->port, p, i);
    158 		u->port[i] = '\0';
    159 		/* check for valid port: range 1 - 65535, may be empty */
    160 		errno = 0;
    161 		l = strtol(u->port, &endptr, 10);
    162 		if (i && (errno || *endptr || l <= 0 || l > 65535))
    163 			return -1;
    164 		p += i;
    165 	}
    166 
    167 parsepath:
    168 	/* path */
    169 	if ((i = strcspn(p, "?#")) >= sizeof(u->path))
    170 		return -1; /* path too long */
    171 	memcpy(u->path, p, i);
    172 	u->path[i] = '\0';
    173 	p += i;
    174 
    175 	/* query */
    176 	if (*p == '?') {
    177 		p++;
    178 		if ((i = strcspn(p, "#")) >= sizeof(u->query))
    179 			return -1; /* query too long */
    180 		memcpy(u->query, p, i);
    181 		u->query[i] = '\0';
    182 		p += i;
    183 	}
    184 
    185 	/* fragment */
    186 	if (*p == '#') {
    187 		p++;
    188 		if ((i = strlen(p)) >= sizeof(u->fragment))
    189 			return -1; /* fragment too long */
    190 		memcpy(u->fragment, p, i);
    191 		u->fragment[i] = '\0';
    192 	}
    193 
    194 	return 0;
    195 }
    196 
    197 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
    198    Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
    199    Returns 0 on success, -1 on error or truncation. */
    200 int
    201 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
    202 {
    203 	char *p;
    204 	int c;
    205 
    206 	strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
    207 
    208 	if (u->proto[0] || u->host[0]) {
    209 		strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
    210 		strlcpy(a->host, u->host, sizeof(a->host));
    211 		strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
    212 		strlcpy(a->host, u->host, sizeof(a->host));
    213 		strlcpy(a->port, u->port, sizeof(a->port));
    214 		strlcpy(a->path, u->path, sizeof(a->path));
    215 		strlcpy(a->query, u->query, sizeof(a->query));
    216 		return 0;
    217 	}
    218 
    219 	strlcpy(a->proto, b->proto, sizeof(a->proto));
    220 	strlcpy(a->host, b->host, sizeof(a->host));
    221 	strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
    222 	strlcpy(a->host, b->host, sizeof(a->host));
    223 	strlcpy(a->port, b->port, sizeof(a->port));
    224 
    225 	if (!u->path[0]) {
    226 		strlcpy(a->path, b->path, sizeof(a->path));
    227 	} else if (u->path[0] == '/') {
    228 		strlcpy(a->path, u->path, sizeof(a->path));
    229 	} else {
    230 		a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
    231 		a->path[1] = '\0';
    232 
    233 		if ((p = strrchr(b->path, '/'))) {
    234 			c = *(++p);
    235 			*p = '\0'; /* temporary NUL-terminate */
    236 			if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
    237 				return -1;
    238 			*p = c; /* restore */
    239 		}
    240 		if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
    241 			return -1;
    242 	}
    243 
    244 	if (u->path[0] || u->query[0])
    245 		strlcpy(a->query, u->query, sizeof(a->query));
    246 	else
    247 		strlcpy(a->query, b->query, sizeof(a->query));
    248 
    249 	return 0;
    250 }
    251 
    252 int
    253 uri_format(char *buf, size_t bufsiz, struct uri *u)
    254 {
    255 	return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
    256 		u->proto,
    257 		u->userinfo[0] ? u->userinfo : "",
    258 		u->userinfo[0] ? "@" : "",
    259 		u->host,
    260 		u->port[0] ? ":" : "",
    261 		u->port,
    262 		u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
    263 		u->path,
    264 		u->query[0] ? "?" : "",
    265 		u->query,
    266 		u->fragment[0] ? "#" : "",
    267 		u->fragment);
    268 }
    269 
    270 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
    271  * terminators and assign these fields as pointers. If there are less fields
    272  * than expected then the field is an empty string constant. */
    273 void
    274 parseline(char *line, char *fields[FieldLast])
    275 {
    276 	char *prev, *s;
    277 	size_t i;
    278 
    279 	for (prev = line, i = 0;
    280 	    (s = strchr(prev, '\t')) && i < FieldLast - 1;
    281 	    i++) {
    282 		*s = '\0';
    283 		fields[i] = prev;
    284 		prev = s + 1;
    285 	}
    286 	fields[i++] = prev;
    287 	/* make non-parsed fields empty. */
    288 	for (; i < FieldLast; i++)
    289 		fields[i] = "";
    290 }
    291 
    292 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
    293 int
    294 strtotime(const char *s, time_t *t)
    295 {
    296 	long long l;
    297 	char *e;
    298 
    299 	errno = 0;
    300 	l = strtoll(s, &e, 10);
    301 	if (errno || *s == '\0' || *e)
    302 		return -1;
    303 
    304 	/* NOTE: the type long long supports the 64-bit range. If time_t is
    305 	   64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
    306 	if (t)
    307 		*t = (time_t)l;
    308 
    309 	return 0;
    310 }
    311 
    312 /* Escape characters below as HTML 2.0 / XML 1.0. */
    313 void
    314 xmlencode(const char *s, FILE *fp)
    315 {
    316 	for (; *s; ++s) {
    317 		switch (*s) {
    318 		case '<':  fputs("&lt;",   fp); break;
    319 		case '>':  fputs("&gt;",   fp); break;
    320 		case '\'': fputs("&#39;",  fp); break;
    321 		case '&':  fputs("&amp;",  fp); break;
    322 		case '"':  fputs("&quot;", fp); break;
    323 		default:   putc(*s, fp);
    324 		}
    325 	}
    326 }
    327 
    328 /* print `len` columns of characters. If string is shorter pad the rest with
    329  * characters `pad`. */
    330 void
    331 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
    332 {
    333 	wchar_t wc;
    334 	size_t col = 0, i, slen;
    335 	int inc, rl, w;
    336 
    337 	if (!len)
    338 		return;
    339 
    340 	slen = strlen(s);
    341 	for (i = 0; i < slen; i += inc) {
    342 		inc = 1; /* next byte */
    343 		if ((unsigned char)s[i] < 32) {
    344 			continue; /* skip control characters */
    345 		} else if ((unsigned char)s[i] >= 127) {
    346 			rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
    347 			inc = rl;
    348 			if (rl < 0) {
    349 				mbtowc(NULL, NULL, 0); /* reset state */
    350 				inc = 1; /* invalid, seek next byte */
    351 				w = 1; /* replacement char is one width */
    352 			} else if ((w = wcwidth(wc)) == -1) {
    353 				continue;
    354 			}
    355 
    356 			if (col + w > len || (col + w == len && s[i + inc])) {
    357 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
    358 				col++;
    359 				break;
    360 			} else if (rl < 0) {
    361 				fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
    362 				col++;
    363 				continue;
    364 			}
    365 			fwrite(&s[i], 1, rl, fp);
    366 			col += w;
    367 		} else {
    368 			/* optimization: simple ASCII character */
    369 			if (col + 1 > len || (col + 1 == len && s[i + 1])) {
    370 				fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
    371 				col++;
    372 				break;
    373 			}
    374 			putc(s[i], fp);
    375 			col++;
    376 		}
    377 
    378 	}
    379 	for (; col < len; ++col)
    380 		putc(pad, fp);
    381 }