util.c (8769B) - raw
1 #include <ctype.h> 2 #include <errno.h> 3 #include <stdarg.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <wchar.h> 8 9 #include "util.h" 10 11 /* print to stderr, print error message of errno and exit(). 12 Unlike BSD err() it does not prefix __progname */ 13 __dead void 14 err(int exitstatus, const char *fmt, ...) 15 { 16 va_list ap; 17 int saved_errno; 18 19 saved_errno = errno; 20 21 if (fmt) { 22 va_start(ap, fmt); 23 vfprintf(stderr, fmt, ap); 24 va_end(ap); 25 fputs(": ", stderr); 26 } 27 fprintf(stderr, "%s\n", strerror(saved_errno)); 28 29 exit(exitstatus); 30 } 31 32 /* print to stderr and exit(). 33 Unlike BSD errx() it does not prefix __progname */ 34 __dead void 35 errx(int exitstatus, const char *fmt, ...) 36 { 37 va_list ap; 38 39 if (fmt) { 40 va_start(ap, fmt); 41 vfprintf(stderr, fmt, ap); 42 va_end(ap); 43 } 44 fputs("\n", stderr); 45 46 exit(exitstatus); 47 } 48 49 /* strcasestr() included for portability */ 50 char * 51 strcasestr(const char *h, const char *n) 52 { 53 size_t i; 54 55 if (!n[0]) 56 return (char *)h; 57 58 for (; *h; ++h) { 59 for (i = 0; n[i] && tolower((unsigned char)n[i]) == 60 tolower((unsigned char)h[i]); ++i) 61 ; 62 if (n[i] == '\0') 63 return (char *)h; 64 } 65 66 return NULL; 67 } 68 69 /* Check if string has a non-empty scheme / protocol part. */ 70 int 71 uri_hasscheme(const char *s) 72 { 73 const char *p = s; 74 75 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || 76 *p == '+' || *p == '-' || *p == '.'; p++) 77 ; 78 /* scheme, except if empty and starts with ":" then it is a path */ 79 return (*p == ':' && p != s); 80 } 81 82 /* Parse URI string `s` into an uri structure `u`. 83 Returns 0 on success or -1 on failure */ 84 int 85 uri_parse(const char *s, struct uri *u) 86 { 87 const char *p = s; 88 char *endptr; 89 size_t i; 90 long l; 91 92 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; 93 u->path[0] = u->query[0] = u->fragment[0] = '\0'; 94 95 /* protocol-relative */ 96 if (*p == '/' && *(p + 1) == '/') { 97 p += 2; /* skip "//" */ 98 goto parseauth; 99 } 100 101 /* scheme / protocol part */ 102 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || 103 *p == '+' || *p == '-' || *p == '.'; p++) 104 ; 105 /* scheme, except if empty and starts with ":" then it is a path */ 106 if (*p == ':' && p != s) { 107 if (*(p + 1) == '/' && *(p + 2) == '/') 108 p += 3; /* skip "://" */ 109 else 110 p++; /* skip ":" */ 111 112 if ((size_t)(p - s) >= sizeof(u->proto)) 113 return -1; /* protocol too long */ 114 memcpy(u->proto, s, p - s); 115 u->proto[p - s] = '\0'; 116 117 if (*(p - 1) != '/') 118 goto parsepath; 119 } else { 120 p = s; /* no scheme format, reset to start */ 121 goto parsepath; 122 } 123 124 parseauth: 125 /* userinfo (username:password) */ 126 i = strcspn(p, "@/?#"); 127 if (p[i] == '@') { 128 if (i >= sizeof(u->userinfo)) 129 return -1; /* userinfo too long */ 130 memcpy(u->userinfo, p, i); 131 u->userinfo[i] = '\0'; 132 p += i + 1; 133 } 134 135 /* IPv6 address */ 136 if (*p == '[') { 137 /* bracket not found, host too short or too long */ 138 i = strcspn(p, "]"); 139 if (p[i] != ']' || i < 3) 140 return -1; 141 i++; /* including "]" */ 142 } else { 143 /* domain / host part, skip until port, path or end. */ 144 i = strcspn(p, ":/?#"); 145 } 146 if (i >= sizeof(u->host)) 147 return -1; /* host too long */ 148 memcpy(u->host, p, i); 149 u->host[i] = '\0'; 150 p += i; 151 152 /* port */ 153 if (*p == ':') { 154 p++; 155 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) 156 return -1; /* port too long */ 157 memcpy(u->port, p, i); 158 u->port[i] = '\0'; 159 /* check for valid port: range 1 - 65535, may be empty */ 160 errno = 0; 161 l = strtol(u->port, &endptr, 10); 162 if (i && (errno || *endptr || l <= 0 || l > 65535)) 163 return -1; 164 p += i; 165 } 166 167 parsepath: 168 /* path */ 169 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) 170 return -1; /* path too long */ 171 memcpy(u->path, p, i); 172 u->path[i] = '\0'; 173 p += i; 174 175 /* query */ 176 if (*p == '?') { 177 p++; 178 if ((i = strcspn(p, "#")) >= sizeof(u->query)) 179 return -1; /* query too long */ 180 memcpy(u->query, p, i); 181 u->query[i] = '\0'; 182 p += i; 183 } 184 185 /* fragment */ 186 if (*p == '#') { 187 p++; 188 if ((i = strlen(p)) >= sizeof(u->fragment)) 189 return -1; /* fragment too long */ 190 memcpy(u->fragment, p, i); 191 u->fragment[i] = '\0'; 192 } 193 194 return 0; 195 } 196 197 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`. 198 Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". 199 Returns 0 on success, -1 on error or truncation. */ 200 int 201 uri_makeabs(struct uri *a, struct uri *u, struct uri *b) 202 { 203 char *p; 204 int c; 205 206 strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); 207 208 if (u->proto[0] || u->host[0]) { 209 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto)); 210 strlcpy(a->host, u->host, sizeof(a->host)); 211 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); 212 strlcpy(a->host, u->host, sizeof(a->host)); 213 strlcpy(a->port, u->port, sizeof(a->port)); 214 strlcpy(a->path, u->path, sizeof(a->path)); 215 strlcpy(a->query, u->query, sizeof(a->query)); 216 return 0; 217 } 218 219 strlcpy(a->proto, b->proto, sizeof(a->proto)); 220 strlcpy(a->host, b->host, sizeof(a->host)); 221 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); 222 strlcpy(a->host, b->host, sizeof(a->host)); 223 strlcpy(a->port, b->port, sizeof(a->port)); 224 225 if (!u->path[0]) { 226 strlcpy(a->path, b->path, sizeof(a->path)); 227 } else if (u->path[0] == '/') { 228 strlcpy(a->path, u->path, sizeof(a->path)); 229 } else { 230 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0'; 231 a->path[1] = '\0'; 232 233 if ((p = strrchr(b->path, '/'))) { 234 c = *(++p); 235 *p = '\0'; /* temporary NUL-terminate */ 236 if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path)) 237 return -1; 238 *p = c; /* restore */ 239 } 240 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path)) 241 return -1; 242 } 243 244 if (u->path[0] || u->query[0]) 245 strlcpy(a->query, u->query, sizeof(a->query)); 246 else 247 strlcpy(a->query, b->query, sizeof(a->query)); 248 249 return 0; 250 } 251 252 int 253 uri_format(char *buf, size_t bufsiz, struct uri *u) 254 { 255 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", 256 u->proto, 257 u->userinfo[0] ? u->userinfo : "", 258 u->userinfo[0] ? "@" : "", 259 u->host, 260 u->port[0] ? ":" : "", 261 u->port, 262 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", 263 u->path, 264 u->query[0] ? "?" : "", 265 u->query, 266 u->fragment[0] ? "#" : "", 267 u->fragment); 268 } 269 270 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0') 271 * terminators and assign these fields as pointers. If there are less fields 272 * than expected then the field is an empty string constant. */ 273 void 274 parseline(char *line, char *fields[FieldLast]) 275 { 276 char *prev, *s; 277 size_t i; 278 279 for (prev = line, i = 0; 280 (s = strchr(prev, '\t')) && i < FieldLast - 1; 281 i++) { 282 *s = '\0'; 283 fields[i] = prev; 284 prev = s + 1; 285 } 286 fields[i++] = prev; 287 /* make non-parsed fields empty. */ 288 for (; i < FieldLast; i++) 289 fields[i] = ""; 290 } 291 292 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */ 293 int 294 strtotime(const char *s, time_t *t) 295 { 296 long long l; 297 char *e; 298 299 errno = 0; 300 l = strtoll(s, &e, 10); 301 if (errno || *s == '\0' || *e) 302 return -1; 303 304 /* NOTE: the type long long supports the 64-bit range. If time_t is 305 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */ 306 if (t) 307 *t = (time_t)l; 308 309 return 0; 310 } 311 312 /* Escape characters below as HTML 2.0 / XML 1.0. */ 313 void 314 xmlencode(const char *s, FILE *fp) 315 { 316 for (; *s; ++s) { 317 switch (*s) { 318 case '<': fputs("<", fp); break; 319 case '>': fputs(">", fp); break; 320 case '\'': fputs("'", fp); break; 321 case '&': fputs("&", fp); break; 322 case '"': fputs(""", fp); break; 323 default: putc(*s, fp); 324 } 325 } 326 } 327 328 /* print `len` columns of characters. If string is shorter pad the rest with 329 * characters `pad`. */ 330 void 331 printutf8pad(FILE *fp, const char *s, size_t len, int pad) 332 { 333 wchar_t wc; 334 size_t col = 0, i, slen; 335 int inc, rl, w; 336 337 if (!len) 338 return; 339 340 slen = strlen(s); 341 for (i = 0; i < slen; i += inc) { 342 inc = 1; /* next byte */ 343 if ((unsigned char)s[i] < 32) { 344 continue; /* skip control characters */ 345 } else if ((unsigned char)s[i] >= 127) { 346 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4); 347 inc = rl; 348 if (rl < 0) { 349 mbtowc(NULL, NULL, 0); /* reset state */ 350 inc = 1; /* invalid, seek next byte */ 351 w = 1; /* replacement char is one width */ 352 } else if ((w = wcwidth(wc)) == -1) { 353 continue; 354 } 355 356 if (col + w > len || (col + w == len && s[i + inc])) { 357 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 358 col++; 359 break; 360 } else if (rl < 0) { 361 fputs(UTF_INVALID_SYMBOL, fp); /* replacement */ 362 col++; 363 continue; 364 } 365 fwrite(&s[i], 1, rl, fp); 366 col += w; 367 } else { 368 /* optimization: simple ASCII character */ 369 if (col + 1 > len || (col + 1 == len && s[i + 1])) { 370 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 371 col++; 372 break; 373 } 374 putc(s[i], fp); 375 col++; 376 } 377 378 } 379 for (; col < len; ++col) 380 putc(pad, fp); 381 }