summaryrefslogtreecommitdiff
path: root/util.h
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2021-02-16 18:38:56 +0100
committerHiltjo Posthuma <hiltjo@codemadness.org>2021-03-01 18:41:27 +0100
commitf305b032bc19b4e81c0dd6c0398370028ea910ca (patch)
treeab89d4a7fc24bb2ee8c2a3b5409734925d37500a /util.h
parent30476d22307aaa38170da5241a5d5e9864c4e76d (diff)
util: improve/refactor URI parsing and formatting
Removed/rewritten the functions: absuri, parseuri, and encodeuri() for percent-encoding. The functions are now split separately with the following purpose: - uri_format: format struct uri into a string. - uri_hasscheme: quick check if a string is absolute or not. - uri_makeabs: make a URI absolute using a base uri and the original URI. - uri_parse: parse a string into a struct uri. The following URLs are better parsed: - URLs with extra "/"'s in the path prepended are kept as is, no "/" is added either for empty paths. - URLs like "http://codemadness.org" are not changed to "http://codemadness.org/" anymore (paths are kept as is, unless they are non-empty and not start with "/"). - Paths are not percent-encoded anymore. - URLs with userinfo field (username, password) are parsed. like: ftp://user:password@[2001:db8::7]:2121/rfc/rfc1808.txt - Non-authoritive URLs like mailto:some@email.org, magnet URIs, ISBN URIs/urn, like: urn:isbn:0-395-36341-1 are allowed and parsed correctly. - Both local (file:///) and non-local (file://) are supported. - Specifying a base URL with a port will now only use it when the relative URL has no host and port set and follows RFC3986 5.2.2 more closely. - Parsing numeric port: parse as signed long and check <= 0, empty port is allowed. - Parsing URIs containing query, fragment, but no path separator (/) will now parse the component properly. For sfeed: - Parse the baseURI only once (no need to do it every time for making absolute URIs). - If a link/enclosure is absolute already or if there is no base URL specified then just print the link directly. There have also been other small performance improvements related to handling URIs. References: - https://tools.ietf.org/html/rfc3986 - Section "5.2.2. Transform References" have also been helpful.
Diffstat (limited to 'util.h')
-rw-r--r--util.h18
1 files changed, 12 insertions, 6 deletions
diff --git a/util.h b/util.h
index bc6e52f..15d6702 100644
--- a/util.h
+++ b/util.h
@@ -21,12 +21,15 @@ struct feed {
unsigned long total; /* total items */
};
-/* uri */
+/* URI */
struct uri {
- char proto[48];
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
char host[256];
- char path[2048];
- char port[6]; /* numeric port */
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
};
enum {
@@ -35,9 +38,12 @@ enum {
FieldLast
};
-int absuri(char *, size_t, const char *, const char *);
+int uri_format(char *, size_t, struct uri *);
+int uri_hasscheme(const char *);
+int uri_makeabs(struct uri *, struct uri *, struct uri *);
+int uri_parse(const char *, struct uri *);
+
void parseline(char *, char *[FieldLast]);
-int parseuri(const char *, struct uri *, int);
void printutf8pad(FILE *, const char *, size_t, int);
int strtotime(const char *, time_t *);
void xmlencode(const char *, FILE *);