Elijah Newren | fc7bd51 | 2023-02-24 00:09:34 +0000 | [diff] [blame] | 1 | #include "git-compat-util.h" |
| 2 | #include "gettext.h" |
Calvin Wan | d88e810 | 2023-09-29 14:20:48 -0700 | [diff] [blame] | 3 | #include "hex-ll.h" |
Elijah Newren | fc7bd51 | 2023-02-24 00:09:34 +0000 | [diff] [blame] | 4 | #include "strbuf.h" |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 5 | #include "urlmatch.h" |
| 6 | |
| 7 | #define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
| 8 | #define URL_DIGIT "0123456789" |
| 9 | #define URL_ALPHADIGIT URL_ALPHA URL_DIGIT |
| 10 | #define URL_SCHEME_CHARS URL_ALPHADIGIT "+.-" |
Jeff King | e4c497a | 2021-10-12 17:12:26 -0400 | [diff] [blame] | 11 | #define URL_HOST_CHARS URL_ALPHADIGIT ".-_[:]" /* IPv6 literals need [:] */ |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 12 | #define URL_UNSAFE_CHARS " <>\"%{}|\\^`" /* plus 0x00-0x1F,0x7F-0xFF */ |
| 13 | #define URL_GEN_RESERVED ":/?#[]@" |
| 14 | #define URL_SUB_RESERVED "!$&'()*+,;=" |
| 15 | #define URL_RESERVED URL_GEN_RESERVED URL_SUB_RESERVED /* only allowed delims */ |
| 16 | |
| 17 | static int append_normalized_escapes(struct strbuf *buf, |
| 18 | const char *from, |
| 19 | size_t from_len, |
| 20 | const char *esc_extra, |
| 21 | const char *esc_ok) |
| 22 | { |
| 23 | /* |
| 24 | * Append to strbuf 'buf' characters from string 'from' with length |
| 25 | * 'from_len' while unescaping characters that do not need to be escaped |
| 26 | * and escaping characters that do. The set of characters to escape |
| 27 | * (the complement of which is unescaped) starts out as the RFC 3986 |
| 28 | * unsafe characters (0x00-0x1F,0x7F-0xFF," <>\"#%{}|\\^`"). If |
| 29 | * 'esc_extra' is not NULL, those additional characters will also always |
| 30 | * be escaped. If 'esc_ok' is not NULL, those characters will be left |
| 31 | * escaped if found that way, but will not be unescaped otherwise (used |
| 32 | * for delimiters). If a %-escape sequence is encountered that is not |
| 33 | * followed by 2 hexadecimal digits, the sequence is invalid and |
| 34 | * false (0) will be returned. Otherwise true (1) will be returned for |
| 35 | * success. |
| 36 | * |
| 37 | * Note that all %-escape sequences will be normalized to UPPERCASE |
| 38 | * as indicated in RFC 3986. Unless included in esc_extra or esc_ok |
| 39 | * alphanumerics and "-._~" will always be unescaped as per RFC 3986. |
| 40 | */ |
| 41 | |
| 42 | while (from_len) { |
| 43 | int ch = *from++; |
| 44 | int was_esc = 0; |
| 45 | |
| 46 | from_len--; |
| 47 | if (ch == '%') { |
René Scharfe | 5053313 | 2017-07-08 10:59:19 +0200 | [diff] [blame] | 48 | if (from_len < 2) |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 49 | return 0; |
René Scharfe | 5053313 | 2017-07-08 10:59:19 +0200 | [diff] [blame] | 50 | ch = hex2chr(from); |
| 51 | if (ch < 0) |
| 52 | return 0; |
| 53 | from += 2; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 54 | from_len -= 2; |
| 55 | was_esc = 1; |
| 56 | } |
| 57 | if ((unsigned char)ch <= 0x1F || (unsigned char)ch >= 0x7F || |
| 58 | strchr(URL_UNSAFE_CHARS, ch) || |
| 59 | (esc_extra && strchr(esc_extra, ch)) || |
| 60 | (was_esc && strchr(esc_ok, ch))) |
| 61 | strbuf_addf(buf, "%%%02X", (unsigned char)ch); |
| 62 | else |
| 63 | strbuf_addch(buf, ch); |
| 64 | } |
| 65 | |
| 66 | return 1; |
| 67 | } |
| 68 | |
Patrick Steinhardt | a272b9e | 2017-01-31 10:01:47 +0100 | [diff] [blame] | 69 | static const char *end_of_token(const char *s, int c, size_t n) |
| 70 | { |
| 71 | const char *next = memchr(s, c, n); |
| 72 | if (!next) |
| 73 | next = s + n; |
| 74 | return next; |
| 75 | } |
| 76 | |
| 77 | static int match_host(const struct url_info *url_info, |
| 78 | const struct url_info *pattern_info) |
| 79 | { |
| 80 | const char *url = url_info->url + url_info->host_off; |
| 81 | const char *pat = pattern_info->url + pattern_info->host_off; |
| 82 | int url_len = url_info->host_len; |
| 83 | int pat_len = pattern_info->host_len; |
| 84 | |
| 85 | while (url_len && pat_len) { |
| 86 | const char *url_next = end_of_token(url, '.', url_len); |
| 87 | const char *pat_next = end_of_token(pat, '.', pat_len); |
| 88 | |
| 89 | if (pat_next == pat + 1 && pat[0] == '*') |
| 90 | /* wildcard matches anything */ |
| 91 | ; |
| 92 | else if ((pat_next - pat) == (url_next - url) && |
| 93 | !memcmp(url, pat, url_next - url)) |
| 94 | /* the components are the same */ |
| 95 | ; |
| 96 | else |
| 97 | return 0; /* found an unmatch */ |
| 98 | |
| 99 | if (url_next < url + url_len) |
| 100 | url_next++; |
| 101 | url_len -= url_next - url; |
| 102 | url = url_next; |
| 103 | if (pat_next < pat + pat_len) |
| 104 | pat_next++; |
| 105 | pat_len -= pat_next - pat; |
| 106 | pat = pat_next; |
| 107 | } |
| 108 | |
| 109 | return (!url_len && !pat_len); |
| 110 | } |
| 111 | |
Patrick Steinhardt | 3e6a0e6 | 2017-01-31 10:01:44 +0100 | [diff] [blame] | 112 | static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs) |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 113 | { |
| 114 | /* |
| 115 | * Normalize NUL-terminated url using the following rules: |
| 116 | * |
| 117 | * 1. Case-insensitive parts of url will be converted to lower case |
| 118 | * 2. %-encoded characters that do not need to be will be unencoded |
| 119 | * 3. Characters that are not %-encoded and must be will be encoded |
| 120 | * 4. All %-encodings will be converted to upper case hexadecimal |
| 121 | * 5. Leading 0s are removed from port numbers |
| 122 | * 6. If the default port for the scheme is given it will be removed |
| 123 | * 7. A path part (including empty) not starting with '/' has one added |
| 124 | * 8. Any dot segments (. or ..) in the path are resolved and removed |
| 125 | * 9. IPv6 host literals are allowed (but not normalized or validated) |
| 126 | * |
| 127 | * The rules are based on information in RFC 3986. |
| 128 | * |
| 129 | * Please note this function requires a full URL including a scheme |
| 130 | * and host part (except for file: URLs which may have an empty host). |
| 131 | * |
| 132 | * The return value is a newly allocated string that must be freed |
| 133 | * or NULL if the url is not valid. |
| 134 | * |
| 135 | * If out_info is non-NULL, the url and err fields therein will always |
| 136 | * be set. If a non-NULL value is returned, it will be stored in |
| 137 | * out_info->url as well, out_info->err will be set to NULL and the |
| 138 | * other fields of *out_info will also be filled in. If a NULL value |
| 139 | * is returned, NULL will be stored in out_info->url and out_info->err |
| 140 | * will be set to a brief, translated, error message, but no other |
| 141 | * fields will be filled in. |
| 142 | * |
| 143 | * This is NOT a URL validation function. Full URL validation is NOT |
| 144 | * performed. Some invalid host names are passed through this function |
| 145 | * undetected. However, most all other problems that make a URL invalid |
| 146 | * will be detected (including a missing host for non file: URLs). |
| 147 | */ |
| 148 | |
| 149 | size_t url_len = strlen(url); |
| 150 | struct strbuf norm; |
| 151 | size_t spanned; |
| 152 | size_t scheme_len, user_off=0, user_len=0, passwd_off=0, passwd_len=0; |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 153 | size_t host_off=0, host_len=0, port_off=0, port_len=0, path_off, path_len, result_len; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 154 | const char *slash_ptr, *at_ptr, *colon_ptr, *path_start; |
| 155 | char *result; |
| 156 | |
| 157 | /* |
| 158 | * Copy lowercased scheme and :// suffix, %-escapes are not allowed |
| 159 | * First character of scheme must be URL_ALPHA |
| 160 | */ |
| 161 | spanned = strspn(url, URL_SCHEME_CHARS); |
| 162 | if (!spanned || !isalpha(url[0]) || spanned + 3 > url_len || |
| 163 | url[spanned] != ':' || url[spanned+1] != '/' || url[spanned+2] != '/') { |
| 164 | if (out_info) { |
| 165 | out_info->url = NULL; |
| 166 | out_info->err = _("invalid URL scheme name or missing '://' suffix"); |
| 167 | } |
| 168 | return NULL; /* Bad scheme and/or missing "://" part */ |
| 169 | } |
| 170 | strbuf_init(&norm, url_len); |
| 171 | scheme_len = spanned; |
| 172 | spanned += 3; |
| 173 | url_len -= spanned; |
| 174 | while (spanned--) |
| 175 | strbuf_addch(&norm, tolower(*url++)); |
| 176 | |
| 177 | |
| 178 | /* |
| 179 | * Copy any username:password if present normalizing %-escapes |
| 180 | */ |
| 181 | at_ptr = strchr(url, '@'); |
| 182 | slash_ptr = url + strcspn(url, "/?#"); |
| 183 | if (at_ptr && at_ptr < slash_ptr) { |
| 184 | user_off = norm.len; |
| 185 | if (at_ptr > url) { |
| 186 | if (!append_normalized_escapes(&norm, url, at_ptr - url, |
| 187 | "", URL_RESERVED)) { |
| 188 | if (out_info) { |
| 189 | out_info->url = NULL; |
| 190 | out_info->err = _("invalid %XX escape sequence"); |
| 191 | } |
| 192 | strbuf_release(&norm); |
| 193 | return NULL; |
| 194 | } |
| 195 | colon_ptr = strchr(norm.buf + scheme_len + 3, ':'); |
| 196 | if (colon_ptr) { |
| 197 | passwd_off = (colon_ptr + 1) - norm.buf; |
| 198 | passwd_len = norm.len - passwd_off; |
| 199 | user_len = (passwd_off - 1) - (scheme_len + 3); |
| 200 | } else { |
| 201 | user_len = norm.len - (scheme_len + 3); |
| 202 | } |
| 203 | } |
| 204 | strbuf_addch(&norm, '@'); |
| 205 | url_len -= (++at_ptr - url); |
| 206 | url = at_ptr; |
| 207 | } |
| 208 | |
| 209 | |
| 210 | /* |
| 211 | * Copy the host part excluding any port part, no %-escapes allowed |
| 212 | */ |
| 213 | if (!url_len || strchr(":/?#", *url)) { |
| 214 | /* Missing host invalid for all URL schemes except file */ |
Jeff King | 20869d1 | 2023-01-07 08:26:18 -0500 | [diff] [blame] | 215 | if (!starts_with(norm.buf, "file:")) { |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 216 | if (out_info) { |
| 217 | out_info->url = NULL; |
| 218 | out_info->err = _("missing host and scheme is not 'file:'"); |
| 219 | } |
| 220 | strbuf_release(&norm); |
| 221 | return NULL; |
| 222 | } |
| 223 | } else { |
| 224 | host_off = norm.len; |
| 225 | } |
| 226 | colon_ptr = slash_ptr - 1; |
| 227 | while (colon_ptr > url && *colon_ptr != ':' && *colon_ptr != ']') |
| 228 | colon_ptr--; |
| 229 | if (*colon_ptr != ':') { |
| 230 | colon_ptr = slash_ptr; |
| 231 | } else if (!host_off && colon_ptr < slash_ptr && colon_ptr + 1 != slash_ptr) { |
| 232 | /* file: URLs may not have a port number */ |
| 233 | if (out_info) { |
| 234 | out_info->url = NULL; |
| 235 | out_info->err = _("a 'file:' URL may not have a port number"); |
| 236 | } |
| 237 | strbuf_release(&norm); |
| 238 | return NULL; |
| 239 | } |
Patrick Steinhardt | 3e6a0e6 | 2017-01-31 10:01:44 +0100 | [diff] [blame] | 240 | |
| 241 | if (allow_globs) |
| 242 | spanned = strspn(url, URL_HOST_CHARS "*"); |
| 243 | else |
| 244 | spanned = strspn(url, URL_HOST_CHARS); |
| 245 | |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 246 | if (spanned < colon_ptr - url) { |
| 247 | /* Host name has invalid characters */ |
| 248 | if (out_info) { |
| 249 | out_info->url = NULL; |
| 250 | out_info->err = _("invalid characters in host name"); |
| 251 | } |
| 252 | strbuf_release(&norm); |
| 253 | return NULL; |
| 254 | } |
| 255 | while (url < colon_ptr) { |
| 256 | strbuf_addch(&norm, tolower(*url++)); |
| 257 | url_len--; |
| 258 | } |
| 259 | |
| 260 | |
| 261 | /* |
| 262 | * Check the port part and copy if not the default (after removing any |
| 263 | * leading 0s); no %-escapes allowed |
| 264 | */ |
| 265 | if (colon_ptr < slash_ptr) { |
| 266 | /* skip the ':' and leading 0s but not the last one if all 0s */ |
| 267 | url++; |
| 268 | url += strspn(url, "0"); |
| 269 | if (url == slash_ptr && url[-1] == '0') |
| 270 | url--; |
| 271 | if (url == slash_ptr) { |
| 272 | /* Skip ":" port with no number, it's same as default */ |
| 273 | } else if (slash_ptr - url == 2 && |
Jeff King | 20869d1 | 2023-01-07 08:26:18 -0500 | [diff] [blame] | 274 | starts_with(norm.buf, "http:") && |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 275 | !strncmp(url, "80", 2)) { |
| 276 | /* Skip http :80 as it's the default */ |
| 277 | } else if (slash_ptr - url == 3 && |
Jeff King | 20869d1 | 2023-01-07 08:26:18 -0500 | [diff] [blame] | 278 | starts_with(norm.buf, "https:") && |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 279 | !strncmp(url, "443", 3)) { |
| 280 | /* Skip https :443 as it's the default */ |
| 281 | } else { |
| 282 | /* |
| 283 | * Port number must be all digits with leading 0s removed |
| 284 | * and since all the protocols we deal with have a 16-bit |
| 285 | * port number it must also be in the range 1..65535 |
| 286 | * 0 is not allowed because that means "next available" |
| 287 | * on just about every system and therefore cannot be used |
| 288 | */ |
| 289 | unsigned long pnum = 0; |
| 290 | spanned = strspn(url, URL_DIGIT); |
| 291 | if (spanned < slash_ptr - url) { |
| 292 | /* port number has invalid characters */ |
| 293 | if (out_info) { |
| 294 | out_info->url = NULL; |
| 295 | out_info->err = _("invalid port number"); |
| 296 | } |
| 297 | strbuf_release(&norm); |
| 298 | return NULL; |
| 299 | } |
| 300 | if (slash_ptr - url <= 5) |
| 301 | pnum = strtoul(url, NULL, 10); |
| 302 | if (pnum == 0 || pnum > 65535) { |
| 303 | /* port number not in range 1..65535 */ |
| 304 | if (out_info) { |
| 305 | out_info->url = NULL; |
| 306 | out_info->err = _("invalid port number"); |
| 307 | } |
| 308 | strbuf_release(&norm); |
| 309 | return NULL; |
| 310 | } |
| 311 | strbuf_addch(&norm, ':'); |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 312 | port_off = norm.len; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 313 | strbuf_add(&norm, url, slash_ptr - url); |
| 314 | port_len = slash_ptr - url; |
| 315 | } |
| 316 | url_len -= slash_ptr - colon_ptr; |
| 317 | url = slash_ptr; |
| 318 | } |
| 319 | if (host_off) |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 320 | host_len = norm.len - host_off - (port_len ? port_len + 1 : 0); |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 321 | |
| 322 | |
| 323 | /* |
| 324 | * Now copy the path resolving any . and .. segments being careful not |
| 325 | * to corrupt the URL by unescaping any delimiters, but do add an |
| 326 | * initial '/' if it's missing and do normalize any %-escape sequences. |
| 327 | */ |
| 328 | path_off = norm.len; |
| 329 | path_start = norm.buf + path_off; |
| 330 | strbuf_addch(&norm, '/'); |
| 331 | if (*url == '/') { |
| 332 | url++; |
| 333 | url_len--; |
| 334 | } |
| 335 | for (;;) { |
Thomas Rast | a7f0a0e | 2013-09-12 07:15:40 -0700 | [diff] [blame] | 336 | const char *seg_start; |
| 337 | size_t seg_start_off = norm.len; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 338 | const char *next_slash = url + strcspn(url, "/?#"); |
| 339 | int skip_add_slash = 0; |
Thomas Rast | a7f0a0e | 2013-09-12 07:15:40 -0700 | [diff] [blame] | 340 | |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 341 | /* |
| 342 | * RFC 3689 indicates that any . or .. segments should be |
| 343 | * unescaped before being checked for. |
| 344 | */ |
| 345 | if (!append_normalized_escapes(&norm, url, next_slash - url, "", |
| 346 | URL_RESERVED)) { |
| 347 | if (out_info) { |
| 348 | out_info->url = NULL; |
| 349 | out_info->err = _("invalid %XX escape sequence"); |
| 350 | } |
| 351 | strbuf_release(&norm); |
| 352 | return NULL; |
| 353 | } |
Thomas Rast | a7f0a0e | 2013-09-12 07:15:40 -0700 | [diff] [blame] | 354 | |
| 355 | seg_start = norm.buf + seg_start_off; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 356 | if (!strcmp(seg_start, ".")) { |
| 357 | /* ignore a . segment; be careful not to remove initial '/' */ |
| 358 | if (seg_start == path_start + 1) { |
| 359 | strbuf_setlen(&norm, norm.len - 1); |
| 360 | skip_add_slash = 1; |
| 361 | } else { |
| 362 | strbuf_setlen(&norm, norm.len - 2); |
| 363 | } |
| 364 | } else if (!strcmp(seg_start, "..")) { |
| 365 | /* |
| 366 | * ignore a .. segment and remove the previous segment; |
| 367 | * be careful not to remove initial '/' from path |
| 368 | */ |
| 369 | const char *prev_slash = norm.buf + norm.len - 3; |
| 370 | if (prev_slash == path_start) { |
| 371 | /* invalid .. because no previous segment to remove */ |
| 372 | if (out_info) { |
| 373 | out_info->url = NULL; |
| 374 | out_info->err = _("invalid '..' path segment"); |
| 375 | } |
| 376 | strbuf_release(&norm); |
| 377 | return NULL; |
| 378 | } |
| 379 | while (*--prev_slash != '/') {} |
| 380 | if (prev_slash == path_start) { |
| 381 | strbuf_setlen(&norm, prev_slash - norm.buf + 1); |
| 382 | skip_add_slash = 1; |
| 383 | } else { |
| 384 | strbuf_setlen(&norm, prev_slash - norm.buf); |
| 385 | } |
| 386 | } |
| 387 | url_len -= next_slash - url; |
| 388 | url = next_slash; |
| 389 | /* if the next char is not '/' done with the path */ |
| 390 | if (*url != '/') |
| 391 | break; |
| 392 | url++; |
| 393 | url_len--; |
| 394 | if (!skip_add_slash) |
| 395 | strbuf_addch(&norm, '/'); |
| 396 | } |
| 397 | path_len = norm.len - path_off; |
| 398 | |
| 399 | |
| 400 | /* |
| 401 | * Now simply copy the rest, if any, only normalizing %-escapes and |
| 402 | * being careful not to corrupt the URL by unescaping any delimiters. |
| 403 | */ |
| 404 | if (*url) { |
| 405 | if (!append_normalized_escapes(&norm, url, url_len, "", URL_RESERVED)) { |
| 406 | if (out_info) { |
| 407 | out_info->url = NULL; |
| 408 | out_info->err = _("invalid %XX escape sequence"); |
| 409 | } |
| 410 | strbuf_release(&norm); |
| 411 | return NULL; |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | |
| 416 | result = strbuf_detach(&norm, &result_len); |
| 417 | if (out_info) { |
| 418 | out_info->url = result; |
| 419 | out_info->err = NULL; |
| 420 | out_info->url_len = result_len; |
| 421 | out_info->scheme_len = scheme_len; |
| 422 | out_info->user_off = user_off; |
| 423 | out_info->user_len = user_len; |
| 424 | out_info->passwd_off = passwd_off; |
| 425 | out_info->passwd_len = passwd_len; |
| 426 | out_info->host_off = host_off; |
| 427 | out_info->host_len = host_len; |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 428 | out_info->port_off = port_off; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 429 | out_info->port_len = port_len; |
| 430 | out_info->path_off = path_off; |
| 431 | out_info->path_len = path_len; |
| 432 | } |
| 433 | return result; |
| 434 | } |
| 435 | |
Patrick Steinhardt | 3e6a0e6 | 2017-01-31 10:01:44 +0100 | [diff] [blame] | 436 | char *url_normalize(const char *url, struct url_info *out_info) |
| 437 | { |
| 438 | return url_normalize_1(url, out_info, 0); |
| 439 | } |
| 440 | |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 441 | static size_t url_match_prefix(const char *url, |
| 442 | const char *url_prefix, |
| 443 | size_t url_prefix_len) |
| 444 | { |
| 445 | /* |
| 446 | * url_prefix matches url if url_prefix is an exact match for url or it |
| 447 | * is a prefix of url and the match ends on a path component boundary. |
| 448 | * Both url and url_prefix are considered to have an implicit '/' on the |
| 449 | * end for matching purposes if they do not already. |
| 450 | * |
| 451 | * url must be NUL terminated. url_prefix_len is the length of |
| 452 | * url_prefix which need not be NUL terminated. |
| 453 | * |
| 454 | * The return value is the length of the match in characters (including |
| 455 | * the final '/' even if it's implicit) or 0 for no match. |
| 456 | * |
| 457 | * Passing NULL as url and/or url_prefix will always cause 0 to be |
| 458 | * returned without causing any faults. |
| 459 | */ |
| 460 | if (!url || !url_prefix) |
| 461 | return 0; |
| 462 | if (!url_prefix_len || (url_prefix_len == 1 && *url_prefix == '/')) |
| 463 | return (!*url || *url == '/') ? 1 : 0; |
| 464 | if (url_prefix[url_prefix_len - 1] == '/') |
| 465 | url_prefix_len--; |
| 466 | if (strncmp(url, url_prefix, url_prefix_len)) |
| 467 | return 0; |
| 468 | if ((strlen(url) == url_prefix_len) || (url[url_prefix_len] == '/')) |
| 469 | return url_prefix_len + 1; |
| 470 | return 0; |
| 471 | } |
| 472 | |
Junio C Hamano | 667f7eb | 2015-01-14 14:57:08 -0800 | [diff] [blame] | 473 | static int match_urls(const struct url_info *url, |
| 474 | const struct url_info *url_prefix, |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 475 | struct urlmatch_item *match) |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 476 | { |
| 477 | /* |
| 478 | * url_prefix matches url if the scheme, host and port of url_prefix |
| 479 | * are the same as those of url and the path portion of url_prefix |
| 480 | * is the same as the path portion of url or it is a prefix that |
| 481 | * matches at a '/' boundary. If url_prefix contains a user name, |
| 482 | * that must also exactly match the user name in url. |
| 483 | * |
| 484 | * If the user, host, port and path match in this fashion, the returned |
| 485 | * value is the length of the path match including any implicit |
| 486 | * final '/'. For example, "http://me@example.com/path" is matched by |
| 487 | * "http://example.com" with a path length of 1. |
| 488 | * |
| 489 | * If there is a match and exactusermatch is not NULL, then |
| 490 | * *exactusermatch will be set to true if both url and url_prefix |
| 491 | * contained a user name or false if url_prefix did not have a |
| 492 | * user name. If there is no match *exactusermatch is left untouched. |
| 493 | */ |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 494 | char usermatched = 0; |
| 495 | size_t pathmatchlen; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 496 | |
| 497 | if (!url || !url_prefix || !url->url || !url_prefix->url) |
| 498 | return 0; |
| 499 | |
| 500 | /* check the scheme */ |
| 501 | if (url_prefix->scheme_len != url->scheme_len || |
| 502 | strncmp(url->url, url_prefix->url, url->scheme_len)) |
| 503 | return 0; /* schemes do not match */ |
| 504 | |
| 505 | /* check the user name if url_prefix has one */ |
| 506 | if (url_prefix->user_off) { |
| 507 | if (!url->user_off || url->user_len != url_prefix->user_len || |
| 508 | strncmp(url->url + url->user_off, |
| 509 | url_prefix->url + url_prefix->user_off, |
| 510 | url->user_len)) |
| 511 | return 0; /* url_prefix has a user but it's not a match */ |
| 512 | usermatched = 1; |
| 513 | } |
| 514 | |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 515 | /* check the host */ |
Patrick Steinhardt | a272b9e | 2017-01-31 10:01:47 +0100 | [diff] [blame] | 516 | if (!match_host(url, url_prefix)) |
Patrick Steinhardt | 3ec6e6e | 2017-01-31 10:01:45 +0100 | [diff] [blame] | 517 | return 0; /* host names do not match */ |
| 518 | |
| 519 | /* check the port */ |
| 520 | if (url_prefix->port_len != url->port_len || |
| 521 | strncmp(url->url + url->port_off, |
| 522 | url_prefix->url + url_prefix->port_off, url->port_len)) |
| 523 | return 0; /* ports do not match */ |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 524 | |
| 525 | /* check the path */ |
| 526 | pathmatchlen = url_match_prefix( |
| 527 | url->url + url->path_off, |
| 528 | url_prefix->url + url_prefix->path_off, |
| 529 | url_prefix->url_len - url_prefix->path_off); |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 530 | if (!pathmatchlen) |
| 531 | return 0; /* paths do not match */ |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 532 | |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 533 | if (match) { |
| 534 | match->hostmatch_len = url_prefix->host_len; |
| 535 | match->pathmatch_len = pathmatchlen; |
| 536 | match->user_matched = usermatched; |
| 537 | } |
| 538 | |
| 539 | return 1; |
| 540 | } |
| 541 | |
| 542 | static int cmp_matches(const struct urlmatch_item *a, |
| 543 | const struct urlmatch_item *b) |
| 544 | { |
| 545 | if (a->hostmatch_len != b->hostmatch_len) |
| 546 | return a->hostmatch_len < b->hostmatch_len ? -1 : 1; |
| 547 | if (a->pathmatch_len != b->pathmatch_len) |
| 548 | return a->pathmatch_len < b->pathmatch_len ? -1 : 1; |
| 549 | if (a->user_matched != b->user_matched) |
| 550 | return b->user_matched ? -1 : 1; |
| 551 | return 0; |
Kyle J. McKay | 3402a8d | 2013-07-31 13:52:00 -0700 | [diff] [blame] | 552 | } |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 553 | |
Glen Choo | a4e7e31 | 2023-06-28 19:26:22 +0000 | [diff] [blame] | 554 | int urlmatch_config_entry(const char *var, const char *value, |
| 555 | const struct config_context *ctx, void *cb) |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 556 | { |
| 557 | struct string_list_item *item; |
| 558 | struct urlmatch_config *collect = cb; |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 559 | struct urlmatch_item matched = {0}; |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 560 | struct url_info *url = &collect->url; |
| 561 | const char *key, *dot; |
| 562 | struct strbuf synthkey = STRBUF_INIT; |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 563 | int retval; |
brian m. carlson | 46fd7b3 | 2020-02-20 02:24:13 +0000 | [diff] [blame] | 564 | int (*select_fn)(const struct urlmatch_item *a, const struct urlmatch_item *b) = |
| 565 | collect->select_fn ? collect->select_fn : cmp_matches; |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 566 | |
Jeff King | cf4fff5 | 2014-06-18 15:44:19 -0400 | [diff] [blame] | 567 | if (!skip_prefix(var, collect->section, &key) || *(key++) != '.') { |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 568 | if (collect->cascade_fn) |
Glen Choo | a4e7e31 | 2023-06-28 19:26:22 +0000 | [diff] [blame] | 569 | return collect->cascade_fn(var, value, ctx, cb); |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 570 | return 0; /* not interested */ |
| 571 | } |
| 572 | dot = strrchr(key, '.'); |
| 573 | if (dot) { |
| 574 | char *config_url, *norm_url; |
| 575 | struct url_info norm_info; |
| 576 | |
| 577 | config_url = xmemdupz(key, dot - key); |
Patrick Steinhardt | a272b9e | 2017-01-31 10:01:47 +0100 | [diff] [blame] | 578 | norm_url = url_normalize_1(config_url, &norm_info, 1); |
Johannes Schindelin | 1229499 | 2020-04-24 22:35:49 +0000 | [diff] [blame] | 579 | if (norm_url) |
| 580 | retval = match_urls(url, &norm_info, &matched); |
| 581 | else if (collect->fallback_match_fn) |
| 582 | retval = collect->fallback_match_fn(config_url, |
| 583 | collect->cb); |
| 584 | else |
| 585 | retval = 0; |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 586 | free(config_url); |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 587 | free(norm_url); |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 588 | if (!retval) |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 589 | return 0; |
| 590 | key = dot + 1; |
| 591 | } |
| 592 | |
| 593 | if (collect->key && strcmp(key, collect->key)) |
| 594 | return 0; |
| 595 | |
| 596 | item = string_list_insert(&collect->vars, key); |
| 597 | if (!item->util) { |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 598 | item->util = xcalloc(1, sizeof(matched)); |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 599 | } else { |
brian m. carlson | 46fd7b3 | 2020-02-20 02:24:13 +0000 | [diff] [blame] | 600 | if (select_fn(&matched, item->util) < 0) |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 601 | /* |
| 602 | * Our match is worse than the old one, |
| 603 | * we cannot use it. |
| 604 | */ |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 605 | return 0; |
| 606 | /* Otherwise, replace it with this one. */ |
| 607 | } |
| 608 | |
Patrick Steinhardt | af99049 | 2017-01-31 10:01:46 +0100 | [diff] [blame] | 609 | memcpy(item->util, &matched, sizeof(matched)); |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 610 | strbuf_addstr(&synthkey, collect->section); |
| 611 | strbuf_addch(&synthkey, '.'); |
| 612 | strbuf_addstr(&synthkey, key); |
Glen Choo | a4e7e31 | 2023-06-28 19:26:22 +0000 | [diff] [blame] | 613 | retval = collect->collect_fn(synthkey.buf, value, ctx, collect->cb); |
Junio C Hamano | 836b6fb | 2013-07-31 10:42:01 -0700 | [diff] [blame] | 614 | |
| 615 | strbuf_release(&synthkey); |
| 616 | return retval; |
| 617 | } |
Ævar Arnfjörð Bjarmason | a41e8e7 | 2022-03-04 19:32:07 +0100 | [diff] [blame] | 618 | |
| 619 | void urlmatch_config_release(struct urlmatch_config *config) |
| 620 | { |
| 621 | string_list_clear(&config->vars, 1); |
| 622 | } |