fetch.c
1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "bsd_compat.h" 32 33 #include <sys/param.h> 34 35 #include <netinet/in.h> 36 37 #include <errno.h> 38 #include <ctype.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 43 #include "fetch.h" 44 #include "common.h" 45 46 auth_t fetchAuthMethod; 47 int fetchLastErrCode; 48 char fetchLastErrString[MAXERRSTRING]; 49 int fetchTimeout; 50 int fetchRestartCalls = 1; 51 int fetchDebug; 52 const char *fetchCustomHTTPHeaders; 53 int fetchSpeedLimit; 54 int fetchSpeedTime; 55 56 57 /*** Local data **************************************************************/ 58 59 /* 60 * Error messages for parser errors 61 */ 62 #define URL_MALFORMED 1 63 #define URL_BAD_SCHEME 2 64 #define URL_BAD_PORT 3 65 static struct fetcherr url_errlist[] = { 66 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 67 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 68 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 69 { -1, FETCH_UNKNOWN, "Unknown parser error" } 70 }; 71 72 73 /*** Public API **************************************************************/ 74 75 /* 76 * Select the appropriate protocol for the URL scheme, and return a 77 * read-only stream connected to the document referenced by the URL. 78 * Also fill out the struct url_stat. 79 */ 80 FILE * 81 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 82 { 83 84 if (us != NULL) { 85 us->size = -1; 86 us->atime = us->mtime = 0; 87 } 88 if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 89 return (fetchXGetHTTP(URL, us, flags)); 90 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 91 return (fetchXGetHTTP(URL, us, flags)); 92 url_seterr(URL_BAD_SCHEME); 93 return (NULL); 94 } 95 96 /* 97 * Select the appropriate protocol for the URL scheme, and return a 98 * read-only stream connected to the document referenced by the URL. 99 */ 100 FILE * 101 fetchGet(struct url *URL, const char *flags) 102 { 103 return (fetchXGet(URL, NULL, flags)); 104 } 105 106 /* 107 * Select the appropriate protocol for the URL scheme, and return a 108 * write-only stream connected to the document referenced by the URL. 109 */ 110 FILE * 111 fetchPut(struct url *URL, const char *flags) 112 { 113 114 if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 115 return (fetchPutHTTP(URL, flags)); 116 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 117 return (fetchPutHTTP(URL, flags)); 118 url_seterr(URL_BAD_SCHEME); 119 return (NULL); 120 } 121 122 /* 123 * Select the appropriate protocol for the URL scheme, and return the 124 * size of the document referenced by the URL if it exists. 125 */ 126 int 127 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 128 { 129 130 if (us != NULL) { 131 us->size = -1; 132 us->atime = us->mtime = 0; 133 } 134 if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 135 return (fetchStatHTTP(URL, us, flags)); 136 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 137 return (fetchStatHTTP(URL, us, flags)); 138 url_seterr(URL_BAD_SCHEME); 139 return (-1); 140 } 141 142 /* 143 * Select the appropriate protocol for the URL scheme, and return a 144 * list of files in the directory pointed to by the URL. 145 */ 146 struct url_ent * 147 fetchList(struct url *URL, const char *flags) 148 { 149 150 if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 151 return (fetchListHTTP(URL, flags)); 152 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 153 return (fetchListHTTP(URL, flags)); 154 url_seterr(URL_BAD_SCHEME); 155 return (NULL); 156 } 157 158 /* 159 * Attempt to parse the given URL; if successful, call fetchXGet(). 160 */ 161 FILE * 162 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 163 { 164 struct url *u; 165 FILE *f; 166 167 if ((u = fetchParseURL(URL)) == NULL) 168 return (NULL); 169 170 f = fetchXGet(u, us, flags); 171 172 fetchFreeURL(u); 173 return (f); 174 } 175 176 /* 177 * Attempt to parse the given URL; if successful, call fetchGet(). 178 */ 179 FILE * 180 fetchGetURL(const char *URL, const char *flags) 181 { 182 return (fetchXGetURL(URL, NULL, flags)); 183 } 184 185 /* 186 * Attempt to parse the given URL; if successful, call fetchPut(). 187 */ 188 FILE * 189 fetchPutURL(const char *URL, const char *flags) 190 { 191 struct url *u; 192 FILE *f; 193 194 if ((u = fetchParseURL(URL)) == NULL) 195 return (NULL); 196 197 f = fetchPut(u, flags); 198 199 fetchFreeURL(u); 200 return (f); 201 } 202 203 /* 204 * Attempt to parse the given URL; if successful, call fetchStat(). 205 */ 206 int 207 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 208 { 209 struct url *u; 210 int s; 211 212 if ((u = fetchParseURL(URL)) == NULL) 213 return (-1); 214 215 s = fetchStat(u, us, flags); 216 217 fetchFreeURL(u); 218 return (s); 219 } 220 221 /* 222 * Attempt to parse the given URL; if successful, call fetchList(). 223 */ 224 struct url_ent * 225 fetchListURL(const char *URL, const char *flags) 226 { 227 struct url *u; 228 struct url_ent *ue; 229 230 if ((u = fetchParseURL(URL)) == NULL) 231 return (NULL); 232 233 ue = fetchList(u, flags); 234 235 fetchFreeURL(u); 236 return (ue); 237 } 238 239 /* 240 * Make a URL 241 */ 242 struct url * 243 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 244 const char *user, const char *pwd) 245 { 246 struct url *u; 247 248 if (!scheme || (!host && !doc)) { 249 url_seterr(URL_MALFORMED); 250 return (NULL); 251 } 252 253 if (port < 0 || port > 65535) { 254 url_seterr(URL_BAD_PORT); 255 return (NULL); 256 } 257 258 /* allocate struct url */ 259 if ((u = calloc(1, sizeof(*u))) == NULL) { 260 fetch_syserr(); 261 return (NULL); 262 } 263 u->netrcfd = -1; 264 265 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 266 fetch_syserr(); 267 free(u); 268 return (NULL); 269 } 270 271 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 272 seturl(scheme); 273 seturl(host); 274 seturl(user); 275 seturl(pwd); 276 #undef seturl 277 u->port = port; 278 279 return (u); 280 } 281 282 /* 283 * Return value of the given hex digit. 284 */ 285 static int 286 fetch_hexval(char ch) 287 { 288 289 if (ch >= '0' && ch <= '9') 290 return (ch - '0'); 291 else if (ch >= 'a' && ch <= 'f') 292 return (ch - 'a' + 10); 293 else if (ch >= 'A' && ch <= 'F') 294 return (ch - 'A' + 10); 295 return (-1); 296 } 297 298 /* 299 * Decode percent-encoded URL component from src into dst, stopping at end 300 * of string, or at @ or : separators. Returns a pointer to the unhandled 301 * part of the input string (null terminator, @, or :). No terminator is 302 * written to dst (it is the caller's responsibility). 303 */ 304 static const char * 305 fetch_pctdecode(char *dst, const char *src, size_t dlen) 306 { 307 int d1, d2; 308 char c; 309 const char *s; 310 311 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 312 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 313 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 314 c = d1 << 4 | d2; 315 s += 2; 316 } else if (s[0] == '%') { 317 /* Invalid escape sequence. */ 318 return (NULL); 319 } else { 320 c = *s; 321 } 322 if (dlen-- > 0) 323 *dst++ = c; 324 else 325 return (NULL); 326 } 327 return (s); 328 } 329 330 /* 331 * Split an URL into components. URL syntax is: 332 * [method:/][/[user[:pwd]@]host[:port]/][document] 333 * This almost, but not quite, RFC1738 URL syntax. 334 */ 335 struct url * 336 fetchParseURL(const char *URL) 337 { 338 char *doc; 339 const char *p, *q; 340 struct url *u; 341 int i, n; 342 343 /* allocate struct url */ 344 if ((u = calloc(1, sizeof(*u))) == NULL) { 345 fetch_syserr(); 346 return (NULL); 347 } 348 u->netrcfd = -1; 349 350 /* scheme name */ 351 if ((p = strstr(URL, ":/"))) { 352 if (p - URL > URL_SCHEMELEN) 353 goto ouch; 354 for (i = 0; URL + i < p; i++) 355 u->scheme[i] = tolower((unsigned char)URL[i]); 356 URL = ++p; 357 /* 358 * Only one slash: no host, leave slash as part of document 359 * Two slashes: host follows, strip slashes 360 */ 361 if (URL[1] == '/') 362 URL = (p += 2); 363 } else { 364 p = URL; 365 } 366 if (!*URL || *URL == '/' || *URL == '.' || 367 (u->scheme[0] == '\0' && 368 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 369 goto nohost; 370 371 p = strpbrk(URL, "/@"); 372 if (p && *p == '@') { 373 /* username */ 374 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 375 if (q == NULL) 376 goto ouch; 377 378 /* password */ 379 if (*q == ':') { 380 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 381 if (q == NULL) 382 goto ouch; 383 } 384 p++; 385 } else { 386 p = URL; 387 } 388 389 /* hostname */ 390 if (*p == '[') { 391 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef."); 392 if (*q++ != ']') 393 goto ouch; 394 } else { 395 /* valid characters in a DNS name */ 396 q = p + strspn(p, "-." "0123456789" 397 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 398 "abcdefghijklmnopqrstuvwxyz"); 399 } 400 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 401 goto ouch; 402 for (i = 0; p + i < q; i++) 403 u->host[i] = tolower((unsigned char)p[i]); 404 u->host[i] = '\0'; 405 p = q; 406 407 /* port */ 408 if (*p == ':') { 409 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 410 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 411 n = n * 10 + (*q - '0'); 412 } else { 413 /* invalid port */ 414 url_seterr(URL_BAD_PORT); 415 goto ouch; 416 } 417 } 418 #ifndef IPPORT_MAX 419 #define IPPORT_MAX 65535 420 #endif 421 if (p != q && (n < 1 || n > IPPORT_MAX)) 422 goto ouch; 423 u->port = n; 424 p = q; 425 } 426 427 nohost: 428 /* document */ 429 if (!*p) 430 p = "/"; 431 432 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 433 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 434 const char hexnums[] = "0123456789abcdef"; 435 436 /* percent-escape whitespace. */ 437 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 438 fetch_syserr(); 439 goto ouch; 440 } 441 u->doc = doc; 442 /* fragments are reserved for client-side processing, see 443 * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1 444 */ 445 while (*p != '\0' && *p != '#') { 446 if (!isspace((unsigned char)*p)) { 447 *doc++ = *p++; 448 } else { 449 *doc++ = '%'; 450 *doc++ = hexnums[((unsigned int)*p) >> 4]; 451 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 452 p++; 453 } 454 } 455 *doc = '\0'; 456 } else if ((u->doc = strdup(p)) == NULL) { 457 fetch_syserr(); 458 goto ouch; 459 } 460 461 DEBUGF("scheme: \"%s\"\n" 462 "user: \"%s\"\n" 463 "password: \"%s\"\n" 464 "host: \"%s\"\n" 465 "port: \"%d\"\n" 466 "document: \"%s\"\n", 467 u->scheme, u->user, u->pwd, 468 u->host, u->port, u->doc); 469 470 return (u); 471 472 ouch: 473 free(u); 474 return (NULL); 475 } 476 477 /* 478 * Free a URL 479 */ 480 void 481 fetchFreeURL(struct url *u) 482 { 483 free(u->doc); 484 free(u); 485 }