/ external / libfetch / fetch.c
fetch.c
  1  /*-
  2   * SPDX-License-Identifier: BSD-3-Clause
  3   *
  4   * Copyright (c) 1998-2004 Dag-Erling Smørgrav
  5   * All rights reserved.
  6   *
  7   * Redistribution and use in source and binary forms, with or without
  8   * modification, are permitted provided that the following conditions
  9   * are met:
 10   * 1. Redistributions of source code must retain the above copyright
 11   *    notice, this list of conditions and the following disclaimer
 12   *    in this position and unchanged.
 13   * 2. Redistributions in binary form must reproduce the above copyright
 14   *    notice, this list of conditions and the following disclaimer in the
 15   *    documentation and/or other materials provided with the distribution.
 16   * 3. The name of the author may not be used to endorse or promote products
 17   *    derived from this software without specific prior written permission
 18   *
 19   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 20   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 21   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 22   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 23   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 24   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 28   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29   */
 30  
 31  #include "bsd_compat.h"
 32  
 33  #include <sys/param.h>
 34  
 35  #include <netinet/in.h>
 36  
 37  #include <errno.h>
 38  #include <ctype.h>
 39  #include <stdio.h>
 40  #include <stdlib.h>
 41  #include <string.h>
 42  
 43  #include "fetch.h"
 44  #include "common.h"
 45  
 46  auth_t	 fetchAuthMethod;
 47  int	 fetchLastErrCode;
 48  char	 fetchLastErrString[MAXERRSTRING];
 49  int	 fetchTimeout;
 50  int	 fetchRestartCalls = 1;
 51  int	 fetchDebug;
 52  const char	*fetchCustomHTTPHeaders;
 53  int	 fetchSpeedLimit;
 54  int	 fetchSpeedTime;
 55  
 56  
 57  /*** Local data **************************************************************/
 58  
 59  /*
 60   * Error messages for parser errors
 61   */
 62  #define URL_MALFORMED		1
 63  #define URL_BAD_SCHEME		2
 64  #define URL_BAD_PORT		3
 65  static struct fetcherr url_errlist[] = {
 66  	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
 67  	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
 68  	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
 69  	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
 70  };
 71  
 72  
 73  /*** Public API **************************************************************/
 74  
 75  /*
 76   * Select the appropriate protocol for the URL scheme, and return a
 77   * read-only stream connected to the document referenced by the URL.
 78   * Also fill out the struct url_stat.
 79   */
 80  FILE *
 81  fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
 82  {
 83  
 84  	if (us != NULL) {
 85  		us->size = -1;
 86  		us->atime = us->mtime = 0;
 87  	}
 88  	if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
 89  		return (fetchXGetHTTP(URL, us, flags));
 90  	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
 91  		return (fetchXGetHTTP(URL, us, flags));
 92  	url_seterr(URL_BAD_SCHEME);
 93  	return (NULL);
 94  }
 95  
 96  /*
 97   * Select the appropriate protocol for the URL scheme, and return a
 98   * read-only stream connected to the document referenced by the URL.
 99   */
100  FILE *
101  fetchGet(struct url *URL, const char *flags)
102  {
103  	return (fetchXGet(URL, NULL, flags));
104  }
105  
106  /*
107   * Select the appropriate protocol for the URL scheme, and return a
108   * write-only stream connected to the document referenced by the URL.
109   */
110  FILE *
111  fetchPut(struct url *URL, const char *flags)
112  {
113  
114  	if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
115  		return (fetchPutHTTP(URL, flags));
116  	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
117  		return (fetchPutHTTP(URL, flags));
118  	url_seterr(URL_BAD_SCHEME);
119  	return (NULL);
120  }
121  
122  /*
123   * Select the appropriate protocol for the URL scheme, and return the
124   * size of the document referenced by the URL if it exists.
125   */
126  int
127  fetchStat(struct url *URL, struct url_stat *us, const char *flags)
128  {
129  
130  	if (us != NULL) {
131  		us->size = -1;
132  		us->atime = us->mtime = 0;
133  	}
134  	if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
135  		return (fetchStatHTTP(URL, us, flags));
136  	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
137  		return (fetchStatHTTP(URL, us, flags));
138  	url_seterr(URL_BAD_SCHEME);
139  	return (-1);
140  }
141  
142  /*
143   * Select the appropriate protocol for the URL scheme, and return a
144   * list of files in the directory pointed to by the URL.
145   */
146  struct url_ent *
147  fetchList(struct url *URL, const char *flags)
148  {
149  
150  	if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
151  		return (fetchListHTTP(URL, flags));
152  	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
153  		return (fetchListHTTP(URL, flags));
154  	url_seterr(URL_BAD_SCHEME);
155  	return (NULL);
156  }
157  
158  /*
159   * Attempt to parse the given URL; if successful, call fetchXGet().
160   */
161  FILE *
162  fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
163  {
164  	struct url *u;
165  	FILE *f;
166  
167  	if ((u = fetchParseURL(URL)) == NULL)
168  		return (NULL);
169  
170  	f = fetchXGet(u, us, flags);
171  
172  	fetchFreeURL(u);
173  	return (f);
174  }
175  
176  /*
177   * Attempt to parse the given URL; if successful, call fetchGet().
178   */
179  FILE *
180  fetchGetURL(const char *URL, const char *flags)
181  {
182  	return (fetchXGetURL(URL, NULL, flags));
183  }
184  
185  /*
186   * Attempt to parse the given URL; if successful, call fetchPut().
187   */
188  FILE *
189  fetchPutURL(const char *URL, const char *flags)
190  {
191  	struct url *u;
192  	FILE *f;
193  
194  	if ((u = fetchParseURL(URL)) == NULL)
195  		return (NULL);
196  
197  	f = fetchPut(u, flags);
198  
199  	fetchFreeURL(u);
200  	return (f);
201  }
202  
203  /*
204   * Attempt to parse the given URL; if successful, call fetchStat().
205   */
206  int
207  fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
208  {
209  	struct url *u;
210  	int s;
211  
212  	if ((u = fetchParseURL(URL)) == NULL)
213  		return (-1);
214  
215  	s = fetchStat(u, us, flags);
216  
217  	fetchFreeURL(u);
218  	return (s);
219  }
220  
221  /*
222   * Attempt to parse the given URL; if successful, call fetchList().
223   */
224  struct url_ent *
225  fetchListURL(const char *URL, const char *flags)
226  {
227  	struct url *u;
228  	struct url_ent *ue;
229  
230  	if ((u = fetchParseURL(URL)) == NULL)
231  		return (NULL);
232  
233  	ue = fetchList(u, flags);
234  
235  	fetchFreeURL(u);
236  	return (ue);
237  }
238  
239  /*
240   * Make a URL
241   */
242  struct url *
243  fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
244      const char *user, const char *pwd)
245  {
246  	struct url *u;
247  
248  	if (!scheme || (!host && !doc)) {
249  		url_seterr(URL_MALFORMED);
250  		return (NULL);
251  	}
252  
253  	if (port < 0 || port > 65535) {
254  		url_seterr(URL_BAD_PORT);
255  		return (NULL);
256  	}
257  
258  	/* allocate struct url */
259  	if ((u = calloc(1, sizeof(*u))) == NULL) {
260  		fetch_syserr();
261  		return (NULL);
262  	}
263  	u->netrcfd = -1;
264  
265  	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
266  		fetch_syserr();
267  		free(u);
268  		return (NULL);
269  	}
270  
271  #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
272  	seturl(scheme);
273  	seturl(host);
274  	seturl(user);
275  	seturl(pwd);
276  #undef seturl
277  	u->port = port;
278  
279  	return (u);
280  }
281  
282  /*
283   * Return value of the given hex digit.
284   */
285  static int
286  fetch_hexval(char ch)
287  {
288  
289  	if (ch >= '0' && ch <= '9')
290  		return (ch - '0');
291  	else if (ch >= 'a' && ch <= 'f')
292  		return (ch - 'a' + 10);
293  	else if (ch >= 'A' && ch <= 'F')
294  		return (ch - 'A' + 10);
295  	return (-1);
296  }
297  
298  /*
299   * Decode percent-encoded URL component from src into dst, stopping at end
300   * of string, or at @ or : separators.  Returns a pointer to the unhandled
301   * part of the input string (null terminator, @, or :).  No terminator is
302   * written to dst (it is the caller's responsibility).
303   */
304  static const char *
305  fetch_pctdecode(char *dst, const char *src, size_t dlen)
306  {
307  	int d1, d2;
308  	char c;
309  	const char *s;
310  
311  	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
312  		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
313  		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
314  			c = d1 << 4 | d2;
315  			s += 2;
316  		} else if (s[0] == '%') {
317  			/* Invalid escape sequence. */
318  			return (NULL);
319  		} else {
320  			c = *s;
321  		}
322  		if (dlen-- > 0)
323  			*dst++ = c;
324  		else
325  			return (NULL);
326  	}
327  	return (s);
328  }
329  
330  /*
331   * Split an URL into components. URL syntax is:
332   * [method:/][/[user[:pwd]@]host[:port]/][document]
333   * This almost, but not quite, RFC1738 URL syntax.
334   */
335  struct url *
336  fetchParseURL(const char *URL)
337  {
338  	char *doc;
339  	const char *p, *q;
340  	struct url *u;
341  	int i, n;
342  
343  	/* allocate struct url */
344  	if ((u = calloc(1, sizeof(*u))) == NULL) {
345  		fetch_syserr();
346  		return (NULL);
347  	}
348  	u->netrcfd = -1;
349  
350  	/* scheme name */
351  	if ((p = strstr(URL, ":/"))) {
352                  if (p - URL > URL_SCHEMELEN)
353                          goto ouch;
354                  for (i = 0; URL + i < p; i++)
355                          u->scheme[i] = tolower((unsigned char)URL[i]);
356  		URL = ++p;
357  		/*
358  		 * Only one slash: no host, leave slash as part of document
359  		 * Two slashes: host follows, strip slashes
360  		 */
361  		if (URL[1] == '/')
362  			URL = (p += 2);
363  	} else {
364  		p = URL;
365  	}
366  	if (!*URL || *URL == '/' || *URL == '.' ||
367  	    (u->scheme[0] == '\0' &&
368  		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
369  		goto nohost;
370  
371  	p = strpbrk(URL, "/@");
372  	if (p && *p == '@') {
373  		/* username */
374  		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
375  		if (q == NULL)
376  			goto ouch;
377  
378  		/* password */
379  		if (*q == ':') {
380  			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
381  			if (q == NULL)
382  				goto ouch;
383  		}
384  		p++;
385  	} else {
386  		p = URL;
387  	}
388  
389  	/* hostname */
390  	if (*p == '[') {
391  		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef.");
392  		if (*q++ != ']')
393  			goto ouch;
394  	} else {
395  		/* valid characters in a DNS name */
396  		q = p + strspn(p, "-." "0123456789"
397  		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
398  		    "abcdefghijklmnopqrstuvwxyz");
399  	}
400  	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
401  		goto ouch;
402  	for (i = 0; p + i < q; i++)
403  		u->host[i] = tolower((unsigned char)p[i]);
404  	u->host[i] = '\0';
405  	p = q;
406  
407  	/* port */
408  	if (*p == ':') {
409  		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
410  			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
411  				n = n * 10 + (*q - '0');
412  			} else {
413  				/* invalid port */
414  				url_seterr(URL_BAD_PORT);
415  				goto ouch;
416  			}
417  		}
418  #ifndef IPPORT_MAX
419  #define IPPORT_MAX 65535
420  #endif
421  		if (p != q && (n < 1 || n > IPPORT_MAX))
422  			goto ouch;
423  		u->port = n;
424  		p = q;
425  	}
426  
427  nohost:
428  	/* document */
429  	if (!*p)
430  		p = "/";
431  
432  	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
433  	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
434  		const char hexnums[] = "0123456789abcdef";
435  
436  		/* percent-escape whitespace. */
437  		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
438  			fetch_syserr();
439  			goto ouch;
440  		}
441  		u->doc = doc;
442  		/* fragments are reserved for client-side processing, see
443  		 * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1
444  		 */
445  		while (*p != '\0' && *p != '#') {
446  			if (!isspace((unsigned char)*p)) {
447  				*doc++ = *p++;
448  			} else {
449  				*doc++ = '%';
450  				*doc++ = hexnums[((unsigned int)*p) >> 4];
451  				*doc++ = hexnums[((unsigned int)*p) & 0xf];
452  				p++;
453  			}
454  		}
455  		*doc = '\0';
456  	} else if ((u->doc = strdup(p)) == NULL) {
457  		fetch_syserr();
458  		goto ouch;
459  	}
460  
461  	DEBUGF("scheme:   \"%s\"\n"
462  	    "user:     \"%s\"\n"
463  	    "password: \"%s\"\n"
464  	    "host:     \"%s\"\n"
465  	    "port:     \"%d\"\n"
466  	    "document: \"%s\"\n",
467  	    u->scheme, u->user, u->pwd,
468  	    u->host, u->port, u->doc);
469  
470  	return (u);
471  
472  ouch:
473  	free(u);
474  	return (NULL);
475  }
476  
477  /*
478   * Free a URL
479   */
480  void
481  fetchFreeURL(struct url *u)
482  {
483  	free(u->doc);
484  	free(u);
485  }