/ libi2pd / HTTP.cpp
HTTP.cpp
  1  /*
  2  * Copyright (c) 2013-2026, The PurpleI2P Project
  3  *
  4  * This file is part of Purple i2pd project and licensed under BSD3
  5  *
  6  * See full license text in LICENSE file at top of project tree
  7  */
  8  
  9  #include <algorithm>
 10  #include <utility>
 11  #include <stdio.h>
 12  #include <ctime>
 13  #include <charconv>
 14  #include "util.h"
 15  #include "Base.h"
 16  #include "HTTP.h"
 17  
 18  namespace i2p
 19  {
 20  namespace http
 21  {
 22  	// list of valid HTTP methods
 23  	static constexpr std::array<std::string_view, 16> HTTP_METHODS =
 24  	{
 25  		"GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "CONNECT", // HTTP basic methods
 26  		"COPY", "LOCK", "MKCOL", "MOVE", "PROPFIND", "PROPPATCH", "UNLOCK", "SEARCH" // WebDAV methods, for SEARCH see rfc5323
 27  	};
 28  
 29  	// list of valid HTTP versions
 30  	static constexpr std::array<std::string_view, 2> HTTP_VERSIONS =
 31  	{
 32  		"HTTP/1.0", "HTTP/1.1"
 33  	};
 34  
 35  	static constexpr std::array<const char *, 7> weekdays =
 36  	{
 37  		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
 38  	};
 39  
 40  	static constexpr std::array<const char *, 12> months =
 41  	{
 42  		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
 43  		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
 44  	};
 45  
 46  	static inline bool is_http_version(std::string_view str)
 47  	{
 48  		return std::find(HTTP_VERSIONS.begin(), HTTP_VERSIONS.end(), str) != std::end(HTTP_VERSIONS);
 49  	}
 50  
 51  	static inline bool is_http_method(std::string_view str)
 52  	{
 53  		return std::find(HTTP_METHODS.begin(), HTTP_METHODS.end(), str) != std::end(HTTP_METHODS);
 54  	}
 55  
 56  	static void strsplit(std::string_view line, std::vector<std::string_view> &tokens, char delim, std::size_t limit = 0)
 57  	{
 58  		size_t count = 1, pos;
 59  		while ((pos = line.find (delim)) != line.npos)
 60  		{
 61  			count++;
 62  			if (limit > 0 && count >= limit) delim = '\n'; // reset delimiter
 63  			tokens.push_back (line.substr (0, pos));
 64  			line = line.substr (pos + 1);
 65  		}
 66  		if (!line.empty ()) tokens.push_back (line);
 67  	}
 68  
 69  	static std::pair<std::string, std::string> parse_header_line(std::string_view line)
 70  	{
 71  		std::size_t pos = 0;
 72  		std::size_t len = 1; /*: */
 73  		std::size_t max = line.length();
 74  		if ((pos = line.find(':', pos)) == std::string::npos)
 75  			return std::pair{"", ""}; // no ':' found
 76  		if (pos + 1 < max) // ':' at the end of header is valid
 77  		{
 78  			while ((pos + len) < max && isspace(line.at(pos + len)))
 79  				len++;
 80  			if (len == 1)
 81  				return std::pair{"", ""}; // no following space, but something else
 82  		}
 83  		return std::pair{std::string (line.substr(0, pos)), std::string (line.substr(pos + len))};
 84  	}
 85  
 86  	void gen_rfc7231_date(std::string & out) {
 87  		std::time_t now = std::time(nullptr);
 88  		char buf[128];
 89  		std::tm *tm = std::gmtime(&now);
 90  		snprintf(buf, sizeof(buf), "%s, %02d %s %d %02d:%02d:%02d GMT",
 91  			weekdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
 92  			tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec
 93  		);
 94  		out = buf;
 95  	}
 96  
 97  	bool URL::parse(const char *str, std::size_t len)
 98  	{
 99  		return parse({str, len ? len : strlen(str)});
100  	}
101  
102  	bool URL::parse(std::string_view url)
103  	{
104  		if (url.empty ()) return false;
105  		std::size_t pos_p = 0; /* < current parse position */
106  		std::size_t pos_c = 0; /* < work position */
107  		if(url.at(0) != '/' || pos_p > 0)
108  		{
109  			std::size_t pos_s = 0;
110  
111  			/* schema */
112  			pos_c = url.find("://");
113  			if (pos_c != std::string::npos) {
114  				schema = url.substr(0, pos_c);
115  				pos_p = pos_c + 3;
116  			}
117  
118  			/* user[:pass] */
119  			pos_s = url.find('/', pos_p); /* find first slash */
120  			pos_c = url.find('@', pos_p); /* find end of 'user' or 'user:pass' part */
121  
122  			if (pos_c != std::string::npos && (pos_s == std::string::npos || pos_s > pos_c)) {
123  				std::size_t delim = url.find(':', pos_p);
124  				if (delim && delim != std::string::npos && delim < pos_c) {
125  					user = url.substr(pos_p, delim - pos_p);
126  					delim += 1;
127  					pass = url.substr(delim, pos_c - delim);
128  				} else if(delim) {
129  					user = url.substr(pos_p, pos_c - pos_p);
130  				}
131  				pos_p = pos_c + 1;
132  			}
133  
134  			/* hostname[:port][/path] */
135  			if (url.at(pos_p) == '[') // ipv6
136  			{
137  				auto pos_b = url.find(']', pos_p);
138  				if (pos_b == std::string::npos) return false;
139  				ipv6 = true;
140  				pos_c = url.find_first_of(":/", pos_b);
141  			}
142  			else
143  				pos_c = url.find_first_of(":/", pos_p);
144  
145  			if (pos_c == std::string::npos) {
146  				/* only hostname, without post and path */
147  				host = ipv6 ?
148  					url.substr(pos_p + 1, url.length() - 1) :
149  					url.substr(pos_p, std::string::npos);
150  				return true;
151  			} else if (url.at(pos_c) == ':') {
152  				host = ipv6 ?
153  					url.substr(pos_p + 1, pos_c - pos_p - 2) :
154  					url.substr(pos_p, pos_c - pos_p);
155  				/* port[/path] */
156  				pos_p = pos_c + 1;
157  				pos_c = url.find('/', pos_p);
158  				std::string_view port_str = (pos_c == std::string::npos)
159  					? url.substr(pos_p, std::string::npos)
160  					: url.substr(pos_p, pos_c - pos_p);
161  				/* stoi throws exception on failure, we don't need it */
162  				port = 0;
163  				for (char c : port_str) {
164  					if (c < '0' || c > '9')
165  						return false;
166  					port *= 10;
167  					port += c - '0';
168  				}
169  				if (pos_c == std::string::npos)
170  					return true; /* no path part */
171  				pos_p = pos_c;
172  			} else {
173  				/* start of path part found */
174  				host = ipv6 ?
175  					url.substr(pos_p + 1, pos_c - pos_p - 2) :
176  					url.substr(pos_p, pos_c - pos_p);
177  				pos_p = pos_c;
178  			}
179  		}
180  
181  		/* pos_p now at start of path part */
182  		pos_c = url.find_first_of("?#", pos_p);
183  		if (pos_c == std::string::npos) {
184  			/* only path, without fragment and query */
185  			path = url.substr(pos_p, std::string::npos);
186  			return true;
187  		} else if (url.at(pos_c) == '?') {
188  			/* found query part */
189  			hasquery = true;
190  			path = url.substr(pos_p, pos_c - pos_p);
191  			pos_p = pos_c + 1;
192  			pos_c = url.find('#', pos_p);
193  			if (pos_c == std::string::npos) {
194  				/* no fragment */
195  				query = url.substr(pos_p, std::string::npos);
196  				return true;
197  			} else {
198  				query = url.substr(pos_p, pos_c - pos_p);
199  				pos_p = pos_c + 1;
200  			}
201  		} else {
202  			/* found fragment part */
203  			path = url.substr(pos_p, pos_c - pos_p);
204  			pos_p = pos_c + 1;
205  		}
206  
207  		/* pos_p now at start of fragment part */
208  		frag = url.substr(pos_p, std::string::npos);
209  		return true;
210  	}
211  
212  	bool URL::parse_query(std::map<std::string, std::string> & params)
213  	{
214  		std::vector<std::string_view> tokens;
215  		strsplit(query, tokens, '&');
216  
217  		params.clear();
218  		for (const auto& it : tokens) {
219  			if (!it.length()) // empty
220  				continue;
221  			std::size_t eq = it.find ('=');
222  			if (eq != std::string::npos) {
223  				auto e = std::pair<std::string, std::string>(it.substr(0, eq), it.substr(eq + 1));
224  				params.insert(e);
225  			} else {
226  				auto e = std::pair<std::string, std::string>(it, "");
227  				params.insert(e);
228  			}
229  		}
230  		return true;
231  	}
232  
233  	std::string URL::to_string() {
234  		std::string out = "";
235  		if (schema != "") {
236  			out = schema + "://";
237  			if (user != "" && pass != "") {
238  				out += user + ":" + pass + "@";
239  			} else if (user != "") {
240  				out += user + "@";
241  			}
242  			if (ipv6) {
243  				if (port) {
244  					out += "[" + host + "]:" + std::to_string(port);
245  				} else {
246  					out += "[" + host + "]";
247  				}
248  			} else {
249  				if (port) {
250  					out += host + ":" + std::to_string(port);
251  				} else {
252  					out += host;
253  				}
254  			}
255  		}
256  		out += path;
257  		if (hasquery) // add query even if it was empty
258  			out += "?";
259  		if (query != "")
260  			out += query;
261  		if (frag != "")
262  			out += "#" + frag;
263  		return out;
264  	}
265  
266  	bool URL::is_i2p() const
267  	{
268  		return host.rfind(".i2p") == ( host.size() - 4 );
269  	}
270  
271  	void HTTPMsg::add_header(const char *name, const std::string & value, bool replace) {
272  		add_header(name, value.c_str(), replace);
273  	}
274  
275  	void HTTPMsg::add_header(const char *name, const char *value, bool replace) {
276  		std::size_t count = headers.count(name);
277  		if (count && !replace)
278  			return;
279  		if (count) {
280  			headers[name] = value;
281  			return;
282  		}
283  		headers.insert(std::pair<std::string, std::string>(name, value));
284  	}
285  
286  	void HTTPMsg::del_header(const char *name) {
287  		headers.erase(name);
288  	}
289  
290  	std::string HTTPMsg::get_header(const std::string& name) const
291  	{
292  		auto it = headers.find(name);
293  		if (it == headers.end())
294  			return "";
295  		else
296  			return it->second;
297  	}
298  
299  	int HTTPReq::parse(const char *buf, size_t len)
300  	{
301  		return parse({buf, len});
302  	}
303  
304  	int HTTPReq::parse(std::string_view str)
305  	{
306  		enum { REQ_LINE, HEADER_LINE } expect = REQ_LINE;
307  		std::size_t eoh = str.find(HTTP_EOH); /* request head size */
308  		std::size_t eol = 0, pos = 0;
309  		URL url;
310  
311  		if (eoh == std::string::npos)
312  			return 0; /* str not contains complete request */
313  
314  		while ((eol = str.find(CRLF, pos)) != std::string::npos)
315  		{
316  			if (expect == REQ_LINE)
317  			{
318  				std::string_view line = str.substr(pos, eol - pos);
319  				std::vector<std::string_view> tokens;
320  				strsplit(line, tokens, ' ');
321  
322  				if (tokens.size() != 3)
323  					return -1;
324  				if (!is_http_method(tokens[0]))
325  					return -1;
326  				if (!is_http_version(tokens[2]))
327  					return -1;
328  				if (!url.parse(tokens[1]))
329  					return -1;
330  				/* all ok */
331  				method  = tokens[0];
332  				uri     = tokens[1];
333  				version = tokens[2];
334  				expect  = HEADER_LINE;
335  			}
336  			else
337  			{
338  				std::string_view line = str.substr(pos, eol - pos);
339  				auto p = parse_header_line(line);
340  				if (p.first.length () > 0)
341  					headers.push_back (p);
342  				else
343  					return -1;
344  			}
345  			pos = eol + CRLF.length();
346  			if (pos >= eoh)
347  				break;
348  		}
349  		return eoh + HTTP_EOH.length();
350  	}
351  
352  	void HTTPReq::write(std::ostream & o)
353  	{
354  		o << method << " " << uri << " " << version << CRLF;
355  		for (auto & h : headers)
356  			o << h.first << ": " << h.second << CRLF;
357  		o << CRLF;
358  	}
359  
360  	std::string HTTPReq::to_string()
361  	{
362  		std::stringstream ss;
363  		write(ss);
364  		return ss.str();
365  	}
366  
367  	void HTTPReq::AddHeader (const std::string& name, const std::string& value)
368  	{
369  		headers.push_back (std::make_pair(name, value));
370  	}
371  
372  	void HTTPReq::UpdateHeader (const std::string& name, const std::string& value)
373  	{
374  		for (auto& it : headers)
375  			if (it.first == name)
376  			{
377  				it.second = value;
378  				break;
379  			}
380  	}
381  
382  	void HTTPReq::RemoveHeader (const std::string& name, const std::string& exempt)
383  	{
384  		for (auto it = headers.begin (); it != headers.end ();)
385  		{
386  			if (!it->first.compare(0, name.length (), name) && it->first != exempt)
387  				it = headers.erase (it);
388  			else
389  				it++;
390  		}
391  	}
392  
393  	std::string HTTPReq::GetHeader (std::string_view name) const
394  	{
395  		for (auto& it : headers)
396  			if (it.first == name)
397  				return it.second;
398  		return "";
399  	}
400  
401  	size_t HTTPReq::GetNumHeaders (std::string_view name) const
402  	{
403  		size_t num = 0;
404  		for (auto& it : headers)
405  			if (it.first == name) num++;
406  		return num;
407  	}
408  
409  	bool HTTPRes::is_chunked() const
410  	{
411  		auto it = headers.find("Transfer-Encoding");
412  		if (it == headers.end())
413  			return false;
414  		if (it->second.find("chunked") != std::string::npos)
415  			return true;
416  		return false;
417  	}
418  
419  	bool HTTPRes::is_gzipped(bool includingI2PGzip) const
420  	{
421  		auto it = headers.find("Content-Encoding");
422  		if (it == headers.end())
423  			return false; /* no header */
424  		if (it->second.find("gzip") != std::string::npos)
425  			return true; /* gotcha! */
426  		if (includingI2PGzip && it->second.find("x-i2p-gzip") != std::string::npos)
427  			return true;
428  		return false;
429  	}
430  
431  	long int HTTPMsg::content_length() const
432  	{
433  		unsigned long int length = 0;
434  		auto it = headers.find("Content-Length");
435  		if (it == headers.end())
436  			return -1;
437  		errno = 0;
438  		length = std::strtoul(it->second.c_str(), (char **) NULL, 10);
439  		if (errno != 0)
440  			return -1;
441  		return length;
442  	}
443  
444  	int HTTPRes::parse(const char *buf, size_t len)
445  	{
446  		return parse({buf,len});
447  	}
448  
449  	int HTTPRes::parse(std::string_view str)
450  	{
451  		enum { RES_LINE, HEADER_LINE } expect = RES_LINE;
452  		std::size_t eoh = str.find(HTTP_EOH); /* request head size */
453  		std::size_t eol = 0, pos = 0;
454  
455  		if (eoh == std::string::npos)
456  			return 0; /* str not contains complete request */
457  
458  		while ((eol = str.find(CRLF, pos)) != std::string::npos)
459  		{
460  			if (expect == RES_LINE)
461  			{
462  				std::string_view line = str.substr(pos, eol - pos);
463  				std::vector<std::string_view> tokens;
464  				strsplit(line, tokens, ' ', 3);
465  				if (tokens.size() != 3)
466  					return -1;
467  				if (!is_http_version(tokens[0]))
468  					return -1;
469  				auto res = std::from_chars(tokens[1].data (), tokens[1].data() + tokens[1].size(), code);
470  				if (res.ec != std::errc())
471  					return -1;
472  				if (code < 100 || code >= 600)
473  					return -1;
474  				/* all ok */
475  				version = tokens[0];
476  				status  = tokens[2];
477  				expect  = HEADER_LINE;
478  			}
479  			else
480  			{
481  				std::string_view line = str.substr(pos, eol - pos);
482  				auto p = parse_header_line(line);
483  				if (p.first.length () > 0)
484  					headers.insert (p);
485  				else
486  					return -1;
487  			}
488  			pos = eol + CRLF.length();
489  			if (pos >= eoh)
490  				break;
491  		}
492  		return eoh + HTTP_EOH.length();
493  	}
494  
495  	std::string HTTPRes::to_string() {
496  		if (version == "HTTP/1.1" && headers.count("Date") == 0) {
497  			std::string date;
498  			gen_rfc7231_date(date);
499  			add_header("Date", date.c_str());
500  		}
501  		if (status == "OK" && code != 200)
502  			status = HTTPCodeToStatus(code); // update
503  		if (body.length() > 0 && headers.count("Content-Length") == 0)
504  			add_header("Content-Length", std::to_string(body.length()).c_str());
505  		/* build response */
506  		std::stringstream ss;
507  		ss << version << " " << code << " " << status << CRLF;
508  		for (auto & h : headers) {
509  			ss << h.first << ": " << h.second << CRLF;
510  		}
511  		ss << CRLF;
512  		if (body.length() > 0)
513  			ss << body;
514  		return ss.str();
515  	}
516  
517  	std::string_view HTTPCodeToStatus(int code)
518  	{
519  		std::string_view ptr;
520  		switch (code)
521  		{
522  			case 105: ptr = "Name Not Resolved"; break;
523  			/* success */
524  			case 200: ptr = "OK"; break;
525  			case 206: ptr = "Partial Content"; break;
526  			/* redirect */
527  			case 301: ptr = "Moved Permanently"; break;
528  			case 302: ptr = "Found"; break;
529  			case 304: ptr = "Not Modified"; break;
530  			case 307: ptr = "Temporary Redirect"; break;
531  			/* client error */
532  			case 400: ptr = "Bad Request"; break;
533  			case 401: ptr = "Unauthorized"; break;
534  			case 403: ptr = "Forbidden"; break;
535  			case 404: ptr = "Not Found"; break;
536  			case 407: ptr = "Proxy Authentication Required"; break;
537  			case 408: ptr = "Request Timeout"; break;
538  			/* server error */
539  			case 500: ptr = "Internal Server Error"; break;
540  			case 502: ptr = "Bad Gateway"; break;
541  			case 503: ptr = "Not Implemented"; break;
542  			case 504: ptr = "Gateway Timeout"; break;
543  			default:  ptr = "Unknown Status"; break;
544  		}
545  		return ptr;
546  	}
547  
548  	std::string UrlDecode(std::string_view url, bool allow_null)
549  	{
550  		std::string decoded;
551  		decoded.reserve (url.length ());
552  		size_t start = 0;
553  		for (size_t i = 0; i < url.length (); i++)
554  		{
555  			auto c = url[i];
556  			if (c == '%')
557  			{
558  				decoded.append (url, start, i - start);
559  				if (i + 2 <= url.length ())
560  				{
561  					unsigned char ch;
562  					auto res = std::from_chars(url.data() + i + 1, url.data() + i + 3, ch, 16);
563  					if (res.ec == std::errc() && (ch || allow_null))
564  						decoded += ch;
565  					else
566  						decoded.append (url, i, 3);
567  					i += 2;
568  					start = i + 1;
569  				}
570  				else
571  					break;
572  			}
573  		}
574  		if (start < url.length ())
575  			decoded.append (url, start);
576  		return decoded;
577  	}
578  
579  	bool MergeChunkedResponse (std::istream& in, std::ostream& out)
580  	{
581  		std::string hexLen;
582  		while (!in.eof ())
583  		{
584  			std::getline (in, hexLen);
585  			errno = 0;
586  			long int len = strtoul(hexLen.c_str(), (char **) NULL, 16);
587  			if (errno != 0)
588  				return false; /* conversion error */
589  			if (len == 0)
590  				return true; /* end of stream */
591  			if (len < 0 || len > 10 * 1024 * 1024) /* < 10Mb */
592  				return false; /* too large chunk */
593  			char * buf = new char[len];
594  			in.read (buf, len);
595  			out.write (buf, len);
596  			delete[] buf;
597  			std::getline (in, hexLen); // read \r\n after chunk
598  		}
599  		return true;
600  	}
601  
602  	std::string CreateBasicAuthorizationString (const std::string& user, const std::string& pass)
603  	{
604  		if (user.empty () && pass.empty ()) return "";
605  		return "Basic " + i2p::data::ToBase64Standard (user + ":" + pass);
606  	}
607  
608  } // http
609  } // i2p