HTTP.cpp
1 /* 2 * Copyright (c) 2013-2026, The PurpleI2P Project 3 * 4 * This file is part of Purple i2pd project and licensed under BSD3 5 * 6 * See full license text in LICENSE file at top of project tree 7 */ 8 9 #include <algorithm> 10 #include <utility> 11 #include <stdio.h> 12 #include <ctime> 13 #include <charconv> 14 #include "util.h" 15 #include "Base.h" 16 #include "HTTP.h" 17 18 namespace i2p 19 { 20 namespace http 21 { 22 // list of valid HTTP methods 23 static constexpr std::array<std::string_view, 16> HTTP_METHODS = 24 { 25 "GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "CONNECT", // HTTP basic methods 26 "COPY", "LOCK", "MKCOL", "MOVE", "PROPFIND", "PROPPATCH", "UNLOCK", "SEARCH" // WebDAV methods, for SEARCH see rfc5323 27 }; 28 29 // list of valid HTTP versions 30 static constexpr std::array<std::string_view, 2> HTTP_VERSIONS = 31 { 32 "HTTP/1.0", "HTTP/1.1" 33 }; 34 35 static constexpr std::array<const char *, 7> weekdays = 36 { 37 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 38 }; 39 40 static constexpr std::array<const char *, 12> months = 41 { 42 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 43 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 44 }; 45 46 static inline bool is_http_version(std::string_view str) 47 { 48 return std::find(HTTP_VERSIONS.begin(), HTTP_VERSIONS.end(), str) != std::end(HTTP_VERSIONS); 49 } 50 51 static inline bool is_http_method(std::string_view str) 52 { 53 return std::find(HTTP_METHODS.begin(), HTTP_METHODS.end(), str) != std::end(HTTP_METHODS); 54 } 55 56 static void strsplit(std::string_view line, std::vector<std::string_view> &tokens, char delim, std::size_t limit = 0) 57 { 58 size_t count = 1, pos; 59 while ((pos = line.find (delim)) != line.npos) 60 { 61 count++; 62 if (limit > 0 && count >= limit) delim = '\n'; // reset delimiter 63 tokens.push_back (line.substr (0, pos)); 64 line = line.substr (pos + 1); 65 } 66 if (!line.empty ()) tokens.push_back (line); 67 } 68 69 static std::pair<std::string, std::string> parse_header_line(std::string_view line) 70 { 71 std::size_t pos = 0; 72 std::size_t len = 1; /*: */ 73 std::size_t max = line.length(); 74 if ((pos = line.find(':', pos)) == std::string::npos) 75 return std::pair{"", ""}; // no ':' found 76 if (pos + 1 < max) // ':' at the end of header is valid 77 { 78 while ((pos + len) < max && isspace(line.at(pos + len))) 79 len++; 80 if (len == 1) 81 return std::pair{"", ""}; // no following space, but something else 82 } 83 return std::pair{std::string (line.substr(0, pos)), std::string (line.substr(pos + len))}; 84 } 85 86 void gen_rfc7231_date(std::string & out) { 87 std::time_t now = std::time(nullptr); 88 char buf[128]; 89 std::tm *tm = std::gmtime(&now); 90 snprintf(buf, sizeof(buf), "%s, %02d %s %d %02d:%02d:%02d GMT", 91 weekdays[tm->tm_wday], tm->tm_mday, months[tm->tm_mon], 92 tm->tm_year + 1900, tm->tm_hour, tm->tm_min, tm->tm_sec 93 ); 94 out = buf; 95 } 96 97 bool URL::parse(const char *str, std::size_t len) 98 { 99 return parse({str, len ? len : strlen(str)}); 100 } 101 102 bool URL::parse(std::string_view url) 103 { 104 if (url.empty ()) return false; 105 std::size_t pos_p = 0; /* < current parse position */ 106 std::size_t pos_c = 0; /* < work position */ 107 if(url.at(0) != '/' || pos_p > 0) 108 { 109 std::size_t pos_s = 0; 110 111 /* schema */ 112 pos_c = url.find("://"); 113 if (pos_c != std::string::npos) { 114 schema = url.substr(0, pos_c); 115 pos_p = pos_c + 3; 116 } 117 118 /* user[:pass] */ 119 pos_s = url.find('/', pos_p); /* find first slash */ 120 pos_c = url.find('@', pos_p); /* find end of 'user' or 'user:pass' part */ 121 122 if (pos_c != std::string::npos && (pos_s == std::string::npos || pos_s > pos_c)) { 123 std::size_t delim = url.find(':', pos_p); 124 if (delim && delim != std::string::npos && delim < pos_c) { 125 user = url.substr(pos_p, delim - pos_p); 126 delim += 1; 127 pass = url.substr(delim, pos_c - delim); 128 } else if(delim) { 129 user = url.substr(pos_p, pos_c - pos_p); 130 } 131 pos_p = pos_c + 1; 132 } 133 134 /* hostname[:port][/path] */ 135 if (url.at(pos_p) == '[') // ipv6 136 { 137 auto pos_b = url.find(']', pos_p); 138 if (pos_b == std::string::npos) return false; 139 ipv6 = true; 140 pos_c = url.find_first_of(":/", pos_b); 141 } 142 else 143 pos_c = url.find_first_of(":/", pos_p); 144 145 if (pos_c == std::string::npos) { 146 /* only hostname, without post and path */ 147 host = ipv6 ? 148 url.substr(pos_p + 1, url.length() - 1) : 149 url.substr(pos_p, std::string::npos); 150 return true; 151 } else if (url.at(pos_c) == ':') { 152 host = ipv6 ? 153 url.substr(pos_p + 1, pos_c - pos_p - 2) : 154 url.substr(pos_p, pos_c - pos_p); 155 /* port[/path] */ 156 pos_p = pos_c + 1; 157 pos_c = url.find('/', pos_p); 158 std::string_view port_str = (pos_c == std::string::npos) 159 ? url.substr(pos_p, std::string::npos) 160 : url.substr(pos_p, pos_c - pos_p); 161 /* stoi throws exception on failure, we don't need it */ 162 port = 0; 163 for (char c : port_str) { 164 if (c < '0' || c > '9') 165 return false; 166 port *= 10; 167 port += c - '0'; 168 } 169 if (pos_c == std::string::npos) 170 return true; /* no path part */ 171 pos_p = pos_c; 172 } else { 173 /* start of path part found */ 174 host = ipv6 ? 175 url.substr(pos_p + 1, pos_c - pos_p - 2) : 176 url.substr(pos_p, pos_c - pos_p); 177 pos_p = pos_c; 178 } 179 } 180 181 /* pos_p now at start of path part */ 182 pos_c = url.find_first_of("?#", pos_p); 183 if (pos_c == std::string::npos) { 184 /* only path, without fragment and query */ 185 path = url.substr(pos_p, std::string::npos); 186 return true; 187 } else if (url.at(pos_c) == '?') { 188 /* found query part */ 189 hasquery = true; 190 path = url.substr(pos_p, pos_c - pos_p); 191 pos_p = pos_c + 1; 192 pos_c = url.find('#', pos_p); 193 if (pos_c == std::string::npos) { 194 /* no fragment */ 195 query = url.substr(pos_p, std::string::npos); 196 return true; 197 } else { 198 query = url.substr(pos_p, pos_c - pos_p); 199 pos_p = pos_c + 1; 200 } 201 } else { 202 /* found fragment part */ 203 path = url.substr(pos_p, pos_c - pos_p); 204 pos_p = pos_c + 1; 205 } 206 207 /* pos_p now at start of fragment part */ 208 frag = url.substr(pos_p, std::string::npos); 209 return true; 210 } 211 212 bool URL::parse_query(std::map<std::string, std::string> & params) 213 { 214 std::vector<std::string_view> tokens; 215 strsplit(query, tokens, '&'); 216 217 params.clear(); 218 for (const auto& it : tokens) { 219 if (!it.length()) // empty 220 continue; 221 std::size_t eq = it.find ('='); 222 if (eq != std::string::npos) { 223 auto e = std::pair<std::string, std::string>(it.substr(0, eq), it.substr(eq + 1)); 224 params.insert(e); 225 } else { 226 auto e = std::pair<std::string, std::string>(it, ""); 227 params.insert(e); 228 } 229 } 230 return true; 231 } 232 233 std::string URL::to_string() { 234 std::string out = ""; 235 if (schema != "") { 236 out = schema + "://"; 237 if (user != "" && pass != "") { 238 out += user + ":" + pass + "@"; 239 } else if (user != "") { 240 out += user + "@"; 241 } 242 if (ipv6) { 243 if (port) { 244 out += "[" + host + "]:" + std::to_string(port); 245 } else { 246 out += "[" + host + "]"; 247 } 248 } else { 249 if (port) { 250 out += host + ":" + std::to_string(port); 251 } else { 252 out += host; 253 } 254 } 255 } 256 out += path; 257 if (hasquery) // add query even if it was empty 258 out += "?"; 259 if (query != "") 260 out += query; 261 if (frag != "") 262 out += "#" + frag; 263 return out; 264 } 265 266 bool URL::is_i2p() const 267 { 268 return host.rfind(".i2p") == ( host.size() - 4 ); 269 } 270 271 void HTTPMsg::add_header(const char *name, const std::string & value, bool replace) { 272 add_header(name, value.c_str(), replace); 273 } 274 275 void HTTPMsg::add_header(const char *name, const char *value, bool replace) { 276 std::size_t count = headers.count(name); 277 if (count && !replace) 278 return; 279 if (count) { 280 headers[name] = value; 281 return; 282 } 283 headers.insert(std::pair<std::string, std::string>(name, value)); 284 } 285 286 void HTTPMsg::del_header(const char *name) { 287 headers.erase(name); 288 } 289 290 std::string HTTPMsg::get_header(const std::string& name) const 291 { 292 auto it = headers.find(name); 293 if (it == headers.end()) 294 return ""; 295 else 296 return it->second; 297 } 298 299 int HTTPReq::parse(const char *buf, size_t len) 300 { 301 return parse({buf, len}); 302 } 303 304 int HTTPReq::parse(std::string_view str) 305 { 306 enum { REQ_LINE, HEADER_LINE } expect = REQ_LINE; 307 std::size_t eoh = str.find(HTTP_EOH); /* request head size */ 308 std::size_t eol = 0, pos = 0; 309 URL url; 310 311 if (eoh == std::string::npos) 312 return 0; /* str not contains complete request */ 313 314 while ((eol = str.find(CRLF, pos)) != std::string::npos) 315 { 316 if (expect == REQ_LINE) 317 { 318 std::string_view line = str.substr(pos, eol - pos); 319 std::vector<std::string_view> tokens; 320 strsplit(line, tokens, ' '); 321 322 if (tokens.size() != 3) 323 return -1; 324 if (!is_http_method(tokens[0])) 325 return -1; 326 if (!is_http_version(tokens[2])) 327 return -1; 328 if (!url.parse(tokens[1])) 329 return -1; 330 /* all ok */ 331 method = tokens[0]; 332 uri = tokens[1]; 333 version = tokens[2]; 334 expect = HEADER_LINE; 335 } 336 else 337 { 338 std::string_view line = str.substr(pos, eol - pos); 339 auto p = parse_header_line(line); 340 if (p.first.length () > 0) 341 headers.push_back (p); 342 else 343 return -1; 344 } 345 pos = eol + CRLF.length(); 346 if (pos >= eoh) 347 break; 348 } 349 return eoh + HTTP_EOH.length(); 350 } 351 352 void HTTPReq::write(std::ostream & o) 353 { 354 o << method << " " << uri << " " << version << CRLF; 355 for (auto & h : headers) 356 o << h.first << ": " << h.second << CRLF; 357 o << CRLF; 358 } 359 360 std::string HTTPReq::to_string() 361 { 362 std::stringstream ss; 363 write(ss); 364 return ss.str(); 365 } 366 367 void HTTPReq::AddHeader (const std::string& name, const std::string& value) 368 { 369 headers.push_back (std::make_pair(name, value)); 370 } 371 372 void HTTPReq::UpdateHeader (const std::string& name, const std::string& value) 373 { 374 for (auto& it : headers) 375 if (it.first == name) 376 { 377 it.second = value; 378 break; 379 } 380 } 381 382 void HTTPReq::RemoveHeader (const std::string& name, const std::string& exempt) 383 { 384 for (auto it = headers.begin (); it != headers.end ();) 385 { 386 if (!it->first.compare(0, name.length (), name) && it->first != exempt) 387 it = headers.erase (it); 388 else 389 it++; 390 } 391 } 392 393 std::string HTTPReq::GetHeader (std::string_view name) const 394 { 395 for (auto& it : headers) 396 if (it.first == name) 397 return it.second; 398 return ""; 399 } 400 401 size_t HTTPReq::GetNumHeaders (std::string_view name) const 402 { 403 size_t num = 0; 404 for (auto& it : headers) 405 if (it.first == name) num++; 406 return num; 407 } 408 409 bool HTTPRes::is_chunked() const 410 { 411 auto it = headers.find("Transfer-Encoding"); 412 if (it == headers.end()) 413 return false; 414 if (it->second.find("chunked") != std::string::npos) 415 return true; 416 return false; 417 } 418 419 bool HTTPRes::is_gzipped(bool includingI2PGzip) const 420 { 421 auto it = headers.find("Content-Encoding"); 422 if (it == headers.end()) 423 return false; /* no header */ 424 if (it->second.find("gzip") != std::string::npos) 425 return true; /* gotcha! */ 426 if (includingI2PGzip && it->second.find("x-i2p-gzip") != std::string::npos) 427 return true; 428 return false; 429 } 430 431 long int HTTPMsg::content_length() const 432 { 433 unsigned long int length = 0; 434 auto it = headers.find("Content-Length"); 435 if (it == headers.end()) 436 return -1; 437 errno = 0; 438 length = std::strtoul(it->second.c_str(), (char **) NULL, 10); 439 if (errno != 0) 440 return -1; 441 return length; 442 } 443 444 int HTTPRes::parse(const char *buf, size_t len) 445 { 446 return parse({buf,len}); 447 } 448 449 int HTTPRes::parse(std::string_view str) 450 { 451 enum { RES_LINE, HEADER_LINE } expect = RES_LINE; 452 std::size_t eoh = str.find(HTTP_EOH); /* request head size */ 453 std::size_t eol = 0, pos = 0; 454 455 if (eoh == std::string::npos) 456 return 0; /* str not contains complete request */ 457 458 while ((eol = str.find(CRLF, pos)) != std::string::npos) 459 { 460 if (expect == RES_LINE) 461 { 462 std::string_view line = str.substr(pos, eol - pos); 463 std::vector<std::string_view> tokens; 464 strsplit(line, tokens, ' ', 3); 465 if (tokens.size() != 3) 466 return -1; 467 if (!is_http_version(tokens[0])) 468 return -1; 469 auto res = std::from_chars(tokens[1].data (), tokens[1].data() + tokens[1].size(), code); 470 if (res.ec != std::errc()) 471 return -1; 472 if (code < 100 || code >= 600) 473 return -1; 474 /* all ok */ 475 version = tokens[0]; 476 status = tokens[2]; 477 expect = HEADER_LINE; 478 } 479 else 480 { 481 std::string_view line = str.substr(pos, eol - pos); 482 auto p = parse_header_line(line); 483 if (p.first.length () > 0) 484 headers.insert (p); 485 else 486 return -1; 487 } 488 pos = eol + CRLF.length(); 489 if (pos >= eoh) 490 break; 491 } 492 return eoh + HTTP_EOH.length(); 493 } 494 495 std::string HTTPRes::to_string() { 496 if (version == "HTTP/1.1" && headers.count("Date") == 0) { 497 std::string date; 498 gen_rfc7231_date(date); 499 add_header("Date", date.c_str()); 500 } 501 if (status == "OK" && code != 200) 502 status = HTTPCodeToStatus(code); // update 503 if (body.length() > 0 && headers.count("Content-Length") == 0) 504 add_header("Content-Length", std::to_string(body.length()).c_str()); 505 /* build response */ 506 std::stringstream ss; 507 ss << version << " " << code << " " << status << CRLF; 508 for (auto & h : headers) { 509 ss << h.first << ": " << h.second << CRLF; 510 } 511 ss << CRLF; 512 if (body.length() > 0) 513 ss << body; 514 return ss.str(); 515 } 516 517 std::string_view HTTPCodeToStatus(int code) 518 { 519 std::string_view ptr; 520 switch (code) 521 { 522 case 105: ptr = "Name Not Resolved"; break; 523 /* success */ 524 case 200: ptr = "OK"; break; 525 case 206: ptr = "Partial Content"; break; 526 /* redirect */ 527 case 301: ptr = "Moved Permanently"; break; 528 case 302: ptr = "Found"; break; 529 case 304: ptr = "Not Modified"; break; 530 case 307: ptr = "Temporary Redirect"; break; 531 /* client error */ 532 case 400: ptr = "Bad Request"; break; 533 case 401: ptr = "Unauthorized"; break; 534 case 403: ptr = "Forbidden"; break; 535 case 404: ptr = "Not Found"; break; 536 case 407: ptr = "Proxy Authentication Required"; break; 537 case 408: ptr = "Request Timeout"; break; 538 /* server error */ 539 case 500: ptr = "Internal Server Error"; break; 540 case 502: ptr = "Bad Gateway"; break; 541 case 503: ptr = "Not Implemented"; break; 542 case 504: ptr = "Gateway Timeout"; break; 543 default: ptr = "Unknown Status"; break; 544 } 545 return ptr; 546 } 547 548 std::string UrlDecode(std::string_view url, bool allow_null) 549 { 550 std::string decoded; 551 decoded.reserve (url.length ()); 552 size_t start = 0; 553 for (size_t i = 0; i < url.length (); i++) 554 { 555 auto c = url[i]; 556 if (c == '%') 557 { 558 decoded.append (url, start, i - start); 559 if (i + 2 <= url.length ()) 560 { 561 unsigned char ch; 562 auto res = std::from_chars(url.data() + i + 1, url.data() + i + 3, ch, 16); 563 if (res.ec == std::errc() && (ch || allow_null)) 564 decoded += ch; 565 else 566 decoded.append (url, i, 3); 567 i += 2; 568 start = i + 1; 569 } 570 else 571 break; 572 } 573 } 574 if (start < url.length ()) 575 decoded.append (url, start); 576 return decoded; 577 } 578 579 bool MergeChunkedResponse (std::istream& in, std::ostream& out) 580 { 581 std::string hexLen; 582 while (!in.eof ()) 583 { 584 std::getline (in, hexLen); 585 errno = 0; 586 long int len = strtoul(hexLen.c_str(), (char **) NULL, 16); 587 if (errno != 0) 588 return false; /* conversion error */ 589 if (len == 0) 590 return true; /* end of stream */ 591 if (len < 0 || len > 10 * 1024 * 1024) /* < 10Mb */ 592 return false; /* too large chunk */ 593 char * buf = new char[len]; 594 in.read (buf, len); 595 out.write (buf, len); 596 delete[] buf; 597 std::getline (in, hexLen); // read \r\n after chunk 598 } 599 return true; 600 } 601 602 std::string CreateBasicAuthorizationString (const std::string& user, const std::string& pass) 603 { 604 if (user.empty () && pass.empty ()) return ""; 605 return "Basic " + i2p::data::ToBase64Standard (user + ":" + pass); 606 } 607 608 } // http 609 } // i2p