1 /* 2 * Copyright 2022 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Niels Sascha Reedijk, niels.reedijk@gmail.com 7 */ 8 9 #include "HttpParser.h" 10 11 #include <stdexcept> 12 #include <string> 13 14 #include <HttpFields.h> 15 #include <NetServicesDefs.h> 16 #include <ZlibCompressionAlgorithm.h> 17 18 using namespace std::literals; 19 using namespace BPrivate::Network; 20 21 22 // #pragma mark -- HttpParser 23 24 25 /*! 26 \brief Explicitly mark the response as having no content. 27 28 This is done in cases where the request was a HEAD request. Setting it to no content, will 29 instruct the parser to move to completion after all the header fields have been parsed. 30 */ 31 void 32 HttpParser::SetNoContent() noexcept 33 { 34 if (fStreamState > HttpInputStreamState::Fields) 35 debugger("Cannot set the parser to no content after parsing of the body has started"); 36 fBodyType = HttpBodyType::NoContent; 37 }; 38 39 40 /*! 41 \brief Parse the status from the \a buffer and store it in \a status. 42 43 \retval true The status was succesfully parsed 44 \retval false There is not enough data in the buffer for a full status. 45 46 \exception BNetworkRequestException The status does not conform to the HTTP spec. 47 */ 48 bool 49 HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status) 50 { 51 if (fStreamState != HttpInputStreamState::StatusLine) 52 debugger("The Status line has already been parsed"); 53 54 auto statusLine = buffer.GetNextLine(); 55 if (!statusLine) 56 return false; 57 58 auto codeStart = statusLine->FindFirst(' ') + 1; 59 if (codeStart < 0) 60 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 61 62 auto codeEnd = statusLine->FindFirst(' ', codeStart); 63 64 if (codeEnd < 0 || (codeEnd - codeStart) != 3) 65 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 66 67 std::string statusCodeString(statusLine->String() + codeStart, 3); 68 69 // build the output 70 try { 71 status.code = std::stol(statusCodeString); 72 } catch (...) { 73 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 74 } 75 76 status.text = std::move(statusLine.value()); 77 fStatus.code = status.code; // cache the status code 78 fStreamState = HttpInputStreamState::Fields; 79 return true; 80 } 81 82 83 /*! 84 \brief Parse the fields from the \a buffer and store it in \a fields. 85 86 The fields are parsed incrementally, meaning that even if the full header is not yet in the 87 \a buffer, it will still parse all complete fields and store them in the \a fields. 88 89 After all fields have been parsed, it will determine the properties of the request body. 90 This means it will determine whether there is any content compression, if there is a body, 91 and if so if it has a fixed size or not. 92 93 \retval true All fields were succesfully parsed 94 \retval false There is not enough data in the buffer to complete parsing of fields. 95 96 \exception BNetworkRequestException The fields not conform to the HTTP spec. 97 */ 98 bool 99 HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields) 100 { 101 if (fStreamState != HttpInputStreamState::Fields) 102 debugger("The parser is not expecting header fields at this point"); 103 104 auto fieldLine = buffer.GetNextLine(); 105 106 while (fieldLine && !fieldLine.value().IsEmpty()) { 107 // Parse next header line 108 fields.AddField(fieldLine.value()); 109 fieldLine = buffer.GetNextLine(); 110 } 111 112 if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())) { 113 // there is more to parse 114 return false; 115 } 116 117 // Determine the properties for the body 118 // RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body: 119 std::optional<off_t> bodyBytesTotal = std::nullopt; 120 if (fBodyType == HttpBodyType::NoContent || fStatus.StatusCode() == BHttpStatusCode::NoContent 121 || fStatus.StatusCode() == BHttpStatusCode::NotModified) { 122 // [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304, 123 // no content [2] NOT SUPPORTED: when doing a CONNECT request, no content 124 fBodyType = HttpBodyType::NoContent; 125 fStreamState = HttpInputStreamState::Done; 126 } else if (auto header = fields.FindField("Transfer-Encoding"sv); 127 header != fields.end() && header->Value() == "chunked"sv) { 128 // [3] If there is a Transfer-Encoding heading set to 'chunked' 129 // TODO: support the more advanced rules in the RFC around the meaning of this field 130 fBodyType = HttpBodyType::Chunked; 131 fStreamState = HttpInputStreamState::Body; 132 } else if (fields.CountFields("Content-Length"sv) > 0) { 133 // [4] When there is no Transfer-Encoding, then look for Content-Encoding: 134 // - If there are more than one, the values must match 135 // - The value must be a valid number 136 // [5] If there is a valid value, then that is the expected size of the body 137 try { 138 auto contentLength = std::string(); 139 for (const auto& field: fields) { 140 if (field.Name() == "Content-Length"sv) { 141 if (contentLength.size() == 0) 142 contentLength = field.Value(); 143 else if (contentLength != field.Value()) { 144 throw BNetworkRequestError(__PRETTY_FUNCTION__, 145 BNetworkRequestError::ProtocolError, 146 "Multiple Content-Length fields with differing values"); 147 } 148 } 149 } 150 bodyBytesTotal = std::stol(contentLength); 151 if (*bodyBytesTotal == 0) { 152 fBodyType = HttpBodyType::NoContent; 153 fStreamState = HttpInputStreamState::Done; 154 } else { 155 fBodyType = HttpBodyType::FixedSize; 156 fStreamState = HttpInputStreamState::Body; 157 } 158 } catch (const std::logic_error& e) { 159 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError, 160 "Cannot parse Content-Length field value (logic_error)"); 161 } 162 } else { 163 // [6] Applies to request messages only (this is a response) 164 // [7] If nothing else then the received message is all data until connection close 165 // (this is the default) 166 fStreamState = HttpInputStreamState::Body; 167 } 168 169 // Set up the body parser based on the logic above. 170 switch (fBodyType) { 171 case HttpBodyType::VariableSize: 172 fBodyParser = std::make_unique<HttpRawBodyParser>(); 173 break; 174 case HttpBodyType::FixedSize: 175 fBodyParser = std::make_unique<HttpRawBodyParser>(*bodyBytesTotal); 176 break; 177 case HttpBodyType::Chunked: 178 fBodyParser = std::make_unique<HttpChunkedBodyParser>(); 179 break; 180 case HttpBodyType::NoContent: 181 default: 182 return true; 183 } 184 185 // Check Content-Encoding for compression 186 auto header = fields.FindField("Content-Encoding"sv); 187 if (header != fields.end() && (header->Value() == "gzip" || header->Value() == "deflate")) { 188 fBodyParser = std::make_unique<HttpBodyDecompression>(std::move(fBodyParser)); 189 } 190 191 return true; 192 } 193 194 195 /*! 196 \brief Parse the body from the \a buffer and use \a writeToBody function to save. 197 198 The \a readEnd parameter indicates to the parser that the buffer currently contains all the 199 expected data for this request. 200 */ 201 size_t 202 HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) 203 { 204 if (fStreamState < HttpInputStreamState::Body || fStreamState == HttpInputStreamState::Done) 205 debugger("The parser is not in the correct state to parse a body"); 206 207 auto parseResult = fBodyParser->ParseBody(buffer, writeToBody, readEnd); 208 209 if (parseResult.complete) 210 fStreamState = HttpInputStreamState::Done; 211 212 return parseResult.bytesParsed; 213 } 214 215 216 /*! 217 \brief Return if the body is currently expecting to having content. 218 219 This may change if the header fields have not yet been parsed, as these may contain 220 instructions about the body having no content. 221 */ 222 bool 223 HttpParser::HasContent() const noexcept 224 { 225 return fBodyType != HttpBodyType::NoContent; 226 } 227 228 229 /*! 230 \brief Return the total size of the body, if known. 231 */ 232 std::optional<off_t> 233 HttpParser::BodyBytesTotal() const noexcept 234 { 235 if (fBodyParser) 236 return fBodyParser->TotalBodySize(); 237 return std::nullopt; 238 } 239 240 241 /*! 242 \brief Return the number of body bytes transferred from the response. 243 */ 244 off_t 245 HttpParser::BodyBytesTransferred() const noexcept 246 { 247 if (fBodyParser) 248 return fBodyParser->TransferredBodySize(); 249 return 0; 250 } 251 252 253 /*! 254 \brief Check if the body is fully parsed. 255 */ 256 bool 257 HttpParser::Complete() const noexcept 258 { 259 return fStreamState == HttpInputStreamState::Done; 260 } 261 262 263 // #pragma mark -- HttpBodyParser 264 265 266 /*! 267 \brief Default implementation to return std::nullopt. 268 */ 269 std::optional<off_t> 270 HttpBodyParser::TotalBodySize() const noexcept 271 { 272 return std::nullopt; 273 } 274 275 276 /*! 277 \brief Return the number of body bytes read from the stream so far. 278 279 For chunked transfers, this excludes the chunk headers and other metadata. 280 */ 281 off_t 282 HttpBodyParser::TransferredBodySize() const noexcept 283 { 284 return fTransferredBodySize; 285 } 286 287 288 // #pragma mark -- HttpRawBodyParser 289 /*! 290 \brief Construct a HttpRawBodyParser with an unknown content size. 291 */ 292 HttpRawBodyParser::HttpRawBodyParser() 293 { 294 } 295 296 297 /*! 298 \brief Construct a HttpRawBodyParser with expected \a bodyBytesTotal size. 299 */ 300 HttpRawBodyParser::HttpRawBodyParser(off_t bodyBytesTotal) 301 : 302 fBodyBytesTotal(bodyBytesTotal) 303 { 304 } 305 306 307 /*! 308 \brief Parse a regular (non-chunked) body from a buffer. 309 310 The buffer is parsed into a target using the \a writeToBody function. 311 312 The \a readEnd argument indicates whether the current \a buffer contains all the expected data. 313 In case the total body size is known, and the remaining bytes in the buffer are smaller than 314 the expected remainder, a ProtocolError will be raised. The data in the buffer will *not* be 315 copied to the target. 316 317 Also, if the body size is known, and the data in the \a buffer is larger than the expected 318 expected length, then it will only read the bytes needed and leave the remainder in the buffer. 319 320 It is required that the \a writeToBody function writes all the bytes it is asked to; this 321 method does not support partial writes and throws an exception when it fails. 322 323 \exception BNetworkRequestError In case the buffer contains too little or invalid data. 324 325 \returns The number of bytes parsed from the \a buffer. 326 */ 327 BodyParseResult 328 HttpRawBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) 329 { 330 auto bytesToRead = buffer.RemainingBytes(); 331 if (fBodyBytesTotal) { 332 auto expectedRemainingBytes = *fBodyBytesTotal - fTransferredBodySize; 333 if (expectedRemainingBytes < static_cast<off_t>(buffer.RemainingBytes())) 334 bytesToRead = expectedRemainingBytes; 335 else if (readEnd && expectedRemainingBytes > static_cast<off_t>(buffer.RemainingBytes())) { 336 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError, 337 "Message body is incomplete; less data received than expected"); 338 } 339 } 340 341 // Copy the data 342 auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead); 343 fTransferredBodySize += bytesRead; 344 345 if (bytesRead != bytesToRead) { 346 // Fail if not all expected bytes are written. 347 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError, 348 "Could not write all available body bytes to the target."); 349 } 350 351 if (fBodyBytesTotal) { 352 if (*fBodyBytesTotal == fTransferredBodySize) 353 return {bytesRead, bytesRead, true}; 354 else 355 return {bytesRead, bytesRead, false}; 356 } else 357 return {bytesRead, bytesRead, readEnd}; 358 } 359 360 361 /*! 362 \brief Override default implementation and return known body size (or std::nullopt) 363 */ 364 std::optional<off_t> 365 HttpRawBodyParser::TotalBodySize() const noexcept 366 { 367 return fBodyBytesTotal; 368 } 369 370 371 // #pragma mark -- HttpChunkedBodyParser 372 /*! 373 \brief Parse a chunked body from a buffer. 374 375 The contents of the cunks are copied into a target using the \a writeToBody function. 376 377 The \a readEnd argument indicates whether the current \a buffer contains all the expected data. 378 In case the chunk argument indicates that more data was to come, an exception is thrown. 379 380 It is required that the \a writeToBody function writes all the bytes it is asked to; this 381 method does not support partial writes and throws an exception when it fails. 382 383 \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too 384 little data. 385 386 \returns The number of bytes parsed from the \a buffer. 387 */ 388 BodyParseResult 389 HttpChunkedBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) 390 { 391 size_t totalBytesRead = 0; 392 while (buffer.RemainingBytes() > 0) { 393 switch (fChunkParserState) { 394 case ChunkSize: 395 { 396 // Read the next chunk size from the buffer; if unsuccesful wait for more data 397 auto chunkSizeString = buffer.GetNextLine(); 398 if (!chunkSizeString) 399 return {totalBytesRead, totalBytesRead, false}; 400 auto chunkSizeStr = std::string(chunkSizeString.value().String()); 401 try { 402 size_t pos = 0; 403 fRemainingChunkSize = std::stoll(chunkSizeStr, &pos, 16); 404 if (pos < chunkSizeStr.size() && chunkSizeStr[pos] != ';') { 405 throw BNetworkRequestError( 406 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 407 } 408 } catch (const std::invalid_argument&) { 409 throw BNetworkRequestError( 410 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 411 } catch (const std::out_of_range&) { 412 throw BNetworkRequestError( 413 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 414 } 415 416 if (fRemainingChunkSize > 0) 417 fChunkParserState = Chunk; 418 else 419 fChunkParserState = Trailers; 420 break; 421 } 422 423 case Chunk: 424 { 425 size_t bytesToRead; 426 if (fRemainingChunkSize > static_cast<off_t>(buffer.RemainingBytes())) 427 bytesToRead = buffer.RemainingBytes(); 428 else 429 bytesToRead = fRemainingChunkSize; 430 431 auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead); 432 if (bytesRead != bytesToRead) { 433 // Fail if not all expected bytes are written. 434 throw BNetworkRequestError(__PRETTY_FUNCTION__, 435 BNetworkRequestError::SystemError, 436 "Could not write all available body bytes to the target."); 437 } 438 439 fTransferredBodySize += bytesRead; 440 totalBytesRead += bytesRead; 441 fRemainingChunkSize -= bytesRead; 442 if (fRemainingChunkSize == 0) 443 fChunkParserState = ChunkEnd; 444 break; 445 } 446 447 case ChunkEnd: 448 { 449 if (buffer.RemainingBytes() < 2) { 450 // not enough data in the buffer to finish the chunk 451 return {totalBytesRead, totalBytesRead, false}; 452 } 453 auto chunkEndString = buffer.GetNextLine(); 454 if (!chunkEndString || chunkEndString.value().Length() != 0) { 455 // There should have been an empty chunk 456 throw BNetworkRequestError( 457 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); 458 } 459 460 fChunkParserState = ChunkSize; 461 break; 462 } 463 464 case Trailers: 465 { 466 auto trailerString = buffer.GetNextLine(); 467 if (!trailerString) { 468 // More data to come 469 return {totalBytesRead, totalBytesRead, false}; 470 } 471 472 if (trailerString.value().Length() > 0) { 473 // Ignore empty trailers for now 474 // TODO: review if the API should support trailing headers 475 } else { 476 fChunkParserState = Complete; 477 return {totalBytesRead, totalBytesRead, true}; 478 } 479 break; 480 } 481 482 case Complete: 483 return {totalBytesRead, totalBytesRead, true}; 484 } 485 } 486 return {totalBytesRead, totalBytesRead, false}; 487 } 488 489 490 // #pragma mark -- HttpBodyDecompression 491 /*! 492 \brief Set up a decompression stream that decompresses the data read by \a bodyParser. 493 */ 494 HttpBodyDecompression::HttpBodyDecompression(std::unique_ptr<HttpBodyParser> bodyParser) 495 { 496 fDecompressorStorage = std::make_unique<BMallocIO>(); 497 498 BDataIO* stream = nullptr; 499 auto result = BZlibCompressionAlgorithm().CreateDecompressingOutputStream( 500 fDecompressorStorage.get(), nullptr, stream); 501 502 if (result != B_OK) { 503 throw BNetworkRequestError("BZlibCompressionAlgorithm().CreateCompressingOutputStream", 504 BNetworkRequestError::SystemError, result); 505 } 506 507 fDecompressingStream = std::unique_ptr<BDataIO>(stream); 508 fBodyParser = std::move(bodyParser); 509 } 510 511 512 /*! 513 \brief Read a compressed body into a target.. 514 515 The stream captures chunked or raw data, and decompresses it. The decompressed data is then 516 copied into a target using the \a writeToBody function. 517 518 The \a readEnd argument indicates whether the current \a buffer contains all the expected data. 519 It is up for the underlying parser to determine if more data was expected, and therefore, if 520 there is an error. 521 522 It is required that the \a writeToBody function writes all the bytes it is asked to; this 523 method does not support partial writes and throws an exception when it fails. 524 525 \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too 526 little data. 527 528 \returns The number of bytes parsed from the \a buffer. 529 */ 530 BodyParseResult 531 HttpBodyDecompression::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) 532 { 533 // Get the underlying raw or chunked parser to write data to our decompressionstream 534 auto parseResults = fBodyParser->ParseBody( 535 buffer, 536 [this](const std::byte* buffer, size_t bufferSize) { 537 auto status = fDecompressingStream->WriteExactly(buffer, bufferSize); 538 if (status != B_OK) { 539 throw BNetworkRequestError( 540 "BDataIO::WriteExactly()", BNetworkRequestError::SystemError, status); 541 } 542 return bufferSize; 543 }, 544 readEnd); 545 fTransferredBodySize += parseResults.bytesParsed; 546 547 if (readEnd || parseResults.complete) { 548 // No more bytes expected so flush out the final bytes 549 if (auto status = fDecompressingStream->Flush(); status != B_OK) { 550 throw BNetworkRequestError( 551 "BZlibDecompressionStream::Flush()", BNetworkRequestError::SystemError, status); 552 } 553 } 554 555 size_t bytesWritten = 0; 556 if (auto bodySize = fDecompressorStorage->Position(); bodySize > 0) { 557 bytesWritten 558 = writeToBody(static_cast<const std::byte*>(fDecompressorStorage->Buffer()), bodySize); 559 if (static_cast<off_t>(bytesWritten) != bodySize) { 560 throw BNetworkRequestError( 561 __PRETTY_FUNCTION__, BNetworkRequestError::SystemError, B_PARTIAL_WRITE); 562 } 563 fDecompressorStorage->Seek(0, SEEK_SET); 564 } 565 return {parseResults.bytesParsed, bytesWritten, parseResults.complete}; 566 } 567 568 569 /*! 570 \brief Return the TotalBodySize() from the underlying chunked or raw parser. 571 */ 572 std::optional<off_t> 573 HttpBodyDecompression::TotalBodySize() const noexcept 574 { 575 return fBodyParser->TotalBodySize(); 576 } 577