/* * Copyright 2022 Haiku Inc. All rights reserved. * Distributed under the terms of the MIT License. * * Authors: * Niels Sascha Reedijk, niels.reedijk@gmail.com */ #include "HttpParser.h" #include #include #include #include #include using namespace std::literals; using namespace BPrivate::Network; // #pragma mark -- HttpParser /*! \brief Explicitly mark the response as having no content. This is done in cases where the request was a HEAD request. Setting it to no content, will instruct the parser to move to completion after all the header fields have been parsed. */ void HttpParser::SetNoContent() noexcept { if (fStreamState > HttpInputStreamState::Fields) debugger("Cannot set the parser to no content after parsing of the body has started"); fBodyType = HttpBodyType::NoContent; }; /*! \brief Parse the status from the \a buffer and store it in \a status. \retval true The status was succesfully parsed \retval false There is not enough data in the buffer for a full status. \exception BNetworkRequestException The status does not conform to the HTTP spec. */ bool HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status) { if (fStreamState != HttpInputStreamState::StatusLine) debugger("The Status line has already been parsed"); auto statusLine = buffer.GetNextLine(); if (!statusLine) return false; auto codeStart = statusLine->FindFirst(' ') + 1; if (codeStart < 0) throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); auto codeEnd = statusLine->FindFirst(' ', codeStart); if (codeEnd < 0 || (codeEnd - codeStart) != 3) throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); std::string statusCodeString(statusLine->String() + codeStart, 3); // build the output try { status.code = std::stol(statusCodeString); } catch (...) { throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); } status.text = std::move(statusLine.value()); fStatus.code = status.code; // cache the status code fStreamState = HttpInputStreamState::Fields; return true; } /*! \brief Parse the fields from the \a buffer and store it in \a fields. The fields are parsed incrementally, meaning that even if the full header is not yet in the \a buffer, it will still parse all complete fields and store them in the \a fields. After all fields have been parsed, it will determine the properties of the request body. This means it will determine whether there is any content compression, if there is a body, and if so if it has a fixed size or not. \retval true All fields were succesfully parsed \retval false There is not enough data in the buffer to complete parsing of fields. \exception BNetworkRequestException The fields not conform to the HTTP spec. */ bool HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields) { if (fStreamState != HttpInputStreamState::Fields) debugger("The parser is not expecting header fields at this point"); auto fieldLine = buffer.GetNextLine(); while (fieldLine && !fieldLine.value().IsEmpty()) { // Parse next header line fields.AddField(fieldLine.value()); fieldLine = buffer.GetNextLine(); } if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())) { // there is more to parse return false; } // Determine the properties for the body // RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body: std::optional bodyBytesTotal = std::nullopt; if (fBodyType == HttpBodyType::NoContent || fStatus.StatusCode() == BHttpStatusCode::NoContent || fStatus.StatusCode() == BHttpStatusCode::NotModified) { // [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304, // no content [2] NOT SUPPORTED: when doing a CONNECT request, no content fBodyType = HttpBodyType::NoContent; fStreamState = HttpInputStreamState::Done; } else if (auto header = fields.FindField("Transfer-Encoding"sv); header != fields.end() && header->Value() == "chunked"sv) { // [3] If there is a Transfer-Encoding heading set to 'chunked' // TODO: support the more advanced rules in the RFC around the meaning of this field fBodyType = HttpBodyType::Chunked; fStreamState = HttpInputStreamState::Body; } else if (fields.CountFields("Content-Length"sv) > 0) { // [4] When there is no Transfer-Encoding, then look for Content-Encoding: // - If there are more than one, the values must match // - The value must be a valid number // [5] If there is a valid value, then that is the expected size of the body try { auto contentLength = std::string(); for (const auto& field: fields) { if (field.Name() == "Content-Length"sv) { if (contentLength.size() == 0) contentLength = field.Value(); else if (contentLength != field.Value()) { throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError, "Multiple Content-Length fields with differing values"); } } } bodyBytesTotal = std::stol(contentLength); if (*bodyBytesTotal == 0) { fBodyType = HttpBodyType::NoContent; fStreamState = HttpInputStreamState::Done; } else { fBodyType = HttpBodyType::FixedSize; fStreamState = HttpInputStreamState::Body; } } catch (const std::logic_error& e) { throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError, "Cannot parse Content-Length field value (logic_error)"); } } else { // [6] Applies to request messages only (this is a response) // [7] If nothing else then the received message is all data until connection close // (this is the default) fStreamState = HttpInputStreamState::Body; } // Set up the body parser based on the logic above. switch (fBodyType) { case HttpBodyType::VariableSize: fBodyParser = std::make_unique(); break; case HttpBodyType::FixedSize: fBodyParser = std::make_unique(*bodyBytesTotal); break; case HttpBodyType::Chunked: fBodyParser = std::make_unique(); break; case HttpBodyType::NoContent: default: return true; } // Check Content-Encoding for compression auto header = fields.FindField("Content-Encoding"sv); if (header != fields.end() && (header->Value() == "gzip" || header->Value() == "deflate")) { fBodyParser = std::make_unique(std::move(fBodyParser)); } return true; } /*! \brief Parse the body from the \a buffer and use \a writeToBody function to save. The \a readEnd parameter indicates to the parser that the buffer currently contains all the expected data for this request. */ size_t HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) { if (fStreamState < HttpInputStreamState::Body || fStreamState == HttpInputStreamState::Done) debugger("The parser is not in the correct state to parse a body"); auto parseResult = fBodyParser->ParseBody(buffer, writeToBody, readEnd); if (parseResult.complete) fStreamState = HttpInputStreamState::Done; return parseResult.bytesParsed; } /*! \brief Return if the body is currently expecting to having content. This may change if the header fields have not yet been parsed, as these may contain instructions about the body having no content. */ bool HttpParser::HasContent() const noexcept { return fBodyType != HttpBodyType::NoContent; } /*! \brief Return the total size of the body, if known. */ std::optional HttpParser::BodyBytesTotal() const noexcept { if (fBodyParser) return fBodyParser->TotalBodySize(); return std::nullopt; } /*! \brief Return the number of body bytes transferred from the response. */ off_t HttpParser::BodyBytesTransferred() const noexcept { if (fBodyParser) return fBodyParser->TransferredBodySize(); return 0; } /*! \brief Check if the body is fully parsed. */ bool HttpParser::Complete() const noexcept { return fStreamState == HttpInputStreamState::Done; } // #pragma mark -- HttpBodyParser /*! \brief Default implementation to return std::nullopt. */ std::optional HttpBodyParser::TotalBodySize() const noexcept { return std::nullopt; } /*! \brief Return the number of body bytes read from the stream so far. For chunked transfers, this excludes the chunk headers and other metadata. */ off_t HttpBodyParser::TransferredBodySize() const noexcept { return fTransferredBodySize; } // #pragma mark -- HttpRawBodyParser /*! \brief Construct a HttpRawBodyParser with an unknown content size. */ HttpRawBodyParser::HttpRawBodyParser() { } /*! \brief Construct a HttpRawBodyParser with expected \a bodyBytesTotal size. */ HttpRawBodyParser::HttpRawBodyParser(off_t bodyBytesTotal) : fBodyBytesTotal(bodyBytesTotal) { } /*! \brief Parse a regular (non-chunked) body from a buffer. The buffer is parsed into a target using the \a writeToBody function. The \a readEnd argument indicates whether the current \a buffer contains all the expected data. In case the total body size is known, and the remaining bytes in the buffer are smaller than the expected remainder, a ProtocolError will be raised. The data in the buffer will *not* be copied to the target. Also, if the body size is known, and the data in the \a buffer is larger than the expected expected length, then it will only read the bytes needed and leave the remainder in the buffer. It is required that the \a writeToBody function writes all the bytes it is asked to; this method does not support partial writes and throws an exception when it fails. \exception BNetworkRequestError In case the buffer contains too little or invalid data. \returns The number of bytes parsed from the \a buffer. */ BodyParseResult HttpRawBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) { auto bytesToRead = buffer.RemainingBytes(); if (fBodyBytesTotal) { auto expectedRemainingBytes = *fBodyBytesTotal - fTransferredBodySize; if (expectedRemainingBytes < static_cast(buffer.RemainingBytes())) bytesToRead = expectedRemainingBytes; else if (readEnd && expectedRemainingBytes > static_cast(buffer.RemainingBytes())) { throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError, "Message body is incomplete; less data received than expected"); } } // Copy the data auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead); fTransferredBodySize += bytesRead; if (bytesRead != bytesToRead) { // Fail if not all expected bytes are written. throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError, "Could not write all available body bytes to the target."); } if (fBodyBytesTotal) { if (*fBodyBytesTotal == fTransferredBodySize) return {bytesRead, bytesRead, true}; else return {bytesRead, bytesRead, false}; } else return {bytesRead, bytesRead, readEnd}; } /*! \brief Override default implementation and return known body size (or std::nullopt) */ std::optional HttpRawBodyParser::TotalBodySize() const noexcept { return fBodyBytesTotal; } // #pragma mark -- HttpChunkedBodyParser /*! \brief Parse a chunked body from a buffer. The contents of the cunks are copied into a target using the \a writeToBody function. The \a readEnd argument indicates whether the current \a buffer contains all the expected data. In case the chunk argument indicates that more data was to come, an exception is thrown. It is required that the \a writeToBody function writes all the bytes it is asked to; this method does not support partial writes and throws an exception when it fails. \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too little data. \returns The number of bytes parsed from the \a buffer. */ BodyParseResult HttpChunkedBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) { size_t totalBytesRead = 0; while (buffer.RemainingBytes() > 0) { switch (fChunkParserState) { case ChunkSize: { // Read the next chunk size from the buffer; if unsuccesful wait for more data auto chunkSizeString = buffer.GetNextLine(); if (!chunkSizeString) return {totalBytesRead, totalBytesRead, false}; auto chunkSizeStr = std::string(chunkSizeString.value().String()); try { size_t pos = 0; fRemainingChunkSize = std::stoll(chunkSizeStr, &pos, 16); if (pos < chunkSizeStr.size() && chunkSizeStr[pos] != ';') { throw BNetworkRequestError( __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); } } catch (const std::invalid_argument&) { throw BNetworkRequestError( __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); } catch (const std::out_of_range&) { throw BNetworkRequestError( __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); } if (fRemainingChunkSize > 0) fChunkParserState = Chunk; else fChunkParserState = Trailers; break; } case Chunk: { size_t bytesToRead; if (fRemainingChunkSize > static_cast(buffer.RemainingBytes())) bytesToRead = buffer.RemainingBytes(); else bytesToRead = fRemainingChunkSize; auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead); if (bytesRead != bytesToRead) { // Fail if not all expected bytes are written. throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError, "Could not write all available body bytes to the target."); } fTransferredBodySize += bytesRead; totalBytesRead += bytesRead; fRemainingChunkSize -= bytesRead; if (fRemainingChunkSize == 0) fChunkParserState = ChunkEnd; break; } case ChunkEnd: { if (buffer.RemainingBytes() < 2) { // not enough data in the buffer to finish the chunk return {totalBytesRead, totalBytesRead, false}; } auto chunkEndString = buffer.GetNextLine(); if (!chunkEndString || chunkEndString.value().Length() != 0) { // There should have been an empty chunk throw BNetworkRequestError( __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError); } fChunkParserState = ChunkSize; break; } case Trailers: { auto trailerString = buffer.GetNextLine(); if (!trailerString) { // More data to come return {totalBytesRead, totalBytesRead, false}; } if (trailerString.value().Length() > 0) { // Ignore empty trailers for now // TODO: review if the API should support trailing headers } else { fChunkParserState = Complete; return {totalBytesRead, totalBytesRead, true}; } break; } case Complete: return {totalBytesRead, totalBytesRead, true}; } } return {totalBytesRead, totalBytesRead, false}; } // #pragma mark -- HttpBodyDecompression /*! \brief Set up a decompression stream that decompresses the data read by \a bodyParser. */ HttpBodyDecompression::HttpBodyDecompression(std::unique_ptr bodyParser) { fDecompressorStorage = std::make_unique(); BDataIO* stream = nullptr; auto result = BZlibCompressionAlgorithm().CreateDecompressingOutputStream( fDecompressorStorage.get(), nullptr, stream); if (result != B_OK) { throw BNetworkRequestError("BZlibCompressionAlgorithm().CreateCompressingOutputStream", BNetworkRequestError::SystemError, result); } fDecompressingStream = std::unique_ptr(stream); fBodyParser = std::move(bodyParser); } /*! \brief Read a compressed body into a target.. The stream captures chunked or raw data, and decompresses it. The decompressed data is then copied into a target using the \a writeToBody function. The \a readEnd argument indicates whether the current \a buffer contains all the expected data. It is up for the underlying parser to determine if more data was expected, and therefore, if there is an error. It is required that the \a writeToBody function writes all the bytes it is asked to; this method does not support partial writes and throws an exception when it fails. \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too little data. \returns The number of bytes parsed from the \a buffer. */ BodyParseResult HttpBodyDecompression::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd) { // Get the underlying raw or chunked parser to write data to our decompressionstream auto parseResults = fBodyParser->ParseBody( buffer, [this](const std::byte* buffer, size_t bufferSize) { auto status = fDecompressingStream->WriteExactly(buffer, bufferSize); if (status != B_OK) { throw BNetworkRequestError( "BDataIO::WriteExactly()", BNetworkRequestError::SystemError, status); } return bufferSize; }, readEnd); fTransferredBodySize += parseResults.bytesParsed; if (readEnd || parseResults.complete) { // No more bytes expected so flush out the final bytes if (auto status = fDecompressingStream->Flush(); status != B_OK) { throw BNetworkRequestError( "BZlibDecompressionStream::Flush()", BNetworkRequestError::SystemError, status); } } size_t bytesWritten = 0; if (auto bodySize = fDecompressorStorage->Position(); bodySize > 0) { bytesWritten = writeToBody(static_cast(fDecompressorStorage->Buffer()), bodySize); if (static_cast(bytesWritten) != bodySize) { throw BNetworkRequestError( __PRETTY_FUNCTION__, BNetworkRequestError::SystemError, B_PARTIAL_WRITE); } fDecompressorStorage->Seek(0, SEEK_SET); } return {parseResults.bytesParsed, bytesWritten, parseResults.complete}; } /*! \brief Return the TotalBodySize() from the underlying chunked or raw parser. */ std::optional HttpBodyDecompression::TotalBodySize() const noexcept { return fBodyParser->TotalBodySize(); }