1 /*
2 * Copyright 2022 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 * Niels Sascha Reedijk, niels.reedijk@gmail.com
7 */
8
9 #include "HttpParser.h"
10
11 #include <stdexcept>
12 #include <string>
13
14 #include <HttpFields.h>
15 #include <NetServicesDefs.h>
16 #include <ZlibCompressionAlgorithm.h>
17
18 using namespace std::literals;
19 using namespace BPrivate::Network;
20
21
22 // #pragma mark -- HttpParser
23
24
25 /*!
26 \brief Explicitly mark the response as having no content.
27
28 This is done in cases where the request was a HEAD request. Setting it to no content, will
29 instruct the parser to move to completion after all the header fields have been parsed.
30 */
31 void
SetNoContent()32 HttpParser::SetNoContent() noexcept
33 {
34 if (fStreamState > HttpInputStreamState::Fields)
35 debugger("Cannot set the parser to no content after parsing of the body has started");
36 fBodyType = HttpBodyType::NoContent;
37 };
38
39
40 /*!
41 \brief Parse the status from the \a buffer and store it in \a status.
42
43 \retval true The status was succesfully parsed
44 \retval false There is not enough data in the buffer for a full status.
45
46 \exception BNetworkRequestException The status does not conform to the HTTP spec.
47 */
48 bool
ParseStatus(HttpBuffer & buffer,BHttpStatus & status)49 HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status)
50 {
51 if (fStreamState != HttpInputStreamState::StatusLine)
52 debugger("The Status line has already been parsed");
53
54 auto statusLine = buffer.GetNextLine();
55 if (!statusLine)
56 return false;
57
58 auto codeStart = statusLine->FindFirst(' ') + 1;
59 if (codeStart < 0)
60 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
61
62 auto codeEnd = statusLine->FindFirst(' ', codeStart);
63
64 if (codeEnd < 0 || (codeEnd - codeStart) != 3)
65 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
66
67 std::string statusCodeString(statusLine->String() + codeStart, 3);
68
69 // build the output
70 try {
71 status.code = std::stol(statusCodeString);
72 } catch (...) {
73 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
74 }
75
76 status.text = std::move(statusLine.value());
77 fStatus.code = status.code; // cache the status code
78 fStreamState = HttpInputStreamState::Fields;
79 return true;
80 }
81
82
83 /*!
84 \brief Parse the fields from the \a buffer and store it in \a fields.
85
86 The fields are parsed incrementally, meaning that even if the full header is not yet in the
87 \a buffer, it will still parse all complete fields and store them in the \a fields.
88
89 After all fields have been parsed, it will determine the properties of the request body.
90 This means it will determine whether there is any content compression, if there is a body,
91 and if so if it has a fixed size or not.
92
93 \retval true All fields were succesfully parsed
94 \retval false There is not enough data in the buffer to complete parsing of fields.
95
96 \exception BNetworkRequestException The fields not conform to the HTTP spec.
97 */
98 bool
ParseFields(HttpBuffer & buffer,BHttpFields & fields)99 HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields)
100 {
101 if (fStreamState != HttpInputStreamState::Fields)
102 debugger("The parser is not expecting header fields at this point");
103
104 auto fieldLine = buffer.GetNextLine();
105
106 while (fieldLine && !fieldLine.value().IsEmpty()) {
107 // Parse next header line
108 fields.AddField(fieldLine.value());
109 fieldLine = buffer.GetNextLine();
110 }
111
112 if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())) {
113 // there is more to parse
114 return false;
115 }
116
117 // Determine the properties for the body
118 // RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body:
119 std::optional<off_t> bodyBytesTotal = std::nullopt;
120 if (fBodyType == HttpBodyType::NoContent || fStatus.StatusCode() == BHttpStatusCode::NoContent
121 || fStatus.StatusCode() == BHttpStatusCode::NotModified) {
122 // [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304,
123 // no content [2] NOT SUPPORTED: when doing a CONNECT request, no content
124 fBodyType = HttpBodyType::NoContent;
125 fStreamState = HttpInputStreamState::Done;
126 } else if (auto header = fields.FindField("Transfer-Encoding"sv);
127 header != fields.end() && header->Value() == "chunked"sv) {
128 // [3] If there is a Transfer-Encoding heading set to 'chunked'
129 // TODO: support the more advanced rules in the RFC around the meaning of this field
130 fBodyType = HttpBodyType::Chunked;
131 fStreamState = HttpInputStreamState::Body;
132 } else if (fields.CountFields("Content-Length"sv) > 0) {
133 // [4] When there is no Transfer-Encoding, then look for Content-Encoding:
134 // - If there are more than one, the values must match
135 // - The value must be a valid number
136 // [5] If there is a valid value, then that is the expected size of the body
137 try {
138 auto contentLength = std::string();
139 for (const auto& field: fields) {
140 if (field.Name() == "Content-Length"sv) {
141 if (contentLength.size() == 0)
142 contentLength = field.Value();
143 else if (contentLength != field.Value()) {
144 throw BNetworkRequestError(__PRETTY_FUNCTION__,
145 BNetworkRequestError::ProtocolError,
146 "Multiple Content-Length fields with differing values");
147 }
148 }
149 }
150 bodyBytesTotal = std::stol(contentLength);
151 if (*bodyBytesTotal == 0) {
152 fBodyType = HttpBodyType::NoContent;
153 fStreamState = HttpInputStreamState::Done;
154 } else {
155 fBodyType = HttpBodyType::FixedSize;
156 fStreamState = HttpInputStreamState::Body;
157 }
158 } catch (const std::logic_error& e) {
159 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
160 "Cannot parse Content-Length field value (logic_error)");
161 }
162 } else {
163 // [6] Applies to request messages only (this is a response)
164 // [7] If nothing else then the received message is all data until connection close
165 // (this is the default)
166 fStreamState = HttpInputStreamState::Body;
167 }
168
169 // Set up the body parser based on the logic above.
170 switch (fBodyType) {
171 case HttpBodyType::VariableSize:
172 fBodyParser = std::make_unique<HttpRawBodyParser>();
173 break;
174 case HttpBodyType::FixedSize:
175 fBodyParser = std::make_unique<HttpRawBodyParser>(*bodyBytesTotal);
176 break;
177 case HttpBodyType::Chunked:
178 fBodyParser = std::make_unique<HttpChunkedBodyParser>();
179 break;
180 case HttpBodyType::NoContent:
181 default:
182 return true;
183 }
184
185 // Check Content-Encoding for compression
186 auto header = fields.FindField("Content-Encoding"sv);
187 if (header != fields.end() && (header->Value() == "gzip" || header->Value() == "deflate")) {
188 fBodyParser = std::make_unique<HttpBodyDecompression>(std::move(fBodyParser));
189 }
190
191 return true;
192 }
193
194
195 /*!
196 \brief Parse the body from the \a buffer and use \a writeToBody function to save.
197
198 The \a readEnd parameter indicates to the parser that the buffer currently contains all the
199 expected data for this request.
200 */
201 size_t
ParseBody(HttpBuffer & buffer,HttpTransferFunction writeToBody,bool readEnd)202 HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
203 {
204 if (fStreamState < HttpInputStreamState::Body || fStreamState == HttpInputStreamState::Done)
205 debugger("The parser is not in the correct state to parse a body");
206
207 auto parseResult = fBodyParser->ParseBody(buffer, writeToBody, readEnd);
208
209 if (parseResult.complete)
210 fStreamState = HttpInputStreamState::Done;
211
212 return parseResult.bytesParsed;
213 }
214
215
216 /*!
217 \brief Return if the body is currently expecting to having content.
218
219 This may change if the header fields have not yet been parsed, as these may contain
220 instructions about the body having no content.
221 */
222 bool
HasContent() const223 HttpParser::HasContent() const noexcept
224 {
225 return fBodyType != HttpBodyType::NoContent;
226 }
227
228
229 /*!
230 \brief Return the total size of the body, if known.
231 */
232 std::optional<off_t>
BodyBytesTotal() const233 HttpParser::BodyBytesTotal() const noexcept
234 {
235 if (fBodyParser)
236 return fBodyParser->TotalBodySize();
237 return std::nullopt;
238 }
239
240
241 /*!
242 \brief Return the number of body bytes transferred from the response.
243 */
244 off_t
BodyBytesTransferred() const245 HttpParser::BodyBytesTransferred() const noexcept
246 {
247 if (fBodyParser)
248 return fBodyParser->TransferredBodySize();
249 return 0;
250 }
251
252
253 /*!
254 \brief Check if the body is fully parsed.
255 */
256 bool
Complete() const257 HttpParser::Complete() const noexcept
258 {
259 return fStreamState == HttpInputStreamState::Done;
260 }
261
262
263 // #pragma mark -- HttpBodyParser
264
265
266 /*!
267 \brief Default implementation to return std::nullopt.
268 */
269 std::optional<off_t>
TotalBodySize() const270 HttpBodyParser::TotalBodySize() const noexcept
271 {
272 return std::nullopt;
273 }
274
275
276 /*!
277 \brief Return the number of body bytes read from the stream so far.
278
279 For chunked transfers, this excludes the chunk headers and other metadata.
280 */
281 off_t
TransferredBodySize() const282 HttpBodyParser::TransferredBodySize() const noexcept
283 {
284 return fTransferredBodySize;
285 }
286
287
288 // #pragma mark -- HttpRawBodyParser
289 /*!
290 \brief Construct a HttpRawBodyParser with an unknown content size.
291 */
HttpRawBodyParser()292 HttpRawBodyParser::HttpRawBodyParser()
293 {
294 }
295
296
297 /*!
298 \brief Construct a HttpRawBodyParser with expected \a bodyBytesTotal size.
299 */
HttpRawBodyParser(off_t bodyBytesTotal)300 HttpRawBodyParser::HttpRawBodyParser(off_t bodyBytesTotal)
301 :
302 fBodyBytesTotal(bodyBytesTotal)
303 {
304 }
305
306
307 /*!
308 \brief Parse a regular (non-chunked) body from a buffer.
309
310 The buffer is parsed into a target using the \a writeToBody function.
311
312 The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
313 In case the total body size is known, and the remaining bytes in the buffer are smaller than
314 the expected remainder, a ProtocolError will be raised. The data in the buffer will *not* be
315 copied to the target.
316
317 Also, if the body size is known, and the data in the \a buffer is larger than the expected
318 expected length, then it will only read the bytes needed and leave the remainder in the buffer.
319
320 It is required that the \a writeToBody function writes all the bytes it is asked to; this
321 method does not support partial writes and throws an exception when it fails.
322
323 \exception BNetworkRequestError In case the buffer contains too little or invalid data.
324
325 \returns The number of bytes parsed from the \a buffer.
326 */
327 BodyParseResult
ParseBody(HttpBuffer & buffer,HttpTransferFunction writeToBody,bool readEnd)328 HttpRawBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
329 {
330 auto bytesToRead = buffer.RemainingBytes();
331 if (fBodyBytesTotal) {
332 auto expectedRemainingBytes = *fBodyBytesTotal - fTransferredBodySize;
333 if (expectedRemainingBytes < static_cast<off_t>(buffer.RemainingBytes()))
334 bytesToRead = expectedRemainingBytes;
335 else if (readEnd && expectedRemainingBytes > static_cast<off_t>(buffer.RemainingBytes())) {
336 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
337 "Message body is incomplete; less data received than expected");
338 }
339 }
340
341 // Copy the data
342 auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
343 fTransferredBodySize += bytesRead;
344
345 if (bytesRead != bytesToRead) {
346 // Fail if not all expected bytes are written.
347 throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError,
348 "Could not write all available body bytes to the target.");
349 }
350
351 if (fBodyBytesTotal) {
352 if (*fBodyBytesTotal == fTransferredBodySize)
353 return {bytesRead, bytesRead, true};
354 else
355 return {bytesRead, bytesRead, false};
356 } else
357 return {bytesRead, bytesRead, readEnd};
358 }
359
360
361 /*!
362 \brief Override default implementation and return known body size (or std::nullopt)
363 */
364 std::optional<off_t>
TotalBodySize() const365 HttpRawBodyParser::TotalBodySize() const noexcept
366 {
367 return fBodyBytesTotal;
368 }
369
370
371 // #pragma mark -- HttpChunkedBodyParser
372 /*!
373 \brief Parse a chunked body from a buffer.
374
375 The contents of the cunks are copied into a target using the \a writeToBody function.
376
377 The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
378 In case the chunk argument indicates that more data was to come, an exception is thrown.
379
380 It is required that the \a writeToBody function writes all the bytes it is asked to; this
381 method does not support partial writes and throws an exception when it fails.
382
383 \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
384 little data.
385
386 \returns The number of bytes parsed from the \a buffer.
387 */
388 BodyParseResult
ParseBody(HttpBuffer & buffer,HttpTransferFunction writeToBody,bool readEnd)389 HttpChunkedBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
390 {
391 size_t totalBytesRead = 0;
392 while (buffer.RemainingBytes() > 0) {
393 switch (fChunkParserState) {
394 case ChunkSize:
395 {
396 // Read the next chunk size from the buffer; if unsuccesful wait for more data
397 auto chunkSizeString = buffer.GetNextLine();
398 if (!chunkSizeString)
399 return {totalBytesRead, totalBytesRead, false};
400 auto chunkSizeStr = std::string(chunkSizeString.value().String());
401 try {
402 size_t pos = 0;
403 fRemainingChunkSize = std::stoll(chunkSizeStr, &pos, 16);
404 if (pos < chunkSizeStr.size() && chunkSizeStr[pos] != ';') {
405 throw BNetworkRequestError(
406 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
407 }
408 } catch (const std::invalid_argument&) {
409 throw BNetworkRequestError(
410 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
411 } catch (const std::out_of_range&) {
412 throw BNetworkRequestError(
413 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
414 }
415
416 if (fRemainingChunkSize > 0)
417 fChunkParserState = Chunk;
418 else
419 fChunkParserState = Trailers;
420 break;
421 }
422
423 case Chunk:
424 {
425 size_t bytesToRead;
426 if (fRemainingChunkSize > static_cast<off_t>(buffer.RemainingBytes()))
427 bytesToRead = buffer.RemainingBytes();
428 else
429 bytesToRead = fRemainingChunkSize;
430
431 auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
432 if (bytesRead != bytesToRead) {
433 // Fail if not all expected bytes are written.
434 throw BNetworkRequestError(__PRETTY_FUNCTION__,
435 BNetworkRequestError::SystemError,
436 "Could not write all available body bytes to the target.");
437 }
438
439 fTransferredBodySize += bytesRead;
440 totalBytesRead += bytesRead;
441 fRemainingChunkSize -= bytesRead;
442 if (fRemainingChunkSize == 0)
443 fChunkParserState = ChunkEnd;
444 break;
445 }
446
447 case ChunkEnd:
448 {
449 if (buffer.RemainingBytes() < 2) {
450 // not enough data in the buffer to finish the chunk
451 return {totalBytesRead, totalBytesRead, false};
452 }
453 auto chunkEndString = buffer.GetNextLine();
454 if (!chunkEndString || chunkEndString.value().Length() != 0) {
455 // There should have been an empty chunk
456 throw BNetworkRequestError(
457 __PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
458 }
459
460 fChunkParserState = ChunkSize;
461 break;
462 }
463
464 case Trailers:
465 {
466 auto trailerString = buffer.GetNextLine();
467 if (!trailerString) {
468 // More data to come
469 return {totalBytesRead, totalBytesRead, false};
470 }
471
472 if (trailerString.value().Length() > 0) {
473 // Ignore empty trailers for now
474 // TODO: review if the API should support trailing headers
475 } else {
476 fChunkParserState = Complete;
477 return {totalBytesRead, totalBytesRead, true};
478 }
479 break;
480 }
481
482 case Complete:
483 return {totalBytesRead, totalBytesRead, true};
484 }
485 }
486 return {totalBytesRead, totalBytesRead, false};
487 }
488
489
490 // #pragma mark -- HttpBodyDecompression
491 /*!
492 \brief Set up a decompression stream that decompresses the data read by \a bodyParser.
493 */
HttpBodyDecompression(std::unique_ptr<HttpBodyParser> bodyParser)494 HttpBodyDecompression::HttpBodyDecompression(std::unique_ptr<HttpBodyParser> bodyParser)
495 {
496 fDecompressorStorage = std::make_unique<BMallocIO>();
497
498 BDataIO* stream = nullptr;
499 auto result = BZlibCompressionAlgorithm().CreateDecompressingOutputStream(
500 fDecompressorStorage.get(), nullptr, stream);
501
502 if (result != B_OK) {
503 throw BNetworkRequestError("BZlibCompressionAlgorithm().CreateCompressingOutputStream",
504 BNetworkRequestError::SystemError, result);
505 }
506
507 fDecompressingStream = std::unique_ptr<BDataIO>(stream);
508 fBodyParser = std::move(bodyParser);
509 }
510
511
512 /*!
513 \brief Read a compressed body into a target..
514
515 The stream captures chunked or raw data, and decompresses it. The decompressed data is then
516 copied into a target using the \a writeToBody function.
517
518 The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
519 It is up for the underlying parser to determine if more data was expected, and therefore, if
520 there is an error.
521
522 It is required that the \a writeToBody function writes all the bytes it is asked to; this
523 method does not support partial writes and throws an exception when it fails.
524
525 \exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
526 little data.
527
528 \returns The number of bytes parsed from the \a buffer.
529 */
530 BodyParseResult
ParseBody(HttpBuffer & buffer,HttpTransferFunction writeToBody,bool readEnd)531 HttpBodyDecompression::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
532 {
533 // Get the underlying raw or chunked parser to write data to our decompressionstream
534 auto parseResults = fBodyParser->ParseBody(
535 buffer,
536 [this](const std::byte* buffer, size_t bufferSize) {
537 auto status = fDecompressingStream->WriteExactly(buffer, bufferSize);
538 if (status != B_OK) {
539 throw BNetworkRequestError(
540 "BDataIO::WriteExactly()", BNetworkRequestError::SystemError, status);
541 }
542 return bufferSize;
543 },
544 readEnd);
545 fTransferredBodySize += parseResults.bytesParsed;
546
547 if (readEnd || parseResults.complete) {
548 // No more bytes expected so flush out the final bytes
549 if (auto status = fDecompressingStream->Flush(); status != B_OK) {
550 throw BNetworkRequestError(
551 "BZlibDecompressionStream::Flush()", BNetworkRequestError::SystemError, status);
552 }
553 }
554
555 size_t bytesWritten = 0;
556 if (auto bodySize = fDecompressorStorage->Position(); bodySize > 0) {
557 bytesWritten
558 = writeToBody(static_cast<const std::byte*>(fDecompressorStorage->Buffer()), bodySize);
559 if (static_cast<off_t>(bytesWritten) != bodySize) {
560 throw BNetworkRequestError(
561 __PRETTY_FUNCTION__, BNetworkRequestError::SystemError, B_PARTIAL_WRITE);
562 }
563 fDecompressorStorage->Seek(0, SEEK_SET);
564 }
565 return {parseResults.bytesParsed, bytesWritten, parseResults.complete};
566 }
567
568
569 /*!
570 \brief Return the TotalBodySize() from the underlying chunked or raw parser.
571 */
572 std::optional<off_t>
TotalBodySize() const573 HttpBodyDecompression::TotalBodySize() const noexcept
574 {
575 return fBodyParser->TotalBodySize();
576 }
577