xref: /haiku/src/kits/network/libnetservices2/HttpParser.cpp (revision 52c4471a3024d2eb81fe88e2c3982b9f8daa5e56)
1 /*
2  * Copyright 2022 Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Niels Sascha Reedijk, niels.reedijk@gmail.com
7  */
8 
9 #include "HttpParser.h"
10 
11 #include <stdexcept>
12 #include <string>
13 
14 #include <HttpFields.h>
15 #include <NetServicesDefs.h>
16 #include <ZlibCompressionAlgorithm.h>
17 
18 using namespace std::literals;
19 using namespace BPrivate::Network;
20 
21 
22 // #pragma mark -- HttpParser
23 
24 
25 /*!
26 	\brief Explicitly mark the response as having no content.
27 
28 	This is done in cases where the request was a HEAD request. Setting it to no content, will
29 	instruct the parser to move to completion after all the header fields have been parsed.
30 */
31 void
32 HttpParser::SetNoContent() noexcept
33 {
34 	if (fStreamState > HttpInputStreamState::Fields)
35 		debugger("Cannot set the parser to no content after parsing of the body has started");
36 	fBodyType = HttpBodyType::NoContent;
37 };
38 
39 
40 /*!
41 	\brief Parse the status from the \a buffer and store it in \a status.
42 
43 	\retval true The status was succesfully parsed
44 	\retval false There is not enough data in the buffer for a full status.
45 
46 	\exception BNetworkRequestException The status does not conform to the HTTP spec.
47 */
48 bool
49 HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status)
50 {
51 	if (fStreamState != HttpInputStreamState::StatusLine)
52 		debugger("The Status line has already been parsed");
53 
54 	auto statusLine = buffer.GetNextLine();
55 	if (!statusLine)
56 		return false;
57 
58 	auto codeStart = statusLine->FindFirst(' ') + 1;
59 	if (codeStart < 0)
60 		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
61 
62 	auto codeEnd = statusLine->FindFirst(' ', codeStart);
63 
64 	if (codeEnd < 0 || (codeEnd - codeStart) != 3)
65 		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
66 
67 	std::string statusCodeString(statusLine->String() + codeStart, 3);
68 
69 	// build the output
70 	try {
71 		status.code = std::stol(statusCodeString);
72 	} catch (...) {
73 		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
74 	}
75 
76 	status.text = std::move(statusLine.value());
77 	fStatus.code = status.code; // cache the status code
78 	fStreamState = HttpInputStreamState::Fields;
79 	return true;
80 }
81 
82 
83 /*!
84 	\brief Parse the fields from the \a buffer and store it in \a fields.
85 
86 	The fields are parsed incrementally, meaning that even if the full header is not yet in the
87 	\a buffer, it will still parse all complete fields and store them in the \a fields.
88 
89 	After all fields have been parsed, it will determine the properties of the request body.
90 	This means it will determine whether there is any content compression, if there is a body,
91 	and if so if it has a fixed size or not.
92 
93 	\retval true All fields were succesfully parsed
94 	\retval false There is not enough data in the buffer to complete parsing of fields.
95 
96 	\exception BNetworkRequestException The fields not conform to the HTTP spec.
97 */
98 bool
99 HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields)
100 {
101 	if (fStreamState != HttpInputStreamState::Fields)
102 		debugger("The parser is not expecting header fields at this point");
103 
104 	auto fieldLine = buffer.GetNextLine();
105 
106 	while (fieldLine && !fieldLine.value().IsEmpty()) {
107 		// Parse next header line
108 		fields.AddField(fieldLine.value());
109 		fieldLine = buffer.GetNextLine();
110 	}
111 
112 	if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())) {
113 		// there is more to parse
114 		return false;
115 	}
116 
117 	// Determine the properties for the body
118 	// RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body:
119 	std::optional<off_t> bodyBytesTotal = std::nullopt;
120 	if (fBodyType == HttpBodyType::NoContent || fStatus.StatusCode() == BHttpStatusCode::NoContent
121 		|| fStatus.StatusCode() == BHttpStatusCode::NotModified) {
122 		// [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304,
123 		// no content [2] NOT SUPPORTED: when doing a CONNECT request, no content
124 		fBodyType = HttpBodyType::NoContent;
125 		fStreamState = HttpInputStreamState::Done;
126 	} else if (auto header = fields.FindField("Transfer-Encoding"sv);
127 			   header != fields.end() && header->Value() == "chunked"sv) {
128 		// [3] If there is a Transfer-Encoding heading set to 'chunked'
129 		// TODO: support the more advanced rules in the RFC around the meaning of this field
130 		fBodyType = HttpBodyType::Chunked;
131 		fStreamState = HttpInputStreamState::Body;
132 	} else if (fields.CountFields("Content-Length"sv) > 0) {
133 		// [4] When there is no Transfer-Encoding, then look for Content-Encoding:
134 		//	- If there are more than one, the values must match
135 		//	- The value must be a valid number
136 		// [5] If there is a valid value, then that is the expected size of the body
137 		try {
138 			auto contentLength = std::string();
139 			for (const auto& field: fields) {
140 				if (field.Name() == "Content-Length"sv) {
141 					if (contentLength.size() == 0)
142 						contentLength = field.Value();
143 					else if (contentLength != field.Value()) {
144 						throw BNetworkRequestError(__PRETTY_FUNCTION__,
145 							BNetworkRequestError::ProtocolError,
146 							"Multiple Content-Length fields with differing values");
147 					}
148 				}
149 			}
150 			bodyBytesTotal = std::stol(contentLength);
151 			if (*bodyBytesTotal == 0) {
152 				fBodyType = HttpBodyType::NoContent;
153 				fStreamState = HttpInputStreamState::Done;
154 			} else {
155 				fBodyType = HttpBodyType::FixedSize;
156 				fStreamState = HttpInputStreamState::Body;
157 			}
158 		} catch (const std::logic_error& e) {
159 			throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
160 				"Cannot parse Content-Length field value (logic_error)");
161 		}
162 	} else {
163 		// [6] Applies to request messages only (this is a response)
164 		// [7] If nothing else then the received message is all data until connection close
165 		// (this is the default)
166 		fStreamState = HttpInputStreamState::Body;
167 	}
168 
169 	// Set up the body parser based on the logic above.
170 	switch (fBodyType) {
171 		case HttpBodyType::VariableSize:
172 			fBodyParser = std::make_unique<HttpRawBodyParser>();
173 			break;
174 		case HttpBodyType::FixedSize:
175 			fBodyParser = std::make_unique<HttpRawBodyParser>(*bodyBytesTotal);
176 			break;
177 		case HttpBodyType::Chunked:
178 			fBodyParser = std::make_unique<HttpChunkedBodyParser>();
179 			break;
180 		case HttpBodyType::NoContent:
181 		default:
182 			return true;
183 	}
184 
185 	// Check Content-Encoding for compression
186 	auto header = fields.FindField("Content-Encoding"sv);
187 	if (header != fields.end() && (header->Value() == "gzip" || header->Value() == "deflate")) {
188 		fBodyParser = std::make_unique<HttpBodyDecompression>(std::move(fBodyParser));
189 	}
190 
191 	return true;
192 }
193 
194 
195 /*!
196 	\brief Parse the body from the \a buffer and use \a writeToBody function to save.
197 
198 	The \a readEnd parameter indicates to the parser that the buffer currently contains all the
199 	expected data for this request.
200 */
201 size_t
202 HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
203 {
204 	if (fStreamState < HttpInputStreamState::Body || fStreamState == HttpInputStreamState::Done)
205 		debugger("The parser is not in the correct state to parse a body");
206 
207 	auto parseResult = fBodyParser->ParseBody(buffer, writeToBody, readEnd);
208 
209 	if (parseResult.complete)
210 		fStreamState = HttpInputStreamState::Done;
211 
212 	return parseResult.bytesParsed;
213 }
214 
215 
216 /*!
217 	\brief Return if the body is currently expecting to having content.
218 
219 	This may change if the header fields have not yet been parsed, as these may contain
220 	instructions about the body having no content.
221 */
222 bool
223 HttpParser::HasContent() const noexcept
224 {
225 	return fBodyType != HttpBodyType::NoContent;
226 }
227 
228 
229 /*!
230 	\brief Return the total size of the body, if known.
231 */
232 std::optional<off_t>
233 HttpParser::BodyBytesTotal() const noexcept
234 {
235 	if (fBodyParser)
236 		return fBodyParser->TotalBodySize();
237 	return std::nullopt;
238 }
239 
240 
241 /*!
242 	\brief Return the number of body bytes transferred from the response.
243 */
244 off_t
245 HttpParser::BodyBytesTransferred() const noexcept
246 {
247 	if (fBodyParser)
248 		return fBodyParser->TransferredBodySize();
249 	return 0;
250 }
251 
252 
253 /*!
254 	\brief Check if the body is fully parsed.
255 */
256 bool
257 HttpParser::Complete() const noexcept
258 {
259 	return fStreamState == HttpInputStreamState::Done;
260 }
261 
262 
263 // #pragma mark -- HttpBodyParser
264 
265 
266 /*!
267 	\brief Default implementation to return std::nullopt.
268 */
269 std::optional<off_t>
270 HttpBodyParser::TotalBodySize() const noexcept
271 {
272 	return std::nullopt;
273 }
274 
275 
276 /*!
277 	\brief Return the number of body bytes read from the stream so far.
278 
279 	For chunked transfers, this excludes the chunk headers and other metadata.
280 */
281 off_t
282 HttpBodyParser::TransferredBodySize() const noexcept
283 {
284 	return fTransferredBodySize;
285 }
286 
287 
288 // #pragma mark -- HttpRawBodyParser
289 /*!
290 	\brief Construct a HttpRawBodyParser with an unknown content size.
291 */
292 HttpRawBodyParser::HttpRawBodyParser()
293 {
294 }
295 
296 
297 /*!
298 	\brief Construct a HttpRawBodyParser with expected \a bodyBytesTotal size.
299 */
300 HttpRawBodyParser::HttpRawBodyParser(off_t bodyBytesTotal)
301 	:
302 	fBodyBytesTotal(bodyBytesTotal)
303 {
304 }
305 
306 
307 /*!
308 	\brief Parse a regular (non-chunked) body from a buffer.
309 
310 	The buffer is parsed into a target using the \a writeToBody function.
311 
312 	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
313 	In case the total body size is known, and the remaining bytes in the buffer are smaller than
314 	the expected remainder, a ProtocolError will be raised. The data in the buffer will *not* be
315 	copied to the target.
316 
317 	Also, if the body size is known, and the data in the \a buffer is larger than the expected
318 	expected length, then it will only read the bytes needed and leave the remainder in the buffer.
319 
320 	It is required that the \a writeToBody function writes all the bytes it is asked to; this
321 	method does not support partial writes and throws an exception when it fails.
322 
323 	\exception BNetworkRequestError In case the buffer contains too little or invalid data.
324 
325 	\returns The number of bytes parsed from the \a buffer.
326 */
327 BodyParseResult
328 HttpRawBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
329 {
330 	auto bytesToRead = buffer.RemainingBytes();
331 	if (fBodyBytesTotal) {
332 		auto expectedRemainingBytes = *fBodyBytesTotal - fTransferredBodySize;
333 		if (expectedRemainingBytes < static_cast<off_t>(buffer.RemainingBytes()))
334 			bytesToRead = expectedRemainingBytes;
335 		else if (readEnd && expectedRemainingBytes > static_cast<off_t>(buffer.RemainingBytes())) {
336 			throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
337 				"Message body is incomplete; less data received than expected");
338 		}
339 	}
340 
341 	// Copy the data
342 	auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
343 	fTransferredBodySize += bytesRead;
344 
345 	if (bytesRead != bytesToRead) {
346 		// Fail if not all expected bytes are written.
347 		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError,
348 			"Could not write all available body bytes to the target.");
349 	}
350 
351 	if (fBodyBytesTotal) {
352 		if (*fBodyBytesTotal == fTransferredBodySize)
353 			return {bytesRead, bytesRead, true};
354 		else
355 			return {bytesRead, bytesRead, false};
356 	} else
357 		return {bytesRead, bytesRead, readEnd};
358 }
359 
360 
361 /*!
362 	\brief Override default implementation and return known body size (or std::nullopt)
363 */
364 std::optional<off_t>
365 HttpRawBodyParser::TotalBodySize() const noexcept
366 {
367 	return fBodyBytesTotal;
368 }
369 
370 
371 // #pragma mark -- HttpChunkedBodyParser
372 /*!
373 	\brief Parse a chunked body from a buffer.
374 
375 	The contents of the cunks are copied into a target using the \a writeToBody function.
376 
377 	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
378 	In case the chunk argument indicates that more data was to come, an exception is thrown.
379 
380 	It is required that the \a writeToBody function writes all the bytes it is asked to; this
381 	method does not support partial writes and throws an exception when it fails.
382 
383 	\exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
384 		little data.
385 
386 	\returns The number of bytes parsed from the \a buffer.
387 */
388 BodyParseResult
389 HttpChunkedBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
390 {
391 	size_t totalBytesRead = 0;
392 	while (buffer.RemainingBytes() > 0) {
393 		switch (fChunkParserState) {
394 			case ChunkSize:
395 			{
396 				// Read the next chunk size from the buffer; if unsuccesful wait for more data
397 				auto chunkSizeString = buffer.GetNextLine();
398 				if (!chunkSizeString)
399 					return {totalBytesRead, totalBytesRead, false};
400 				auto chunkSizeStr = std::string(chunkSizeString.value().String());
401 				try {
402 					size_t pos = 0;
403 					fRemainingChunkSize = std::stoll(chunkSizeStr, &pos, 16);
404 					if (pos < chunkSizeStr.size() && chunkSizeStr[pos] != ';') {
405 						throw BNetworkRequestError(
406 							__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
407 					}
408 				} catch (const std::invalid_argument&) {
409 					throw BNetworkRequestError(
410 						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
411 				} catch (const std::out_of_range&) {
412 					throw BNetworkRequestError(
413 						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
414 				}
415 
416 				if (fRemainingChunkSize > 0)
417 					fChunkParserState = Chunk;
418 				else
419 					fChunkParserState = Trailers;
420 				break;
421 			}
422 
423 			case Chunk:
424 			{
425 				size_t bytesToRead;
426 				if (fRemainingChunkSize > static_cast<off_t>(buffer.RemainingBytes()))
427 					bytesToRead = buffer.RemainingBytes();
428 				else
429 					bytesToRead = fRemainingChunkSize;
430 
431 				auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
432 				if (bytesRead != bytesToRead) {
433 					// Fail if not all expected bytes are written.
434 					throw BNetworkRequestError(__PRETTY_FUNCTION__,
435 						BNetworkRequestError::SystemError,
436 						"Could not write all available body bytes to the target.");
437 				}
438 
439 				fTransferredBodySize += bytesRead;
440 				totalBytesRead += bytesRead;
441 				fRemainingChunkSize -= bytesRead;
442 				if (fRemainingChunkSize == 0)
443 					fChunkParserState = ChunkEnd;
444 				break;
445 			}
446 
447 			case ChunkEnd:
448 			{
449 				if (buffer.RemainingBytes() < 2) {
450 					// not enough data in the buffer to finish the chunk
451 					return {totalBytesRead, totalBytesRead, false};
452 				}
453 				auto chunkEndString = buffer.GetNextLine();
454 				if (!chunkEndString || chunkEndString.value().Length() != 0) {
455 					// There should have been an empty chunk
456 					throw BNetworkRequestError(
457 						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
458 				}
459 
460 				fChunkParserState = ChunkSize;
461 				break;
462 			}
463 
464 			case Trailers:
465 			{
466 				auto trailerString = buffer.GetNextLine();
467 				if (!trailerString) {
468 					// More data to come
469 					return {totalBytesRead, totalBytesRead, false};
470 				}
471 
472 				if (trailerString.value().Length() > 0) {
473 					// Ignore empty trailers for now
474 					// TODO: review if the API should support trailing headers
475 				} else {
476 					fChunkParserState = Complete;
477 					return {totalBytesRead, totalBytesRead, true};
478 				}
479 				break;
480 			}
481 
482 			case Complete:
483 				return {totalBytesRead, totalBytesRead, true};
484 		}
485 	}
486 	return {totalBytesRead, totalBytesRead, false};
487 }
488 
489 
490 // #pragma mark -- HttpBodyDecompression
491 /*!
492 	\brief Set up a decompression stream that decompresses the data read by \a bodyParser.
493 */
494 HttpBodyDecompression::HttpBodyDecompression(std::unique_ptr<HttpBodyParser> bodyParser)
495 {
496 	fDecompressorStorage = std::make_unique<BMallocIO>();
497 
498 	BDataIO* stream = nullptr;
499 	auto result = BZlibCompressionAlgorithm().CreateDecompressingOutputStream(
500 		fDecompressorStorage.get(), nullptr, stream);
501 
502 	if (result != B_OK) {
503 		throw BNetworkRequestError("BZlibCompressionAlgorithm().CreateCompressingOutputStream",
504 			BNetworkRequestError::SystemError, result);
505 	}
506 
507 	fDecompressingStream = std::unique_ptr<BDataIO>(stream);
508 	fBodyParser = std::move(bodyParser);
509 }
510 
511 
512 /*!
513 	\brief Read a compressed body into a target..
514 
515 	The stream captures chunked or raw data, and decompresses it. The decompressed data is then
516 	copied into a target using the \a writeToBody function.
517 
518 	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
519 	It is up for the underlying parser to determine if more data was expected, and therefore, if
520 	there is an error.
521 
522 	It is required that the \a writeToBody function writes all the bytes it is asked to; this
523 	method does not support partial writes and throws an exception when it fails.
524 
525 	\exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
526 		little data.
527 
528 	\returns The number of bytes parsed from the \a buffer.
529 */
530 BodyParseResult
531 HttpBodyDecompression::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
532 {
533 	// Get the underlying raw or chunked parser to write data to our decompressionstream
534 	auto parseResults = fBodyParser->ParseBody(
535 		buffer,
536 		[this](const std::byte* buffer, size_t bufferSize) {
537 			auto status = fDecompressingStream->WriteExactly(buffer, bufferSize);
538 			if (status != B_OK) {
539 				throw BNetworkRequestError(
540 					"BDataIO::WriteExactly()", BNetworkRequestError::SystemError, status);
541 			}
542 			return bufferSize;
543 		},
544 		readEnd);
545 	fTransferredBodySize += parseResults.bytesParsed;
546 
547 	if (readEnd || parseResults.complete) {
548 		// No more bytes expected so flush out the final bytes
549 		if (auto status = fDecompressingStream->Flush(); status != B_OK) {
550 			throw BNetworkRequestError(
551 				"BZlibDecompressionStream::Flush()", BNetworkRequestError::SystemError, status);
552 		}
553 	}
554 
555 	size_t bytesWritten = 0;
556 	if (auto bodySize = fDecompressorStorage->Position(); bodySize > 0) {
557 		bytesWritten
558 			= writeToBody(static_cast<const std::byte*>(fDecompressorStorage->Buffer()), bodySize);
559 		if (static_cast<off_t>(bytesWritten) != bodySize) {
560 			throw BNetworkRequestError(
561 				__PRETTY_FUNCTION__, BNetworkRequestError::SystemError, B_PARTIAL_WRITE);
562 		}
563 		fDecompressorStorage->Seek(0, SEEK_SET);
564 	}
565 	return {parseResults.bytesParsed, bytesWritten, parseResults.complete};
566 }
567 
568 
569 /*!
570 	\brief Return the TotalBodySize() from the underlying chunked or raw parser.
571 */
572 std::optional<off_t>
573 HttpBodyDecompression::TotalBodySize() const noexcept
574 {
575 	return fBodyParser->TotalBodySize();
576 }
577