xref: /haiku/src/tests/system/kernel/file_corruption/fs/File.cpp (revision 539ec8ff1e62c970b91a4260cad0067234f04848)
1 /*
2  * Copyright 2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "File.h"
8 
9 #include <errno.h>
10 #include <string.h>
11 #include <unistd.h>
12 
13 #include <algorithm>
14 #include <new>
15 
16 #include <fs_cache.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include "Block.h"
21 #include "BlockAllocator.h"
22 #include "DebugSupport.h"
23 #include "Transaction.h"
24 #include "Volume.h"
25 
26 
27 static const size_t kFileRootBlockOffset	= sizeof(checksumfs_node);
28 static const size_t kFileRootBlockSize		= B_PAGE_SIZE
29 												- kFileRootBlockOffset;
30 static const uint32 kFileRootBlockMaxCount	= kFileRootBlockSize / 8;
31 static const uint32 kFileBlockMaxCount		= B_PAGE_SIZE / 8;
32 static const uint32 kFileBlockShift			= 9;
33 static const uint32 kFileMaxTreeDepth		= (64 + kFileBlockShift - 1)
34 												/ kFileBlockShift + 1;
35 
36 
37 #define BLOCK_ROUND_UP(value)	(((value) + B_PAGE_SIZE - 1) / B_PAGE_SIZE \
38 									* B_PAGE_SIZE)
39 
40 
41 namespace {
42 	struct WriteTempData {
43 		SHA256							sha256;
44 		checksum_device_ioctl_check_sum	indexAndCheckSum;
45 		file_io_vec						fileVecs[16];
46 		uint8							blockData[B_PAGE_SIZE];
47 	};
48 }
49 
50 
51 struct File::LevelInfo {
52 	uint64	addressableShift;	// 1 << addressableShift is the number of
53 								// descendent data blocks a child block (and its
54 								// descendents) can address
55 	uint32	childCount;			// number of child blocks of the last block of
56 								// this level
57 	Block	block;
58 	uint64*	blockData;
59 	int32	index;
60 };
61 
62 
File(Volume * volume,uint64 blockIndex,const checksumfs_node & nodeData)63 File::File(Volume* volume, uint64 blockIndex, const checksumfs_node& nodeData)
64 	:
65 	Node(volume, blockIndex, nodeData),
66 	fFileCache(NULL)
67 {
68 	STATIC_ASSERT(kFileBlockMaxCount == (uint32)1 << kFileBlockShift);
69 }
70 
71 
File(Volume * volume,mode_t mode)72 File::File(Volume* volume, mode_t mode)
73 	:
74 	Node(volume, mode),
75 	fFileCache(NULL),
76 	fFileMap(NULL)
77 {
78 }
79 
80 
~File()81 File::~File()
82 {
83 	if (fFileCache != NULL)
84 		file_cache_delete(fFileCache);
85 	if (fFileMap != NULL)
86 		file_map_delete(fFileMap);
87 }
88 
89 
90 status_t
InitForVFS()91 File::InitForVFS()
92 {
93 	// create the file map
94 	fFileMap = file_map_create(GetVolume()->ID(), BlockIndex(), Size());
95 	if (fFileMap == NULL)
96 		RETURN_ERROR(B_NO_MEMORY);
97 
98 	// create the file cache
99 	fFileCache = file_cache_create(GetVolume()->ID(), BlockIndex(), Size());
100 	if (fFileCache == NULL)
101 		RETURN_ERROR(B_NO_MEMORY);
102 
103 	return B_OK;
104 }
105 
106 
107 void
DeletingNode()108 File::DeletingNode()
109 {
110 	Node::DeletingNode();
111 
112 	// start a transaction
113 	Transaction transaction(GetVolume());
114 	status_t error = transaction.Start();
115 	if (error != B_OK) {
116 		ERROR("Failed to start transaction for deleting contents of file at %"
117 			B_PRIu64 "\n", BlockIndex());
118 		return;
119 	}
120 
121 	error = Resize(0, false, transaction);
122 	if (error != B_OK) {
123 		ERROR("Failed to delete contents of file at %" B_PRIu64 "\n",
124 			BlockIndex());
125 		return;
126 	}
127 
128 	error = transaction.Commit();
129 	if (error != B_OK) {
130 		ERROR("Failed to commit transaction for deleting contents of file at %"
131 			B_PRIu64 "\n", BlockIndex());
132 	}
133 }
134 
135 
136 status_t
Resize(uint64 newSize,bool fillWithZeroes,Transaction & transaction)137 File::Resize(uint64 newSize, bool fillWithZeroes, Transaction& transaction)
138 {
139 	uint64 size = Size();
140 	if (newSize == size)
141 		return B_OK;
142 
143 	FUNCTION("%" B_PRIu64 " -> %" B_PRIu64 "\n", size, newSize);
144 
145 	uint64 blockCount = BLOCK_ROUND_UP(size) / B_PAGE_SIZE;
146 	uint64 newBlockCount = BLOCK_ROUND_UP(newSize) / B_PAGE_SIZE;
147 
148 	if (newBlockCount != blockCount) {
149 		status_t error;
150 		if (newBlockCount < blockCount)
151 			error = _ShrinkTree(blockCount, newBlockCount, transaction);
152 		else
153 			error = _GrowTree(blockCount, newBlockCount, transaction);
154 
155 		if (error != B_OK)
156 			RETURN_ERROR(error);
157 	}
158 
159 	SetSize(newSize);
160 
161 	file_cache_set_size(fFileCache, newSize);
162 	file_map_set_size(fFileMap, newSize);
163 
164 	if (newSize > size && fillWithZeroes) {
165 		status_t error = _WriteZeroes(size, newSize - size);
166 		if (error != B_OK) {
167 			file_cache_set_size(fFileCache, size);
168 			file_map_set_size(fFileMap, size);
169 			RETURN_ERROR(error);
170 		}
171 	}
172 
173 	return B_OK;
174 }
175 
176 
177 status_t
Read(off_t pos,void * buffer,size_t size,size_t & _bytesRead)178 File::Read(off_t pos, void* buffer, size_t size, size_t& _bytesRead)
179 {
180 	if (pos < 0)
181 		return B_BAD_VALUE;
182 
183 	if (size == 0) {
184 		_bytesRead = 0;
185 		return B_OK;
186 	}
187 
188 	NodeReadLocker locker(this);
189 
190 	uint64 fileSize = Size();
191 	if ((uint64)pos >= fileSize) {
192 		_bytesRead = 0;
193 		return B_OK;
194 	}
195 
196 	if (fileSize - pos < size)
197 		size = fileSize - pos;
198 
199 	locker.Unlock();
200 
201 	size_t bytesRead = size;
202 	status_t error = file_cache_read(fFileCache, NULL, pos, buffer, &bytesRead);
203 	if (error != B_OK)
204 		RETURN_ERROR(error);
205 
206 	_bytesRead = bytesRead;
207 	return B_OK;
208 }
209 
210 
211 status_t
Write(off_t pos,const void * buffer,size_t size,size_t & _bytesWritten,bool & _sizeChanged)212 File::Write(off_t pos, const void* buffer, size_t size, size_t& _bytesWritten,
213 	bool& _sizeChanged)
214 {
215 	_sizeChanged = false;
216 
217 	if (size == 0) {
218 		_bytesWritten = 0;
219 		return B_OK;
220 	}
221 
222 	NodeWriteLocker locker(this);
223 
224 	uint64 fileSize = Size();
225 	if (pos < 0)
226 		pos = fileSize;
227 
228 	uint64 newFileSize = (uint64)pos + size;
229 
230 	if (newFileSize > fileSize) {
231 		// we have to resize the file
232 		Transaction transaction(GetVolume());
233 		status_t error = transaction.Start();
234 		if (error != B_OK)
235 			RETURN_ERROR(error);
236 
237 		// attach the node to the transaction (write locks it, too)
238 		error = transaction.AddNode(this,
239 			TRANSACTION_NODE_ALREADY_LOCKED | TRANSACTION_KEEP_NODE_LOCKED);
240 		if (error != B_OK)
241 			RETURN_ERROR(error);
242 
243 		// resize
244 		error = Resize((uint64)pos + size, false, transaction);
245 		if (error != B_OK)
246 			RETURN_ERROR(error);
247 
248 		SetSize(newFileSize);
249 
250 		// commit the transaction
251 		error = transaction.Commit();
252 		if (error != B_OK)
253 			RETURN_ERROR(error);
254 
255 		_sizeChanged = true;
256 	}
257 
258 	// now the file has the right size -- do the write
259 	locker.Unlock();
260 
261 	if (fileSize < (uint64)pos) {
262 		// fill the gap between old file end and write position with zeroes
263 		_WriteZeroes(fileSize, pos - fileSize);
264 	}
265 
266 	size_t bytesWritten;
267 	status_t error = _WriteData(pos, buffer, size, bytesWritten);
268 	if (error != B_OK)
269 		RETURN_ERROR(error);
270 
271 	// update the file times
272 	Transaction transaction(GetVolume());
273 	if (transaction.Start() == B_OK && transaction.AddNode(this) == B_OK) {
274 		// note: we don't fail, if we only couldn't update the times
275 		Touched(NODE_MODIFIED);
276 		transaction.Commit();
277 	}
278 
279 	_bytesWritten = bytesWritten;
280 	return B_OK;
281 }
282 
283 
284 status_t
Sync()285 File::Sync()
286 {
287 	return file_cache_sync(fFileCache);
288 }
289 
290 
291 void
RevertNodeData(const checksumfs_node & nodeData)292 File::RevertNodeData(const checksumfs_node& nodeData)
293 {
294 	Node::RevertNodeData(nodeData);
295 
296 	// in case the file size was reverted, reset file cache and map
297 	uint64 size = Size();
298 	file_cache_set_size(fFileCache, size);
299 	file_map_set_size(fFileMap, size);
300 }
301 
302 
303 status_t
GetFileVecs(uint64 offset,size_t size,file_io_vec * vecs,size_t count,size_t & _count)304 File::GetFileVecs(uint64 offset, size_t size, file_io_vec* vecs, size_t count,
305 	size_t& _count)
306 {
307 	FUNCTION("offset: %" B_PRIu64 ", size: %" B_PRIuSIZE ", count: %" B_PRIuSIZE
308 		"\n", offset, size, count);
309 
310 	// Round size to block size, but restrict to file size. This semantics is
311 	// fine with the caller (the file map) and it will help avoiding partial
312 	// block I/O.
313 	uint32 inBlockOffset = offset % B_PAGE_SIZE;
314 
315 	uint64 firstBlock = offset / B_PAGE_SIZE;
316 	uint64 neededBlockCount = BLOCK_ROUND_UP((uint64)size + inBlockOffset)
317 		/ B_PAGE_SIZE;
318 	uint64 fileBlockCount = BLOCK_ROUND_UP(Size()) / B_PAGE_SIZE;
319 
320 	if (firstBlock >= fileBlockCount) {
321 		_count = 0;
322 		return B_OK;
323 	}
324 
325 	if (firstBlock + neededBlockCount > fileBlockCount)
326 		neededBlockCount = fileBlockCount - firstBlock;
327 
328 	// get the level infos
329 	int32 depth;
330 	LevelInfo* infos = _GetLevelInfos(fileBlockCount, depth);
331 	if (infos == NULL)
332 		RETURN_ERROR(B_NO_MEMORY);
333 	ArrayDeleter<LevelInfo> infosDeleter(infos);
334 
335 	// prepare for the iteration
336 	uint64 blockIndex = BlockIndex();
337 
338 	PRINT("  preparing iteration: firstBlock: %" B_PRIu64 ", blockIndex: %"
339 		B_PRIu64 "\n", firstBlock, blockIndex);
340 
341 	for (int32 i = 0; i < depth; i++) {
342 		LevelInfo& info = infos[i];
343 		if (!info.block.GetReadable(GetVolume(), blockIndex))
344 			RETURN_ERROR(B_ERROR);
345 
346 		if (i == 0) {
347 			info.blockData = (uint64*)((uint8*)info.block.Data()
348 				+ kFileRootBlockOffset);
349 		} else
350 			info.blockData = (uint64*)info.block.Data();
351 
352 		info.index = firstBlock >> info.addressableShift;
353 		firstBlock -= (uint64)info.index << info.addressableShift;
354 
355 		blockIndex = info.blockData[info.index];
356 
357 		PRINT("  preparing level %" B_PRId32 ": index: %" B_PRId32
358 			", firstBlock: %" B_PRIu64 ", blockIndex: %" B_PRIu64 "\n", i,
359 			info.index, firstBlock, blockIndex);
360 	}
361 
362 	// and iterate
363 	int32 level = depth - 1;
364 	size_t countAdded = 0;
365 
366 	while (true) {
367 		LevelInfo& info = infos[level];
368 
369 		if (info.index == (int32)kFileBlockMaxCount) {
370 			// end of block -- back track to next greater branch
371 			PRINT("  level: %" B_PRId32 ": index: %" B_PRId32 " -> back "
372 				"tracking\n", level, info.index);
373 
374 			level--;
375 			infos[level].index++;
376 			continue;
377 		}
378 
379 		blockIndex = info.blockData[info.index];
380 
381 		PRINT("  level: %" B_PRId32 ": index: %" B_PRId32 " -> blockIndex: %"
382 			B_PRIu64 "\n", level, info.index, blockIndex);
383 
384 		if (level < depth - 1) {
385 			// descend to next level
386 			level++;
387 
388 			if (!infos[level].block.GetReadable(GetVolume(), blockIndex))
389 				RETURN_ERROR(B_ERROR);
390 
391 			infos[level].blockData = (uint64*)infos[level].block.Data();
392 			infos[level].index = 0;
393 			continue;
394 		}
395 
396 		info.index++;
397 
398 		// add the block
399 		uint64 blockOffset = blockIndex * B_PAGE_SIZE;
400 		if (countAdded > 0
401 			&& blockOffset
402 				== (uint64)vecs[countAdded - 1].offset
403 					+ vecs[countAdded - 1].length) {
404 			// the block continues where the previous block ends -- just extend
405 			// the vector
406 			vecs[countAdded - 1].length += B_PAGE_SIZE;
407 
408 			PRINT("  -> extended vector %" B_PRIuSIZE ": offset: %"
409 				B_PRIdOFF " size: %" B_PRIdOFF "\n", countAdded - 1,
410 				vecs[countAdded - 1].offset, vecs[countAdded - 1].length);
411 		} else {
412 			// we need a new block
413 			if (countAdded == count)
414 				break;
415 
416 			vecs[countAdded].offset = blockOffset + inBlockOffset;
417 			vecs[countAdded].length = B_PAGE_SIZE - inBlockOffset;
418 			countAdded++;
419 			inBlockOffset = 0;
420 
421 			PRINT("  -> added vector %" B_PRIuSIZE ":    offset: %"
422 				B_PRIdOFF " size: %" B_PRIdOFF "\n", countAdded - 1,
423 				vecs[countAdded - 1].offset, vecs[countAdded - 1].length);
424 		}
425 
426 		if (--neededBlockCount == 0)
427 			break;
428 	}
429 
430 	_count = countAdded;
431 	return B_OK;
432 }
433 
434 
435 /*static*/ uint32
_DepthForBlockCount(uint64 blockCount)436 File::_DepthForBlockCount(uint64 blockCount)
437 {
438 	uint64 addressableBlocks = kFileRootBlockMaxCount;
439 
440 	uint32 depth = 1;
441 	while (blockCount > addressableBlocks) {
442 		addressableBlocks *= kFileBlockMaxCount;
443 		depth++;
444 	}
445 
446 	return depth;
447 }
448 
449 
450 /*static*/ void
_UpdateLevelInfos(LevelInfo * infos,int32 levelCount,uint64 blockCount)451 File::_UpdateLevelInfos(LevelInfo* infos, int32 levelCount, uint64 blockCount)
452 {
453 	if (blockCount == 0) {
454 		infos[0].addressableShift = 0;
455 		infos[0].childCount = 0;
456 		return;
457 	}
458 
459 	uint64 addressableShift = 0;
460 	for (int32 i = levelCount - 1; i >= 0; i--) {
461 		infos[i].addressableShift = addressableShift;
462 		infos[i].childCount = (blockCount - 1) % kFileBlockMaxCount + 1;
463 		addressableShift += kFileBlockShift;
464 		blockCount = (blockCount + kFileBlockMaxCount - 1) / kFileBlockMaxCount;
465 	}
466 }
467 
468 
469 /*static*/ File::LevelInfo*
_GetLevelInfos(uint64 blockCount,int32 & _levelCount)470 File::_GetLevelInfos(uint64 blockCount, int32& _levelCount)
471 {
472 	LevelInfo* infos = new(std::nothrow) LevelInfo[kFileMaxTreeDepth];
473 // TODO: We need to allocate differently, if requested by the page writer!
474 	if (infos == NULL)
475 		return NULL;
476 
477 	int32 levelCount = _DepthForBlockCount(blockCount);
478 	_UpdateLevelInfos(infos, levelCount, blockCount);
479 
480 	_levelCount = levelCount;
481 	return infos;
482 }
483 
484 
485 status_t
_ShrinkTree(uint64 blockCount,uint64 newBlockCount,Transaction & transaction)486 File::_ShrinkTree(uint64 blockCount, uint64 newBlockCount,
487 	Transaction& transaction)
488 {
489 	FUNCTION("blockCount: %" B_PRIu64 " -> %" B_PRIu64 "\n", blockCount,
490 		newBlockCount);
491 
492 	int32 depth;
493 	LevelInfo* infos = _GetLevelInfos(blockCount, depth);
494 	if (infos == NULL)
495 		return B_NO_MEMORY;
496 	ArrayDeleter<LevelInfo> infosDeleter(infos);
497 
498 	// load the root block
499 	if (!infos[0].block.GetWritable(GetVolume(), BlockIndex(), transaction))
500 		RETURN_ERROR(B_ERROR);
501 	infos[0].blockData = (uint64*)((uint8*)infos[0].block.Data()
502 		+ kFileRootBlockOffset);
503 
504 	int32 level = 0;
505 
506 	// remove blocks
507 	bool removeBlock = false;
508 	while (true) {
509 		PRINT("  level %" B_PRId32 ", child count: %" B_PRIu32 "\n", level,
510 			infos[level].childCount);
511 
512 		// If the block is empty, remove it.
513 		if (infos[level].childCount == 0) {
514 			if (level == 0)
515 				break;
516 
517 			// prepare for the next iteration
518 			infos[level].childCount = kFileBlockMaxCount;
519 
520 			removeBlock = true;
521 			level--;
522 			continue;
523 		}
524 
525 		// block not empty -- we might already be done
526 		if (blockCount == newBlockCount)
527 			break;
528 
529 		uint64 blockIndex = infos[level].blockData[infos[level].childCount - 1];
530 
531 		// unless we're in the last level or shall remove, descend
532 		if (level < depth - 1 && !removeBlock) {
533 			LevelInfo& info = infos[++level];
534 			if (!info.block.GetWritable(GetVolume(), blockIndex, transaction))
535 				RETURN_ERROR(B_ERROR);
536 			info.blockData = (uint64*)info.block.Data();
537 			continue;
538 		}
539 
540 		// remove the block
541 
542 		LevelInfo& info = infos[level];
543 
544 		PRINT("  freeing block: %" B_PRId64 "\n", blockIndex);
545 
546 		// clear the entry (not strictly necessary)
547 		info.blockData[info.childCount - 1] = 0;
548 
549 		// free the block
550 		status_t error = GetVolume()->GetBlockAllocator()->Free(blockIndex, 1,
551 			transaction);
552 		if (error != B_OK)
553 			RETURN_ERROR(error);
554 
555 		if (level == depth - 1)
556 			blockCount--;
557 
558 		infos[level].childCount--;
559 
560 		removeBlock = false;
561 	}
562 
563 	// We got rid of all unnecessary data blocks and empty node blocks. We might
564 	// need to cull the lower levels of the tree, now.
565 	int32 newDepth = _DepthForBlockCount(newBlockCount);
566 	if (newDepth == depth)
567 		return B_OK;
568 
569 	for (int32 i = 1; i <= depth - newDepth; i++) {
570 		uint64 blockIndex = infos[0].blockData[0];
571 
572 		PRINT("  removing block %" B_PRIu64 " at level %" B_PRIi32 "\n",
573 			blockIndex, i);
574 
575 		Block block;
576 		if (!block.GetReadable(GetVolume(), blockIndex))
577 			RETURN_ERROR(B_ERROR);
578 
579 		// copy to the root block
580 		const uint64* blockData = (uint64*)infos[i].block.Data();
581 		memcpy(infos[0].blockData, blockData, infos[i].childCount * 8);
582 
583 		// free the block
584 		block.Put();
585 		status_t error = GetVolume()->GetBlockAllocator()->Free(blockIndex, 1,
586 			transaction);
587 		if (error != B_OK)
588 			RETURN_ERROR(error);
589 	}
590 
591 	return B_OK;
592 }
593 
594 
595 status_t
_GrowTree(uint64 blockCount,uint64 newBlockCount,Transaction & transaction)596 File::_GrowTree(uint64 blockCount, uint64 newBlockCount,
597 	Transaction& transaction)
598 {
599 	FUNCTION("blockCount: %" B_PRIu64 " -> %" B_PRIu64 "\n", blockCount,
600 		newBlockCount);
601 
602 	int32 depth;
603 	LevelInfo* infos = _GetLevelInfos(blockCount, depth);
604 	if (infos == NULL)
605 		return B_NO_MEMORY;
606 	ArrayDeleter<LevelInfo> infosDeleter(infos);
607 
608 	int32 newDepth = _DepthForBlockCount(newBlockCount);
609 
610 	Block& rootBlock = infos[0].block;
611 	if (!rootBlock.GetWritable(GetVolume(), BlockIndex(), transaction))
612 		RETURN_ERROR(B_ERROR);
613 	infos[0].blockData = (uint64*)((uint8*)rootBlock.Data()
614 		+ kFileRootBlockOffset);
615 
616 	// add new levels, if necessary
617 	if (depth < newDepth) {
618 		uint32 childCount = infos[0].childCount;
619 
620 		// update the level infos
621 		_UpdateLevelInfos(infos, newDepth, blockCount);
622 
623 		// allocate a block per new level
624 		for (int32 i = newDepth - depth - 1; i >= 0; i--) {
625 			// allocate a new block
626 			AllocatedBlock allocatedBlock(GetVolume()->GetBlockAllocator(),
627 				transaction);
628 			status_t error = allocatedBlock.Allocate(BlockIndex());
629 			if (error != B_OK)
630 				RETURN_ERROR(error);
631 
632 			Block newBlock;
633 			if (!newBlock.GetZero(GetVolume(), allocatedBlock.Index(),
634 					transaction)) {
635 				RETURN_ERROR(B_ERROR);
636 			}
637 
638 			allocatedBlock.Detach();
639 
640 			PRINT("  inserting block %" B_PRIu64 " at level %" B_PRIi32
641 				"\n", newBlock.Index(), i + 1);
642 
643 			// copy the root block
644 			memcpy(newBlock.Data(), infos[0].blockData, childCount * 8);
645 
646 			// set the block in the root block
647 			infos[0].blockData[0] = newBlock.Index();
648 			childCount = 1;
649 		}
650 	}
651 
652 	depth = newDepth;
653 
654 	// prepare the iteration
655 	int32 level = depth - 1;
656 	for (int32 i = 0; i < level; i++) {
657 		// get the block for the next level
658 		LevelInfo& info = infos[i];
659 		if (!infos[i + 1].block.GetWritable(GetVolume(),
660 				info.blockData[info.childCount - 1], transaction)) {
661 			RETURN_ERROR(B_ERROR);
662 		}
663 		infos[i + 1].blockData = (uint64*)infos[i + 1].block.Data();
664 	}
665 
666 	// add the new blocks
667 	while (blockCount < newBlockCount) {
668 		PRINT("  level %" B_PRId32 ", child count: %" B_PRIu32 "\n", level,
669 			infos[level].childCount);
670 
671 		if (infos[level].childCount >= (int32)kFileBlockMaxCount) {
672 			// block is full -- back track
673 			level--;
674 		}
675 
676 		// allocate and insert block
677 		AllocatedBlock allocatedBlock(GetVolume()->GetBlockAllocator(),
678 			transaction);
679 		status_t error = allocatedBlock.Allocate(BlockIndex());
680 		if (error != B_OK)
681 			RETURN_ERROR(error);
682 
683 		uint64 blockIndex = allocatedBlock.Index();
684 		infos[level].blockData[infos[level].childCount++] = blockIndex;
685 
686 		PRINT("  allocated block: %" B_PRId64 "\n", blockIndex);
687 
688 		if (level < depth - 1) {
689 			// descend to the next level
690 			level++;
691 			infos[level].childCount = 0;
692 
693 			if (!infos[level].block.GetZero(GetVolume(), blockIndex,
694 					transaction)) {
695 				RETURN_ERROR(B_ERROR);
696 			}
697 
698 			infos[level].blockData = (uint64*)infos[level].block.Data();
699 		} else {
700 			// That's a data block -- make the block cache forget it, so it
701 			// doesn't conflict with the file cache.
702 			block_cache_discard(GetVolume()->BlockCache(), blockIndex, 1);
703 			blockCount++;
704 		}
705 
706 		allocatedBlock.Detach();
707 	}
708 
709 	return B_OK;
710 }
711 
712 
713 status_t
_WriteZeroes(uint64 offset,uint64 size)714 File::_WriteZeroes(uint64 offset, uint64 size)
715 {
716 	while (size > 0) {
717 		size_t bytesWritten;
718 		status_t error =  _WriteData(offset, NULL,
719 			std::min(size, (uint64)SIZE_MAX), bytesWritten);
720 		if (error != B_OK)
721 			RETURN_ERROR(error);
722 		if (bytesWritten == 0)
723 			RETURN_ERROR(B_ERROR);
724 
725 		size -= bytesWritten;
726 		offset += bytesWritten;
727 	}
728 
729 	return B_OK;
730 }
731 
732 
733 status_t
_WriteData(uint64 offset,const void * buffer,size_t size,size_t & _bytesWritten)734 File::_WriteData(uint64 offset, const void* buffer, size_t size,
735 	size_t& _bytesWritten)
736 {
737 	uint32 inBlockOffset = offset % B_PAGE_SIZE;
738 	uint64 blockCount = ((uint64)size + inBlockOffset + B_PAGE_SIZE - 1)
739 		/ B_PAGE_SIZE;
740 
741 	// allocate storage for the indices of the blocks
742 	uint64* blockIndices = new(std::nothrow) uint64[blockCount];
743 	if (blockIndices == NULL)
744 		RETURN_ERROR(B_NO_MEMORY);
745 	ArrayDeleter<uint64> blockIndicesDeleter(blockIndices);
746 
747 	// allocate temporary storage for the check sum computation
748 	WriteTempData* tempData = new(std::nothrow) WriteTempData;
749 	if (tempData == NULL)
750 		RETURN_ERROR(B_NO_MEMORY);
751 	ObjectDeleter<WriteTempData> tempDataDeleter(tempData);
752 
753 	// get the block indices
754 	uint64 firstBlockIndex = offset / B_PAGE_SIZE;
755 	for (uint64 i = 0; i < blockCount;) {
756 		size_t count;
757 		status_t error = GetFileVecs((firstBlockIndex + i) * B_PAGE_SIZE,
758 			size + inBlockOffset - i * B_PAGE_SIZE, tempData->fileVecs,
759 			sizeof(tempData->fileVecs) / sizeof(file_io_vec), count);
760 		if (error != B_OK)
761 			RETURN_ERROR(error);
762 
763 		for (size_t k = 0; k < count && i < blockCount; k++) {
764 			off_t vecBlockIndex = tempData->fileVecs[k].offset / B_PAGE_SIZE;
765 			off_t vecLength = tempData->fileVecs[k].length;
766 			while (vecLength > 0 && i < blockCount) {
767 				blockIndices[i++] = vecBlockIndex++;
768 				vecLength -= B_PAGE_SIZE;
769 			}
770 		}
771 	}
772 
773 	// clear the check sums of the affected blocks
774 	memset(&tempData->indexAndCheckSum.checkSum, 0, sizeof(CheckSum));
775 	for (uint64 i = 0; i < blockCount; i++) {
776 		tempData->indexAndCheckSum.blockIndex = blockIndices[i];
777 		if (ioctl(GetVolume()->FD(), CHECKSUM_DEVICE_IOCTL_SET_CHECK_SUM,
778 				&tempData->indexAndCheckSum,
779 				sizeof(tempData->indexAndCheckSum)) < 0) {
780 			RETURN_ERROR(errno);
781 		}
782 	}
783 
784 	// write
785 	size_t bytesWritten = size;
786 	status_t error = file_cache_write(fFileCache, NULL, offset, buffer,
787 		&bytesWritten);
788 	if (error != B_OK)
789 		RETURN_ERROR(error);
790 
791 	// compute and set the new check sums
792 	for (uint64 i = 0; i < blockCount; i++) {
793 		// copy the data to our temporary buffer
794 		if (i == 0 && inBlockOffset != 0) {
795 			// partial block -- read complete block from cache
796 			size_t bytesRead = B_PAGE_SIZE;
797 			error = file_cache_read(fFileCache, NULL, offset - inBlockOffset,
798 				tempData->blockData, &bytesRead);
799 			if (error != B_OK)
800 				RETURN_ERROR(error);
801 
802 			if (bytesRead < B_PAGE_SIZE) {
803 				// partial read (the file is possibly shorter) -- clear the rest
804 				memset(tempData->blockData + bytesRead, 0,
805 					B_PAGE_SIZE - bytesRead);
806 			}
807 
808 			// copy provided data
809 			size_t toCopy = std::min((size_t)B_PAGE_SIZE - inBlockOffset, size);
810 			if (buffer != NULL) {
811 				error = user_memcpy(tempData->blockData + inBlockOffset,
812 					buffer, toCopy);
813 				if (error != B_OK)
814 					RETURN_ERROR(error);
815 			} else
816 				memset(tempData->blockData + inBlockOffset, 0, toCopy);
817 		} else if (i == blockCount - 1
818 			&& (size + inBlockOffset) % B_PAGE_SIZE != 0) {
819 			// partial block -- read complete block from cache
820 			size_t bytesRead = B_PAGE_SIZE;
821 			error = file_cache_read(fFileCache, NULL,
822 				offset - inBlockOffset + i * B_PAGE_SIZE,
823 				tempData->blockData, &bytesRead);
824 			if (error != B_OK)
825 				RETURN_ERROR(error);
826 
827 			if (bytesRead < B_PAGE_SIZE) {
828 				// partial read (the file is possibly shorter) -- clear the rest
829 				memset(tempData->blockData + bytesRead, 0,
830 					B_PAGE_SIZE - bytesRead);
831 			}
832 
833 			// copy provided data
834 			size_t toCopy = (size + inBlockOffset) % B_PAGE_SIZE;
835 				// we start at the beginning of the block, since i > 0
836 			if (buffer != NULL) {
837 				error = user_memcpy(tempData->blockData,
838 					(const uint8*)buffer + i * B_PAGE_SIZE - inBlockOffset,
839 					toCopy);
840 				if (error != B_OK)
841 					RETURN_ERROR(error);
842 			} else
843 				memset(tempData->blockData, 0, toCopy);
844 		} else {
845 			// complete block
846 			if (buffer != NULL) {
847 				error = user_memcpy(tempData->blockData,
848 					(const uint8*)buffer + i * B_PAGE_SIZE - inBlockOffset,
849 					B_PAGE_SIZE);
850 				if (error != B_OK)
851 					RETURN_ERROR(error);
852 			} else if (i == 0 || (i == 1 && inBlockOffset != 0)) {
853 				// clear only once
854 				memset(tempData->blockData, 0, B_PAGE_SIZE);
855 			}
856 		}
857 
858 		// compute the check sum
859 		if (buffer != NULL || i == 0 || (i == 1 && inBlockOffset != 0)) {
860 			tempData->sha256.Init();
861 			tempData->sha256.Update(tempData->blockData, B_PAGE_SIZE);
862 			tempData->indexAndCheckSum.checkSum = tempData->sha256.Digest();
863 		}
864 
865 		// set it
866 		tempData->indexAndCheckSum.blockIndex = blockIndices[i];
867 
868 		if (ioctl(GetVolume()->FD(), CHECKSUM_DEVICE_IOCTL_SET_CHECK_SUM,
869 				&tempData->indexAndCheckSum,
870 				sizeof(tempData->indexAndCheckSum)) < 0) {
871 			RETURN_ERROR(errno);
872 		}
873 	}
874 
875 	_bytesWritten = bytesWritten;
876 	return B_OK;
877 }
878