xref: /haiku/src/add-ons/kernel/file_systems/ext2/Journal.cpp (revision a3e794ae459fec76826407f8ba8c94cd3535f128)
1 /*
2  * Copyright 2010, Haiku Inc. All rights reserved.
3  * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
4  * This file may be used under the terms of the MIT License.
5  *
6  * Authors:
7  *		Janito V. Ferreira Filho
8  */
9 
10 
11 #include "Journal.h"
12 
13 #include <new>
14 #include <string.h>
15 #include <unistd.h>
16 
17 #include <fs_cache.h>
18 
19 #include "CachedBlock.h"
20 #include "HashRevokeManager.h"
21 
22 
23 //#define TRACE_EXT2
24 #ifdef TRACE_EXT2
25 #	define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
26 #else
27 #	define TRACE(x...) ;
28 #endif
29 #define ERROR(x...) dprintf("\33[34mext2:\33[0m " x)
30 
31 
32 class LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
33 public:
34 							LogEntry(Journal* journal, uint32 logStart,
35 								uint32 length);
36 							~LogEntry();
37 
38 			uint32			Start() const { return fStart; }
39 			uint32			CommitID() const { return fCommitID; }
40 
41 			Journal*		GetJournal() { return fJournal; }
42 
43 private:
44 			Journal*		fJournal;
45 			uint32			fStart;
46 			uint32			fCommitID;
47 };
48 
49 
50 LogEntry::LogEntry(Journal* journal, uint32 logStart, uint32 commitID)
51 	:
52 	fJournal(journal),
53 	fStart(logStart),
54 	fCommitID(commitID)
55 {
56 }
57 
58 
59 LogEntry::~LogEntry()
60 {
61 }
62 
63 
64 void
65 JournalHeader::MakeDescriptor(uint32 sequence)
66 {
67 	this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
68 	this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
69 	this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_DESCRIPTOR_BLOCK);
70 }
71 
72 
73 void
74 JournalHeader::MakeCommit(uint32 sequence)
75 {
76 	this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
77 	this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
78 	this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_COMMIT_BLOCK);
79 }
80 
81 
82 Journal::Journal(Volume* fsVolume, Volume* jVolume)
83 	:
84 	fJournalVolume(jVolume),
85 	fJournalBlockCache(jVolume->BlockCache()),
86 	fFilesystemVolume(fsVolume),
87 	fFilesystemBlockCache(fsVolume->BlockCache()),
88 	fRevokeManager(NULL),
89 	fInitStatus(B_OK),
90 	fBlockSize(sizeof(JournalSuperBlock)),
91 	fFirstCommitID(0),
92 	fFirstCacheCommitID(0),
93 	fFirstLogBlock(1),
94 	fLogSize(0),
95 	fVersion(0),
96 	fLogStart(0),
97 	fLogEnd(0),
98 	fFreeBlocks(0),
99 	fMaxTransactionSize(0),
100 	fCurrentCommitID(0),
101 	fHasSubTransaction(false),
102 	fSeparateSubTransactions(false),
103 	fUnwrittenTransactions(0),
104 	fTransactionID(0)
105 {
106 	recursive_lock_init(&fLock, "ext2 journal");
107 	mutex_init(&fLogEntriesLock, "ext2 journal log entries");
108 
109 	HashRevokeManager* revokeManager = new(std::nothrow) HashRevokeManager;
110 	TRACE("Journal::Journal(): Allocated a hash revoke manager at %p\n",
111 		revokeManager);
112 
113 	if (revokeManager == NULL)
114 		fInitStatus = B_NO_MEMORY;
115 	else {
116 		fInitStatus = revokeManager->Init();
117 
118 		if (fInitStatus == B_OK) {
119 			fRevokeManager = revokeManager;
120 			fInitStatus = _LoadSuperBlock();
121 		} else
122 			delete revokeManager;
123 	}
124 }
125 
126 
127 Journal::Journal()
128 	:
129 	fJournalVolume(NULL),
130 	fJournalBlockCache(NULL),
131 	fFilesystemVolume(NULL),
132 	fFilesystemBlockCache(NULL),
133 	fRevokeManager(NULL),
134 	fInitStatus(B_OK),
135 	fBlockSize(sizeof(JournalSuperBlock)),
136 	fFirstCommitID(0),
137 	fFirstCacheCommitID(0),
138 	fFirstLogBlock(1),
139 	fLogSize(0),
140 	fVersion(0),
141 	fIsStarted(false),
142 	fLogStart(0),
143 	fLogEnd(0),
144 	fFreeBlocks(0),
145 	fMaxTransactionSize(0),
146 	fCurrentCommitID(0),
147 	fHasSubTransaction(false),
148 	fSeparateSubTransactions(false),
149 	fUnwrittenTransactions(0),
150 	fTransactionID(0)
151 {
152 	recursive_lock_init(&fLock, "ext2 journal");
153 	mutex_init(&fLogEntriesLock, "ext2 journal log entries");
154 }
155 
156 
157 Journal::~Journal()
158 {
159 	TRACE("Journal destructor.\n");
160 
161 	TRACE("Journal::~Journal(): Attempting to delete revoke manager at %p\n",
162 		fRevokeManager);
163 	delete fRevokeManager;
164 
165 	recursive_lock_destroy(&fLock);
166 	mutex_destroy(&fLogEntriesLock);
167 }
168 
169 
170 status_t
171 Journal::InitCheck()
172 {
173 	return fInitStatus;
174 }
175 
176 
177 status_t
178 Journal::Uninit()
179 {
180 	if (!fIsStarted)
181 		return B_OK;
182 
183 	status_t status = FlushLogAndBlocks();
184 
185 	if (status == B_OK) {
186 		// Mark journal as clean
187 		fLogStart = 0;
188 		status = _SaveSuperBlock();
189 	}
190 
191 	fIsStarted = false;
192 
193 	return status;
194 }
195 
196 
197 /*virtual*/ status_t
198 Journal::StartLog()
199 {
200 	fLogStart = fFirstLogBlock;
201 	fLogEnd = fFirstLogBlock;
202 	fFreeBlocks = 0;
203 	fIsStarted = true;
204 
205 	fCurrentCommitID = fFirstCommitID;
206 
207 	return _SaveSuperBlock();
208 }
209 
210 
211 status_t
212 Journal::RestartLog()
213 {
214 	fFirstCommitID = 1;
215 
216 	return B_OK;
217 }
218 
219 
220 /*virtual*/ status_t
221 Journal::Lock(Transaction* owner, bool separateSubTransactions)
222 {
223 	TRACE("Journal::Lock()\n");
224 	status_t status = recursive_lock_lock(&fLock);
225 	if (status != B_OK)
226 		return status;
227 
228 	TRACE("Journal::Lock(): Aquired lock\n");
229 
230 	if (!fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) > 1) {
231 		// reuse current transaction
232 		TRACE("Journal::Lock(): Reusing current transaction\n");
233 		return B_OK;
234 	}
235 
236 	if(separateSubTransactions)
237 		fSeparateSubTransactions = true;
238 
239 	if (owner != NULL)
240 		owner->SetParent(fOwner);
241 
242 	fOwner = owner;
243 
244 	if (fOwner != NULL) {
245 		if (fUnwrittenTransactions > 0) {
246 			// start a sub transaction
247 			TRACE("Journal::Lock(): Starting sub transaction\n");
248 			cache_start_sub_transaction(fFilesystemBlockCache, fTransactionID);
249 			fHasSubTransaction = true;
250 		} else {
251 			TRACE("Journal::Lock(): Starting new transaction\n");
252 			fTransactionID = cache_start_transaction(fFilesystemBlockCache);
253 		}
254 
255 		if (fTransactionID < B_OK) {
256 			recursive_lock_unlock(&fLock);
257 			return fTransactionID;
258 		}
259 
260 		cache_add_transaction_listener(fFilesystemBlockCache, fTransactionID,
261 			TRANSACTION_IDLE, _TransactionIdle, this);
262 	}
263 
264 	return B_OK;
265 }
266 
267 
268 /*virtual*/ status_t
269 Journal::Unlock(Transaction* owner, bool success)
270 {
271 	TRACE("Journal::Unlock(): Lock recursion: %" B_PRId32 "\n",
272 		recursive_lock_get_recursion(&fLock));
273 	if (fSeparateSubTransactions
274 		|| recursive_lock_get_recursion(&fLock) == 1) {
275 		// we only end the transaction if we unlock it
276 		if (owner != NULL) {
277 			TRACE("Journal::Unlock(): Calling _TransactionDone\n");
278 			status_t status = _TransactionDone(success);
279 			if (status != B_OK)
280 				return status;
281 
282 			TRACE("Journal::Unlock(): Returned from _TransactionDone\n");
283 			bool separateSubTransactions = fSeparateSubTransactions;
284 			fSeparateSubTransactions = true;
285 			TRACE("Journal::Unlock(): Notifying listeners for: %p\n", owner);
286 			owner->NotifyListeners(success);
287 			TRACE("Journal::Unlock(): Done notifying listeners\n");
288 			fSeparateSubTransactions = separateSubTransactions;
289 
290 			fOwner = owner->Parent();
291 		} else
292 			fOwner = NULL;
293 
294 		if (fSeparateSubTransactions
295 			&& recursive_lock_get_recursion(&fLock) == 1)
296 			fSeparateSubTransactions = false;
297 	} else
298 		owner->MoveListenersTo(fOwner);
299 
300 	TRACE("Journal::Unlock(): Unlocking the lock\n");
301 
302 	recursive_lock_unlock(&fLock);
303 	return B_OK;
304 }
305 
306 
307 status_t
308 Journal::MapBlock(off_t logical, fsblock_t& physical)
309 {
310 	TRACE("Journal::MapBlock()\n");
311 	physical = logical;
312 
313 	return B_OK;
314 }
315 
316 
317 inline uint32
318 Journal::FreeLogBlocks() const
319 {
320 	TRACE("Journal::FreeLogBlocks(): start: %" B_PRIu32 ", end: %" B_PRIu32
321 		", size: %" B_PRIu32 "\n", fLogStart, fLogEnd, fLogSize);
322 	return fLogStart <= fLogEnd
323 		? fLogSize - fLogEnd + fLogStart - 1
324 		: fLogStart - fLogEnd;
325 }
326 
327 
328 status_t
329 Journal::FlushLogAndBlocks()
330 {
331 	return _FlushLog(true, true);
332 }
333 
334 
335 int32
336 Journal::TransactionID() const
337 {
338 	return fTransactionID;
339 }
340 
341 
342 status_t
343 Journal::_WritePartialTransactionToLog(JournalHeader* descriptorBlock,
344 	bool detached, uint8** _escapedData, uint32 &logBlock, off_t& blockNumber,
345 	long& cookie, ArrayDeleter<uint8>& escapedDataDeleter, uint32& blockCount,
346 	bool& finished)
347 {
348 	TRACE("Journal::_WritePartialTransactionToLog()\n");
349 
350 	uint32 descriptorBlockPos = logBlock;
351 	uint8* escapedData = *_escapedData;
352 
353 	JournalBlockTag* tag = (JournalBlockTag*)descriptorBlock->data;
354 	JournalBlockTag* lastTag = (JournalBlockTag*)((uint8*)descriptorBlock
355 		+ fBlockSize - sizeof(JournalHeader));
356 
357 	finished = false;
358 	status_t status = B_OK;
359 
360 	while (tag < lastTag && status == B_OK) {
361 		tag->SetBlockNumber(blockNumber);
362 		tag->SetFlags(0);
363 
364 		CachedBlock data(fFilesystemVolume);
365 		const JournalHeader* blockData = (JournalHeader*)data.SetTo(
366 			blockNumber);
367 		if (blockData == NULL) {
368 			panic("Got a NULL pointer while iterating through transaction "
369 				"blocks.\n");
370 			return B_ERROR;
371 		}
372 
373 		void* finalData;
374 
375 		if (blockData->CheckMagic()) {
376 			// The journaled block starts with the magic value
377 			// We must remove it to prevent confusion
378 			TRACE("Journal::_WritePartialTransactionToLog(): Block starts with "
379 				"magic number. Escaping it\n");
380 			tag->SetEscapedFlag();
381 
382 			if (escapedData == NULL) {
383 				TRACE("Journal::_WritePartialTransactionToLog(): Allocating "
384 					"space for escaped block (%" B_PRIu32 ")\n", fBlockSize);
385 				escapedData = new(std::nothrow) uint8[fBlockSize];
386 				if (escapedData == NULL) {
387 					TRACE("Journal::_WritePartialTransactionToLof(): Failed to "
388 						"allocate buffer for escaped data block\n");
389 					return B_NO_MEMORY;
390 				}
391 				escapedDataDeleter.SetTo(escapedData);
392 				*_escapedData = escapedData;
393 
394 				((int32*)escapedData)[0] = 0; // Remove magic
395 			}
396 
397 			memcpy(escapedData + 4, blockData->data, fBlockSize - 4);
398 			finalData = escapedData;
399 		} else
400 			finalData = (void*)blockData;
401 
402 		// TODO: use iovecs?
403 
404 		logBlock = _WrapAroundLog(logBlock + 1);
405 
406 		fsblock_t physicalBlock;
407 		status = MapBlock(logBlock, physicalBlock);
408 		if (status != B_OK)
409 			return status;
410 
411 		off_t logOffset = physicalBlock * fBlockSize;
412 
413 		TRACE("Journal::_WritePartialTransactionToLog(): Writing from memory: "
414 			"%p, to disk: %" B_PRIdOFF "\n", finalData, logOffset);
415 		size_t written = write_pos(fJournalVolume->Device(), logOffset,
416 			finalData, fBlockSize);
417 		if (written != fBlockSize) {
418 			TRACE("Failed to write journal block.\n");
419 			return B_IO_ERROR;
420 		}
421 
422 		TRACE("Journal::_WritePartialTransactionToLog(): Wrote a journal block "
423 			"at: %" B_PRIu32 "\n", logBlock);
424 
425 		blockCount++;
426 		tag++;
427 
428 		status = cache_next_block_in_transaction(fFilesystemBlockCache,
429 			fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
430 	}
431 
432 	finished = status != B_OK;
433 
434 	// Write descriptor block
435 	--tag;
436 	tag->SetLastTagFlag();
437 
438 	fsblock_t physicalBlock;
439 	status = MapBlock(descriptorBlockPos, physicalBlock);
440 	if (status != B_OK)
441 		return status;
442 
443 	off_t descriptorBlockOffset = physicalBlock * fBlockSize;
444 
445 	TRACE("Journal::_WritePartialTransactionToLog(): Writing to: %" B_PRIdOFF
446 		"\n", descriptorBlockOffset);
447 	size_t written = write_pos(fJournalVolume->Device(),
448 		descriptorBlockOffset, descriptorBlock, fBlockSize);
449 	if (written != fBlockSize) {
450 		TRACE("Failed to write journal descriptor block.\n");
451 		return B_IO_ERROR;
452 	}
453 
454 	blockCount++;
455 	logBlock = _WrapAroundLog(logBlock + 1);
456 
457 	return B_OK;
458 }
459 
460 
461 status_t
462 Journal::_WriteTransactionToLog()
463 {
464 	TRACE("Journal::_WriteTransactionToLog()\n");
465 	// Transaction enters the Flush state
466 	bool detached = false;
467 	TRACE("Journal::_WriteTransactionToLog(): Attempting to get transaction "
468 		"size\n");
469 	size_t size = _FullTransactionSize();
470 	TRACE("Journal::_WriteTransactionToLog(): transaction size: %" B_PRIuSIZE
471 		"\n", size);
472 
473 	if (size > fMaxTransactionSize) {
474 		TRACE("Journal::_WriteTransactionToLog(): not enough free space "
475 			"for the transaction. Attempting to free some space.\n");
476 		size = _MainTransactionSize();
477 		TRACE("Journal::_WriteTransactionToLog(): main transaction size: %"
478 			B_PRIuSIZE "\n", size);
479 
480 		if(fHasSubTransaction && size < fMaxTransactionSize) {
481 			TRACE("Journal::_WriteTransactionToLog(): transaction doesn't fit, "
482 				"but it can be separated\n");
483 			detached = true;
484 		} else {
485 			// Error: transaction can't fit in log
486 			panic("transaction too large (size: %" B_PRIuSIZE ", max size: %"
487 				B_PRIu32 ", log size: %" B_PRIu32 ")\n", size,
488 				fMaxTransactionSize, fLogSize);
489 			return B_BUFFER_OVERFLOW;
490 		}
491 	}
492 
493 	TRACE("Journal::_WriteTransactionToLog(): free log blocks: %" B_PRIu32
494 		"\n", FreeLogBlocks());
495 	if (size > FreeLogBlocks()) {
496 		TRACE("Journal::_WriteTransactionToLog(): Syncing block cache\n");
497 		cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
498 
499 		if (size > FreeLogBlocks()) {
500 			panic("Transaction fits, but sync didn't result in enough"
501 				"free space.\n\tGot %" B_PRIu32 " when at least %" B_PRIuSIZE
502 				" was expected.", FreeLogBlocks(), size);
503 		}
504 	}
505 
506 	TRACE("Journal::_WriteTransactionToLog(): finished managing space for "
507 		"the transaction\n");
508 
509 	fHasSubTransaction = false;
510 	if (!fIsStarted)
511 		StartLog();
512 
513 	// Prepare Descriptor block
514 	TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
515 		"the descriptor block, block size %" B_PRIu32 "\n", fBlockSize);
516 	JournalHeader* descriptorBlock =
517 		(JournalHeader*)new(std::nothrow) uint8[fBlockSize];
518 	if (descriptorBlock == NULL) {
519 		TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
520 			"for the descriptor block\n");
521 		return B_NO_MEMORY;
522 	}
523 	ArrayDeleter<uint8> descriptorBlockDeleter((uint8*)descriptorBlock);
524 
525 	descriptorBlock->MakeDescriptor(fCurrentCommitID);
526 
527 	// Prepare Commit block
528 	TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
529 		"the commit block, block size %" B_PRIu32 "\n", fBlockSize);
530 	JournalHeader* commitBlock =
531 		(JournalHeader*)new(std::nothrow) uint8[fBlockSize];
532 	if (descriptorBlock == NULL) {
533 		TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
534 			"for the commit block\n");
535 		return B_NO_MEMORY;
536 	}
537 	ArrayDeleter<uint8> commitBlockDeleter((uint8*)commitBlock);
538 
539 	commitBlock->MakeCommit(fCurrentCommitID + 1);
540 	memset(commitBlock->data, 0, fBlockSize - sizeof(JournalHeader));
541 		// TODO: This probably isn't necessary
542 
543 	uint8* escapedData = NULL;
544 	ArrayDeleter<uint8> escapedDataDeleter;
545 
546 	off_t blockNumber;
547 	long cookie = 0;
548 
549 	status_t status = cache_next_block_in_transaction(fFilesystemBlockCache,
550 		fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
551 	if (status != B_OK) {
552 		TRACE("Journal::_WriteTransactionToLog(): Transaction has no blocks to "
553 			"write\n");
554 		return B_OK;
555 	}
556 
557 	uint32 blockCount = 0;
558 
559 	uint32 logBlock = _WrapAroundLog(fLogEnd);
560 
561 	bool finished = false;
562 
563 	status = _WritePartialTransactionToLog(descriptorBlock, detached,
564 		&escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
565 		blockCount, finished);
566 	if (!finished && status != B_OK)
567 		return status;
568 
569 	uint32 commitBlockPos = logBlock;
570 
571 	while (!finished) {
572 		descriptorBlock->IncrementSequence();
573 
574 		status = _WritePartialTransactionToLog(descriptorBlock, detached,
575 			&escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
576 			blockCount, finished);
577 		if (!finished && status != B_OK)
578 			return status;
579 
580 		// It is okay to write the commit blocks of the partial transactions
581 		// as long as the commit block of the first partial transaction isn't
582 		// written. When it recovery reaches where the first commit should be
583 		// and doesn't find it, it considers it found the end of the log.
584 
585 		fsblock_t physicalBlock;
586 		status = MapBlock(logBlock, physicalBlock);
587 		if (status != B_OK)
588 			return status;
589 
590 		off_t logOffset = physicalBlock * fBlockSize;
591 
592 		TRACE("Journal::_WriteTransactionToLog(): Writting commit block to "
593 			"%" B_PRIdOFF "\n", logOffset);
594 		off_t written = write_pos(fJournalVolume->Device(), logOffset,
595 			commitBlock, fBlockSize);
596 		if (written != fBlockSize) {
597 			TRACE("Failed to write journal commit block.\n");
598 			return B_IO_ERROR;
599 		}
600 
601 		commitBlock->IncrementSequence();
602 		blockCount++;
603 
604 		logBlock = _WrapAroundLog(logBlock + 1);
605 	}
606 
607 	// Transaction will enter the Commit state
608 	fsblock_t physicalBlock;
609 	status = MapBlock(commitBlockPos, physicalBlock);
610 	if (status != B_OK)
611 		return status;
612 
613 	off_t logOffset = physicalBlock * fBlockSize;
614 
615 	TRACE("Journal::_WriteTransactionToLog(): Writing to: %" B_PRIdOFF "\n",
616 		logOffset);
617 	off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock,
618 		fBlockSize);
619 	if (written != fBlockSize) {
620 		TRACE("Failed to write journal commit block.\n");
621 		return B_IO_ERROR;
622 	}
623 
624 	blockCount++;
625 	fLogEnd = _WrapAroundLog(fLogEnd + blockCount);
626 
627 	status = _SaveSuperBlock();
628 
629 	// Transaction will enter Finished state
630 	LogEntry *logEntry = new LogEntry(this, fLogEnd, fCurrentCommitID++);
631 	TRACE("Journal::_WriteTransactionToLog(): Allocating log entry at %p\n",
632 		logEntry);
633 	if (logEntry == NULL) {
634 		panic("no memory to allocate log entries!");
635 		return B_NO_MEMORY;
636 	}
637 
638 	mutex_lock(&fLogEntriesLock);
639 	fLogEntries.Add(logEntry);
640 	mutex_unlock(&fLogEntriesLock);
641 
642 	if (detached) {
643 		fTransactionID = cache_detach_sub_transaction(fFilesystemBlockCache,
644 			fTransactionID, _TransactionWritten, logEntry);
645 		fUnwrittenTransactions = 1;
646 
647 		if (status == B_OK && _FullTransactionSize() > fLogSize) {
648 			// If the transaction is too large after writing, there is no way to
649 			// recover, so let this transaction fail.
650 			ERROR("transaction too large (%" B_PRIuSIZE " blocks, log size %"
651 				B_PRIu32 ")!\n", _FullTransactionSize(), fLogSize);
652 			return B_BUFFER_OVERFLOW;
653 		}
654 	} else {
655 		cache_end_transaction(fFilesystemBlockCache, fTransactionID,
656 			_TransactionWritten, logEntry);
657 		fUnwrittenTransactions = 0;
658 	}
659 
660 	return B_OK;
661 }
662 
663 
664 status_t
665 Journal::_SaveSuperBlock()
666 {
667 	TRACE("Journal::_SaveSuperBlock()\n");
668 	fsblock_t physicalBlock;
669 	status_t status = MapBlock(0, physicalBlock);
670 	if (status != B_OK)
671 		return status;
672 
673 	off_t superblockPos = physicalBlock * fBlockSize;
674 
675 	JournalSuperBlock superblock;
676 	size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos,
677 		&superblock, sizeof(superblock));
678 
679 	if (bytesRead != sizeof(superblock))
680 		return B_IO_ERROR;
681 
682 	superblock.SetFirstCommitID(fFirstCommitID);
683 	superblock.SetLogStart(fLogStart);
684 
685 	TRACE("Journal::SaveSuperBlock(): Write to %" B_PRIdOFF "\n",
686 		superblockPos);
687 	size_t bytesWritten = write_pos(fJournalVolume->Device(), superblockPos,
688 		&superblock, sizeof(superblock));
689 
690 	if (bytesWritten != sizeof(superblock))
691 		return B_IO_ERROR;
692 
693 	TRACE("Journal::_SaveSuperBlock(): Done\n");
694 
695 	return B_OK;
696 }
697 
698 
699 status_t
700 Journal::_LoadSuperBlock()
701 {
702 	TRACE("Journal::_LoadSuperBlock()\n");
703 	fsblock_t superblockPos;
704 
705 	status_t status = MapBlock(0, superblockPos);
706 	if (status != B_OK)
707 		return status;
708 
709 	TRACE("Journal::_LoadSuperBlock(): superblock physical block: %" B_PRIu64
710 		"\n", superblockPos);
711 
712 	JournalSuperBlock superblock;
713 	size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos
714 		* fJournalVolume->BlockSize(), &superblock, sizeof(superblock));
715 
716 	if (bytesRead != sizeof(superblock)) {
717 		ERROR("Journal::_LoadSuperBlock(): failed to read superblock\n");
718 		return B_IO_ERROR;
719 	}
720 
721 	if (!superblock.header.CheckMagic()) {
722 		ERROR("Journal::_LoadSuperBlock(): Invalid superblock magic %" B_PRIx32
723 			"\n", superblock.header.Magic());
724 		return B_BAD_VALUE;
725 	}
726 
727 	if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V1) {
728 		TRACE("Journal::_LoadSuperBlock(): Journal superblock version 1\n");
729 		fVersion = 1;
730 	} else if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V2) {
731 		TRACE("Journal::_LoadSuperBlock(): Journal superblock version 2\n");
732 		fVersion = 2;
733 	} else {
734 		ERROR("Journal::_LoadSuperBlock(): Invalid superblock version\n");
735 		return B_BAD_VALUE;
736 	}
737 
738 	if (fVersion >= 2) {
739 		status = _CheckFeatures(&superblock);
740 
741 		if (status != B_OK) {
742 			ERROR("Journal::_LoadSuperBlock(): Unsupported features\n");
743 			return status;
744 		}
745 	}
746 
747 	fBlockSize = superblock.BlockSize();
748 	fFirstCommitID = superblock.FirstCommitID();
749 	fFirstLogBlock = superblock.FirstLogBlock();
750 	fLogStart = superblock.LogStart();
751 	fLogSize = superblock.NumBlocks();
752 
753 	uint32 descriptorTags = (fBlockSize - sizeof(JournalHeader))
754 		/ sizeof(JournalBlockTag);
755 		// Maximum tags per descriptor block
756 	uint32 maxDescriptors = (fLogSize - 1) / (descriptorTags + 2);
757 		// Maximum number of full journal transactions
758 	fMaxTransactionSize = maxDescriptors * descriptorTags;
759 	fMaxTransactionSize += (fLogSize - 1) - fMaxTransactionSize - 2;
760 		// Maximum size of a "logical" transaction
761 		// TODO: Why is "superblock.MaxTransactionBlocks();" zero?
762 	//fFirstCacheCommitID = fFirstCommitID - fTransactionID /*+ 1*/;
763 
764 	TRACE("Journal::_LoadSuperBlock(): block size: %" B_PRIu32 ", first commit"
765 		" id: %" B_PRIu32 ", first log block: %" B_PRIu32 ", log start: %"
766 		B_PRIu32 ", log size: %" B_PRIu32 ", max transaction size: %" B_PRIu32
767 		"\n", fBlockSize, fFirstCommitID, fFirstLogBlock, fLogStart,
768 		fLogSize, fMaxTransactionSize);
769 
770 	return B_OK;
771 }
772 
773 
774 status_t
775 Journal::_CheckFeatures(JournalSuperBlock* superblock)
776 {
777 	if ((superblock->ReadOnlyCompatibleFeatures()
778 			& ~JOURNAL_KNOWN_READ_ONLY_COMPATIBLE_FEATURES) != 0
779 		|| (superblock->IncompatibleFeatures()
780 			& ~JOURNAL_KNOWN_INCOMPATIBLE_FEATURES) != 0)
781 		return B_UNSUPPORTED;
782 
783 	return B_OK;
784 }
785 
786 
787 uint32
788 Journal::_CountTags(JournalHeader* descriptorBlock)
789 {
790 	uint32 count = 0;
791 
792 	JournalBlockTag* tags = (JournalBlockTag*)descriptorBlock->data;
793 		// Skip the header
794 	JournalBlockTag* lastTag = (JournalBlockTag*)
795 		(descriptorBlock + fBlockSize - sizeof(JournalBlockTag));
796 
797 	while (tags < lastTag && (tags->Flags() & JOURNAL_FLAG_LAST_TAG) == 0) {
798 		if ((tags->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
799 			// sizeof(UUID) = 16 = 2*sizeof(JournalBlockTag)
800 			tags += 2;	// Skip new UUID
801 		}
802 
803 		TRACE("Journal::_CountTags(): Tag block: %" B_PRIu32 "\n",
804 			tags->BlockNumber());
805 
806 		tags++; // Go to next tag
807 		count++;
808 	}
809 
810 	if ((tags->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
811 		count++;
812 
813 	TRACE("Journal::_CountTags(): counted tags: %" B_PRIu32 "\n", count);
814 
815 	return count;
816 }
817 
818 
819 /*virtual*/ status_t
820 Journal::Recover()
821 {
822 	TRACE("Journal::Recover()\n");
823 	if (fLogStart == 0) // Journal was cleanly unmounted
824 		return B_OK;
825 
826 	TRACE("Journal::Recover(): Journal needs recovery\n");
827 
828 	uint32 lastCommitID;
829 
830 	status_t status = _RecoverPassScan(lastCommitID);
831 	if (status != B_OK)
832 		return status;
833 
834 	status = _RecoverPassRevoke(lastCommitID);
835 	if (status != B_OK)
836 		return status;
837 
838 	return _RecoverPassReplay(lastCommitID);
839 }
840 
841 
842 // First pass: Find the end of the log
843 status_t
844 Journal::_RecoverPassScan(uint32& lastCommitID)
845 {
846 	TRACE("Journal Recover: 1st Pass: Scan\n");
847 
848 	CachedBlock cached(fJournalVolume);
849 	JournalHeader* header;
850 	uint32 nextCommitID = fFirstCommitID;
851 	uint32 nextBlock = fLogStart;
852 	fsblock_t nextBlockPos;
853 
854 	status_t status = MapBlock(nextBlock, nextBlockPos);
855 	if (status != B_OK)
856 		return status;
857 
858 	header = (JournalHeader*)cached.SetTo(nextBlockPos);
859 
860 	while (header->CheckMagic() && header->Sequence() == nextCommitID) {
861 		uint32 blockType = header->BlockType();
862 
863 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
864 			uint32 tags = _CountTags(header);
865 			nextBlock += tags;
866 			TRACE("Journal recover pass scan: Found a descriptor block with "
867 				"%" B_PRIu32 " tags\n", tags);
868 		} else if (blockType == JOURNAL_COMMIT_BLOCK) {
869 			nextCommitID++;
870 			TRACE("Journal recover pass scan: Found a commit block. Next "
871 				"commit ID: %" B_PRIu32 "\n", nextCommitID);
872 		} else if (blockType != JOURNAL_REVOKE_BLOCK) {
873 			TRACE("Journal recover pass scan: Reached an unrecognized block, "
874 				"assuming as log's end.\n");
875 			break;
876 		} else {
877 			TRACE("Journal recover pass scan: Found a revoke block, "
878 				"skipping it\n");
879 		}
880 
881 		nextBlock = _WrapAroundLog(nextBlock + 1);
882 
883 		status = MapBlock(nextBlock, nextBlockPos);
884 		if (status != B_OK)
885 			return status;
886 
887 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
888 	}
889 
890 	TRACE("Journal Recovery pass scan: Last detected transaction ID: %"
891 		B_PRIu32 "\n", nextCommitID);
892 
893 	lastCommitID = nextCommitID;
894 	return B_OK;
895 }
896 
897 
898 // Second pass: Collect all revoked blocks
899 status_t
900 Journal::_RecoverPassRevoke(uint32 lastCommitID)
901 {
902 	TRACE("Journal Recover: 2nd Pass: Revoke\n");
903 
904 	CachedBlock cached(fJournalVolume);
905 	JournalHeader* header;
906 	uint32 nextCommitID = fFirstCommitID;
907 	uint32 nextBlock = fLogStart;
908 	fsblock_t nextBlockPos;
909 
910 	status_t status = MapBlock(nextBlock, nextBlockPos);
911 	if (status != B_OK)
912 		return status;
913 
914 	header = (JournalHeader*)cached.SetTo(nextBlockPos);
915 
916 	while (nextCommitID < lastCommitID) {
917 		if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
918 			// Somehow the log is different than the expexted
919 			return B_ERROR;
920 		}
921 
922 		uint32 blockType = header->BlockType();
923 
924 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK)
925 			nextBlock += _CountTags(header);
926 		else if (blockType == JOURNAL_COMMIT_BLOCK)
927 			nextCommitID++;
928 		else if (blockType == JOURNAL_REVOKE_BLOCK) {
929 			TRACE("Journal::_RecoverPassRevoke(): Found a revoke block\n");
930 			status = fRevokeManager->ScanRevokeBlock(
931 				(JournalRevokeHeader*)header, nextCommitID);
932 
933 			if (status != B_OK)
934 				return status;
935 		} else {
936 				// TODO: Warn that we found an unrecognized block
937 			break;
938 		}
939 
940 		nextBlock = _WrapAroundLog(nextBlock + 1);
941 
942 		status = MapBlock(nextBlock, nextBlockPos);
943 		if (status != B_OK)
944 			return status;
945 
946 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
947 	}
948 
949 	if (nextCommitID != lastCommitID) {
950 		// Possibly because of some sort of IO error
951 		TRACE("Journal::_RecoverPassRevoke(): Incompatible commit IDs\n");
952 		return B_ERROR;
953 	}
954 
955 	TRACE("Journal recovery pass revoke: Revoked blocks: %" B_PRIu32 "\n",
956 		fRevokeManager->NumRevokes());
957 
958 	return B_OK;
959 }
960 
961 
962 // Third pass: Replay log
963 status_t
964 Journal::_RecoverPassReplay(uint32 lastCommitID)
965 {
966 	TRACE("Journal Recover: 3rd Pass: Replay\n");
967 
968 	uint32 nextCommitID = fFirstCommitID;
969 	uint32 nextBlock = fLogStart;
970 	fsblock_t nextBlockPos;
971 
972 	status_t status = MapBlock(nextBlock, nextBlockPos);
973 	if (status != B_OK)
974 		return status;
975 
976 	CachedBlock cached(fJournalVolume);
977 	JournalHeader* header = (JournalHeader*)cached.SetTo(nextBlockPos);
978 
979 	int count = 0;
980 
981 	uint8* data = new(std::nothrow) uint8[fBlockSize];
982 	if (data == NULL) {
983 		TRACE("Journal::_RecoverPassReplay(): Failed to allocate memory for "
984 			"data\n");
985 		return B_NO_MEMORY;
986 	}
987 
988 	ArrayDeleter<uint8> dataDeleter(data);
989 
990 	while (nextCommitID < lastCommitID) {
991 		if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
992 			// Somehow the log is different than the expected
993 			ERROR("Journal::_RecoverPassReplay(): Weird problem with block\n");
994 			return B_ERROR;
995 		}
996 
997 		uint32 blockType = header->BlockType();
998 
999 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
1000 			JournalBlockTag* last_tag = (JournalBlockTag*)((uint8*)header
1001 				+ fBlockSize - sizeof(JournalBlockTag));
1002 
1003 			for (JournalBlockTag* tag = (JournalBlockTag*)header->data;
1004 				tag <= last_tag; ++tag) {
1005 				nextBlock = _WrapAroundLog(nextBlock + 1);
1006 
1007 				status = MapBlock(nextBlock, nextBlockPos);
1008 				if (status != B_OK)
1009 					return status;
1010 
1011 				if (!fRevokeManager->Lookup(tag->BlockNumber(),
1012 						nextCommitID)) {
1013 					// Block isn't revoked
1014 					size_t read = read_pos(fJournalVolume->Device(),
1015 						nextBlockPos * fBlockSize, data, fBlockSize);
1016 					if (read != fBlockSize)
1017 						return B_IO_ERROR;
1018 
1019 					if ((tag->Flags() & JOURNAL_FLAG_ESCAPED) != 0) {
1020 						// Block is escaped
1021 						((int32*)data)[0]
1022 							= B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
1023 					}
1024 
1025 					TRACE("Journal::_RevoverPassReplay(): Write to %" B_PRIu32
1026 						"\n", tag->BlockNumber() * fBlockSize);
1027 					size_t written = write_pos(fFilesystemVolume->Device(),
1028 						tag->BlockNumber() * fBlockSize, data, fBlockSize);
1029 
1030 					if (written != fBlockSize)
1031 						return B_IO_ERROR;
1032 
1033 					++count;
1034 				}
1035 
1036 				if ((tag->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
1037 					break;
1038 				if ((tag->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
1039 					// TODO: Check new UUID with file system UUID
1040 					tag += 2;
1041 						// sizeof(JournalBlockTag) = 8
1042 						// sizeof(UUID) = 16
1043 				}
1044 			}
1045 		} else if (blockType == JOURNAL_COMMIT_BLOCK)
1046 			nextCommitID++;
1047 		else if (blockType != JOURNAL_REVOKE_BLOCK) {
1048 				// TODO: Warn that we found an unrecognized block
1049 			break;
1050 		} // If blockType == JOURNAL_REVOKE_BLOCK we just skip it
1051 
1052 		nextBlock = _WrapAroundLog(nextBlock + 1);
1053 
1054 		status = MapBlock(nextBlock, nextBlockPos);
1055 		if (status != B_OK)
1056 			return status;
1057 
1058 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
1059 	}
1060 
1061 	if (nextCommitID != lastCommitID) {
1062 		// Possibly because of some sort of IO error
1063 		return B_ERROR;
1064 	}
1065 
1066 	TRACE("Journal recovery pass replay: Replayed blocks: %u\n", count);
1067 
1068 	return B_OK;
1069 }
1070 
1071 
1072 status_t
1073 Journal::_FlushLog(bool canWait, bool flushBlocks)
1074 {
1075 	TRACE("Journal::_FlushLog()\n");
1076 	status_t status = canWait ? recursive_lock_lock(&fLock)
1077 		: recursive_lock_trylock(&fLock);
1078 
1079 	TRACE("Journal::_FlushLog(): Acquired fLock, recursion: %" B_PRId32 "\n",
1080 		recursive_lock_get_recursion(&fLock));
1081 	if (status != B_OK)
1082 		return status;
1083 
1084 	if (recursive_lock_get_recursion(&fLock) > 1) {
1085 		// Called from inside a transaction
1086 		recursive_lock_unlock(&fLock);
1087 		TRACE("Journal::_FlushLog(): Called from a transaction. Leaving...\n");
1088 		return B_OK;
1089 	}
1090 
1091 	if (fUnwrittenTransactions != 0 && _FullTransactionSize() != 0) {
1092 		status = _WriteTransactionToLog();
1093 		if (status < B_OK)
1094 			panic("Failed flushing transaction: %s\n", strerror(status));
1095 	}
1096 
1097 	TRACE("Journal::_FlushLog(): Attempting to flush journal volume at %p\n",
1098 		fJournalVolume);
1099 
1100 	// TODO: Not sure this is correct. Need to review...
1101 	// NOTE: Not correct. Causes double lock of a block cache mutex
1102 	// TODO: Need some other way to synchronize the journal...
1103 	/*status = fJournalVolume->FlushDevice();
1104 	if (status != B_OK)
1105 		return status;*/
1106 
1107 	TRACE("Journal::_FlushLog(): Flushed journal volume\n");
1108 
1109 	if (flushBlocks) {
1110 		TRACE("Journal::_FlushLog(): Attempting to flush file system volume "
1111 			"at %p\n", fFilesystemVolume);
1112 		status = fFilesystemVolume->FlushDevice();
1113 		if (status == B_OK)
1114 			TRACE("Journal::_FlushLog(): Flushed file system volume\n");
1115 	}
1116 
1117 	TRACE("Journal::_FlushLog(): Finished. Releasing lock\n");
1118 
1119 	recursive_lock_unlock(&fLock);
1120 
1121 	TRACE("Journal::_FlushLog(): Done, final status: %s\n", strerror(status));
1122 	return status;
1123 }
1124 
1125 
1126 inline uint32
1127 Journal::_WrapAroundLog(uint32 block)
1128 {
1129 	TRACE("Journal::_WrapAroundLog()\n");
1130 	if (block >= fLogSize)
1131 		return block - fLogSize + fFirstLogBlock;
1132 	else
1133 		return block;
1134 }
1135 
1136 
1137 size_t
1138 Journal::_CurrentTransactionSize() const
1139 {
1140 	TRACE("Journal::_CurrentTransactionSize(): transaction %" B_PRIu32 "\n",
1141 		fTransactionID);
1142 
1143 	size_t count;
1144 
1145 	if (fHasSubTransaction) {
1146 		count = cache_blocks_in_sub_transaction(fFilesystemBlockCache,
1147 			fTransactionID);
1148 
1149 		TRACE("\tSub transaction size: %" B_PRIuSIZE "\n", count);
1150 	} else {
1151 		count =  cache_blocks_in_transaction(fFilesystemBlockCache,
1152 			fTransactionID);
1153 
1154 		TRACE("\tTransaction size: %" B_PRIuSIZE "\n", count);
1155 	}
1156 
1157 	return count;
1158 }
1159 
1160 
1161 size_t
1162 Journal::_FullTransactionSize() const
1163 {
1164 	TRACE("Journal::_FullTransactionSize(): transaction %" B_PRIu32 "\n",
1165 		fTransactionID);
1166 	TRACE("\tFile sytem block cache: %p\n", fFilesystemBlockCache);
1167 
1168 	size_t count = cache_blocks_in_transaction(fFilesystemBlockCache,
1169 		 fTransactionID);
1170 
1171 	TRACE("\tFull transaction size: %" B_PRIuSIZE "\n", count);
1172 
1173 	return count;
1174 }
1175 
1176 
1177 size_t
1178 Journal::_MainTransactionSize() const
1179 {
1180 	TRACE("Journal::_MainTransactionSize(): transaction %" B_PRIu32 "\n",
1181 		fTransactionID);
1182 
1183 	size_t count =  cache_blocks_in_main_transaction(fFilesystemBlockCache,
1184 		fTransactionID);
1185 
1186 	TRACE("\tMain transaction size: %" B_PRIuSIZE "\n", count);
1187 
1188 	return count;
1189 }
1190 
1191 
1192 status_t
1193 Journal::_TransactionDone(bool success)
1194 {
1195 	if (!success) {
1196 		if (fHasSubTransaction) {
1197 			TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1198 				" failed, aborting subtransaction\n", fTransactionID);
1199 			cache_abort_sub_transaction(fFilesystemBlockCache, fTransactionID);
1200 			// parent is unaffected
1201 		} else {
1202 			TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1203 				" failed, aborting\n", fTransactionID);
1204 			cache_abort_transaction(fFilesystemBlockCache, fTransactionID);
1205 			fUnwrittenTransactions = 0;
1206 		}
1207 
1208 		TRACE("Journal::_TransactionDone(): returning B_OK\n");
1209 		return B_OK;
1210 	}
1211 
1212 	// If possible, delay flushing the transaction
1213 	uint32 size = _FullTransactionSize();
1214 	TRACE("Journal::_TransactionDone(): full transaction size: %" B_PRIu32
1215 		", max transaction size: %" B_PRIu32 ", free log blocks: %" B_PRIu32
1216 		"\n", size, fMaxTransactionSize, FreeLogBlocks());
1217 	if (fMaxTransactionSize > 0 && size < fMaxTransactionSize) {
1218 		TRACE("Journal::_TransactionDone(): delaying flush of transaction "
1219 			"%" B_PRIu32 "\n", fTransactionID);
1220 
1221 		// Make sure the transaction fits in the log
1222 		if (size < FreeLogBlocks())
1223 			cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
1224 
1225 		fUnwrittenTransactions++;
1226 		TRACE("Journal::_TransactionDone(): returning B_OK\n");
1227 		return B_OK;
1228 	}
1229 
1230 	return _WriteTransactionToLog();
1231 }
1232 
1233 
1234 /*static*/ void
1235 Journal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry)
1236 {
1237 	LogEntry* logEntry = (LogEntry*)_logEntry;
1238 
1239 	TRACE("Journal::_TransactionWritten(): Transaction %" B_PRIu32
1240 		" checkpointed\n", transactionID);
1241 
1242 	Journal* journal = logEntry->GetJournal();
1243 
1244 	TRACE("Journal::_TransactionWritten(): log entry: %p, journal: %p\n",
1245 		logEntry, journal);
1246 	TRACE("Journal::_TransactionWritten(): log entries: %p\n",
1247 		&journal->fLogEntries);
1248 
1249 	mutex_lock(&journal->fLogEntriesLock);
1250 
1251 	TRACE("Journal::_TransactionWritten(): first log entry: %p\n",
1252 		journal->fLogEntries.First());
1253 	if (logEntry == journal->fLogEntries.First()) {
1254 		TRACE("Journal::_TransactionWritten(): Moving start of log to %"
1255 			B_PRIu32 "\n", logEntry->Start());
1256 		journal->fLogStart = logEntry->Start();
1257 		journal->fFirstCommitID = logEntry->CommitID();
1258 		TRACE("Journal::_TransactionWritten(): Setting commit ID to %" B_PRIu32
1259 			"\n", logEntry->CommitID());
1260 
1261 		if (journal->_SaveSuperBlock() != B_OK)
1262 			panic("ext2: Failed to write journal superblock\n");
1263 	}
1264 
1265 	TRACE("Journal::_TransactionWritten(): Removing log entry\n");
1266 	journal->fLogEntries.Remove(logEntry);
1267 
1268 	TRACE("Journal::_TransactionWritten(): Unlocking entries list\n");
1269 	mutex_unlock(&journal->fLogEntriesLock);
1270 
1271 	TRACE("Journal::_TransactionWritten(): Deleting log entry at %p\n", logEntry);
1272 	delete logEntry;
1273 }
1274 
1275 
1276 /*static*/ void
1277 Journal::_TransactionIdle(int32 transactionID, int32 event, void* _journal)
1278 {
1279 	Journal* journal = (Journal*)_journal;
1280 	journal->_FlushLog(false, false);
1281 }
1282