xref: /haiku/src/add-ons/kernel/file_systems/ext2/Journal.cpp (revision 5ac9b506412b11afb993bb52d161efe7666958a5)
1 /*
2  * Copyright 2010, Haiku Inc. All rights reserved.
3  * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
4  * This file may be used under the terms of the MIT License.
5  *
6  * Authors:
7  *		Janito V. Ferreira Filho
8  */
9 
10 
11 #include "Journal.h"
12 
13 #include <new>
14 #include <string.h>
15 #include <unistd.h>
16 
17 #include <fs_cache.h>
18 
19 #include "CachedBlock.h"
20 #include "HashRevokeManager.h"
21 
22 
23 //#define TRACE_EXT2
24 #ifdef TRACE_EXT2
25 #	define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
26 #else
27 #	define TRACE(x...) ;
28 #endif
29 #define ERROR(x...) dprintf("\33[34mext2:\33[0m " x)
30 #define WARN(x...) dprintf("\33[34mext2:\33[0m " x)
31 
32 
33 class LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
34 public:
35 							LogEntry(Journal* journal, uint32 logStart,
36 								uint32 length);
37 							~LogEntry();
38 
39 			uint32			Start() const { return fStart; }
40 			uint32			CommitID() const { return fCommitID; }
41 
42 			Journal*		GetJournal() { return fJournal; }
43 
44 private:
45 			Journal*		fJournal;
46 			uint32			fStart;
47 			uint32			fCommitID;
48 };
49 
50 
51 LogEntry::LogEntry(Journal* journal, uint32 logStart, uint32 commitID)
52 	:
53 	fJournal(journal),
54 	fStart(logStart),
55 	fCommitID(commitID)
56 {
57 }
58 
59 
60 LogEntry::~LogEntry()
61 {
62 }
63 
64 
65 void
66 JournalHeader::MakeDescriptor(uint32 sequence)
67 {
68 	this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
69 	this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
70 	this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_DESCRIPTOR_BLOCK);
71 }
72 
73 
74 void
75 JournalHeader::MakeCommit(uint32 sequence)
76 {
77 	this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
78 	this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
79 	this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_COMMIT_BLOCK);
80 }
81 
82 
83 Journal::Journal(Volume* fsVolume, Volume* jVolume)
84 	:
85 	fJournalVolume(jVolume),
86 	fJournalBlockCache(jVolume->BlockCache()),
87 	fFilesystemVolume(fsVolume),
88 	fFilesystemBlockCache(fsVolume->BlockCache()),
89 	fRevokeManager(NULL),
90 	fInitStatus(B_OK),
91 	fBlockSize(sizeof(JournalSuperBlock)),
92 	fFirstCommitID(0),
93 	fFirstCacheCommitID(0),
94 	fFirstLogBlock(1),
95 	fLogSize(0),
96 	fVersion(0),
97 	fLogStart(0),
98 	fLogEnd(0),
99 	fFreeBlocks(0),
100 	fMaxTransactionSize(0),
101 	fCurrentCommitID(0),
102 	fHasSubTransaction(false),
103 	fSeparateSubTransactions(false),
104 	fUnwrittenTransactions(0),
105 	fTransactionID(0)
106 {
107 	recursive_lock_init(&fLock, "ext2 journal");
108 	mutex_init(&fLogEntriesLock, "ext2 journal log entries");
109 
110 	HashRevokeManager* revokeManager = new(std::nothrow) HashRevokeManager;
111 	TRACE("Journal::Journal(): Allocated a hash revoke manager at %p\n",
112 		revokeManager);
113 
114 	if (revokeManager == NULL)
115 		fInitStatus = B_NO_MEMORY;
116 	else {
117 		fInitStatus = revokeManager->Init();
118 
119 		if (fInitStatus == B_OK) {
120 			fRevokeManager = revokeManager;
121 			fInitStatus = _LoadSuperBlock();
122 		} else
123 			delete revokeManager;
124 	}
125 }
126 
127 
128 Journal::Journal()
129 	:
130 	fJournalVolume(NULL),
131 	fJournalBlockCache(NULL),
132 	fFilesystemVolume(NULL),
133 	fFilesystemBlockCache(NULL),
134 	fRevokeManager(NULL),
135 	fInitStatus(B_OK),
136 	fBlockSize(sizeof(JournalSuperBlock)),
137 	fFirstCommitID(0),
138 	fFirstCacheCommitID(0),
139 	fFirstLogBlock(1),
140 	fLogSize(0),
141 	fVersion(0),
142 	fIsStarted(false),
143 	fLogStart(0),
144 	fLogEnd(0),
145 	fFreeBlocks(0),
146 	fMaxTransactionSize(0),
147 	fCurrentCommitID(0),
148 	fHasSubTransaction(false),
149 	fSeparateSubTransactions(false),
150 	fUnwrittenTransactions(0),
151 	fTransactionID(0)
152 {
153 	recursive_lock_init(&fLock, "ext2 journal");
154 	mutex_init(&fLogEntriesLock, "ext2 journal log entries");
155 }
156 
157 
158 Journal::~Journal()
159 {
160 	TRACE("Journal destructor.\n");
161 
162 	TRACE("Journal::~Journal(): Attempting to delete revoke manager at %p\n",
163 		fRevokeManager);
164 	delete fRevokeManager;
165 
166 	recursive_lock_destroy(&fLock);
167 	mutex_destroy(&fLogEntriesLock);
168 }
169 
170 
171 status_t
172 Journal::InitCheck()
173 {
174 	return fInitStatus;
175 }
176 
177 
178 status_t
179 Journal::Uninit()
180 {
181 	if (!fIsStarted)
182 		return B_OK;
183 
184 	status_t status = FlushLogAndBlocks();
185 
186 	if (status == B_OK) {
187 		// Mark journal as clean
188 		fLogStart = 0;
189 		status = _SaveSuperBlock();
190 	}
191 
192 	fIsStarted = false;
193 
194 	return status;
195 }
196 
197 
198 /*virtual*/ status_t
199 Journal::StartLog()
200 {
201 	fLogStart = fFirstLogBlock;
202 	fLogEnd = fFirstLogBlock;
203 	fFreeBlocks = 0;
204 	fIsStarted = true;
205 
206 	fCurrentCommitID = fFirstCommitID;
207 
208 	return _SaveSuperBlock();
209 }
210 
211 
212 status_t
213 Journal::RestartLog()
214 {
215 	fFirstCommitID = 1;
216 
217 	return B_OK;
218 }
219 
220 
221 /*virtual*/ status_t
222 Journal::Lock(Transaction* owner, bool separateSubTransactions)
223 {
224 	TRACE("Journal::Lock()\n");
225 	status_t status = recursive_lock_lock(&fLock);
226 	if (status != B_OK)
227 		return status;
228 
229 	TRACE("Journal::Lock(): Aquired lock\n");
230 
231 	if (!fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) > 1) {
232 		// reuse current transaction
233 		TRACE("Journal::Lock(): Reusing current transaction\n");
234 		return B_OK;
235 	}
236 
237 	if (separateSubTransactions)
238 		fSeparateSubTransactions = true;
239 
240 	if (owner != NULL)
241 		owner->SetParent(fOwner);
242 
243 	fOwner = owner;
244 
245 	if (fOwner != NULL) {
246 		if (fUnwrittenTransactions > 0) {
247 			// start a sub transaction
248 			TRACE("Journal::Lock(): Starting sub transaction\n");
249 			cache_start_sub_transaction(fFilesystemBlockCache, fTransactionID);
250 			fHasSubTransaction = true;
251 		} else {
252 			TRACE("Journal::Lock(): Starting new transaction\n");
253 			fTransactionID = cache_start_transaction(fFilesystemBlockCache);
254 		}
255 
256 		if (fTransactionID < B_OK) {
257 			recursive_lock_unlock(&fLock);
258 			return fTransactionID;
259 		}
260 
261 		cache_add_transaction_listener(fFilesystemBlockCache, fTransactionID,
262 			TRANSACTION_IDLE, _TransactionIdle, this);
263 	}
264 
265 	return B_OK;
266 }
267 
268 
269 /*virtual*/ status_t
270 Journal::Unlock(Transaction* owner, bool success)
271 {
272 	TRACE("Journal::Unlock(): Lock recursion: %" B_PRId32 "\n",
273 		recursive_lock_get_recursion(&fLock));
274 	if (fSeparateSubTransactions
275 		|| recursive_lock_get_recursion(&fLock) == 1) {
276 		// we only end the transaction if we unlock it
277 		if (owner != NULL) {
278 			TRACE("Journal::Unlock(): Calling _TransactionDone\n");
279 			status_t status = _TransactionDone(success);
280 			if (status != B_OK)
281 				return status;
282 
283 			TRACE("Journal::Unlock(): Returned from _TransactionDone\n");
284 			bool separateSubTransactions = fSeparateSubTransactions;
285 			fSeparateSubTransactions = true;
286 			TRACE("Journal::Unlock(): Notifying listeners for: %p\n", owner);
287 			owner->NotifyListeners(success);
288 			TRACE("Journal::Unlock(): Done notifying listeners\n");
289 			fSeparateSubTransactions = separateSubTransactions;
290 
291 			fOwner = owner->Parent();
292 		} else
293 			fOwner = NULL;
294 
295 		if (fSeparateSubTransactions
296 			&& recursive_lock_get_recursion(&fLock) == 1)
297 			fSeparateSubTransactions = false;
298 	} else
299 		owner->MoveListenersTo(fOwner);
300 
301 	TRACE("Journal::Unlock(): Unlocking the lock\n");
302 
303 	recursive_lock_unlock(&fLock);
304 	return B_OK;
305 }
306 
307 
308 status_t
309 Journal::MapBlock(off_t logical, fsblock_t& physical)
310 {
311 	TRACE("Journal::MapBlock()\n");
312 	physical = logical;
313 
314 	return B_OK;
315 }
316 
317 
318 inline uint32
319 Journal::FreeLogBlocks() const
320 {
321 	TRACE("Journal::FreeLogBlocks(): start: %" B_PRIu32 ", end: %" B_PRIu32
322 		", size: %" B_PRIu32 "\n", fLogStart, fLogEnd, fLogSize);
323 	return fLogStart <= fLogEnd
324 		? fLogSize - fLogEnd + fLogStart - 1
325 		: fLogStart - fLogEnd;
326 }
327 
328 
329 status_t
330 Journal::FlushLogAndBlocks()
331 {
332 	return _FlushLog(true, true);
333 }
334 
335 
336 int32
337 Journal::TransactionID() const
338 {
339 	return fTransactionID;
340 }
341 
342 
343 status_t
344 Journal::_WritePartialTransactionToLog(JournalHeader* descriptorBlock,
345 	bool detached, uint8** _escapedData, uint32 &logBlock, off_t& blockNumber,
346 	long& cookie, ArrayDeleter<uint8>& escapedDataDeleter, uint32& blockCount,
347 	bool& finished)
348 {
349 	TRACE("Journal::_WritePartialTransactionToLog()\n");
350 
351 	uint32 descriptorBlockPos = logBlock;
352 	uint8* escapedData = *_escapedData;
353 
354 	JournalBlockTag* tag = (JournalBlockTag*)descriptorBlock->data;
355 	JournalBlockTag* lastTag = (JournalBlockTag*)((uint8*)descriptorBlock
356 		+ fBlockSize - sizeof(JournalHeader));
357 
358 	finished = false;
359 	status_t status = B_OK;
360 
361 	while (tag < lastTag && status == B_OK) {
362 		tag->SetBlockNumber(blockNumber);
363 		tag->SetFlags(0);
364 
365 		CachedBlock data(fFilesystemVolume);
366 		const JournalHeader* blockData = (JournalHeader*)data.SetTo(
367 			blockNumber);
368 		if (blockData == NULL) {
369 			panic("Got a NULL pointer while iterating through transaction "
370 				"blocks.\n");
371 			return B_ERROR;
372 		}
373 
374 		void* finalData;
375 
376 		if (blockData->CheckMagic()) {
377 			// The journaled block starts with the magic value
378 			// We must remove it to prevent confusion
379 			TRACE("Journal::_WritePartialTransactionToLog(): Block starts with "
380 				"magic number. Escaping it\n");
381 			tag->SetEscapedFlag();
382 
383 			if (escapedData == NULL) {
384 				TRACE("Journal::_WritePartialTransactionToLog(): Allocating "
385 					"space for escaped block (%" B_PRIu32 ")\n", fBlockSize);
386 				escapedData = new(std::nothrow) uint8[fBlockSize];
387 				if (escapedData == NULL) {
388 					TRACE("Journal::_WritePartialTransactionToLof(): Failed to "
389 						"allocate buffer for escaped data block\n");
390 					return B_NO_MEMORY;
391 				}
392 				escapedDataDeleter.SetTo(escapedData);
393 				*_escapedData = escapedData;
394 
395 				((int32*)escapedData)[0] = 0; // Remove magic
396 			}
397 
398 			memcpy(escapedData + 4, blockData->data, fBlockSize - 4);
399 			finalData = escapedData;
400 		} else
401 			finalData = (void*)blockData;
402 
403 		// TODO: use iovecs?
404 
405 		logBlock = _WrapAroundLog(logBlock + 1);
406 
407 		fsblock_t physicalBlock;
408 		status = MapBlock(logBlock, physicalBlock);
409 		if (status != B_OK)
410 			return status;
411 
412 		off_t logOffset = physicalBlock * fBlockSize;
413 
414 		TRACE("Journal::_WritePartialTransactionToLog(): Writing from memory: "
415 			"%p, to disk: %" B_PRIdOFF "\n", finalData, logOffset);
416 		size_t written = write_pos(fJournalVolume->Device(), logOffset,
417 			finalData, fBlockSize);
418 		if (written != fBlockSize) {
419 			TRACE("Failed to write journal block.\n");
420 			return B_IO_ERROR;
421 		}
422 
423 		TRACE("Journal::_WritePartialTransactionToLog(): Wrote a journal block "
424 			"at: %" B_PRIu32 "\n", logBlock);
425 
426 		blockCount++;
427 		tag++;
428 
429 		status = cache_next_block_in_transaction(fFilesystemBlockCache,
430 			fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
431 	}
432 
433 	finished = status != B_OK;
434 
435 	// Write descriptor block
436 	--tag;
437 	tag->SetLastTagFlag();
438 
439 	fsblock_t physicalBlock;
440 	status = MapBlock(descriptorBlockPos, physicalBlock);
441 	if (status != B_OK)
442 		return status;
443 
444 	off_t descriptorBlockOffset = physicalBlock * fBlockSize;
445 
446 	TRACE("Journal::_WritePartialTransactionToLog(): Writing to: %" B_PRIdOFF
447 		"\n", descriptorBlockOffset);
448 	size_t written = write_pos(fJournalVolume->Device(),
449 		descriptorBlockOffset, descriptorBlock, fBlockSize);
450 	if (written != fBlockSize) {
451 		TRACE("Failed to write journal descriptor block.\n");
452 		return B_IO_ERROR;
453 	}
454 
455 	blockCount++;
456 	logBlock = _WrapAroundLog(logBlock + 1);
457 
458 	return B_OK;
459 }
460 
461 
462 status_t
463 Journal::_WriteTransactionToLog()
464 {
465 	TRACE("Journal::_WriteTransactionToLog()\n");
466 	// Transaction enters the Flush state
467 	bool detached = false;
468 	TRACE("Journal::_WriteTransactionToLog(): Attempting to get transaction "
469 		"size\n");
470 	size_t size = _FullTransactionSize();
471 	TRACE("Journal::_WriteTransactionToLog(): transaction size: %" B_PRIuSIZE
472 		"\n", size);
473 
474 	if (size > fMaxTransactionSize) {
475 		TRACE("Journal::_WriteTransactionToLog(): not enough free space "
476 			"for the transaction. Attempting to free some space.\n");
477 		size = _MainTransactionSize();
478 		TRACE("Journal::_WriteTransactionToLog(): main transaction size: %"
479 			B_PRIuSIZE "\n", size);
480 
481 		if (fHasSubTransaction && size < fMaxTransactionSize) {
482 			TRACE("Journal::_WriteTransactionToLog(): transaction doesn't fit, "
483 				"but it can be separated\n");
484 			detached = true;
485 		} else {
486 			// Error: transaction can't fit in log
487 			panic("transaction too large (size: %" B_PRIuSIZE ", max size: %"
488 				B_PRIu32 ", log size: %" B_PRIu32 ")\n", size,
489 				fMaxTransactionSize, fLogSize);
490 			return B_BUFFER_OVERFLOW;
491 		}
492 	}
493 
494 	TRACE("Journal::_WriteTransactionToLog(): free log blocks: %" B_PRIu32
495 		"\n", FreeLogBlocks());
496 	if (size > FreeLogBlocks()) {
497 		TRACE("Journal::_WriteTransactionToLog(): Syncing block cache\n");
498 		cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
499 
500 		if (size > FreeLogBlocks()) {
501 			panic("Transaction fits, but sync didn't result in enough"
502 				"free space.\n\tGot %" B_PRIu32 " when at least %" B_PRIuSIZE
503 				" was expected.", FreeLogBlocks(), size);
504 		}
505 	}
506 
507 	TRACE("Journal::_WriteTransactionToLog(): finished managing space for "
508 		"the transaction\n");
509 
510 	fHasSubTransaction = false;
511 	if (!fIsStarted)
512 		StartLog();
513 
514 	// Prepare Descriptor block
515 	TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
516 		"the descriptor block, block size %" B_PRIu32 "\n", fBlockSize);
517 	JournalHeader* descriptorBlock =
518 		(JournalHeader*)new(std::nothrow) uint8[fBlockSize];
519 	if (descriptorBlock == NULL) {
520 		TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
521 			"for the descriptor block\n");
522 		return B_NO_MEMORY;
523 	}
524 	ArrayDeleter<uint8> descriptorBlockDeleter((uint8*)descriptorBlock);
525 
526 	descriptorBlock->MakeDescriptor(fCurrentCommitID);
527 
528 	// Prepare Commit block
529 	TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
530 		"the commit block, block size %" B_PRIu32 "\n", fBlockSize);
531 	JournalHeader* commitBlock =
532 		(JournalHeader*)new(std::nothrow) uint8[fBlockSize];
533 	if (commitBlock == NULL) {
534 		TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
535 			"for the commit block\n");
536 		return B_NO_MEMORY;
537 	}
538 	ArrayDeleter<uint8> commitBlockDeleter((uint8*)commitBlock);
539 
540 	commitBlock->MakeCommit(fCurrentCommitID + 1);
541 	memset(commitBlock->data, 0, fBlockSize - sizeof(JournalHeader));
542 		// TODO: This probably isn't necessary
543 
544 	uint8* escapedData = NULL;
545 	ArrayDeleter<uint8> escapedDataDeleter;
546 
547 	off_t blockNumber;
548 	long cookie = 0;
549 
550 	status_t status = cache_next_block_in_transaction(fFilesystemBlockCache,
551 		fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
552 	if (status != B_OK) {
553 		TRACE("Journal::_WriteTransactionToLog(): Transaction has no blocks to "
554 			"write\n");
555 		return B_OK;
556 	}
557 
558 	uint32 blockCount = 0;
559 
560 	uint32 logBlock = _WrapAroundLog(fLogEnd);
561 
562 	bool finished = false;
563 
564 	status = _WritePartialTransactionToLog(descriptorBlock, detached,
565 		&escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
566 		blockCount, finished);
567 	if (!finished && status != B_OK)
568 		return status;
569 
570 	uint32 commitBlockPos = logBlock;
571 
572 	while (!finished) {
573 		descriptorBlock->IncrementSequence();
574 
575 		status = _WritePartialTransactionToLog(descriptorBlock, detached,
576 			&escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
577 			blockCount, finished);
578 		if (!finished && status != B_OK)
579 			return status;
580 
581 		// It is okay to write the commit blocks of the partial transactions
582 		// as long as the commit block of the first partial transaction isn't
583 		// written. When it recovery reaches where the first commit should be
584 		// and doesn't find it, it considers it found the end of the log.
585 
586 		fsblock_t physicalBlock;
587 		status = MapBlock(logBlock, physicalBlock);
588 		if (status != B_OK)
589 			return status;
590 
591 		off_t logOffset = physicalBlock * fBlockSize;
592 
593 		TRACE("Journal::_WriteTransactionToLog(): Writting commit block to "
594 			"%" B_PRIdOFF "\n", logOffset);
595 		off_t written = write_pos(fJournalVolume->Device(), logOffset,
596 			commitBlock, fBlockSize);
597 		if (written != fBlockSize) {
598 			TRACE("Failed to write journal commit block.\n");
599 			return B_IO_ERROR;
600 		}
601 
602 		commitBlock->IncrementSequence();
603 		blockCount++;
604 
605 		logBlock = _WrapAroundLog(logBlock + 1);
606 	}
607 
608 	// Transaction will enter the Commit state
609 	fsblock_t physicalBlock;
610 	status = MapBlock(commitBlockPos, physicalBlock);
611 	if (status != B_OK)
612 		return status;
613 
614 	off_t logOffset = physicalBlock * fBlockSize;
615 
616 	TRACE("Journal::_WriteTransactionToLog(): Writing to: %" B_PRIdOFF "\n",
617 		logOffset);
618 	off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock,
619 		fBlockSize);
620 	if (written != fBlockSize) {
621 		TRACE("Failed to write journal commit block.\n");
622 		return B_IO_ERROR;
623 	}
624 
625 	blockCount++;
626 	fLogEnd = _WrapAroundLog(fLogEnd + blockCount);
627 
628 	status = _SaveSuperBlock();
629 
630 	// Transaction will enter Finished state
631 	LogEntry *logEntry = new LogEntry(this, fLogEnd, fCurrentCommitID++);
632 	TRACE("Journal::_WriteTransactionToLog(): Allocating log entry at %p\n",
633 		logEntry);
634 	if (logEntry == NULL) {
635 		panic("no memory to allocate log entries!");
636 		return B_NO_MEMORY;
637 	}
638 
639 	mutex_lock(&fLogEntriesLock);
640 	fLogEntries.Add(logEntry);
641 	mutex_unlock(&fLogEntriesLock);
642 
643 	if (detached) {
644 		fTransactionID = cache_detach_sub_transaction(fFilesystemBlockCache,
645 			fTransactionID, _TransactionWritten, logEntry);
646 		fUnwrittenTransactions = 1;
647 
648 		if (status == B_OK && _FullTransactionSize() > fLogSize) {
649 			// If the transaction is too large after writing, there is no way to
650 			// recover, so let this transaction fail.
651 			ERROR("transaction too large (%" B_PRIuSIZE " blocks, log size %"
652 				B_PRIu32 ")!\n", _FullTransactionSize(), fLogSize);
653 			return B_BUFFER_OVERFLOW;
654 		}
655 	} else {
656 		cache_end_transaction(fFilesystemBlockCache, fTransactionID,
657 			_TransactionWritten, logEntry);
658 		fUnwrittenTransactions = 0;
659 	}
660 
661 	return B_OK;
662 }
663 
664 
665 status_t
666 Journal::_SaveSuperBlock()
667 {
668 	TRACE("Journal::_SaveSuperBlock()\n");
669 	fsblock_t physicalBlock;
670 	status_t status = MapBlock(0, physicalBlock);
671 	if (status != B_OK)
672 		return status;
673 
674 	off_t superblockPos = physicalBlock * fBlockSize;
675 
676 	JournalSuperBlock superblock;
677 	size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos,
678 		&superblock, sizeof(superblock));
679 
680 	if (bytesRead != sizeof(superblock))
681 		return B_IO_ERROR;
682 
683 	superblock.SetFirstCommitID(fFirstCommitID);
684 	superblock.SetLogStart(fLogStart);
685 
686 	TRACE("Journal::SaveSuperBlock(): Write to %" B_PRIdOFF "\n",
687 		superblockPos);
688 	size_t bytesWritten = write_pos(fJournalVolume->Device(), superblockPos,
689 		&superblock, sizeof(superblock));
690 
691 	if (bytesWritten != sizeof(superblock))
692 		return B_IO_ERROR;
693 
694 	TRACE("Journal::_SaveSuperBlock(): Done\n");
695 
696 	return B_OK;
697 }
698 
699 
700 status_t
701 Journal::_LoadSuperBlock()
702 {
703 	TRACE("Journal::_LoadSuperBlock()\n");
704 	fsblock_t superblockPos;
705 
706 	status_t status = MapBlock(0, superblockPos);
707 	if (status != B_OK)
708 		return status;
709 
710 	TRACE("Journal::_LoadSuperBlock(): superblock physical block: %" B_PRIu64
711 		"\n", superblockPos);
712 
713 	JournalSuperBlock superblock;
714 	size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos
715 		* fJournalVolume->BlockSize(), &superblock, sizeof(superblock));
716 
717 	if (bytesRead != sizeof(superblock)) {
718 		ERROR("Journal::_LoadSuperBlock(): failed to read superblock\n");
719 		return B_IO_ERROR;
720 	}
721 
722 	if (!superblock.header.CheckMagic()) {
723 		ERROR("Journal::_LoadSuperBlock(): Invalid superblock magic %" B_PRIx32
724 			"\n", superblock.header.Magic());
725 		return B_BAD_VALUE;
726 	}
727 
728 	if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V1) {
729 		TRACE("Journal::_LoadSuperBlock(): Journal superblock version 1\n");
730 		fVersion = 1;
731 	} else if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V2) {
732 		TRACE("Journal::_LoadSuperBlock(): Journal superblock version 2\n");
733 		fVersion = 2;
734 	} else {
735 		ERROR("Journal::_LoadSuperBlock(): Invalid superblock version\n");
736 		return B_BAD_VALUE;
737 	}
738 
739 	if (fVersion >= 2) {
740 		status = _CheckFeatures(&superblock);
741 
742 		if (status != B_OK) {
743 			ERROR("Journal::_LoadSuperBlock(): Unsupported features\n");
744 			return status;
745 		}
746 	}
747 
748 	fBlockSize = superblock.BlockSize();
749 	fFirstCommitID = superblock.FirstCommitID();
750 	fFirstLogBlock = superblock.FirstLogBlock();
751 	fLogStart = superblock.LogStart();
752 	fLogSize = superblock.NumBlocks();
753 
754 	uint32 descriptorTags = (fBlockSize - sizeof(JournalHeader))
755 		/ sizeof(JournalBlockTag);
756 		// Maximum tags per descriptor block
757 	uint32 maxDescriptors = (fLogSize - 1) / (descriptorTags + 2);
758 		// Maximum number of full journal transactions
759 	fMaxTransactionSize = maxDescriptors * descriptorTags;
760 	fMaxTransactionSize += (fLogSize - 1) - fMaxTransactionSize - 2;
761 		// Maximum size of a "logical" transaction
762 		// TODO: Why is "superblock.MaxTransactionBlocks();" zero?
763 	//fFirstCacheCommitID = fFirstCommitID - fTransactionID /*+ 1*/;
764 
765 	TRACE("Journal::_LoadSuperBlock(): block size: %" B_PRIu32 ", first commit"
766 		" id: %" B_PRIu32 ", first log block: %" B_PRIu32 ", log start: %"
767 		B_PRIu32 ", log size: %" B_PRIu32 ", max transaction size: %" B_PRIu32
768 		"\n", fBlockSize, fFirstCommitID, fFirstLogBlock, fLogStart,
769 		fLogSize, fMaxTransactionSize);
770 
771 	return B_OK;
772 }
773 
774 
775 status_t
776 Journal::_CheckFeatures(JournalSuperBlock* superblock)
777 {
778 	if ((superblock->ReadOnlyCompatibleFeatures()
779 			& ~JOURNAL_KNOWN_READ_ONLY_COMPATIBLE_FEATURES) != 0
780 		|| (superblock->IncompatibleFeatures()
781 			& ~JOURNAL_KNOWN_INCOMPATIBLE_FEATURES) != 0)
782 		return B_UNSUPPORTED;
783 
784 	return B_OK;
785 }
786 
787 
788 uint32
789 Journal::_CountTags(JournalHeader* descriptorBlock)
790 {
791 	uint32 count = 0;
792 
793 	JournalBlockTag* tags = (JournalBlockTag*)descriptorBlock->data;
794 		// Skip the header
795 	JournalBlockTag* lastTag = (JournalBlockTag*)
796 		(descriptorBlock + fBlockSize - sizeof(JournalBlockTag));
797 
798 	while (tags < lastTag && (tags->Flags() & JOURNAL_FLAG_LAST_TAG) == 0) {
799 		if ((tags->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
800 			// sizeof(UUID) = 16 = 2*sizeof(JournalBlockTag)
801 			tags += 2;	// Skip new UUID
802 		}
803 
804 		TRACE("Journal::_CountTags(): Tag block: %" B_PRIu32 "\n",
805 			tags->BlockNumber());
806 
807 		tags++; // Go to next tag
808 		count++;
809 	}
810 
811 	if ((tags->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
812 		count++;
813 
814 	TRACE("Journal::_CountTags(): counted tags: %" B_PRIu32 "\n", count);
815 
816 	return count;
817 }
818 
819 
820 /*virtual*/ status_t
821 Journal::Recover()
822 {
823 	TRACE("Journal::Recover()\n");
824 	if (fLogStart == 0) // Journal was cleanly unmounted
825 		return B_OK;
826 
827 	TRACE("Journal::Recover(): Journal needs recovery\n");
828 
829 	uint32 lastCommitID;
830 
831 	status_t status = _RecoverPassScan(lastCommitID);
832 	if (status != B_OK)
833 		return status;
834 
835 	status = _RecoverPassRevoke(lastCommitID);
836 	if (status != B_OK)
837 		return status;
838 
839 	return _RecoverPassReplay(lastCommitID);
840 }
841 
842 
843 // First pass: Find the end of the log
844 status_t
845 Journal::_RecoverPassScan(uint32& lastCommitID)
846 {
847 	TRACE("Journal Recover: 1st Pass: Scan\n");
848 
849 	CachedBlock cached(fJournalVolume);
850 	JournalHeader* header;
851 	uint32 nextCommitID = fFirstCommitID;
852 	uint32 nextBlock = fLogStart;
853 	fsblock_t nextBlockPos;
854 
855 	status_t status = MapBlock(nextBlock, nextBlockPos);
856 	if (status != B_OK)
857 		return status;
858 
859 	header = (JournalHeader*)cached.SetTo(nextBlockPos);
860 
861 	while (header->CheckMagic() && header->Sequence() == nextCommitID) {
862 		uint32 blockType = header->BlockType();
863 
864 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
865 			uint32 tags = _CountTags(header);
866 			nextBlock += tags;
867 			TRACE("Journal recover pass scan: Found a descriptor block with "
868 				"%" B_PRIu32 " tags\n", tags);
869 		} else if (blockType == JOURNAL_COMMIT_BLOCK) {
870 			nextCommitID++;
871 			TRACE("Journal recover pass scan: Found a commit block. Next "
872 				"commit ID: %" B_PRIu32 "\n", nextCommitID);
873 		} else if (blockType != JOURNAL_REVOKE_BLOCK) {
874 			TRACE("Journal recover pass scan: Reached an unrecognized block, "
875 				"assuming as log's end.\n");
876 			break;
877 		} else {
878 			TRACE("Journal recover pass scan: Found a revoke block, "
879 				"skipping it\n");
880 		}
881 
882 		nextBlock = _WrapAroundLog(nextBlock + 1);
883 
884 		status = MapBlock(nextBlock, nextBlockPos);
885 		if (status != B_OK)
886 			return status;
887 
888 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
889 	}
890 
891 	TRACE("Journal Recovery pass scan: Last detected transaction ID: %"
892 		B_PRIu32 "\n", nextCommitID);
893 
894 	lastCommitID = nextCommitID;
895 	return B_OK;
896 }
897 
898 
899 // Second pass: Collect all revoked blocks
900 status_t
901 Journal::_RecoverPassRevoke(uint32 lastCommitID)
902 {
903 	TRACE("Journal Recover: 2nd Pass: Revoke\n");
904 
905 	CachedBlock cached(fJournalVolume);
906 	JournalHeader* header;
907 	uint32 nextCommitID = fFirstCommitID;
908 	uint32 nextBlock = fLogStart;
909 	fsblock_t nextBlockPos;
910 
911 	status_t status = MapBlock(nextBlock, nextBlockPos);
912 	if (status != B_OK)
913 		return status;
914 
915 	header = (JournalHeader*)cached.SetTo(nextBlockPos);
916 
917 	while (nextCommitID < lastCommitID) {
918 		if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
919 			// Somehow the log is different than the expexted
920 			return B_ERROR;
921 		}
922 
923 		uint32 blockType = header->BlockType();
924 
925 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK)
926 			nextBlock += _CountTags(header);
927 		else if (blockType == JOURNAL_COMMIT_BLOCK)
928 			nextCommitID++;
929 		else if (blockType == JOURNAL_REVOKE_BLOCK) {
930 			TRACE("Journal::_RecoverPassRevoke(): Found a revoke block\n");
931 			status = fRevokeManager->ScanRevokeBlock(
932 				(JournalRevokeHeader*)header, nextCommitID);
933 
934 			if (status != B_OK)
935 				return status;
936 		} else {
937 			WARN("Journal::_RecoverPassRevoke(): Found an unrecognized block\n");
938 			break;
939 		}
940 
941 		nextBlock = _WrapAroundLog(nextBlock + 1);
942 
943 		status = MapBlock(nextBlock, nextBlockPos);
944 		if (status != B_OK)
945 			return status;
946 
947 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
948 	}
949 
950 	if (nextCommitID != lastCommitID) {
951 		// Possibly because of some sort of IO error
952 		TRACE("Journal::_RecoverPassRevoke(): Incompatible commit IDs\n");
953 		return B_ERROR;
954 	}
955 
956 	TRACE("Journal recovery pass revoke: Revoked blocks: %" B_PRIu32 "\n",
957 		fRevokeManager->NumRevokes());
958 
959 	return B_OK;
960 }
961 
962 
963 // Third pass: Replay log
964 status_t
965 Journal::_RecoverPassReplay(uint32 lastCommitID)
966 {
967 	TRACE("Journal Recover: 3rd Pass: Replay\n");
968 
969 	uint32 nextCommitID = fFirstCommitID;
970 	uint32 nextBlock = fLogStart;
971 	fsblock_t nextBlockPos;
972 
973 	status_t status = MapBlock(nextBlock, nextBlockPos);
974 	if (status != B_OK)
975 		return status;
976 
977 	CachedBlock cached(fJournalVolume);
978 	JournalHeader* header = (JournalHeader*)cached.SetTo(nextBlockPos);
979 
980 	int count = 0;
981 
982 	uint8* data = new(std::nothrow) uint8[fBlockSize];
983 	if (data == NULL) {
984 		TRACE("Journal::_RecoverPassReplay(): Failed to allocate memory for "
985 			"data\n");
986 		return B_NO_MEMORY;
987 	}
988 
989 	ArrayDeleter<uint8> dataDeleter(data);
990 
991 	while (nextCommitID < lastCommitID) {
992 		if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
993 			// Somehow the log is different than the expected
994 			ERROR("Journal::_RecoverPassReplay(): Weird problem with block\n");
995 			return B_ERROR;
996 		}
997 
998 		uint32 blockType = header->BlockType();
999 
1000 		if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
1001 			JournalBlockTag* last_tag = (JournalBlockTag*)((uint8*)header
1002 				+ fBlockSize - sizeof(JournalBlockTag));
1003 
1004 			for (JournalBlockTag* tag = (JournalBlockTag*)header->data;
1005 				tag <= last_tag; ++tag) {
1006 				nextBlock = _WrapAroundLog(nextBlock + 1);
1007 
1008 				status = MapBlock(nextBlock, nextBlockPos);
1009 				if (status != B_OK)
1010 					return status;
1011 
1012 				if (!fRevokeManager->Lookup(tag->BlockNumber(),
1013 						nextCommitID)) {
1014 					// Block isn't revoked
1015 					size_t read = read_pos(fJournalVolume->Device(),
1016 						nextBlockPos * fBlockSize, data, fBlockSize);
1017 					if (read != fBlockSize)
1018 						return B_IO_ERROR;
1019 
1020 					if ((tag->Flags() & JOURNAL_FLAG_ESCAPED) != 0) {
1021 						// Block is escaped
1022 						((int32*)data)[0]
1023 							= B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
1024 					}
1025 
1026 					TRACE("Journal::_RevoverPassReplay(): Write to %" B_PRIu32
1027 						"\n", tag->BlockNumber() * fBlockSize);
1028 					size_t written = write_pos(fFilesystemVolume->Device(),
1029 						tag->BlockNumber() * fBlockSize, data, fBlockSize);
1030 
1031 					if (written != fBlockSize)
1032 						return B_IO_ERROR;
1033 
1034 					++count;
1035 				}
1036 
1037 				if ((tag->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
1038 					break;
1039 				if ((tag->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
1040 					// TODO: Check new UUID with file system UUID
1041 					tag += 2;
1042 						// sizeof(JournalBlockTag) = 8
1043 						// sizeof(UUID) = 16
1044 				}
1045 			}
1046 		} else if (blockType == JOURNAL_COMMIT_BLOCK)
1047 			nextCommitID++;
1048 		else if (blockType != JOURNAL_REVOKE_BLOCK) {
1049 			WARN("Journal::_RecoverPassReplay(): Found an unrecognized block\n");
1050 			break;
1051 		} // If blockType == JOURNAL_REVOKE_BLOCK we just skip it
1052 
1053 		nextBlock = _WrapAroundLog(nextBlock + 1);
1054 
1055 		status = MapBlock(nextBlock, nextBlockPos);
1056 		if (status != B_OK)
1057 			return status;
1058 
1059 		header = (JournalHeader*)cached.SetTo(nextBlockPos);
1060 	}
1061 
1062 	if (nextCommitID != lastCommitID) {
1063 		// Possibly because of some sort of IO error
1064 		return B_ERROR;
1065 	}
1066 
1067 	TRACE("Journal recovery pass replay: Replayed blocks: %u\n", count);
1068 
1069 	return B_OK;
1070 }
1071 
1072 
1073 status_t
1074 Journal::_FlushLog(bool canWait, bool flushBlocks)
1075 {
1076 	TRACE("Journal::_FlushLog()\n");
1077 	status_t status = canWait ? recursive_lock_lock(&fLock)
1078 		: recursive_lock_trylock(&fLock);
1079 
1080 	TRACE("Journal::_FlushLog(): Acquired fLock, recursion: %" B_PRId32 "\n",
1081 		recursive_lock_get_recursion(&fLock));
1082 	if (status != B_OK)
1083 		return status;
1084 
1085 	if (recursive_lock_get_recursion(&fLock) > 1) {
1086 		// Called from inside a transaction
1087 		recursive_lock_unlock(&fLock);
1088 		TRACE("Journal::_FlushLog(): Called from a transaction. Leaving...\n");
1089 		return B_OK;
1090 	}
1091 
1092 	if (fUnwrittenTransactions != 0 && _FullTransactionSize() != 0) {
1093 		status = _WriteTransactionToLog();
1094 		if (status < B_OK)
1095 			panic("Failed flushing transaction: %s\n", strerror(status));
1096 	}
1097 
1098 	TRACE("Journal::_FlushLog(): Attempting to flush journal volume at %p\n",
1099 		fJournalVolume);
1100 
1101 	// TODO: Not sure this is correct. Need to review...
1102 	// NOTE: Not correct. Causes double lock of a block cache mutex
1103 	// TODO: Need some other way to synchronize the journal...
1104 	/*status = fJournalVolume->FlushDevice();
1105 	if (status != B_OK)
1106 		return status;*/
1107 
1108 	TRACE("Journal::_FlushLog(): Flushed journal volume\n");
1109 
1110 	if (flushBlocks) {
1111 		TRACE("Journal::_FlushLog(): Attempting to flush file system volume "
1112 			"at %p\n", fFilesystemVolume);
1113 		status = fFilesystemVolume->FlushDevice();
1114 		if (status == B_OK)
1115 			TRACE("Journal::_FlushLog(): Flushed file system volume\n");
1116 	}
1117 
1118 	TRACE("Journal::_FlushLog(): Finished. Releasing lock\n");
1119 
1120 	recursive_lock_unlock(&fLock);
1121 
1122 	TRACE("Journal::_FlushLog(): Done, final status: %s\n", strerror(status));
1123 	return status;
1124 }
1125 
1126 
1127 inline uint32
1128 Journal::_WrapAroundLog(uint32 block)
1129 {
1130 	TRACE("Journal::_WrapAroundLog()\n");
1131 	if (block >= fLogSize)
1132 		return block - fLogSize + fFirstLogBlock;
1133 	else
1134 		return block;
1135 }
1136 
1137 
1138 size_t
1139 Journal::_CurrentTransactionSize() const
1140 {
1141 	TRACE("Journal::_CurrentTransactionSize(): transaction %" B_PRIu32 "\n",
1142 		fTransactionID);
1143 
1144 	size_t count;
1145 
1146 	if (fHasSubTransaction) {
1147 		count = cache_blocks_in_sub_transaction(fFilesystemBlockCache,
1148 			fTransactionID);
1149 
1150 		TRACE("\tSub transaction size: %" B_PRIuSIZE "\n", count);
1151 	} else {
1152 		count =  cache_blocks_in_transaction(fFilesystemBlockCache,
1153 			fTransactionID);
1154 
1155 		TRACE("\tTransaction size: %" B_PRIuSIZE "\n", count);
1156 	}
1157 
1158 	return count;
1159 }
1160 
1161 
1162 size_t
1163 Journal::_FullTransactionSize() const
1164 {
1165 	TRACE("Journal::_FullTransactionSize(): transaction %" B_PRIu32 "\n",
1166 		fTransactionID);
1167 	TRACE("\tFile sytem block cache: %p\n", fFilesystemBlockCache);
1168 
1169 	size_t count = cache_blocks_in_transaction(fFilesystemBlockCache,
1170 		 fTransactionID);
1171 
1172 	TRACE("\tFull transaction size: %" B_PRIuSIZE "\n", count);
1173 
1174 	return count;
1175 }
1176 
1177 
1178 size_t
1179 Journal::_MainTransactionSize() const
1180 {
1181 	TRACE("Journal::_MainTransactionSize(): transaction %" B_PRIu32 "\n",
1182 		fTransactionID);
1183 
1184 	size_t count =  cache_blocks_in_main_transaction(fFilesystemBlockCache,
1185 		fTransactionID);
1186 
1187 	TRACE("\tMain transaction size: %" B_PRIuSIZE "\n", count);
1188 
1189 	return count;
1190 }
1191 
1192 
1193 status_t
1194 Journal::_TransactionDone(bool success)
1195 {
1196 	if (!success) {
1197 		if (fHasSubTransaction) {
1198 			TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1199 				" failed, aborting subtransaction\n", fTransactionID);
1200 			cache_abort_sub_transaction(fFilesystemBlockCache, fTransactionID);
1201 			// parent is unaffected
1202 		} else {
1203 			TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1204 				" failed, aborting\n", fTransactionID);
1205 			cache_abort_transaction(fFilesystemBlockCache, fTransactionID);
1206 			fUnwrittenTransactions = 0;
1207 		}
1208 
1209 		TRACE("Journal::_TransactionDone(): returning B_OK\n");
1210 		return B_OK;
1211 	}
1212 
1213 	// If possible, delay flushing the transaction
1214 	uint32 size = _FullTransactionSize();
1215 	TRACE("Journal::_TransactionDone(): full transaction size: %" B_PRIu32
1216 		", max transaction size: %" B_PRIu32 ", free log blocks: %" B_PRIu32
1217 		"\n", size, fMaxTransactionSize, FreeLogBlocks());
1218 	if (fMaxTransactionSize > 0 && size < fMaxTransactionSize) {
1219 		TRACE("Journal::_TransactionDone(): delaying flush of transaction "
1220 			"%" B_PRIu32 "\n", fTransactionID);
1221 
1222 		// Make sure the transaction fits in the log
1223 		if (size < FreeLogBlocks())
1224 			cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
1225 
1226 		fUnwrittenTransactions++;
1227 		TRACE("Journal::_TransactionDone(): returning B_OK\n");
1228 		return B_OK;
1229 	}
1230 
1231 	return _WriteTransactionToLog();
1232 }
1233 
1234 
1235 /*static*/ void
1236 Journal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry)
1237 {
1238 	LogEntry* logEntry = (LogEntry*)_logEntry;
1239 
1240 	TRACE("Journal::_TransactionWritten(): Transaction %" B_PRIu32
1241 		" checkpointed\n", transactionID);
1242 
1243 	Journal* journal = logEntry->GetJournal();
1244 
1245 	TRACE("Journal::_TransactionWritten(): log entry: %p, journal: %p\n",
1246 		logEntry, journal);
1247 	TRACE("Journal::_TransactionWritten(): log entries: %p\n",
1248 		&journal->fLogEntries);
1249 
1250 	mutex_lock(&journal->fLogEntriesLock);
1251 
1252 	TRACE("Journal::_TransactionWritten(): first log entry: %p\n",
1253 		journal->fLogEntries.First());
1254 	if (logEntry == journal->fLogEntries.First()) {
1255 		TRACE("Journal::_TransactionWritten(): Moving start of log to %"
1256 			B_PRIu32 "\n", logEntry->Start());
1257 		journal->fLogStart = logEntry->Start();
1258 		journal->fFirstCommitID = logEntry->CommitID();
1259 		TRACE("Journal::_TransactionWritten(): Setting commit ID to %" B_PRIu32
1260 			"\n", logEntry->CommitID());
1261 
1262 		if (journal->_SaveSuperBlock() != B_OK)
1263 			panic("ext2: Failed to write journal superblock\n");
1264 	}
1265 
1266 	TRACE("Journal::_TransactionWritten(): Removing log entry\n");
1267 	journal->fLogEntries.Remove(logEntry);
1268 
1269 	TRACE("Journal::_TransactionWritten(): Unlocking entries list\n");
1270 	mutex_unlock(&journal->fLogEntriesLock);
1271 
1272 	TRACE("Journal::_TransactionWritten(): Deleting log entry at %p\n", logEntry);
1273 	delete logEntry;
1274 }
1275 
1276 
1277 /*static*/ void
1278 Journal::_TransactionIdle(int32 transactionID, int32 event, void* _journal)
1279 {
1280 	Journal* journal = (Journal*)_journal;
1281 	journal->_FlushLog(false, false);
1282 }
1283