xref: /haiku/src/add-ons/kernel/file_systems/bfs/Journal.cpp (revision 020cbad9d40235a2c50a81a42d69912a5ff8fbc4)
1 /*
2  * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 //! Transaction and logging
7 
8 
9 #include "Journal.h"
10 #include "Inode.h"
11 #include "Debug.h"
12 
13 
14 struct run_array {
15 	int32		count;
16 	int32		max_runs;
17 	block_run	runs[0];
18 
19 	void Init(int32 blockSize);
20 	void Insert(block_run &run);
21 
22 	int32 CountRuns() const { return BFS_ENDIAN_TO_HOST_INT32(count); }
23 	int32 MaxRuns() const { return BFS_ENDIAN_TO_HOST_INT32(max_runs) - 1; }
24 		// that -1 accounts for an off-by-one error in Be's BFS implementation
25 	const block_run &RunAt(int32 i) const { return runs[i]; }
26 
27 	static int32 MaxRuns(int32 blockSize)
28 		{ return (blockSize - sizeof(run_array)) / sizeof(block_run); }
29 
30 private:
31 	static int _Compare(block_run &a, block_run &b);
32 	int32 _FindInsertionIndex(block_run &run);
33 };
34 
35 class RunArrays {
36 	public:
37 		RunArrays(Journal *journal);
38 		~RunArrays();
39 
40 		uint32 Length() const { return fLength; }
41 
42 		status_t Insert(off_t blockNumber);
43 
44 		run_array *ArrayAt(int32 i) { return fArrays.Array()[i]; }
45 		int32 CountArrays() const { return fArrays.CountItems(); }
46 
47 		int32 MaxArrayLength();
48 
49 	private:
50 		status_t _AddArray();
51 		bool _ContainsRun(block_run &run);
52 		bool _AddRun(block_run &run);
53 
54 		Journal		*fJournal;
55 		uint32		fLength;
56 		Stack<run_array *> fArrays;
57 		run_array	*fLastArray;
58 };
59 
60 class LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
61 	public:
62 		LogEntry(Journal *journal, uint32 logStart, uint32 length);
63 		~LogEntry();
64 
65 		uint32 Start() const { return fStart; }
66 		uint32 Length() const { return fLength; }
67 
68 		Journal *GetJournal() { return fJournal; }
69 
70 	private:
71 		Journal		*fJournal;
72 		uint32		fStart;
73 		uint32		fLength;
74 };
75 
76 
77 //	#pragma mark -
78 
79 
80 static void
81 add_to_iovec(iovec *vecs, int32 &index, int32 max, const void *address,
82 	size_t size)
83 {
84 	if (index > 0 && (addr_t)vecs[index - 1].iov_base
85 			+ vecs[index - 1].iov_len == (addr_t)address) {
86 		// the iovec can be combined with the previous one
87 		vecs[index - 1].iov_len += size;
88 		return;
89 	}
90 
91 	if (index == max)
92 		panic("no more space for iovecs!");
93 
94 	// we need to start a new iovec
95 	vecs[index].iov_base = const_cast<void *>(address);
96 	vecs[index].iov_len = size;
97 	index++;
98 }
99 
100 
101 //	#pragma mark - LogEntry
102 
103 
104 LogEntry::LogEntry(Journal *journal, uint32 start, uint32 length)
105 	:
106 	fJournal(journal),
107 	fStart(start),
108 	fLength(length)
109 {
110 }
111 
112 
113 LogEntry::~LogEntry()
114 {
115 }
116 
117 
118 //	#pragma mark - run_array
119 
120 
121 /*!	The run_array's size equals the block size of the BFS volume, so we
122 	cannot use a (non-overridden) new.
123 	This makes a freshly allocated run_array ready to run.
124 */
125 void
126 run_array::Init(int32 blockSize)
127 {
128 	memset(this, 0, blockSize);
129 	count = 0;
130 	max_runs = HOST_ENDIAN_TO_BFS_INT32(MaxRuns(blockSize));
131 }
132 
133 
134 /*!	Inserts the block_run into the array. You will have to make sure the
135 	array is large enough to contain the entry before calling this function.
136 */
137 void
138 run_array::Insert(block_run &run)
139 {
140 	int32 index = _FindInsertionIndex(run);
141 	if (index == -1) {
142 		// add to the end
143 		runs[CountRuns()] = run;
144 	} else {
145 		// insert at index
146 		memmove(&runs[index + 1], &runs[index],
147 			(CountRuns() - index) * sizeof(off_t));
148 		runs[index] = run;
149 	}
150 
151 	count = HOST_ENDIAN_TO_BFS_INT32(CountRuns() + 1);
152 }
153 
154 
155 /*static*/ int
156 run_array::_Compare(block_run &a, block_run &b)
157 {
158 	int cmp = a.AllocationGroup() - b.AllocationGroup();
159 	if (cmp == 0)
160 		return a.Start() - b.Start();
161 
162 	return cmp;
163 }
164 
165 
166 int32
167 run_array::_FindInsertionIndex(block_run &run)
168 {
169 	int32 min = 0, max = CountRuns() - 1;
170 	int32 i = 0;
171 	if (max >= 8) {
172 		while (min <= max) {
173 			i = (min + max) / 2;
174 
175 			int cmp = _Compare(runs[i], run);
176 			if (cmp < 0)
177 				min = i + 1;
178 			else if (cmp > 0)
179 				max = i - 1;
180 			else
181 				return -1;
182 		}
183 
184 		if (_Compare(runs[i], run) < 0)
185 			i++;
186 	} else {
187 		for (; i <= max; i++) {
188 			if (_Compare(runs[i], run) > 0)
189 				break;
190 		}
191 		if (i == count)
192 			return -1;
193 	}
194 
195 	return i;
196 }
197 
198 
199 //	#pragma mark - RunArrays
200 
201 
202 RunArrays::RunArrays(Journal *journal)
203 	:
204 	fJournal(journal),
205 	fLength(0),
206 	fArrays(),
207 	fLastArray(NULL)
208 {
209 }
210 
211 
212 RunArrays::~RunArrays()
213 {
214 	run_array *array;
215 	while (fArrays.Pop(&array))
216 		free(array);
217 }
218 
219 
220 bool
221 RunArrays::_ContainsRun(block_run &run)
222 {
223 	for (int32 i = 0; i < CountArrays(); i++) {
224 		run_array *array = ArrayAt(i);
225 
226 		for (int32 j = 0; j < array->CountRuns(); j++) {
227 			block_run &arrayRun = array->runs[j];
228 			if (run.AllocationGroup() != arrayRun.AllocationGroup())
229 				continue;
230 
231 			if (run.Start() >= arrayRun.Start()
232 				&& run.Start() + run.Length()
233 					<= arrayRun.Start() + arrayRun.Length())
234 				return true;
235 		}
236 	}
237 
238 	return false;
239 }
240 
241 
242 /*!	Adds the specified block_run into the array.
243 	Note: it doesn't support overlapping - it must only be used
244 	with block_runs of length 1!
245 */
246 bool
247 RunArrays::_AddRun(block_run &run)
248 {
249 	ASSERT(run.length == 1);
250 
251 	// Be's BFS log replay routine can only deal with block_runs of size 1
252 	// A pity, isn't it? Too sad we have to be compatible.
253 
254 	if (fLastArray == NULL || fLastArray->CountRuns() == fLastArray->MaxRuns())
255 		return false;
256 
257 	fLastArray->Insert(run);
258 	fLength++;
259 	return true;
260 }
261 
262 
263 status_t
264 RunArrays::_AddArray()
265 {
266 	int32 blockSize = fJournal->GetVolume()->BlockSize();
267 
268 	run_array *array = (run_array *)malloc(blockSize);
269 	if (array == NULL)
270 		return B_NO_MEMORY;
271 
272 	if (fArrays.Push(array) != B_OK) {
273 		free(array);
274 		return B_NO_MEMORY;
275 	}
276 
277 	array->Init(blockSize);
278 	fLastArray = array;
279 	return B_OK;
280 }
281 
282 
283 status_t
284 RunArrays::Insert(off_t blockNumber)
285 {
286 	Volume *volume = fJournal->GetVolume();
287 	block_run run = volume->ToBlockRun(blockNumber);
288 
289 	if (fLastArray != NULL) {
290 		// check if the block is already in the array
291 		if (_ContainsRun(run))
292 			return B_OK;
293 	}
294 
295 	// insert block into array
296 
297 	if (!_AddRun(run)) {
298 		// array is full
299 		if (_AddArray() != B_OK || !_AddRun(run))
300 			return B_NO_MEMORY;
301 	}
302 
303 	return B_OK;
304 }
305 
306 
307 int32
308 RunArrays::MaxArrayLength()
309 {
310 	int32 max = 0;
311 	for (int32 i = 0; i < CountArrays(); i++) {
312 		if (ArrayAt(i)->CountRuns() > max)
313 			max = ArrayAt(i)->CountRuns();
314 	}
315 
316 	return max;
317 }
318 
319 
320 //	#pragma mark - Journal
321 
322 
323 Journal::Journal(Volume *volume)
324 	:
325 	fVolume(volume),
326 	fLock("bfs journal"),
327 	fOwner(NULL),
328 	fLogSize(volume->Log().Length()),
329 	fMaxTransactionSize(fLogSize / 2 - 5),
330 	fUsed(0),
331 	fUnwrittenTransactions(0),
332 	fHasSubtransaction(false)
333 {
334 }
335 
336 
337 Journal::~Journal()
338 {
339 	FlushLogAndBlocks();
340 }
341 
342 
343 status_t
344 Journal::InitCheck()
345 {
346 	// TODO: this logic won't work whenever the size of the pending transaction
347 	//	equals the size of the log (happens with the original BFS only)
348 	if (fVolume->LogStart() != fVolume->LogEnd()) {
349 		if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY)
350 			FATAL(("log_start and log_end differ, but disk is marked clean - trying to replay log...\n"));
351 
352 		return ReplayLog();
353 	}
354 
355 	return B_OK;
356 }
357 
358 
359 status_t
360 Journal::_CheckRunArray(const run_array *array)
361 {
362 	int32 maxRuns = run_array::MaxRuns(fVolume->BlockSize()) - 1;
363 		// the -1 works around an off-by-one bug in Be's BFS implementation,
364 		// same as in run_array::MaxRuns()
365 	if (array->MaxRuns() != maxRuns
366 		|| array->CountRuns() > maxRuns
367 		|| array->CountRuns() <= 0) {
368 		dprintf("run count: %d, array max: %d, max runs: %d\n",
369 			(int)array->CountRuns(), (int)array->MaxRuns(), (int)maxRuns);
370 		FATAL(("Log entry has broken header!\n"));
371 		return B_ERROR;
372 	}
373 
374 	for (int32 i = 0; i < array->CountRuns(); i++) {
375 		if (fVolume->ValidateBlockRun(array->RunAt(i)) != B_OK)
376 			return B_ERROR;
377 	}
378 
379 	PRINT(("Log entry has %ld entries\n", array->CountRuns()));
380 	return B_OK;
381 }
382 
383 
384 /*!	Replays an entry in the log.
385 	\a _start points to the entry in the log, and will be bumped to the next
386 	one if replaying succeeded.
387 */
388 status_t
389 Journal::_ReplayRunArray(int32 *_start)
390 {
391 	PRINT(("ReplayRunArray(start = %ld)\n", *_start));
392 
393 	off_t logOffset = fVolume->ToBlock(fVolume->Log());
394 	off_t blockNumber = *_start % fLogSize;
395 	int32 blockSize = fVolume->BlockSize();
396 	int32 count = 1;
397 
398 	CachedBlock cachedArray(fVolume);
399 
400 	const run_array *array = (const run_array *)cachedArray.SetTo(logOffset
401 		+ blockNumber);
402 	if (array == NULL)
403 		return B_IO_ERROR;
404 
405 	if (_CheckRunArray(array) < B_OK)
406 		return B_BAD_DATA;
407 
408 	blockNumber = (blockNumber + 1) % fLogSize;
409 
410 	CachedBlock cached(fVolume);
411 	for (int32 index = 0; index < array->CountRuns(); index++) {
412 		const block_run &run = array->RunAt(index);
413 		INFORM(("replay block run %u:%u:%u in log at %Ld!\n",
414 			(int)run.AllocationGroup(), run.Start(), run.Length(), blockNumber));
415 
416 		off_t offset = fVolume->ToOffset(run);
417 		for (int32 i = 0; i < run.Length(); i++) {
418 			const uint8 *data = cached.SetTo(logOffset + blockNumber);
419 			if (data == NULL)
420 				RETURN_ERROR(B_IO_ERROR);
421 
422 			ssize_t written = write_pos(fVolume->Device(),
423 				offset + (i * blockSize), data, blockSize);
424 			if (written != blockSize)
425 				RETURN_ERROR(B_IO_ERROR);
426 
427 			blockNumber = (blockNumber + 1) % fLogSize;
428 			count++;
429 		}
430 	}
431 
432 	*_start += count;
433 	return B_OK;
434 }
435 
436 
437 /*!	Replays all log entries - this will put the disk into a
438 	consistent and clean state, if it was not correctly unmounted
439 	before.
440 	This method is called by Journal::InitCheck() if the log start
441 	and end pointer don't match.
442 */
443 status_t
444 Journal::ReplayLog()
445 {
446 	INFORM(("Replay log, disk was not correctly unmounted...\n"));
447 
448 	int32 start = fVolume->LogStart();
449 	int32 lastStart = -1;
450 	while (true) {
451 		// stop if the log is completely flushed
452 		if (start == fVolume->LogEnd())
453 			break;
454 
455 		if (start == lastStart) {
456 			// strange, flushing the log hasn't changed the log_start pointer
457 			return B_ERROR;
458 		}
459 		lastStart = start;
460 
461 		status_t status = _ReplayRunArray(&start);
462 		if (status < B_OK) {
463 			FATAL(("replaying log entry from %d failed: %s\n", (int)start, strerror(status)));
464 			return B_ERROR;
465 		}
466 		start = start % fLogSize;
467 	}
468 
469 	PRINT(("replaying worked fine!\n"));
470 	fVolume->SuperBlock().log_start = fVolume->LogEnd();
471 	fVolume->LogStart() = fVolume->LogEnd();
472 	fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_CLEAN;
473 
474 	return fVolume->WriteSuperBlock();
475 }
476 
477 
478 /*!	This is a callback function that is called by the cache, whenever
479 	a block is flushed to disk that was updated as part of a transaction.
480 	This is necessary to keep track of completed transactions, to be
481 	able to update the log start pointer.
482 */
483 void
484 Journal::_BlockNotify(int32 transactionID, int32 event, void *arg)
485 {
486 	LogEntry *logEntry = (LogEntry *)arg;
487 
488 	if (event != TRANSACTION_WRITTEN)
489 		return;
490 
491 	PRINT(("Log entry %p has been finished, transaction ID = %ld\n", logEntry, transactionID));
492 
493 	Journal *journal = logEntry->GetJournal();
494 	disk_super_block &superBlock = journal->fVolume->SuperBlock();
495 	bool update = false;
496 
497 	// Set log_start pointer if possible...
498 
499 	journal->fEntriesLock.Lock();
500 
501 	if (logEntry == journal->fEntries.First()) {
502 		LogEntry *next = journal->fEntries.GetNext(logEntry);
503 		if (next != NULL) {
504 			int32 length = next->Start() - logEntry->Start();
505 				// log entries inbetween could have been already released, so
506 				// we can't just use LogEntry::Length() here
507 			superBlock.log_start = superBlock.log_start + length;
508 		} else
509 			superBlock.log_start = journal->fVolume->LogEnd();
510 
511 		superBlock.log_start %= journal->fLogSize;
512 		update = true;
513 	}
514 
515 	journal->fUsed -= logEntry->Length();
516 	journal->fEntries.Remove(logEntry);
517 	journal->fEntriesLock.Unlock();
518 
519 	delete logEntry;
520 
521 	// update the super block, and change the disk's state, if necessary
522 
523 	if (update) {
524 		journal->fVolume->LogStart() = superBlock.log_start;
525 
526 		if (superBlock.log_start == superBlock.log_end)
527 			superBlock.flags = SUPER_BLOCK_DISK_CLEAN;
528 
529 		status_t status = journal->fVolume->WriteSuperBlock();
530 		if (status != B_OK) {
531 			FATAL(("_BlockNotify: could not write back super block: %s\n",
532 				strerror(status)));
533 		}
534 	}
535 }
536 
537 
538 /*!	Writes the blocks that are part of current transaction into the log,
539 	and ends the current transaction.
540 	If the current transaction is too large to fit into the log, it will
541 	try to detach an existing sub-transaction.
542 */
543 status_t
544 Journal::_WriteTransactionToLog()
545 {
546 	// ToDo: in case of a failure, we need a backup plan like writing all
547 	//	changed blocks back to disk immediately (hello disk corruption!)
548 
549 	bool detached = false;
550 
551 	if (_TransactionSize() > fLogSize) {
552 		// The current transaction won't fit into the log anymore, try to
553 		// detach the current sub-transaction
554 		if (_HasSubTransaction() && cache_blocks_in_main_transaction(
555 				fVolume->BlockCache(), fTransactionID) < (int32)fLogSize) {
556 			detached = true;
557 		} else {
558 			// TODO: what are our options here?
559 			// a) abort the transaction - bad, because all changes are lost
560 			// b) carry out the changes, but don't use the log - even worse,
561 			//    as it potentially creates a corrupted disk.
562 			dprintf("transaction too large (%d blocks, %d main, log size %d)!\n",
563 				(int)_TransactionSize(), (int)cache_blocks_in_main_transaction(
564 				fVolume->BlockCache(), fTransactionID), (int)fLogSize);
565 			return B_BUFFER_OVERFLOW;
566 		}
567 	}
568 
569 	fHasSubtransaction = false;
570 
571 	int32 blockShift = fVolume->BlockShift();
572 	off_t logOffset = fVolume->ToBlock(fVolume->Log()) << blockShift;
573 	off_t logStart = fVolume->LogEnd() % fLogSize;
574 	off_t logPosition = logStart;
575 	status_t status;
576 
577 	// create run_array structures for all changed blocks
578 
579 	RunArrays runArrays(this);
580 
581 	off_t blockNumber;
582 	long cookie = 0;
583 	while (cache_next_block_in_transaction(fVolume->BlockCache(),
584 			fTransactionID, detached, &cookie, &blockNumber, NULL,
585 			NULL) == B_OK) {
586 		status = runArrays.Insert(blockNumber);
587 		if (status < B_OK) {
588 			FATAL(("filling log entry failed!"));
589 			return status;
590 		}
591 	}
592 
593 	if (runArrays.Length() == 0) {
594 		// nothing has changed during this transaction
595 		if (detached) {
596 			fTransactionID = cache_detach_sub_transaction(fVolume->BlockCache(),
597 				fTransactionID, NULL, NULL);
598 			fUnwrittenTransactions = 1;
599 		} else {
600 			cache_end_transaction(fVolume->BlockCache(), fTransactionID, NULL,
601 				NULL);
602 			fUnwrittenTransactions = 0;
603 		}
604 		return B_OK;
605 	}
606 
607 	// Write log entries to disk
608 
609 	int32 maxVecs = runArrays.MaxArrayLength() + 1;
610 		// one extra for the index block
611 
612 	iovec *vecs = (iovec *)malloc(sizeof(iovec) * maxVecs);
613 	if (vecs == NULL) {
614 		// ToDo: write back log entries directly?
615 		return B_NO_MEMORY;
616 	}
617 
618 	for (int32 k = 0; k < runArrays.CountArrays(); k++) {
619 		run_array *array = runArrays.ArrayAt(k);
620 		int32 index = 0, count = 1;
621 		int32 wrap = fLogSize - logStart;
622 
623 		add_to_iovec(vecs, index, maxVecs, (void *)array, fVolume->BlockSize());
624 
625 		// add block runs
626 
627 		for (int32 i = 0; i < array->CountRuns(); i++) {
628 			const block_run &run = array->RunAt(i);
629 			off_t blockNumber = fVolume->ToBlock(run);
630 
631 			for (int32 j = 0; j < run.Length(); j++) {
632 				if (count >= wrap) {
633 					// We need to write back the first half of the entry
634 					// directly as the log wraps around
635 					if (writev_pos(fVolume->Device(), logOffset
636 						+ (logStart << blockShift), vecs, index) < 0)
637 						FATAL(("could not write log area!\n"));
638 
639 					logPosition = logStart + count;
640 					logStart = 0;
641 					wrap = fLogSize;
642 					count = 0;
643 					index = 0;
644 				}
645 
646 				// make blocks available in the cache
647 				const void *data = block_cache_get(fVolume->BlockCache(),
648 					blockNumber + j);
649 				if (data == NULL) {
650 					free(vecs);
651 					return B_IO_ERROR;
652 				}
653 
654 				add_to_iovec(vecs, index, maxVecs, data, fVolume->BlockSize());
655 				count++;
656 			}
657 		}
658 
659 		// write back the rest of the log entry
660 		if (count > 0) {
661 			logPosition = logStart + count;
662 			if (writev_pos(fVolume->Device(), logOffset
663 					+ (logStart << blockShift), vecs, index) < 0)
664 				FATAL(("could not write log area: %s!\n", strerror(errno)));
665 		}
666 
667 		// release blocks again
668 		for (int32 i = 0; i < array->CountRuns(); i++) {
669 			const block_run &run = array->RunAt(i);
670 			off_t blockNumber = fVolume->ToBlock(run);
671 
672 			for (int32 j = 0; j < run.Length(); j++) {
673 				block_cache_put(fVolume->BlockCache(), blockNumber + j);
674 			}
675 		}
676 
677 		logStart = logPosition % fLogSize;
678 	}
679 
680 	free(vecs);
681 
682 	LogEntry *logEntry = new LogEntry(this, fVolume->LogEnd(),
683 		runArrays.Length());
684 	if (logEntry == NULL) {
685 		FATAL(("no memory to allocate log entries!"));
686 		return B_NO_MEMORY;
687 	}
688 
689 	// Update the log end pointer in the super block
690 
691 	fVolume->SuperBlock().flags = SUPER_BLOCK_DISK_DIRTY;
692 	fVolume->SuperBlock().log_end = logPosition;
693 	fVolume->LogEnd() = logPosition;
694 
695 	status = fVolume->WriteSuperBlock();
696 
697 	// We need to flush the drives own cache here to ensure
698 	// disk consistency.
699 	// If that call fails, we can't do anything about it anyway
700 	ioctl(fVolume->Device(), B_FLUSH_DRIVE_CACHE);
701 
702 	// at this point, we can finally end the transaction - we're in
703 	// a guaranteed valid state
704 
705 	fEntriesLock.Lock();
706 	fEntries.Add(logEntry);
707 	fUsed += logEntry->Length();
708 	fEntriesLock.Unlock();
709 
710 	if (detached) {
711 		fTransactionID = cache_detach_sub_transaction(fVolume->BlockCache(),
712 			fTransactionID, _BlockNotify, logEntry);
713 		fUnwrittenTransactions = 1;
714 	} else {
715 		cache_end_transaction(fVolume->BlockCache(), fTransactionID,
716 			_BlockNotify, logEntry);
717 		fUnwrittenTransactions = 0;
718 	}
719 
720 	return status;
721 }
722 
723 
724 status_t
725 Journal::FlushLogAndBlocks()
726 {
727 	status_t status = fLock.Lock();
728 	if (status != B_OK)
729 		return status;
730 
731 	if (fLock.OwnerCount() > 1) {
732 		// whoa, FlushLogAndBlocks() was called from inside a transaction
733 		fLock.Unlock();
734 		return B_OK;
735 	}
736 
737 	// write the current log entry to disk
738 
739 	if (fUnwrittenTransactions != 0 && _TransactionSize() != 0) {
740 		status = _WriteTransactionToLog();
741 		if (status < B_OK)
742 			FATAL(("writing current log entry failed: %s\n", strerror(status)));
743 	}
744 
745 	status = fVolume->FlushDevice();
746 
747 	fLock.Unlock();
748 	return status;
749 }
750 
751 
752 status_t
753 Journal::Lock(Transaction *owner)
754 {
755 	status_t status = fLock.Lock();
756 	if (status != B_OK)
757 		return status;
758 
759 /*	ToDo:
760 	// if the last transaction is older than 2 secs, start a new one
761 	if (fTransactionsInEntry != 0 && system_time() - fTimestamp > 2000000L)
762 		WriteLogEntry();
763 */
764 
765 	if (fLock.OwnerCount() > 1) {
766 		// we'll just use the current transaction again
767 		return B_OK;
768 	}
769 
770 	fOwner = owner;
771 
772 	// ToDo: we need a way to find out how big the current transaction is;
773 	//	we need to be able to either detach the latest sub transaction on
774 	//	demand, as well as having some kind of fall back plan in case the
775 	//	sub transaction itself grows bigger than the log.
776 	//	For that, it would be nice to have some call-back interface in the
777 	//	cache transaction API...
778 
779 	if (fUnwrittenTransactions > 0) {
780 		// start a sub transaction
781 		cache_start_sub_transaction(fVolume->BlockCache(), fTransactionID);
782 		fHasSubtransaction = true;
783 	} else
784 		fTransactionID = cache_start_transaction(fVolume->BlockCache());
785 
786 	if (fTransactionID < B_OK) {
787 		fLock.Unlock();
788 		return fTransactionID;
789 	}
790 
791 	return B_OK;
792 }
793 
794 
795 void
796 Journal::Unlock(Transaction *owner, bool success)
797 {
798 	if (fLock.OwnerCount() == 1) {
799 		// we only end the transaction if we would really unlock it
800 		// ToDo: what about failing transactions that do not unlock?
801 		_TransactionDone(success);
802 
803 		fTimestamp = system_time();
804 		fOwner = NULL;
805 	}
806 
807 	fLock.Unlock();
808 }
809 
810 
811 uint32
812 Journal::_TransactionSize() const
813 {
814 	int32 count = cache_blocks_in_transaction(fVolume->BlockCache(),
815 		fTransactionID);
816 	if (count < 0)
817 		return 0;
818 
819 	return count;
820 }
821 
822 
823 status_t
824 Journal::_TransactionDone(bool success)
825 {
826 	if (!success) {
827 		if (_HasSubTransaction())
828 			cache_abort_sub_transaction(fVolume->BlockCache(), fTransactionID);
829 		else
830 			cache_abort_transaction(fVolume->BlockCache(), fTransactionID);
831 
832 		return B_OK;
833 	}
834 
835 	// If necessary, flush the log, so that we have enough space for this
836 	// transaction
837 	if (_TransactionSize() > FreeLogBlocks())
838 		cache_sync_transaction(fVolume->BlockCache(), fTransactionID);
839 
840 	// Up to a maximum size, we will just batch several
841 	// transactions together to improve speed
842 	if (_TransactionSize() < fMaxTransactionSize) {
843 		fUnwrittenTransactions++;
844 		return B_OK;
845 	}
846 
847 	return _WriteTransactionToLog();
848 }
849 
850 
851 //	#pragma mark - Transaction
852 
853 
854 status_t
855 Transaction::Start(Volume *volume, off_t refBlock)
856 {
857 	// has it already been started?
858 	if (fJournal != NULL)
859 		return B_OK;
860 
861 	fJournal = volume->GetJournal(refBlock);
862 	if (fJournal != NULL && fJournal->Lock(this) == B_OK)
863 		return B_OK;
864 
865 	fJournal = NULL;
866 	return B_ERROR;
867 }
868 
869