xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision 4a55cc230cf7566cadcbb23b1928eefff8aea9a2)
1 /*
2  * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "Volume.h"
14 #include "Inode.h"
15 #include "Index.h"
16 #include "BPlusTree.h"
17 #include "Query.h"
18 #include "ResizeVisitor.h"
19 #include "bfs_control.h"
20 #include "bfs_disk_system.h"
21 
22 #include <file_systems/fs_ops_support.h>
23 
24 // TODO: temporary solution as long as there is no public I/O requests API
25 #ifndef FS_SHELL
26 #	include <io_requests.h>
27 #	include <util/fs_trim_support.h>
28 #endif
29 
30 
31 #define BFS_IO_SIZE	65536
32 
33 #if defined(BFS_LITTLE_ENDIAN_ONLY)
34 #define BFS_ENDIAN_SUFFIX ""
35 #define BFS_ENDIAN_PRETTY_SUFFIX ""
36 #else
37 #define BFS_ENDIAN_SUFFIX "_big"
38 #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
39 #endif
40 
41 
42 struct identify_cookie {
43 	disk_super_block super_block;
44 };
45 
46 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
47 
48 
49 static void
50 fill_stat_time(const bfs_inode& node, struct stat& stat)
51 {
52 	bigtime_t now = real_time_clock_usecs();
53 	stat.st_atim.tv_sec = now / 1000000LL;
54 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
55 
56 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
57 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
58 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
59 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
60 
61 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
62 	bigtime_t changeTime = node.StatusChangeTime();
63 	if (changeTime < node.LastModifiedTime())
64 		stat.st_ctim = stat.st_mtim;
65 	else {
66 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
67 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
68 	}
69 }
70 
71 
72 void
73 fill_stat_buffer(Inode* inode, struct stat& stat)
74 {
75 	const bfs_inode& node = inode->Node();
76 
77 	stat.st_dev = inode->GetVolume()->ID();
78 	stat.st_ino = inode->ID();
79 	stat.st_nlink = 1;
80 	stat.st_blksize = BFS_IO_SIZE;
81 
82 	stat.st_uid = node.UserID();
83 	stat.st_gid = node.GroupID();
84 	stat.st_mode = node.Mode();
85 	stat.st_type = node.Type();
86 
87 	fill_stat_time(node, stat);
88 
89 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
90 		// symlinks report the size of the link here
91 		stat.st_size = strlen(node.short_symlink);
92 	} else
93 		stat.st_size = inode->Size();
94 
95 	stat.st_blocks = inode->AllocatedSize() / 512;
96 }
97 
98 
99 //!	bfs_io() callback hook
100 static status_t
101 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
102 	size_t size, struct file_io_vec* vecs, size_t* _count)
103 {
104 	Inode* inode = (Inode*)cookie;
105 
106 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
107 		inode->GetVolume()->BlockSize());
108 }
109 
110 
111 //!	bfs_io() callback hook
112 static status_t
113 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
114 	bool partialTransfer, size_t bytesTransferred)
115 {
116 	Inode* inode = (Inode*)cookie;
117 	rw_lock_read_unlock(&inode->Lock());
118 	return B_OK;
119 }
120 
121 
122 //	#pragma mark - Scanning
123 
124 
125 static float
126 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
127 {
128 	disk_super_block superBlock;
129 	status_t status = Volume::Identify(fd, &superBlock);
130 	if (status != B_OK)
131 		return -1;
132 
133 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
134 	if (cookie == NULL)
135 		return -1;
136 
137 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
138 
139 	*_cookie = cookie;
140 	return 0.85f;
141 }
142 
143 
144 static status_t
145 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
146 {
147 	identify_cookie* cookie = (identify_cookie*)_cookie;
148 
149 	partition->status = B_PARTITION_VALID;
150 	partition->flags |= B_PARTITION_FILE_SYSTEM;
151 	partition->content_size = cookie->super_block.NumBlocks()
152 		* cookie->super_block.BlockSize();
153 	partition->block_size = cookie->super_block.BlockSize();
154 	partition->content_name = strdup(cookie->super_block.name);
155 	if (partition->content_name == NULL)
156 		return B_NO_MEMORY;
157 
158 	return B_OK;
159 }
160 
161 
162 static void
163 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
164 {
165 	identify_cookie* cookie = (identify_cookie*)_cookie;
166 	delete cookie;
167 }
168 
169 
170 //	#pragma mark -
171 
172 
173 static status_t
174 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
175 	const char* args, ino_t* _rootID)
176 {
177 	FUNCTION();
178 
179 	Volume* volume = new(std::nothrow) Volume(_volume);
180 	if (volume == NULL)
181 		return B_NO_MEMORY;
182 
183 	status_t status = volume->Mount(device, flags);
184 	if (status != B_OK) {
185 		delete volume;
186 		RETURN_ERROR(status);
187 	}
188 
189 	_volume->private_volume = volume;
190 	_volume->ops = &gBFSVolumeOps;
191 	*_rootID = volume->ToVnode(volume->Root());
192 
193 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
194 		volume->Name(), *_rootID, device));
195 	return B_OK;
196 }
197 
198 
199 static status_t
200 bfs_unmount(fs_volume* _volume)
201 {
202 	FUNCTION();
203 	Volume* volume = (Volume*)_volume->private_volume;
204 
205 	status_t status = volume->Unmount();
206 	delete volume;
207 
208 	RETURN_ERROR(status);
209 }
210 
211 
212 static status_t
213 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
214 {
215 	FUNCTION();
216 
217 	Volume* volume = (Volume*)_volume->private_volume;
218 	MutexLocker locker(volume->Lock());
219 
220 	// File system flags.
221 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
222 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
223 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
224 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
225 
226 	info->io_size = BFS_IO_SIZE;
227 		// whatever is appropriate here?
228 
229 	info->block_size = volume->BlockSize();
230 	info->total_blocks = volume->NumBlocks();
231 	info->free_blocks = volume->FreeBlocks();
232 
233 	// Volume name
234 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
235 
236 	// File system name
237 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
238 
239 	return B_OK;
240 }
241 
242 
243 static status_t
244 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
245 {
246 	FUNCTION_START(("mask = %" B_PRId32 "\n", mask));
247 
248 	Volume* volume = (Volume*)_volume->private_volume;
249 	if (volume->IsReadOnly())
250 		return B_READ_ONLY_DEVICE;
251 
252 	MutexLocker locker(volume->Lock());
253 
254 	status_t status = B_BAD_VALUE;
255 
256 	if (mask & FS_WRITE_FSINFO_NAME) {
257 		disk_super_block& superBlock = volume->SuperBlock();
258 
259 		strncpy(superBlock.name, info->volume_name,
260 			sizeof(superBlock.name) - 1);
261 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
262 
263 		status = volume->WriteSuperBlock();
264 	}
265 	return status;
266 }
267 
268 
269 static status_t
270 bfs_sync(fs_volume* _volume)
271 {
272 	FUNCTION();
273 
274 	Volume* volume = (Volume*)_volume->private_volume;
275 	return volume->Sync();
276 }
277 
278 
279 //	#pragma mark -
280 
281 
282 /*!	Reads in the node from disk and creates an inode object from it.
283 */
284 static status_t
285 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
286 	uint32* _flags, bool reenter)
287 {
288 	//FUNCTION_START(("ino_t = %Ld\n", id));
289 	Volume* volume = (Volume*)_volume->private_volume;
290 
291 	// first inode may be after the log area, we don't go through
292 	// the hassle and try to load an earlier block from disk
293 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
294 		|| id > volume->NumBlocks()) {
295 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
296 		return B_ERROR;
297 	}
298 
299 	CachedBlock cached(volume);
300 	status_t status = cached.SetTo(id);
301 	if (status != B_OK) {
302 		FATAL(("could not read inode: %" B_PRIdINO ": %s\n", id,
303 			strerror(status)));
304 		return status;
305 	}
306 	bfs_inode* node = (bfs_inode*)cached.Block();
307 
308 	status = node->InitCheck(volume);
309 	if (status != B_OK) {
310 		if ((node->Flags() & INODE_DELETED) != 0) {
311 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
312 		} else {
313 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
314 				strerror(status)));
315 		}
316 		return status;
317 	}
318 
319 	Inode* inode = new(std::nothrow) Inode(volume, id);
320 	if (inode == NULL)
321 		return B_NO_MEMORY;
322 
323 	status = inode->InitCheck(false);
324 	if (status != B_OK)
325 		delete inode;
326 
327 	if (status == B_OK) {
328 		_node->private_node = inode;
329 		_node->ops = &gBFSVnodeOps;
330 		*_type = inode->Mode();
331 		*_flags = 0;
332 	}
333 
334 	return status;
335 }
336 
337 
338 static status_t
339 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
340 {
341 	Volume* volume = (Volume*)_volume->private_volume;
342 	Inode* inode = (Inode*)_node->private_node;
343 
344 	// since a directory's size can be changed without having it opened,
345 	// we need to take care about their preallocated blocks here
346 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
347 		&& inode->NeedsTrimming()) {
348 		Transaction transaction(volume, inode->BlockNumber());
349 
350 		if (inode->TrimPreallocation(transaction) == B_OK)
351 			transaction.Done();
352 		else if (transaction.HasParent()) {
353 			// TODO: for now, we don't let sub-transactions fail
354 			transaction.Done();
355 		}
356 	}
357 
358 	delete inode;
359 	return B_OK;
360 }
361 
362 
363 static status_t
364 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
365 {
366 	FUNCTION();
367 
368 	Volume* volume = (Volume*)_volume->private_volume;
369 	Inode* inode = (Inode*)_node->private_node;
370 
371 	// If the inode isn't in use anymore, we were called before
372 	// bfs_unlink() returns - in this case, we can just use the
373 	// transaction which has already deleted the inode.
374 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
375 
376 	// The file system check functionality uses this flag to prevent the space
377 	// used up by the inode from being freed - this flag is set only in
378 	// situations where this does not cause any harm as the block bitmap will
379 	// get fixed anyway in this case).
380 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
381 		delete inode;
382 		return B_OK;
383 	}
384 
385 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
386 
387 	status_t status = inode->Free(transaction);
388 	if (status == B_OK) {
389 		status = transaction.Done();
390 	} else if (transaction.HasParent()) {
391 		// TODO: for now, we don't let sub-transactions fail
392 		status = transaction.Done();
393 	}
394 
395 	volume->RemovedInodes().Remove(inode);
396 
397 	// TODO: the VFS currently does not allow this to fail
398 	delete inode;
399 
400 	return status;
401 }
402 
403 
404 static bool
405 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
406 {
407 	// TODO: we're obviously not even asked...
408 	return false;
409 }
410 
411 
412 static status_t
413 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
414 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
415 {
416 	Volume* volume = (Volume*)_volume->private_volume;
417 	Inode* inode = (Inode*)_node->private_node;
418 
419 	if (inode->FileCache() == NULL)
420 		RETURN_ERROR(B_BAD_VALUE);
421 
422 	InodeReadLocker _(inode);
423 
424 	uint32 vecIndex = 0;
425 	size_t vecOffset = 0;
426 	size_t bytesLeft = *_numBytes;
427 	status_t status;
428 
429 	while (true) {
430 		file_io_vec fileVecs[8];
431 		size_t fileVecCount = 8;
432 
433 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
434 			&fileVecCount, 0);
435 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
436 			break;
437 
438 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
439 
440 		size_t bytes = bytesLeft;
441 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
442 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
443 		if (status != B_OK || !bufferOverflow)
444 			break;
445 
446 		pos += bytes;
447 		bytesLeft -= bytes;
448 	}
449 
450 	return status;
451 }
452 
453 
454 static status_t
455 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
456 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
457 {
458 	Volume* volume = (Volume*)_volume->private_volume;
459 	Inode* inode = (Inode*)_node->private_node;
460 
461 	if (volume->IsReadOnly())
462 		return B_READ_ONLY_DEVICE;
463 
464 	if (inode->FileCache() == NULL)
465 		RETURN_ERROR(B_BAD_VALUE);
466 
467 	InodeReadLocker _(inode);
468 
469 	uint32 vecIndex = 0;
470 	size_t vecOffset = 0;
471 	size_t bytesLeft = *_numBytes;
472 	status_t status;
473 
474 	while (true) {
475 		file_io_vec fileVecs[8];
476 		size_t fileVecCount = 8;
477 
478 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
479 			&fileVecCount, 0);
480 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
481 			break;
482 
483 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
484 
485 		size_t bytes = bytesLeft;
486 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
487 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
488 		if (status != B_OK || !bufferOverflow)
489 			break;
490 
491 		pos += bytes;
492 		bytesLeft -= bytes;
493 	}
494 
495 	return status;
496 }
497 
498 
499 static status_t
500 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
501 {
502 	Volume* volume = (Volume*)_volume->private_volume;
503 	Inode* inode = (Inode*)_node->private_node;
504 
505 #ifndef FS_SHELL
506 	if (io_request_is_write(request) && volume->IsReadOnly()) {
507 		notify_io_request(request, B_READ_ONLY_DEVICE);
508 		return B_READ_ONLY_DEVICE;
509 	}
510 #endif
511 
512 	if (inode->FileCache() == NULL) {
513 #ifndef FS_SHELL
514 		notify_io_request(request, B_BAD_VALUE);
515 #endif
516 		RETURN_ERROR(B_BAD_VALUE);
517 	}
518 
519 	// We lock the node here and will unlock it in the "finished" hook.
520 	rw_lock_read_lock(&inode->Lock());
521 
522 	return do_iterative_fd_io(volume->Device(), request,
523 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
524 }
525 
526 
527 static status_t
528 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
529 	struct file_io_vec* vecs, size_t* _count)
530 {
531 	Volume* volume = (Volume*)_volume->private_volume;
532 	Inode* inode = (Inode*)_node->private_node;
533 
534 	int32 blockShift = volume->BlockShift();
535 	uint32 index = 0, max = *_count;
536 	block_run run;
537 	off_t fileOffset;
538 
539 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
540 
541 	while (true) {
542 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
543 		if (status != B_OK)
544 			return status;
545 
546 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
547 		vecs[index].length = ((uint32)run.Length() << blockShift)
548 			- offset + fileOffset;
549 
550 		// are we already done?
551 		if ((uint64)size <= (uint64)vecs[index].length
552 			|| (uint64)offset + (uint64)vecs[index].length
553 				>= (uint64)inode->Size()) {
554 			if ((uint64)offset + (uint64)vecs[index].length
555 					> (uint64)inode->Size()) {
556 				// make sure the extent ends with the last official file
557 				// block (without taking any preallocations into account)
558 				vecs[index].length = round_up(inode->Size() - offset,
559 					volume->BlockSize());
560 			}
561 			*_count = index + 1;
562 			return B_OK;
563 		}
564 
565 		offset += vecs[index].length;
566 		size -= vecs[index].length;
567 		index++;
568 
569 		if (index >= max) {
570 			// we're out of file_io_vecs; let's bail out
571 			*_count = index;
572 			return B_BUFFER_OVERFLOW;
573 		}
574 	}
575 
576 	// can never get here
577 	return B_ERROR;
578 }
579 
580 
581 //	#pragma mark -
582 
583 
584 static status_t
585 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
586 	ino_t* _vnodeID)
587 {
588 	Volume* volume = (Volume*)_volume->private_volume;
589 	Inode* directory = (Inode*)_directory->private_node;
590 
591 	InodeReadLocker locker(directory);
592 
593 	// check access permissions
594 	status_t status = directory->CheckPermissions(X_OK);
595 	if (status != B_OK)
596 		RETURN_ERROR(status);
597 
598 	BPlusTree* tree = directory->Tree();
599 	if (tree == NULL)
600 		RETURN_ERROR(B_BAD_VALUE);
601 
602 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
603 	if (status != B_OK) {
604 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
605 		if (status == B_ENTRY_NOT_FOUND)
606 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
607 
608 		return status;
609 	}
610 
611 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
612 
613 	locker.Unlock();
614 
615 	Inode* inode;
616 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
617 	if (status != B_OK) {
618 		REPORT_ERROR(status);
619 		return B_ENTRY_NOT_FOUND;
620 	}
621 
622 	return B_OK;
623 }
624 
625 
626 static status_t
627 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
628 	size_t bufferSize)
629 {
630 	Inode* inode = (Inode*)_node->private_node;
631 
632 	return inode->GetName(buffer, bufferSize);
633 }
634 
635 
636 static status_t
637 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
638 	void* buffer, size_t bufferLength)
639 {
640 	FUNCTION_START(("node = %p, cmd = %" B_PRIu32 ", buf = %p"
641 		", len = %" B_PRIuSIZE "\n", _node, cmd, buffer, bufferLength));
642 
643 	Volume* volume = (Volume*)_volume->private_volume;
644 
645 	switch (cmd) {
646 #ifndef FS_SHELL
647 		case B_TRIM_DEVICE:
648 		{
649 			fs_trim_data* trimData;
650 			MemoryDeleter deleter;
651 			status_t status = get_trim_data_from_user(buffer, bufferLength,
652 				deleter, trimData);
653 			if (status != B_OK)
654 				return status;
655 
656 			trimData->trimmed_size = 0;
657 
658 			for (uint32 i = 0; i < trimData->range_count; i++) {
659 				uint64 trimmedSize = 0;
660 				status_t status = volume->Allocator().Trim(
661 					trimData->ranges[i].offset, trimData->ranges[i].size,
662 					trimmedSize);
663 				if (status != B_OK)
664 					return status;
665 
666 				trimData->trimmed_size += trimmedSize;
667 			}
668 
669 			return copy_trim_data_to_user(buffer, trimData);
670 		}
671 #endif
672 
673 		case BFS_IOCTL_VERSION:
674 		{
675 			uint32 version = 0x10000;
676 			return user_memcpy(buffer, &version, sizeof(uint32));
677 		}
678 		case BFS_IOCTL_START_CHECKING:
679 		{
680 			// start checking
681 			status_t status = volume->CreateCheckVisitor();
682 			if (status != B_OK)
683 				return status;
684 
685 			CheckVisitor* checker = volume->CheckVisitor();
686 
687 			if (user_memcpy(&checker->Control(), buffer,
688 					sizeof(check_control)) != B_OK) {
689 				return B_BAD_ADDRESS;
690 			}
691 
692 			status = checker->StartBitmapPass();
693 			if (status == B_OK) {
694 				file_cookie* cookie = (file_cookie*)_cookie;
695 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
696 			}
697 
698 			return status;
699 		}
700 		case BFS_IOCTL_STOP_CHECKING:
701 		{
702 			// stop checking
703 			CheckVisitor* checker = volume->CheckVisitor();
704 			if (checker == NULL)
705 				return B_NO_INIT;
706 
707 			status_t status = checker->StopChecking();
708 
709 			if (status == B_OK) {
710 				file_cookie* cookie = (file_cookie*)_cookie;
711 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
712 
713 				status = user_memcpy(buffer, &checker->Control(),
714 					sizeof(check_control));
715 			}
716 
717 			volume->DeleteCheckVisitor();
718 			volume->SetCheckingThread(-1);
719 
720 			return status;
721 		}
722 		case BFS_IOCTL_CHECK_NEXT_NODE:
723 		{
724 			// check next
725 			CheckVisitor* checker = volume->CheckVisitor();
726 			if (checker == NULL)
727 				return B_NO_INIT;
728 
729 			volume->SetCheckingThread(find_thread(NULL));
730 
731 			checker->Control().errors = 0;
732 
733 			status_t status = checker->Next();
734 			if (status == B_ENTRY_NOT_FOUND) {
735 				checker->Control().status = B_ENTRY_NOT_FOUND;
736 					// tells StopChecking() that we finished the pass
737 
738 				if (checker->Pass() == BFS_CHECK_PASS_BITMAP) {
739 					if (checker->WriteBackCheckBitmap() == B_OK)
740 						status = checker->StartIndexPass();
741 				}
742 			}
743 
744 			if (status == B_OK) {
745 				status = user_memcpy(buffer, &checker->Control(),
746 					sizeof(check_control));
747 			}
748 
749 			return status;
750 		}
751 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
752 		{
753 			// let's makebootable (or anyone else) update the boot block
754 			// while BFS is mounted
755 			update_boot_block update;
756 			if (bufferLength != sizeof(update_boot_block))
757 				return B_BAD_VALUE;
758 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
759 				return B_BAD_ADDRESS;
760 
761 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
762 			if (update.offset < minOffset
763 				|| update.offset >= 512 || update.length > 512 - minOffset
764 				|| update.length + update.offset > 512) {
765 				return B_BAD_VALUE;
766 			}
767 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
768 					update.data, update.length) != B_OK) {
769 				return B_BAD_ADDRESS;
770 			}
771 
772 			return volume->WriteSuperBlock();
773 		}
774 		case BFS_IOCTL_RESIZE:
775 		{
776 			if (bufferLength != sizeof(uint64))
777 				return B_BAD_VALUE;
778 
779 			uint64 size;
780 			if (user_memcpy((uint8*)&size, buffer, sizeof(uint64)) != B_OK)
781 				return B_BAD_ADDRESS;
782 
783 			ResizeVisitor resizer(volume);
784 			return resizer.Resize(size, -1);
785 		}
786 
787 #ifdef DEBUG_FRAGMENTER
788 		case 56741:
789 		{
790 			BlockAllocator& allocator = volume->Allocator();
791 			allocator.Fragment();
792 			return B_OK;
793 		}
794 #endif
795 
796 #ifdef DEBUG
797 		case 56742:
798 		{
799 			// allocate all free blocks and zero them out
800 			// (a test for the BlockAllocator)!
801 			BlockAllocator& allocator = volume->Allocator();
802 			Transaction transaction(volume, 0);
803 			CachedBlock cached(volume);
804 			block_run run;
805 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
806 					== B_OK) {
807 				PRINT(("write block_run(%" B_PRId32 ", %" B_PRIu16
808 					", %" B_PRIu16 ")\n", run.allocation_group, run.start,
809 					run.length));
810 
811 				for (int32 i = 0;i < run.length;i++) {
812 					status_t status = cached.SetToWritable(transaction, run);
813 					if (status == B_OK)
814 						memset(cached.WritableBlock(), 0, volume->BlockSize());
815 				}
816 			}
817 			return B_OK;
818 		}
819 #endif
820 	}
821 	return B_DEV_INVALID_IOCTL;
822 }
823 
824 
825 /*!	Sets the open-mode flags for the open file cookie - only
826 	supports O_APPEND currently, but that should be sufficient
827 	for a file system.
828 */
829 static status_t
830 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
831 {
832 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
833 
834 	file_cookie* cookie = (file_cookie*)_cookie;
835 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
836 
837 	return B_OK;
838 }
839 
840 
841 static status_t
842 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
843 {
844 	FUNCTION();
845 
846 	Inode* inode = (Inode*)_node->private_node;
847 	return inode->Sync();
848 }
849 
850 
851 static status_t
852 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
853 {
854 	FUNCTION();
855 
856 	Inode* inode = (Inode*)_node->private_node;
857 	fill_stat_buffer(inode, *stat);
858 	return B_OK;
859 }
860 
861 
862 static status_t
863 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
864 	uint32 mask)
865 {
866 	FUNCTION();
867 
868 	Volume* volume = (Volume*)_volume->private_volume;
869 	Inode* inode = (Inode*)_node->private_node;
870 
871 	if (volume->IsReadOnly())
872 		return B_READ_ONLY_DEVICE;
873 
874 	// TODO: we should definitely check a bit more if the new stats are
875 	//	valid - or even better, the VFS should check this before calling us
876 
877 	bfs_inode& node = inode->Node();
878 	bool updateTime = false;
879 	uid_t uid = geteuid();
880 
881 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
882 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
883 
884 	Transaction transaction(volume, inode->BlockNumber());
885 	inode->WriteLockInTransaction(transaction);
886 
887 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
888 		// Since B_STAT_SIZE is the only thing that can fail directly, we
889 		// do it first, so that the inode state will still be consistent
890 		// with the on-disk version
891 		if (inode->IsDirectory())
892 			return B_IS_A_DIRECTORY;
893 		if (!inode->IsFile())
894 			return B_BAD_VALUE;
895 		if (!hasWriteAccess)
896 			RETURN_ERROR(B_NOT_ALLOWED);
897 
898 		off_t oldSize = inode->Size();
899 
900 		status_t status = inode->SetFileSize(transaction, stat->st_size);
901 		if (status != B_OK)
902 			return status;
903 
904 		// fill the new blocks (if any) with zeros
905 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
906 			// We must not keep the inode locked during a write operation,
907 			// or else we might deadlock.
908 			rw_lock_write_unlock(&inode->Lock());
909 			inode->FillGapWithZeros(oldSize, inode->Size());
910 			rw_lock_write_lock(&inode->Lock());
911 		}
912 
913 		if (!inode->IsDeleted()) {
914 			Index index(volume);
915 			index.UpdateSize(transaction, inode);
916 
917 			updateTime = true;
918 		}
919 	}
920 
921 	if ((mask & B_STAT_UID) != 0) {
922 		// only root should be allowed
923 		if (uid != 0)
924 			RETURN_ERROR(B_NOT_ALLOWED);
925 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
926 		updateTime = true;
927 	}
928 
929 	if ((mask & B_STAT_GID) != 0) {
930 		// only the user or root can do that
931 		if (!isOwnerOrRoot)
932 			RETURN_ERROR(B_NOT_ALLOWED);
933 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
934 		updateTime = true;
935 	}
936 
937 	if ((mask & B_STAT_MODE) != 0) {
938 		// only the user or root can do that
939 		if (!isOwnerOrRoot)
940 			RETURN_ERROR(B_NOT_ALLOWED);
941 		PRINT(("original mode = %u, stat->st_mode = %u\n",
942 			(unsigned int)node.Mode(), (unsigned int)stat->st_mode));
943 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
944 			| (stat->st_mode & S_IUMSK));
945 		updateTime = true;
946 	}
947 
948 	if ((mask & B_STAT_CREATION_TIME) != 0) {
949 		// the user or root can do that or any user with write access
950 		if (!isOwnerOrRoot && !hasWriteAccess)
951 			RETURN_ERROR(B_NOT_ALLOWED);
952 		node.create_time
953 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
954 	}
955 
956 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
957 		// the user or root can do that or any user with write access
958 		if (!isOwnerOrRoot && !hasWriteAccess)
959 			RETURN_ERROR(B_NOT_ALLOWED);
960 		if (!inode->InLastModifiedIndex()) {
961 			// directory modification times are not part of the index
962 			node.last_modified_time
963 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
964 		} else if (!inode->IsDeleted()) {
965 			// Index::UpdateLastModified() will set the new time in the inode
966 			Index index(volume);
967 			index.UpdateLastModified(transaction, inode,
968 				bfs_inode::ToInode(stat->st_mtim));
969 		}
970 	}
971 
972 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
973 		// the user or root can do that or any user with write access
974 		if (!isOwnerOrRoot && !hasWriteAccess)
975 			RETURN_ERROR(B_NOT_ALLOWED);
976 		bigtime_t newTime;
977 		if ((mask & B_STAT_CHANGE_TIME) == 0)
978 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
979 		else
980 			newTime = bfs_inode::ToInode(stat->st_ctim);
981 
982 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
983 	}
984 
985 	status_t status = inode->WriteBack(transaction);
986 	if (status == B_OK)
987 		status = transaction.Done();
988 	if (status == B_OK)
989 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
990 
991 	return status;
992 }
993 
994 
995 status_t
996 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
997 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
998 {
999 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
1000 		openMode));
1001 
1002 	Volume* volume = (Volume*)_volume->private_volume;
1003 	Inode* directory = (Inode*)_directory->private_node;
1004 
1005 	if (volume->IsReadOnly())
1006 		return B_READ_ONLY_DEVICE;
1007 
1008 	if (!directory->IsDirectory())
1009 		RETURN_ERROR(B_BAD_TYPE);
1010 
1011 	// We are creating the cookie at this point, so that we don't have
1012 	// to remove the inode if we don't have enough free memory later...
1013 	file_cookie* cookie = new(std::nothrow) file_cookie;
1014 	if (cookie == NULL)
1015 		RETURN_ERROR(B_NO_MEMORY);
1016 
1017 	// initialize the cookie
1018 	cookie->open_mode = openMode;
1019 	cookie->last_size = 0;
1020 	cookie->last_notification = system_time();
1021 
1022 	Transaction transaction(volume, directory->BlockNumber());
1023 
1024 	Inode* inode;
1025 	bool created;
1026 	status_t status = Inode::Create(transaction, directory, name,
1027 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
1028 
1029 	// Disable the file cache, if requested?
1030 	if (status == B_OK && (openMode & O_NOCACHE) != 0
1031 		&& inode->FileCache() != NULL) {
1032 		status = file_cache_disable(inode->FileCache());
1033 	}
1034 
1035 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
1036 
1037 	if (status == B_OK)
1038 		status = transaction.Done();
1039 
1040 	if (status == B_OK) {
1041 		// register the cookie
1042 		*_cookie = cookie;
1043 
1044 		if (created) {
1045 			notify_entry_created(volume->ID(), directory->ID(), name,
1046 				*_vnodeID);
1047 		}
1048 	} else {
1049 		entry_cache_remove(volume->ID(), directory->ID(), name);
1050 		delete cookie;
1051 	}
1052 
1053 	return status;
1054 }
1055 
1056 
1057 static status_t
1058 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1059 	const char* path, int mode)
1060 {
1061 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1062 
1063 	Volume* volume = (Volume*)_volume->private_volume;
1064 	Inode* directory = (Inode*)_directory->private_node;
1065 
1066 	if (volume->IsReadOnly())
1067 		return B_READ_ONLY_DEVICE;
1068 
1069 	if (!directory->IsDirectory())
1070 		RETURN_ERROR(B_BAD_TYPE);
1071 
1072 	status_t status = directory->CheckPermissions(W_OK);
1073 	if (status < B_OK)
1074 		RETURN_ERROR(status);
1075 
1076 	Transaction transaction(volume, directory->BlockNumber());
1077 
1078 	Inode* link;
1079 	off_t id;
1080 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1081 		0, 0, NULL, &id, &link);
1082 	if (status < B_OK)
1083 		RETURN_ERROR(status);
1084 
1085 	size_t length = strlen(path);
1086 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1087 		strcpy(link->Node().short_symlink, path);
1088 	} else {
1089 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1090 			| INODE_LOGGED);
1091 
1092 		// links usually don't have a file cache attached - but we now need one
1093 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1094 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1095 
1096 		// The following call will have to write the inode back, so
1097 		// we don't have to do that here...
1098 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1099 	}
1100 
1101 	if (status == B_OK)
1102 		status = link->WriteBack(transaction);
1103 
1104 	// Inode::Create() left the inode locked in memory, and also doesn't
1105 	// publish links
1106 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1107 	put_vnode(volume->FSVolume(), id);
1108 
1109 	if (status == B_OK) {
1110 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1111 
1112 		status = transaction.Done();
1113 		if (status == B_OK)
1114 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1115 		else
1116 			entry_cache_remove(volume->ID(), directory->ID(), name);
1117 	}
1118 
1119 	return status;
1120 }
1121 
1122 
1123 status_t
1124 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1125 {
1126 	FUNCTION_START(("name = \"%s\"\n", name));
1127 
1128 	// This one won't be implemented in a binary compatible BFS
1129 	return B_UNSUPPORTED;
1130 }
1131 
1132 
1133 status_t
1134 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1135 {
1136 	FUNCTION_START(("name = \"%s\"\n", name));
1137 
1138 	if (!strcmp(name, "..") || !strcmp(name, "."))
1139 		return B_NOT_ALLOWED;
1140 
1141 	Volume* volume = (Volume*)_volume->private_volume;
1142 	Inode* directory = (Inode*)_directory->private_node;
1143 
1144 	status_t status = directory->CheckPermissions(W_OK);
1145 	if (status < B_OK)
1146 		return status;
1147 
1148 	Transaction transaction(volume, directory->BlockNumber());
1149 
1150 	off_t id;
1151 	status = directory->Remove(transaction, name, &id);
1152 	if (status == B_OK) {
1153 		entry_cache_remove(volume->ID(), directory->ID(), name);
1154 
1155 		status = transaction.Done();
1156 		if (status == B_OK)
1157 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1158 		else
1159 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1160 	}
1161 	return status;
1162 }
1163 
1164 
1165 status_t
1166 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1167 	fs_vnode* _newDir, const char* newName)
1168 {
1169 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1170 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1171 
1172 	Volume* volume = (Volume*)_volume->private_volume;
1173 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1174 	Inode* newDirectory = (Inode*)_newDir->private_node;
1175 
1176 	// are we already done?
1177 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1178 		return B_OK;
1179 
1180 	Transaction transaction(volume, oldDirectory->BlockNumber());
1181 
1182 	oldDirectory->WriteLockInTransaction(transaction);
1183 	if (oldDirectory != newDirectory)
1184 		newDirectory->WriteLockInTransaction(transaction);
1185 
1186 	// are we allowed to do what we've been told?
1187 	status_t status = oldDirectory->CheckPermissions(W_OK);
1188 	if (status == B_OK)
1189 		status = newDirectory->CheckPermissions(W_OK);
1190 	if (status != B_OK)
1191 		return status;
1192 
1193 	// Get the directory's tree, and a pointer to the inode which should be
1194 	// changed
1195 	BPlusTree* tree = oldDirectory->Tree();
1196 	if (tree == NULL)
1197 		RETURN_ERROR(B_BAD_VALUE);
1198 
1199 	off_t id;
1200 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1201 	if (status != B_OK)
1202 		RETURN_ERROR(status);
1203 
1204 	Vnode vnode(volume, id);
1205 	Inode* inode;
1206 	if (vnode.Get(&inode) != B_OK)
1207 		return B_IO_ERROR;
1208 
1209 	// Don't move a directory into one of its children - we soar up
1210 	// from the newDirectory to either the root node or the old
1211 	// directory, whichever comes first.
1212 	// If we meet our inode on that way, we have to bail out.
1213 
1214 	if (oldDirectory != newDirectory) {
1215 		ino_t parent = newDirectory->ID();
1216 		ino_t root = volume->RootNode()->ID();
1217 
1218 		while (true) {
1219 			if (parent == id)
1220 				return B_BAD_VALUE;
1221 			else if (parent == root || parent == oldDirectory->ID())
1222 				break;
1223 
1224 			Vnode vnode(volume, parent);
1225 			Inode* parentNode;
1226 			if (vnode.Get(&parentNode) != B_OK)
1227 				return B_ERROR;
1228 
1229 			parent = volume->ToVnode(parentNode->Parent());
1230 		}
1231 	}
1232 
1233 	// Everything okay? Then lets get to work...
1234 
1235 	// First, try to make sure there is nothing that will stop us in
1236 	// the target directory - since this is the only non-critical
1237 	// failure, we will test this case first
1238 	BPlusTree* newTree = tree;
1239 	if (newDirectory != oldDirectory) {
1240 		newTree = newDirectory->Tree();
1241 		if (newTree == NULL)
1242 			RETURN_ERROR(B_BAD_VALUE);
1243 	}
1244 
1245 	status = newTree->Insert(transaction, (const uint8*)newName,
1246 		strlen(newName), id);
1247 	if (status == B_NAME_IN_USE) {
1248 		// If there is already a file with that name, we have to remove
1249 		// it, as long it's not a directory with files in it
1250 		off_t clobber;
1251 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1252 				< B_OK)
1253 			return B_NAME_IN_USE;
1254 		if (clobber == id)
1255 			return B_BAD_VALUE;
1256 
1257 		Vnode vnode(volume, clobber);
1258 		Inode* other;
1259 		if (vnode.Get(&other) < B_OK)
1260 			return B_NAME_IN_USE;
1261 
1262 		// only allowed, if either both nodes are directories or neither is
1263 		if (inode->IsDirectory() != other->IsDirectory())
1264 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1265 
1266 		status = newDirectory->Remove(transaction, newName, NULL,
1267 			other->IsDirectory());
1268 		if (status < B_OK)
1269 			return status;
1270 
1271 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1272 
1273 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1274 			clobber);
1275 
1276 		status = newTree->Insert(transaction, (const uint8*)newName,
1277 			strlen(newName), id);
1278 	}
1279 	if (status != B_OK)
1280 		return status;
1281 
1282 	inode->WriteLockInTransaction(transaction);
1283 
1284 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1285 		newDirectory->ID(), newName);
1286 
1287 	// update the name only when they differ
1288 	if (strcmp(oldName, newName)) {
1289 		status = inode->SetName(transaction, newName);
1290 		if (status == B_OK) {
1291 			Index index(volume);
1292 			index.UpdateName(transaction, oldName, newName, inode);
1293 		}
1294 	}
1295 
1296 	if (status == B_OK) {
1297 		status = tree->Remove(transaction, (const uint8*)oldName,
1298 			strlen(oldName), id);
1299 		if (status == B_OK) {
1300 			inode->Parent() = newDirectory->BlockRun();
1301 
1302 			// if it's a directory, update the parent directory pointer
1303 			// in its tree if necessary
1304 			BPlusTree* movedTree = inode->Tree();
1305 			if (oldDirectory != newDirectory
1306 				&& inode->IsDirectory()
1307 				&& movedTree != NULL) {
1308 				status = movedTree->Replace(transaction, (const uint8*)"..",
1309 					2, newDirectory->ID());
1310 
1311 				if (status == B_OK) {
1312 					// update/add the cache entry for the parent
1313 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1314 				}
1315 			}
1316 
1317 			if (status == B_OK && newDirectory != oldDirectory)
1318 				status = oldDirectory->ContainerContentsChanged(transaction);
1319 			if (status == B_OK)
1320 				status = newDirectory->ContainerContentsChanged(transaction);
1321 
1322 			if (status == B_OK)
1323 				status = inode->WriteBack(transaction);
1324 
1325 			if (status == B_OK) {
1326 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1327 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1328 
1329 				status = transaction.Done();
1330 				if (status == B_OK) {
1331 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1332 						oldName, newDirectory->ID(), newName, id);
1333 					return B_OK;
1334 				}
1335 
1336 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1337 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1338 			}
1339 		}
1340 	}
1341 
1342 	return status;
1343 }
1344 
1345 
1346 static status_t
1347 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1348 {
1349 	FUNCTION();
1350 
1351 	Volume* volume = (Volume*)_volume->private_volume;
1352 	Inode* inode = (Inode*)_node->private_node;
1353 
1354 	// Opening a directory read-only is allowed, although you can't read
1355 	// any data from it.
1356 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1357 		return B_IS_A_DIRECTORY;
1358 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1359 		return B_NOT_A_DIRECTORY;
1360 
1361 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1362 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1363 	if (status != B_OK)
1364 		RETURN_ERROR(status);
1365 
1366 	file_cookie* cookie = new(std::nothrow) file_cookie;
1367 	if (cookie == NULL)
1368 		RETURN_ERROR(B_NO_MEMORY);
1369 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1370 
1371 	// initialize the cookie
1372 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1373 	cookie->last_size = inode->Size();
1374 	cookie->last_notification = system_time();
1375 
1376 	// Disable the file cache, if requested?
1377 	CObjectDeleter<void, void, file_cache_enable> fileCacheEnabler;
1378 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1379 		status = file_cache_disable(inode->FileCache());
1380 		if (status != B_OK)
1381 			return status;
1382 		fileCacheEnabler.SetTo(inode->FileCache());
1383 	}
1384 
1385 	// Should we truncate the file?
1386 	if ((openMode & O_TRUNC) != 0) {
1387 		if ((openMode & O_RWMASK) == O_RDONLY)
1388 			return B_NOT_ALLOWED;
1389 
1390 		Transaction transaction(volume, inode->BlockNumber());
1391 		inode->WriteLockInTransaction(transaction);
1392 
1393 		status_t status = inode->SetFileSize(transaction, 0);
1394 		if (status == B_OK)
1395 			status = inode->WriteBack(transaction);
1396 		if (status == B_OK)
1397 			status = transaction.Done();
1398 		if (status != B_OK)
1399 			return status;
1400 	}
1401 
1402 	fileCacheEnabler.Detach();
1403 	cookieDeleter.Detach();
1404 	*_cookie = cookie;
1405 	return B_OK;
1406 }
1407 
1408 
1409 static status_t
1410 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1411 	void* buffer, size_t* _length)
1412 {
1413 	//FUNCTION();
1414 	Inode* inode = (Inode*)_node->private_node;
1415 
1416 	if (!inode->HasUserAccessableStream()) {
1417 		*_length = 0;
1418 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1419 	}
1420 
1421 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1422 }
1423 
1424 
1425 static status_t
1426 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1427 	const void* buffer, size_t* _length)
1428 {
1429 	//FUNCTION();
1430 	Volume* volume = (Volume*)_volume->private_volume;
1431 	Inode* inode = (Inode*)_node->private_node;
1432 
1433 	if (volume->IsReadOnly())
1434 		return B_READ_ONLY_DEVICE;
1435 
1436 	if (!inode->HasUserAccessableStream()) {
1437 		*_length = 0;
1438 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1439 	}
1440 
1441 	file_cookie* cookie = (file_cookie*)_cookie;
1442 
1443 	if (cookie->open_mode & O_APPEND)
1444 		pos = inode->Size();
1445 
1446 	Transaction transaction;
1447 		// We are not starting the transaction here, since
1448 		// it might not be needed at all (the contents of
1449 		// regular files aren't logged)
1450 
1451 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1452 		_length);
1453 	if (status == B_OK)
1454 		status = transaction.Done();
1455 	if (status == B_OK) {
1456 		InodeReadLocker locker(inode);
1457 
1458 		// periodically notify if the file size has changed
1459 		// TODO: should we better test for a change in the last_modified time only?
1460 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1461 			&& system_time() > cookie->last_notification
1462 					+ INODE_NOTIFICATION_INTERVAL) {
1463 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1464 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1465 			cookie->last_size = inode->Size();
1466 			cookie->last_notification = system_time();
1467 		}
1468 	}
1469 
1470 	return status;
1471 }
1472 
1473 
1474 static status_t
1475 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1476 {
1477 	FUNCTION();
1478 	return B_OK;
1479 }
1480 
1481 
1482 static status_t
1483 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1484 {
1485 	FUNCTION();
1486 
1487 	file_cookie* cookie = (file_cookie*)_cookie;
1488 	Volume* volume = (Volume*)_volume->private_volume;
1489 	Inode* inode = (Inode*)_node->private_node;
1490 
1491 	Transaction transaction;
1492 	bool needsTrimming = false;
1493 
1494 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1495 		InodeReadLocker locker(inode);
1496 		needsTrimming = inode->NeedsTrimming();
1497 
1498 		if ((cookie->open_mode & O_RWMASK) != 0
1499 			&& !inode->IsDeleted()
1500 			&& (needsTrimming
1501 				|| inode->OldLastModified() != inode->LastModified()
1502 				|| (inode->InSizeIndex()
1503 					// TODO: this can prevent the size update notification
1504 					// for nodes not in the index!
1505 					&& inode->OldSize() != inode->Size()))) {
1506 			locker.Unlock();
1507 			transaction.Start(volume, inode->BlockNumber());
1508 		}
1509 	}
1510 
1511 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1512 
1513 	if (status == B_OK) {
1514 		inode->WriteLockInTransaction(transaction);
1515 
1516 		// trim the preallocated blocks and update the size,
1517 		// and last_modified indices if needed
1518 		bool changedSize = false, changedTime = false;
1519 		Index index(volume);
1520 
1521 		if (needsTrimming) {
1522 			status = inode->TrimPreallocation(transaction);
1523 			if (status < B_OK) {
1524 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1525 					", transaction %d: %s!\n", inode->ID(),
1526 					(int)transaction.ID(), strerror(status)));
1527 
1528 				// we still want this transaction to succeed
1529 				status = B_OK;
1530 			}
1531 		}
1532 		if (inode->OldSize() != inode->Size()) {
1533 			if (inode->InSizeIndex())
1534 				index.UpdateSize(transaction, inode);
1535 			changedSize = true;
1536 		}
1537 		if (inode->OldLastModified() != inode->LastModified()) {
1538 			if (inode->InLastModifiedIndex()) {
1539 				index.UpdateLastModified(transaction, inode,
1540 					inode->LastModified());
1541 			}
1542 			changedTime = true;
1543 
1544 			// updating the index doesn't write back the inode
1545 			inode->WriteBack(transaction);
1546 		}
1547 
1548 		if (changedSize || changedTime) {
1549 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1550 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1551 				| (changedSize ? B_STAT_SIZE : 0));
1552 		}
1553 	}
1554 	if (status == B_OK)
1555 		transaction.Done();
1556 
1557 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1558 		// "chkbfs" exited abnormally, so we have to stop it here...
1559 		FATAL(("check process was aborted!\n"));
1560 		volume->CheckVisitor()->StopChecking();
1561 		volume->DeleteCheckVisitor();
1562 	}
1563 
1564 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1565 		file_cache_enable(inode->FileCache());
1566 
1567 	delete cookie;
1568 	return B_OK;
1569 }
1570 
1571 
1572 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1573 	is not allowed.
1574 */
1575 static status_t
1576 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1577 {
1578 	//FUNCTION();
1579 
1580 	Inode* inode = (Inode*)_node->private_node;
1581 	status_t status = inode->CheckPermissions(accessMode);
1582 	if (status < B_OK)
1583 		RETURN_ERROR(status);
1584 
1585 	return B_OK;
1586 }
1587 
1588 
1589 static status_t
1590 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1591 	size_t* _bufferSize)
1592 {
1593 	FUNCTION();
1594 
1595 	Inode* inode = (Inode*)_node->private_node;
1596 
1597 	if (!inode->IsSymLink())
1598 		RETURN_ERROR(B_BAD_VALUE);
1599 
1600 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1601 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1602 		if (status < B_OK)
1603 			RETURN_ERROR(status);
1604 
1605 		*_bufferSize = inode->Size();
1606 		return B_OK;
1607 	}
1608 
1609 	size_t linkLength = strlen(inode->Node().short_symlink);
1610 
1611 	size_t bytesToCopy = min_c(linkLength, *_bufferSize);
1612 
1613 	*_bufferSize = linkLength;
1614 
1615 	memcpy(buffer, inode->Node().short_symlink, bytesToCopy);
1616 	return B_OK;
1617 }
1618 
1619 
1620 //	#pragma mark - Directory functions
1621 
1622 
1623 static status_t
1624 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1625 	int mode)
1626 {
1627 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1628 
1629 	Volume* volume = (Volume*)_volume->private_volume;
1630 	Inode* directory = (Inode*)_directory->private_node;
1631 
1632 	if (volume->IsReadOnly())
1633 		return B_READ_ONLY_DEVICE;
1634 
1635 	if (!directory->IsDirectory())
1636 		RETURN_ERROR(B_BAD_TYPE);
1637 
1638 	status_t status = directory->CheckPermissions(W_OK);
1639 	if (status < B_OK)
1640 		RETURN_ERROR(status);
1641 
1642 	Transaction transaction(volume, directory->BlockNumber());
1643 
1644 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1645 	// need it anyway
1646 	off_t id;
1647 	status = Inode::Create(transaction, directory, name,
1648 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1649 	if (status == B_OK) {
1650 		put_vnode(volume->FSVolume(), id);
1651 
1652 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1653 
1654 		status = transaction.Done();
1655 		if (status == B_OK)
1656 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1657 		else
1658 			entry_cache_remove(volume->ID(), directory->ID(), name);
1659 	}
1660 
1661 	return status;
1662 }
1663 
1664 
1665 static status_t
1666 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1667 {
1668 	FUNCTION_START(("name = \"%s\"\n", name));
1669 
1670 	Volume* volume = (Volume*)_volume->private_volume;
1671 	Inode* directory = (Inode*)_directory->private_node;
1672 
1673 	Transaction transaction(volume, directory->BlockNumber());
1674 
1675 	off_t id;
1676 	status_t status = directory->Remove(transaction, name, &id, true);
1677 	if (status == B_OK) {
1678 		// Remove the cache entry for the directory and potentially also
1679 		// the parent entry still belonging to the directory
1680 		entry_cache_remove(volume->ID(), directory->ID(), name);
1681 		entry_cache_remove(volume->ID(), id, "..");
1682 
1683 		status = transaction.Done();
1684 		if (status == B_OK)
1685 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1686 		else {
1687 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1688 			entry_cache_add(volume->ID(), id, "..", id);
1689 		}
1690 	}
1691 
1692 	return status;
1693 }
1694 
1695 
1696 /*!	Opens a directory ready to be traversed.
1697 	bfs_open_dir() is also used by bfs_open_index_dir().
1698 */
1699 static status_t
1700 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1701 {
1702 	FUNCTION();
1703 
1704 	Inode* inode = (Inode*)_node->private_node;
1705 	status_t status = inode->CheckPermissions(R_OK);
1706 	if (status < B_OK)
1707 		RETURN_ERROR(status);
1708 
1709 	// we don't ask here for directories only, because the bfs_open_index_dir()
1710 	// function utilizes us (so we must be able to open indices as well)
1711 	if (!inode->IsContainer())
1712 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1713 
1714 	BPlusTree* tree = inode->Tree();
1715 	if (tree == NULL)
1716 		RETURN_ERROR(B_BAD_VALUE);
1717 
1718 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1719 	if (iterator == NULL)
1720 		RETURN_ERROR(B_NO_MEMORY);
1721 
1722 	*_cookie = iterator;
1723 	return B_OK;
1724 }
1725 
1726 
1727 static status_t
1728 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1729 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1730 {
1731 	FUNCTION();
1732 
1733 	TreeIterator* iterator = (TreeIterator*)_cookie;
1734 	Volume* volume = (Volume*)_volume->private_volume;
1735 
1736 	uint32 maxCount = *_num;
1737 	uint32 count = 0;
1738 
1739 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1740 		ino_t id;
1741 		uint16 length;
1742 		size_t nameBufferSize = bufferSize - offsetof(struct dirent, d_name);
1743 
1744 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1745 			nameBufferSize, &id);
1746 
1747 		if (status == B_ENTRY_NOT_FOUND)
1748 			break;
1749 
1750 		if (status == B_BUFFER_OVERFLOW) {
1751 			// the remaining name buffer length was too small
1752 			if (count == 0)
1753 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1754 			break;
1755 		}
1756 
1757 		if (status != B_OK)
1758 			RETURN_ERROR(status);
1759 
1760 		dirent->d_dev = volume->ID();
1761 		dirent->d_ino = id;
1762 
1763 		dirent = next_dirent(dirent, length, bufferSize);
1764 		count++;
1765 	}
1766 
1767 	*_num = count;
1768 	return B_OK;
1769 }
1770 
1771 
1772 /*!	Sets the TreeIterator back to the beginning of the directory. */
1773 static status_t
1774 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1775 {
1776 	FUNCTION();
1777 	TreeIterator* iterator = (TreeIterator*)_cookie;
1778 
1779 	return iterator->Rewind();
1780 }
1781 
1782 
1783 static status_t
1784 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1785 {
1786 	FUNCTION();
1787 	return B_OK;
1788 }
1789 
1790 
1791 static status_t
1792 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1793 {
1794 	delete (TreeIterator*)_cookie;
1795 	return B_OK;
1796 }
1797 
1798 
1799 //	#pragma mark - Attribute functions
1800 
1801 
1802 static status_t
1803 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1804 {
1805 	Inode* inode = (Inode*)_node->private_node;
1806 
1807 	FUNCTION();
1808 
1809 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1810 	if (iterator == NULL)
1811 		RETURN_ERROR(B_NO_MEMORY);
1812 
1813 	*_cookie = iterator;
1814 	return B_OK;
1815 }
1816 
1817 
1818 static status_t
1819 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1820 {
1821 	FUNCTION();
1822 	return B_OK;
1823 }
1824 
1825 
1826 static status_t
1827 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1828 {
1829 	FUNCTION();
1830 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1831 
1832 	delete iterator;
1833 	return B_OK;
1834 }
1835 
1836 
1837 static status_t
1838 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1839 {
1840 	FUNCTION();
1841 
1842 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1843 	RETURN_ERROR(iterator->Rewind());
1844 }
1845 
1846 
1847 static status_t
1848 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1849 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1850 {
1851 	FUNCTION();
1852 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1853 
1854 	uint32 type;
1855 	size_t length;
1856 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1857 		&dirent->d_ino);
1858 	if (status == B_ENTRY_NOT_FOUND) {
1859 		*_num = 0;
1860 		return B_OK;
1861 	} else if (status != B_OK) {
1862 		RETURN_ERROR(status);
1863 	}
1864 
1865 	Volume* volume = (Volume*)_volume->private_volume;
1866 
1867 	dirent->d_dev = volume->ID();
1868 	dirent->d_reclen = offsetof(struct dirent, d_name) + length + 1;
1869 
1870 	*_num = 1;
1871 	return B_OK;
1872 }
1873 
1874 
1875 static status_t
1876 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1877 	uint32 type, int openMode, void** _cookie)
1878 {
1879 	FUNCTION();
1880 
1881 	Volume* volume = (Volume*)_volume->private_volume;
1882 	if (volume->IsReadOnly())
1883 		return B_READ_ONLY_DEVICE;
1884 
1885 	Inode* inode = (Inode*)_node->private_node;
1886 	Attribute attribute(inode);
1887 
1888 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1889 }
1890 
1891 
1892 static status_t
1893 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1894 	int openMode, void** _cookie)
1895 {
1896 	FUNCTION();
1897 
1898 	Inode* inode = (Inode*)_node->private_node;
1899 	Attribute attribute(inode);
1900 
1901 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1902 }
1903 
1904 
1905 static status_t
1906 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1907 {
1908 	return B_OK;
1909 }
1910 
1911 
1912 static status_t
1913 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1914 {
1915 	delete (attr_cookie*)cookie;
1916 	return B_OK;
1917 }
1918 
1919 
1920 static status_t
1921 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1922 	void* buffer, size_t* _length)
1923 {
1924 	FUNCTION();
1925 
1926 	attr_cookie* cookie = (attr_cookie*)_cookie;
1927 	Inode* inode = (Inode*)_file->private_node;
1928 
1929 	Attribute attribute(inode, cookie);
1930 
1931 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1932 }
1933 
1934 
1935 static status_t
1936 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1937 	off_t pos, const void* buffer, size_t* _length)
1938 {
1939 	FUNCTION();
1940 
1941 	attr_cookie* cookie = (attr_cookie*)_cookie;
1942 	Volume* volume = (Volume*)_volume->private_volume;
1943 	Inode* inode = (Inode*)_file->private_node;
1944 
1945 	Transaction transaction(volume, inode->BlockNumber());
1946 	Attribute attribute(inode, cookie);
1947 
1948 	bool created;
1949 	status_t status = attribute.Write(transaction, cookie, pos,
1950 		(const uint8*)buffer, _length, &created);
1951 	if (status == B_OK) {
1952 		status = transaction.Done();
1953 		if (status == B_OK) {
1954 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1955 				inode->ID(), cookie->name,
1956 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1957 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1958 				B_STAT_CHANGE_TIME);
1959 		}
1960 	}
1961 
1962 	return status;
1963 }
1964 
1965 
1966 static status_t
1967 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1968 	struct stat* stat)
1969 {
1970 	FUNCTION();
1971 
1972 	attr_cookie* cookie = (attr_cookie*)_cookie;
1973 	Inode* inode = (Inode*)_file->private_node;
1974 
1975 	Attribute attribute(inode, cookie);
1976 
1977 	return attribute.Stat(*stat);
1978 }
1979 
1980 
1981 static status_t
1982 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1983 	const struct stat* stat, int statMask)
1984 {
1985 	// TODO: Implement (at least setting the size)!
1986 	return EOPNOTSUPP;
1987 }
1988 
1989 
1990 static status_t
1991 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1992 	fs_vnode* toFile, const char* toName)
1993 {
1994 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1995 
1996 	// TODO: implement bfs_rename_attr()!
1997 	// There will probably be an API to move one attribute to another file,
1998 	// making that function much more complicated - oh joy ;-)
1999 
2000 	return EOPNOTSUPP;
2001 }
2002 
2003 
2004 static status_t
2005 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
2006 {
2007 	FUNCTION_START(("name = \"%s\"\n", name));
2008 
2009 	Volume* volume = (Volume*)_volume->private_volume;
2010 	Inode* inode = (Inode*)_node->private_node;
2011 
2012 	status_t status = inode->CheckPermissions(W_OK);
2013 	if (status != B_OK)
2014 		return status;
2015 
2016 	Transaction transaction(volume, inode->BlockNumber());
2017 
2018 	status = inode->RemoveAttribute(transaction, name);
2019 	if (status == B_OK)
2020 		status = transaction.Done();
2021 	if (status == B_OK) {
2022 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
2023 			name, B_ATTR_REMOVED);
2024 	}
2025 
2026 	return status;
2027 }
2028 
2029 
2030 //	#pragma mark - Special Nodes
2031 
2032 
2033 status_t
2034 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
2035 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
2036 	fs_vnode* _superVnode, ino_t* _nodeID)
2037 {
2038 	// no need to support entry-less nodes
2039 	if (name == NULL)
2040 		return B_UNSUPPORTED;
2041 
2042 	FUNCTION_START(("name = \"%s\", mode = %u, flags = 0x%" B_PRIx32
2043 		", subVnode: %p\n", name, (unsigned int)mode, flags, subVnode));
2044 
2045 	Volume* volume = (Volume*)_volume->private_volume;
2046 	Inode* directory = (Inode*)_directory->private_node;
2047 
2048 	if (volume->IsReadOnly())
2049 		return B_READ_ONLY_DEVICE;
2050 
2051 	if (!directory->IsDirectory())
2052 		RETURN_ERROR(B_BAD_TYPE);
2053 
2054 	status_t status = directory->CheckPermissions(W_OK);
2055 	if (status < B_OK)
2056 		RETURN_ERROR(status);
2057 
2058 	Transaction transaction(volume, directory->BlockNumber());
2059 
2060 	off_t id;
2061 	Inode* inode;
2062 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2063 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2064 	if (status == B_OK) {
2065 		_superVnode->private_node = inode;
2066 		_superVnode->ops = &gBFSVnodeOps;
2067 		*_nodeID = id;
2068 
2069 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2070 
2071 		status = transaction.Done();
2072 		if (status == B_OK)
2073 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2074 		else
2075 			entry_cache_remove(volume->ID(), directory->ID(), name);
2076 	}
2077 
2078 	return status;
2079 }
2080 
2081 
2082 //	#pragma mark - Index functions
2083 
2084 
2085 static status_t
2086 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2087 {
2088 	FUNCTION();
2089 
2090 	Volume* volume = (Volume*)_volume->private_volume;
2091 
2092 	if (volume->IndicesNode() == NULL) {
2093 		// This volume does not have any indices
2094 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2095 	}
2096 
2097 	// Since the indices root node is just a directory, and we are storing
2098 	// a pointer to it in our Volume object, we can just use the directory
2099 	// traversal functions.
2100 	// In fact we're storing it in the Volume object for that reason.
2101 
2102 	fs_vnode indicesNode;
2103 	indicesNode.private_node = volume->IndicesNode();
2104 
2105 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2106 }
2107 
2108 
2109 static status_t
2110 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2111 {
2112 	FUNCTION();
2113 
2114 	Volume* volume = (Volume*)_volume->private_volume;
2115 
2116 	fs_vnode indicesNode;
2117 	indicesNode.private_node = volume->IndicesNode();
2118 
2119 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2120 }
2121 
2122 
2123 static status_t
2124 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2125 {
2126 	FUNCTION();
2127 
2128 	Volume* volume = (Volume*)_volume->private_volume;
2129 
2130 	fs_vnode indicesNode;
2131 	indicesNode.private_node = volume->IndicesNode();
2132 
2133 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2134 }
2135 
2136 
2137 static status_t
2138 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2139 {
2140 	FUNCTION();
2141 
2142 	Volume* volume = (Volume*)_volume->private_volume;
2143 
2144 	fs_vnode indicesNode;
2145 	indicesNode.private_node = volume->IndicesNode();
2146 
2147 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2148 }
2149 
2150 
2151 static status_t
2152 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2153 	size_t bufferSize, uint32* _num)
2154 {
2155 	FUNCTION();
2156 
2157 	Volume* volume = (Volume*)_volume->private_volume;
2158 
2159 	fs_vnode indicesNode;
2160 	indicesNode.private_node = volume->IndicesNode();
2161 
2162 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2163 		bufferSize, _num));
2164 }
2165 
2166 
2167 static status_t
2168 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2169 	uint32 flags)
2170 {
2171 	FUNCTION_START(("name = \"%s\", type = %" B_PRIu32
2172 		", flags = %" B_PRIu32 "\n", name, type, flags));
2173 
2174 	Volume* volume = (Volume*)_volume->private_volume;
2175 
2176 	if (volume->IsReadOnly())
2177 		return B_READ_ONLY_DEVICE;
2178 
2179 	// only root users are allowed to create indices
2180 	if (geteuid() != 0)
2181 		return B_NOT_ALLOWED;
2182 
2183 	Transaction transaction(volume, volume->Indices());
2184 
2185 	Index index(volume);
2186 	status_t status = index.Create(transaction, name, type);
2187 
2188 	if (status == B_OK)
2189 		status = transaction.Done();
2190 
2191 	RETURN_ERROR(status);
2192 }
2193 
2194 
2195 static status_t
2196 bfs_remove_index(fs_volume* _volume, const char* name)
2197 {
2198 	FUNCTION();
2199 
2200 	Volume* volume = (Volume*)_volume->private_volume;
2201 
2202 	if (volume->IsReadOnly())
2203 		return B_READ_ONLY_DEVICE;
2204 
2205 	// only root users are allowed to remove indices
2206 	if (geteuid() != 0)
2207 		return B_NOT_ALLOWED;
2208 
2209 	Inode* indices = volume->IndicesNode();
2210 	if (indices == NULL)
2211 		return B_ENTRY_NOT_FOUND;
2212 
2213 	Transaction transaction(volume, volume->Indices());
2214 
2215 	status_t status = indices->Remove(transaction, name);
2216 	if (status == B_OK)
2217 		status = transaction.Done();
2218 
2219 	RETURN_ERROR(status);
2220 }
2221 
2222 
2223 static status_t
2224 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2225 {
2226 	FUNCTION_START(("name = %s\n", name));
2227 
2228 	Volume* volume = (Volume*)_volume->private_volume;
2229 
2230 	Index index(volume);
2231 	status_t status = index.SetTo(name);
2232 	if (status < B_OK)
2233 		RETURN_ERROR(status);
2234 
2235 	bfs_inode& node = index.Node()->Node();
2236 
2237 	stat->st_type = index.Type();
2238 	stat->st_mode = node.Mode();
2239 
2240 	stat->st_size = node.data.Size();
2241 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2242 
2243 	stat->st_nlink = 1;
2244 	stat->st_blksize = 65536;
2245 
2246 	stat->st_uid = node.UserID();
2247 	stat->st_gid = node.GroupID();
2248 
2249 	fill_stat_time(node, *stat);
2250 
2251 	return B_OK;
2252 }
2253 
2254 
2255 //	#pragma mark - Query functions
2256 
2257 
2258 static status_t
2259 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2260 	port_id port, uint32 token, void** _cookie)
2261 {
2262 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %" B_PRIu32
2263 		", port_id = %" B_PRId32 ", token = %" B_PRIu32 ")\n",
2264 		queryString, flags, port, token));
2265 
2266 	Volume* volume = (Volume*)_volume->private_volume;
2267 
2268 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2269 	if (expression == NULL)
2270 		RETURN_ERROR(B_NO_MEMORY);
2271 
2272 	if (expression->InitCheck() < B_OK) {
2273 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2274 			queryString, expression->Position()));
2275 
2276 		delete expression;
2277 		RETURN_ERROR(B_BAD_VALUE);
2278 	}
2279 
2280 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2281 	if (query == NULL) {
2282 		delete expression;
2283 		RETURN_ERROR(B_NO_MEMORY);
2284 	}
2285 
2286 	if (flags & B_LIVE_QUERY)
2287 		query->SetLiveMode(port, token);
2288 
2289 	*_cookie = (void*)query;
2290 
2291 	return B_OK;
2292 }
2293 
2294 
2295 static status_t
2296 bfs_close_query(fs_volume* _volume, void* cookie)
2297 {
2298 	FUNCTION();
2299 	return B_OK;
2300 }
2301 
2302 
2303 static status_t
2304 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2305 {
2306 	FUNCTION();
2307 
2308 	Query* query = (Query*)cookie;
2309 	Expression* expression = query->GetExpression();
2310 	delete query;
2311 	delete expression;
2312 
2313 	return B_OK;
2314 }
2315 
2316 
2317 static status_t
2318 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2319 	size_t bufferSize, uint32* _num)
2320 {
2321 	FUNCTION();
2322 	Query* query = (Query*)cookie;
2323 	status_t status = query->GetNextEntry(dirent, bufferSize);
2324 	if (status == B_OK)
2325 		*_num = 1;
2326 	else if (status == B_ENTRY_NOT_FOUND)
2327 		*_num = 0;
2328 	else
2329 		return status;
2330 
2331 	return B_OK;
2332 }
2333 
2334 
2335 static status_t
2336 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2337 {
2338 	FUNCTION();
2339 
2340 	Query* query = (Query*)cookie;
2341 	return query->Rewind();
2342 }
2343 
2344 
2345 //	#pragma mark -
2346 
2347 
2348 static uint32
2349 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2350 {
2351 	// TODO: We should at least check the partition size.
2352 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2353 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2354 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2355 }
2356 
2357 
2358 static status_t
2359 bfs_initialize(int fd, partition_id partitionID, const char* name,
2360 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2361 {
2362 	// check name
2363 	status_t status = check_volume_name(name);
2364 	if (status != B_OK)
2365 		return status;
2366 
2367 	// parse parameters
2368 	initialize_parameters parameters;
2369 	status = parse_initialize_parameters(parameterString, parameters);
2370 	if (status != B_OK)
2371 		return status;
2372 
2373 	update_disk_device_job_progress(job, 0);
2374 
2375 	// initialize the volume
2376 	Volume volume(NULL);
2377 	status = volume.Initialize(fd, name, parameters.blockSize,
2378 		parameters.flags);
2379 	if (status < B_OK) {
2380 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2381 		return status;
2382 	}
2383 
2384 	// rescan partition
2385 	status = scan_partition(partitionID);
2386 	if (status != B_OK)
2387 		return status;
2388 
2389 	update_disk_device_job_progress(job, 1);
2390 
2391 	// print some info, if desired
2392 	if (parameters.verbose) {
2393 		disk_super_block super = volume.SuperBlock();
2394 
2395 		INFORM(("Disk was initialized successfully.\n"));
2396 		INFORM(("\tname: \"%s\"\n", super.name));
2397 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2398 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2399 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2400 		INFORM(("\tnum allocation groups: %d\n",
2401 			(int)super.AllocationGroups()));
2402 		INFORM(("\tallocation group size: %ld blocks\n",
2403 			1L << super.AllocationGroupShift()));
2404 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2405 	}
2406 
2407 	return B_OK;
2408 }
2409 
2410 
2411 static status_t
2412 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2413 	uint32 blockSize, disk_job_id job)
2414 {
2415 	if (blockSize == 0)
2416 		return B_BAD_VALUE;
2417 
2418 	update_disk_device_job_progress(job, 0.0);
2419 
2420 	// just overwrite the superblock
2421 	disk_super_block superBlock;
2422 	memset(&superBlock, 0, sizeof(superBlock));
2423 
2424 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2425 		return errno;
2426 
2427 	update_disk_device_job_progress(job, 1.0);
2428 
2429 	return B_OK;
2430 }
2431 
2432 
2433 //	#pragma mark -
2434 
2435 
2436 static status_t
2437 bfs_std_ops(int32 op, ...)
2438 {
2439 	switch (op) {
2440 		case B_MODULE_INIT:
2441 #ifdef BFS_DEBUGGER_COMMANDS
2442 			add_debugger_commands();
2443 #endif
2444 			return B_OK;
2445 		case B_MODULE_UNINIT:
2446 #ifdef BFS_DEBUGGER_COMMANDS
2447 			remove_debugger_commands();
2448 #endif
2449 			return B_OK;
2450 
2451 		default:
2452 			return B_ERROR;
2453 	}
2454 }
2455 
2456 fs_volume_ops gBFSVolumeOps = {
2457 	&bfs_unmount,
2458 	&bfs_read_fs_stat,
2459 	&bfs_write_fs_stat,
2460 	&bfs_sync,
2461 	&bfs_get_vnode,
2462 
2463 	/* index directory & index operations */
2464 	&bfs_open_index_dir,
2465 	&bfs_close_index_dir,
2466 	&bfs_free_index_dir_cookie,
2467 	&bfs_read_index_dir,
2468 	&bfs_rewind_index_dir,
2469 
2470 	&bfs_create_index,
2471 	&bfs_remove_index,
2472 	&bfs_stat_index,
2473 
2474 	/* query operations */
2475 	&bfs_open_query,
2476 	&bfs_close_query,
2477 	&bfs_free_query_cookie,
2478 	&bfs_read_query,
2479 	&bfs_rewind_query,
2480 };
2481 
2482 fs_vnode_ops gBFSVnodeOps = {
2483 	/* vnode operations */
2484 	&bfs_lookup,
2485 	&bfs_get_vnode_name,
2486 	&bfs_put_vnode,
2487 	&bfs_remove_vnode,
2488 
2489 	/* VM file access */
2490 	&bfs_can_page,
2491 	&bfs_read_pages,
2492 	&bfs_write_pages,
2493 
2494 	&bfs_io,
2495 	NULL,	// cancel_io()
2496 
2497 	&bfs_get_file_map,
2498 
2499 	&bfs_ioctl,
2500 	&bfs_set_flags,
2501 	NULL,	// fs_select
2502 	NULL,	// fs_deselect
2503 	&bfs_fsync,
2504 
2505 	&bfs_read_link,
2506 	&bfs_create_symlink,
2507 
2508 	&bfs_link,
2509 	&bfs_unlink,
2510 	&bfs_rename,
2511 
2512 	&bfs_access,
2513 	&bfs_read_stat,
2514 	&bfs_write_stat,
2515 	NULL,	// fs_preallocate
2516 
2517 	/* file operations */
2518 	&bfs_create,
2519 	&bfs_open,
2520 	&bfs_close,
2521 	&bfs_free_cookie,
2522 	&bfs_read,
2523 	&bfs_write,
2524 
2525 	/* directory operations */
2526 	&bfs_create_dir,
2527 	&bfs_remove_dir,
2528 	&bfs_open_dir,
2529 	&bfs_close_dir,
2530 	&bfs_free_dir_cookie,
2531 	&bfs_read_dir,
2532 	&bfs_rewind_dir,
2533 
2534 	/* attribute directory operations */
2535 	&bfs_open_attr_dir,
2536 	&bfs_close_attr_dir,
2537 	&bfs_free_attr_dir_cookie,
2538 	&bfs_read_attr_dir,
2539 	&bfs_rewind_attr_dir,
2540 
2541 	/* attribute operations */
2542 	&bfs_create_attr,
2543 	&bfs_open_attr,
2544 	&bfs_close_attr,
2545 	&bfs_free_attr_cookie,
2546 	&bfs_read_attr,
2547 	&bfs_write_attr,
2548 
2549 	&bfs_read_attr_stat,
2550 	&bfs_write_attr_stat,
2551 	&bfs_rename_attr,
2552 	&bfs_remove_attr,
2553 
2554 	/* special nodes */
2555 	&bfs_create_special_node
2556 };
2557 
2558 static file_system_module_info sBeFileSystem = {
2559 	{
2560 		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2561 		0,
2562 		bfs_std_ops,
2563 	},
2564 
2565 	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2566 	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2567 
2568 	// DDM flags
2569 	0
2570 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2571 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2572 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2573 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2574 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2575 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2576 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2577 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2578 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2579 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2580 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2581 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2582 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2583 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2584 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2585 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2586 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2587 	,
2588 
2589 	// scanning
2590 	bfs_identify_partition,
2591 	bfs_scan_partition,
2592 	bfs_free_identify_partition_cookie,
2593 	NULL,	// free_partition_content_cookie()
2594 
2595 	&bfs_mount,
2596 
2597 	/* capability querying operations */
2598 	&bfs_get_supported_operations,
2599 
2600 	NULL,	// validate_resize
2601 	NULL,	// validate_move
2602 	NULL,	// validate_set_content_name
2603 	NULL,	// validate_set_content_parameters
2604 	NULL,	// validate_initialize,
2605 
2606 	/* shadow partition modification */
2607 	NULL,	// shadow_changed
2608 
2609 	/* writing */
2610 	NULL,	// defragment
2611 	NULL,	// repair
2612 	NULL,	// resize
2613 	NULL,	// move
2614 	NULL,	// set_content_name
2615 	NULL,	// set_content_parameters
2616 	bfs_initialize,
2617 	bfs_uninitialize
2618 };
2619 
2620 module_info* modules[] = {
2621 	(module_info*)&sBeFileSystem,
2622 	NULL,
2623 };
2624