xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision aa3083e086e5a929c061c72983e09d916c548a38)
1 /*
2  * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Debug.h"
11 #include "Volume.h"
12 #include "Inode.h"
13 #include "Index.h"
14 #include "BPlusTree.h"
15 #include "Query.h"
16 #include "Attribute.h"
17 #include "bfs_control.h"
18 #include "bfs_disk_system.h"
19 
20 // TODO: temporary solution as long as there is no public I/O requests API
21 #ifndef FS_SHELL
22 #	include <io_requests.h>
23 #	include <util/fs_trim_support.h>
24 #endif
25 
26 
27 #define BFS_IO_SIZE	65536
28 
29 
30 struct identify_cookie {
31 	disk_super_block super_block;
32 };
33 
34 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
35 
36 
37 static void
38 fill_stat_time(const bfs_inode& node, struct stat& stat)
39 {
40 	bigtime_t now = real_time_clock_usecs();
41 	stat.st_atim.tv_sec = now / 1000000LL;
42 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
43 
44 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
45 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
46 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
47 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
48 
49 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
50 	bigtime_t changeTime = node.StatusChangeTime();
51 	if (changeTime < node.LastModifiedTime())
52 		stat.st_ctim = stat.st_mtim;
53 	else {
54 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
55 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
56 	}
57 }
58 
59 
60 void
61 fill_stat_buffer(Inode* inode, struct stat& stat)
62 {
63 	const bfs_inode& node = inode->Node();
64 
65 	stat.st_dev = inode->GetVolume()->ID();
66 	stat.st_ino = inode->ID();
67 	stat.st_nlink = 1;
68 	stat.st_blksize = BFS_IO_SIZE;
69 
70 	stat.st_uid = node.UserID();
71 	stat.st_gid = node.GroupID();
72 	stat.st_mode = node.Mode();
73 	stat.st_type = node.Type();
74 
75 	fill_stat_time(node, stat);
76 
77 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
78 		// symlinks report the size of the link here
79 		stat.st_size = strlen(node.short_symlink);
80 	} else
81 		stat.st_size = inode->Size();
82 
83 	stat.st_blocks = inode->AllocatedSize() / 512;
84 }
85 
86 
87 //!	bfs_io() callback hook
88 static status_t
89 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
90 	size_t size, struct file_io_vec* vecs, size_t* _count)
91 {
92 	Inode* inode = (Inode*)cookie;
93 
94 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
95 		inode->GetVolume()->BlockSize());
96 }
97 
98 
99 //!	bfs_io() callback hook
100 static status_t
101 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
102 	bool partialTransfer, size_t bytesTransferred)
103 {
104 	Inode* inode = (Inode*)cookie;
105 	rw_lock_read_unlock(&inode->Lock());
106 	return B_OK;
107 }
108 
109 
110 //	#pragma mark - Scanning
111 
112 
113 static float
114 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
115 {
116 	disk_super_block superBlock;
117 	status_t status = Volume::Identify(fd, &superBlock);
118 	if (status != B_OK)
119 		return -1;
120 
121 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
122 	if (cookie == NULL)
123 		return -1;
124 
125 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
126 
127 	*_cookie = cookie;
128 	return 0.8f;
129 }
130 
131 
132 static status_t
133 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
134 {
135 	identify_cookie* cookie = (identify_cookie*)_cookie;
136 
137 	partition->status = B_PARTITION_VALID;
138 	partition->flags |= B_PARTITION_FILE_SYSTEM;
139 	partition->content_size = cookie->super_block.NumBlocks()
140 		* cookie->super_block.BlockSize();
141 	partition->block_size = cookie->super_block.BlockSize();
142 	partition->content_name = strdup(cookie->super_block.name);
143 	if (partition->content_name == NULL)
144 		return B_NO_MEMORY;
145 
146 	return B_OK;
147 }
148 
149 
150 static void
151 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
152 {
153 	identify_cookie* cookie = (identify_cookie*)_cookie;
154 	delete cookie;
155 }
156 
157 
158 //	#pragma mark -
159 
160 
161 static status_t
162 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
163 	const char* args, ino_t* _rootID)
164 {
165 	FUNCTION();
166 
167 	Volume* volume = new(std::nothrow) Volume(_volume);
168 	if (volume == NULL)
169 		return B_NO_MEMORY;
170 
171 	status_t status = volume->Mount(device, flags);
172 	if (status != B_OK) {
173 		delete volume;
174 		RETURN_ERROR(status);
175 	}
176 
177 	_volume->private_volume = volume;
178 	_volume->ops = &gBFSVolumeOps;
179 	*_rootID = volume->ToVnode(volume->Root());
180 
181 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
182 		volume->Name(), *_rootID, device));
183 	return B_OK;
184 }
185 
186 
187 static status_t
188 bfs_unmount(fs_volume* _volume)
189 {
190 	FUNCTION();
191 	Volume* volume = (Volume*)_volume->private_volume;
192 
193 	status_t status = volume->Unmount();
194 	delete volume;
195 
196 	RETURN_ERROR(status);
197 }
198 
199 
200 static status_t
201 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
202 {
203 	FUNCTION();
204 
205 	Volume* volume = (Volume*)_volume->private_volume;
206 	MutexLocker locker(volume->Lock());
207 
208 	// File system flags.
209 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
210 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
211 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
212 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
213 
214 	info->io_size = BFS_IO_SIZE;
215 		// whatever is appropriate here?
216 
217 	info->block_size = volume->BlockSize();
218 	info->total_blocks = volume->NumBlocks();
219 	info->free_blocks = volume->FreeBlocks();
220 
221 	// Volume name
222 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
223 
224 	// File system name
225 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
226 
227 	return B_OK;
228 }
229 
230 
231 static status_t
232 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
233 {
234 	FUNCTION_START(("mask = %ld\n", mask));
235 
236 	Volume* volume = (Volume*)_volume->private_volume;
237 	if (volume->IsReadOnly())
238 		return B_READ_ONLY_DEVICE;
239 
240 	MutexLocker locker(volume->Lock());
241 
242 	status_t status = B_BAD_VALUE;
243 
244 	if (mask & FS_WRITE_FSINFO_NAME) {
245 		disk_super_block& superBlock = volume->SuperBlock();
246 
247 		strncpy(superBlock.name, info->volume_name,
248 			sizeof(superBlock.name) - 1);
249 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
250 
251 		status = volume->WriteSuperBlock();
252 	}
253 	return status;
254 }
255 
256 
257 static status_t
258 bfs_sync(fs_volume* _volume)
259 {
260 	FUNCTION();
261 
262 	Volume* volume = (Volume*)_volume->private_volume;
263 	return volume->Sync();
264 }
265 
266 
267 //	#pragma mark -
268 
269 
270 /*!	Reads in the node from disk and creates an inode object from it.
271 */
272 static status_t
273 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
274 	uint32* _flags, bool reenter)
275 {
276 	//FUNCTION_START(("ino_t = %Ld\n", id));
277 	Volume* volume = (Volume*)_volume->private_volume;
278 
279 	// first inode may be after the log area, we don't go through
280 	// the hassle and try to load an earlier block from disk
281 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
282 		|| id > volume->NumBlocks()) {
283 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
284 		return B_ERROR;
285 	}
286 
287 	CachedBlock cached(volume, id);
288 	bfs_inode* node = (bfs_inode*)cached.Block();
289 	if (node == NULL) {
290 		FATAL(("could not read inode: %" B_PRIdINO "\n", id));
291 		return B_IO_ERROR;
292 	}
293 
294 	status_t status = node->InitCheck(volume);
295 	if (status != B_OK) {
296 		if ((node->Flags() & INODE_DELETED) != 0) {
297 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
298 		} else {
299 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
300 				strerror(status)));
301 		}
302 		return status;
303 	}
304 
305 	Inode* inode = new(std::nothrow) Inode(volume, id);
306 	if (inode == NULL)
307 		return B_NO_MEMORY;
308 
309 	status = inode->InitCheck(false);
310 	if (status != B_OK)
311 		delete inode;
312 
313 	if (status == B_OK) {
314 		_node->private_node = inode;
315 		_node->ops = &gBFSVnodeOps;
316 		*_type = inode->Mode();
317 		*_flags = 0;
318 	}
319 
320 	return status;
321 }
322 
323 
324 static status_t
325 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
326 {
327 	Volume* volume = (Volume*)_volume->private_volume;
328 	Inode* inode = (Inode*)_node->private_node;
329 
330 	// since a directory's size can be changed without having it opened,
331 	// we need to take care about their preallocated blocks here
332 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
333 		&& inode->NeedsTrimming()) {
334 		Transaction transaction(volume, inode->BlockNumber());
335 
336 		if (inode->TrimPreallocation(transaction) == B_OK)
337 			transaction.Done();
338 		else if (transaction.HasParent()) {
339 			// TODO: for now, we don't let sub-transactions fail
340 			transaction.Done();
341 		}
342 	}
343 
344 	delete inode;
345 	return B_OK;
346 }
347 
348 
349 static status_t
350 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
351 {
352 	FUNCTION();
353 
354 	Volume* volume = (Volume*)_volume->private_volume;
355 	Inode* inode = (Inode*)_node->private_node;
356 
357 	// If the inode isn't in use anymore, we were called before
358 	// bfs_unlink() returns - in this case, we can just use the
359 	// transaction which has already deleted the inode.
360 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
361 
362 	// The file system check functionality uses this flag to prevent the space
363 	// used up by the inode from being freed - this flag is set only in
364 	// situations where this does not cause any harm as the block bitmap will
365 	// get fixed anyway in this case).
366 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
367 		delete inode;
368 		return B_OK;
369 	}
370 
371 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
372 
373 	status_t status = inode->Free(transaction);
374 	if (status == B_OK) {
375 		status = transaction.Done();
376 	} else if (transaction.HasParent()) {
377 		// TODO: for now, we don't let sub-transactions fail
378 		status = transaction.Done();
379 	}
380 
381 	volume->RemovedInodes().Remove(inode);
382 
383 	// TODO: the VFS currently does not allow this to fail
384 	delete inode;
385 
386 	return status;
387 }
388 
389 
390 static bool
391 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
392 {
393 	// TODO: we're obviously not even asked...
394 	return false;
395 }
396 
397 
398 static status_t
399 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
400 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
401 {
402 	Volume* volume = (Volume*)_volume->private_volume;
403 	Inode* inode = (Inode*)_node->private_node;
404 
405 	if (inode->FileCache() == NULL)
406 		RETURN_ERROR(B_BAD_VALUE);
407 
408 	InodeReadLocker _(inode);
409 
410 	uint32 vecIndex = 0;
411 	size_t vecOffset = 0;
412 	size_t bytesLeft = *_numBytes;
413 	status_t status;
414 
415 	while (true) {
416 		file_io_vec fileVecs[8];
417 		size_t fileVecCount = 8;
418 
419 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
420 			&fileVecCount, 0);
421 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
422 			break;
423 
424 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
425 
426 		size_t bytes = bytesLeft;
427 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
428 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
429 		if (status != B_OK || !bufferOverflow)
430 			break;
431 
432 		pos += bytes;
433 		bytesLeft -= bytes;
434 	}
435 
436 	return status;
437 }
438 
439 
440 static status_t
441 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
442 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
443 {
444 	Volume* volume = (Volume*)_volume->private_volume;
445 	Inode* inode = (Inode*)_node->private_node;
446 
447 	if (volume->IsReadOnly())
448 		return B_READ_ONLY_DEVICE;
449 
450 	if (inode->FileCache() == NULL)
451 		RETURN_ERROR(B_BAD_VALUE);
452 
453 	InodeReadLocker _(inode);
454 
455 	uint32 vecIndex = 0;
456 	size_t vecOffset = 0;
457 	size_t bytesLeft = *_numBytes;
458 	status_t status;
459 
460 	while (true) {
461 		file_io_vec fileVecs[8];
462 		size_t fileVecCount = 8;
463 
464 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
465 			&fileVecCount, 0);
466 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
467 			break;
468 
469 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
470 
471 		size_t bytes = bytesLeft;
472 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
473 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
474 		if (status != B_OK || !bufferOverflow)
475 			break;
476 
477 		pos += bytes;
478 		bytesLeft -= bytes;
479 	}
480 
481 	return status;
482 }
483 
484 
485 static status_t
486 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
487 {
488 	Volume* volume = (Volume*)_volume->private_volume;
489 	Inode* inode = (Inode*)_node->private_node;
490 
491 #ifndef FS_SHELL
492 	if (io_request_is_write(request) && volume->IsReadOnly()) {
493 		notify_io_request(request, B_READ_ONLY_DEVICE);
494 		return B_READ_ONLY_DEVICE;
495 	}
496 #endif
497 
498 	if (inode->FileCache() == NULL) {
499 #ifndef FS_SHELL
500 		notify_io_request(request, B_BAD_VALUE);
501 #endif
502 		RETURN_ERROR(B_BAD_VALUE);
503 	}
504 
505 	// We lock the node here and will unlock it in the "finished" hook.
506 	rw_lock_read_lock(&inode->Lock());
507 
508 	return do_iterative_fd_io(volume->Device(), request,
509 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
510 }
511 
512 
513 static status_t
514 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
515 	struct file_io_vec* vecs, size_t* _count)
516 {
517 	Volume* volume = (Volume*)_volume->private_volume;
518 	Inode* inode = (Inode*)_node->private_node;
519 
520 	int32 blockShift = volume->BlockShift();
521 	uint32 index = 0, max = *_count;
522 	block_run run;
523 	off_t fileOffset;
524 
525 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
526 
527 	while (true) {
528 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
529 		if (status != B_OK)
530 			return status;
531 
532 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
533 		vecs[index].length = ((uint32)run.Length() << blockShift)
534 			- offset + fileOffset;
535 
536 		// are we already done?
537 		if ((uint64)size <= (uint64)vecs[index].length
538 			|| (uint64)offset + (uint64)vecs[index].length
539 				>= (uint64)inode->Size()) {
540 			if ((uint64)offset + (uint64)vecs[index].length
541 					> (uint64)inode->Size()) {
542 				// make sure the extent ends with the last official file
543 				// block (without taking any preallocations into account)
544 				vecs[index].length = round_up(inode->Size() - offset,
545 					volume->BlockSize());
546 			}
547 			*_count = index + 1;
548 			return B_OK;
549 		}
550 
551 		offset += vecs[index].length;
552 		size -= vecs[index].length;
553 		index++;
554 
555 		if (index >= max) {
556 			// we're out of file_io_vecs; let's bail out
557 			*_count = index;
558 			return B_BUFFER_OVERFLOW;
559 		}
560 	}
561 
562 	// can never get here
563 	return B_ERROR;
564 }
565 
566 
567 //	#pragma mark -
568 
569 
570 static status_t
571 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
572 	ino_t* _vnodeID)
573 {
574 	Volume* volume = (Volume*)_volume->private_volume;
575 	Inode* directory = (Inode*)_directory->private_node;
576 
577 	InodeReadLocker locker(directory);
578 
579 	// check access permissions
580 	status_t status = directory->CheckPermissions(X_OK);
581 	if (status != B_OK)
582 		RETURN_ERROR(status);
583 
584 	BPlusTree* tree = directory->Tree();
585 	if (tree == NULL)
586 		RETURN_ERROR(B_BAD_VALUE);
587 
588 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
589 	if (status != B_OK) {
590 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
591 		if (status == B_ENTRY_NOT_FOUND)
592 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
593 
594 		return status;
595 	}
596 
597 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
598 
599 	locker.Unlock();
600 
601 	Inode* inode;
602 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
603 	if (status != B_OK) {
604 		REPORT_ERROR(status);
605 		return B_ENTRY_NOT_FOUND;
606 	}
607 
608 	return B_OK;
609 }
610 
611 
612 static status_t
613 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
614 	size_t bufferSize)
615 {
616 	Inode* inode = (Inode*)_node->private_node;
617 
618 	return inode->GetName(buffer, bufferSize);
619 }
620 
621 
622 static status_t
623 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
624 	void* buffer, size_t bufferLength)
625 {
626 	FUNCTION_START(("node = %p, cmd = %lu, buf = %p, len = %ld\n", _node, cmd,
627 		buffer, bufferLength));
628 
629 	Volume* volume = (Volume*)_volume->private_volume;
630 
631 	switch (cmd) {
632 #ifndef FS_SHELL
633 		case B_TRIM_DEVICE:
634 		{
635 			fs_trim_data* trimData;
636 			MemoryDeleter deleter;
637 			status_t status = get_trim_data_from_user(buffer, bufferLength,
638 				deleter, trimData);
639 			if (status != B_OK)
640 				return status;
641 
642 			trimData->trimmed_size = 0;
643 
644 			for (uint32 i = 0; i < trimData->range_count; i++) {
645 				uint64 trimmedSize = 0;
646 				status_t status = volume->Allocator().Trim(
647 					trimData->ranges[i].offset, trimData->ranges[i].size,
648 					trimmedSize);
649 				if (status != B_OK)
650 					return status;
651 
652 				trimData->trimmed_size += trimmedSize;
653 			}
654 
655 			return copy_trim_data_to_user(buffer, trimData);
656 		}
657 #endif
658 
659 		case BFS_IOCTL_VERSION:
660 		{
661 			uint32 version = 0x10000;
662 			return user_memcpy(buffer, &version, sizeof(uint32));
663 		}
664 		case BFS_IOCTL_START_CHECKING:
665 		{
666 			// start checking
667 			BlockAllocator& allocator = volume->Allocator();
668 			check_control control;
669 			if (user_memcpy(&control, buffer, sizeof(check_control)) != B_OK)
670 				return B_BAD_ADDRESS;
671 
672 			status_t status = allocator.StartChecking(&control);
673 			if (status == B_OK) {
674 				file_cookie* cookie = (file_cookie*)_cookie;
675 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
676 			}
677 
678 			return status;
679 		}
680 		case BFS_IOCTL_STOP_CHECKING:
681 		{
682 			// stop checking
683 			BlockAllocator& allocator = volume->Allocator();
684 			check_control control;
685 
686 			status_t status = allocator.StopChecking(&control);
687 			if (status == B_OK) {
688 				file_cookie* cookie = (file_cookie*)_cookie;
689 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
690 			}
691 			if (status == B_OK)
692 				status = user_memcpy(buffer, &control, sizeof(check_control));
693 
694 			return status;
695 		}
696 		case BFS_IOCTL_CHECK_NEXT_NODE:
697 		{
698 			// check next
699 			BlockAllocator& allocator = volume->Allocator();
700 			check_control control;
701 
702 			status_t status = allocator.CheckNextNode(&control);
703 			if (status == B_OK)
704 				status = user_memcpy(buffer, &control, sizeof(check_control));
705 
706 			return status;
707 		}
708 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
709 		{
710 			// let's makebootable (or anyone else) update the boot block
711 			// while BFS is mounted
712 			update_boot_block update;
713 			if (bufferLength != sizeof(update_boot_block))
714 				return B_BAD_VALUE;
715 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
716 				return B_BAD_ADDRESS;
717 
718 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
719 			if (update.offset < minOffset
720 				|| update.offset >= 512 || update.length > 512 - minOffset
721 				|| update.length + update.offset > 512) {
722 				return B_BAD_VALUE;
723 			}
724 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
725 					update.data, update.length) != B_OK) {
726 				return B_BAD_ADDRESS;
727 			}
728 
729 			return volume->WriteSuperBlock();
730 		}
731 
732 #ifdef DEBUG_FRAGMENTER
733 		case 56741:
734 		{
735 			BlockAllocator& allocator = volume->Allocator();
736 			allocator.Fragment();
737 			return B_OK;
738 		}
739 #endif
740 
741 #ifdef DEBUG
742 		case 56742:
743 		{
744 			// allocate all free blocks and zero them out
745 			// (a test for the BlockAllocator)!
746 			BlockAllocator& allocator = volume->Allocator();
747 			Transaction transaction(volume, 0);
748 			CachedBlock cached(volume);
749 			block_run run;
750 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
751 					== B_OK) {
752 				PRINT(("write block_run(%ld, %d, %d)\n", run.allocation_group,
753 					run.start, run.length));
754 				for (int32 i = 0;i < run.length;i++) {
755 					uint8* block = cached.SetToWritable(transaction, run);
756 					if (block != NULL)
757 						memset(block, 0, volume->BlockSize());
758 				}
759 			}
760 			return B_OK;
761 		}
762 #endif
763 	}
764 	return B_DEV_INVALID_IOCTL;
765 }
766 
767 
768 /*!	Sets the open-mode flags for the open file cookie - only
769 	supports O_APPEND currently, but that should be sufficient
770 	for a file system.
771 */
772 static status_t
773 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
774 {
775 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
776 
777 	file_cookie* cookie = (file_cookie*)_cookie;
778 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
779 
780 	return B_OK;
781 }
782 
783 
784 static status_t
785 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
786 {
787 	FUNCTION();
788 
789 	Inode* inode = (Inode*)_node->private_node;
790 	return inode->Sync();
791 }
792 
793 
794 static status_t
795 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
796 {
797 	FUNCTION();
798 
799 	Inode* inode = (Inode*)_node->private_node;
800 	fill_stat_buffer(inode, *stat);
801 	return B_OK;
802 }
803 
804 
805 static status_t
806 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
807 	uint32 mask)
808 {
809 	FUNCTION();
810 
811 	Volume* volume = (Volume*)_volume->private_volume;
812 	Inode* inode = (Inode*)_node->private_node;
813 
814 	if (volume->IsReadOnly())
815 		return B_READ_ONLY_DEVICE;
816 
817 	// TODO: we should definitely check a bit more if the new stats are
818 	//	valid - or even better, the VFS should check this before calling us
819 
820 	bfs_inode& node = inode->Node();
821 	bool updateTime = false;
822 	uid_t uid = geteuid();
823 
824 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
825 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
826 
827 	Transaction transaction(volume, inode->BlockNumber());
828 	inode->WriteLockInTransaction(transaction);
829 
830 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
831 		// Since B_STAT_SIZE is the only thing that can fail directly, we
832 		// do it first, so that the inode state will still be consistent
833 		// with the on-disk version
834 		if (inode->IsDirectory())
835 			return B_IS_A_DIRECTORY;
836 		if (!inode->IsFile())
837 			return B_BAD_VALUE;
838 		if (!hasWriteAccess)
839 			RETURN_ERROR(B_NOT_ALLOWED);
840 
841 		off_t oldSize = inode->Size();
842 
843 		status_t status = inode->SetFileSize(transaction, stat->st_size);
844 		if (status != B_OK)
845 			return status;
846 
847 		// fill the new blocks (if any) with zeros
848 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
849 			// We must not keep the inode locked during a write operation,
850 			// or else we might deadlock.
851 			rw_lock_write_unlock(&inode->Lock());
852 			inode->FillGapWithZeros(oldSize, inode->Size());
853 			rw_lock_write_lock(&inode->Lock());
854 		}
855 
856 		if (!inode->IsDeleted()) {
857 			Index index(volume);
858 			index.UpdateSize(transaction, inode);
859 
860 			updateTime = true;
861 		}
862 	}
863 
864 	if ((mask & B_STAT_UID) != 0) {
865 		// only root should be allowed
866 		if (uid != 0)
867 			RETURN_ERROR(B_NOT_ALLOWED);
868 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
869 		updateTime = true;
870 	}
871 
872 	if ((mask & B_STAT_GID) != 0) {
873 		// only the user or root can do that
874 		if (!isOwnerOrRoot)
875 			RETURN_ERROR(B_NOT_ALLOWED);
876 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
877 		updateTime = true;
878 	}
879 
880 	if ((mask & B_STAT_MODE) != 0) {
881 		// only the user or root can do that
882 		if (!isOwnerOrRoot)
883 			RETURN_ERROR(B_NOT_ALLOWED);
884 		PRINT(("original mode = %ld, stat->st_mode = %d\n", node.Mode(),
885 			stat->st_mode));
886 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
887 			| (stat->st_mode & S_IUMSK));
888 		updateTime = true;
889 	}
890 
891 	if ((mask & B_STAT_CREATION_TIME) != 0) {
892 		// the user or root can do that or any user with write access
893 		if (!isOwnerOrRoot && !hasWriteAccess)
894 			RETURN_ERROR(B_NOT_ALLOWED);
895 		node.create_time
896 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
897 	}
898 
899 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
900 		// the user or root can do that or any user with write access
901 		if (!isOwnerOrRoot && !hasWriteAccess)
902 			RETURN_ERROR(B_NOT_ALLOWED);
903 		if (!inode->InLastModifiedIndex()) {
904 			// directory modification times are not part of the index
905 			node.last_modified_time
906 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
907 		} else if (!inode->IsDeleted()) {
908 			// Index::UpdateLastModified() will set the new time in the inode
909 			Index index(volume);
910 			index.UpdateLastModified(transaction, inode,
911 				bfs_inode::ToInode(stat->st_mtim));
912 		}
913 	}
914 
915 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
916 		// the user or root can do that or any user with write access
917 		if (!isOwnerOrRoot && !hasWriteAccess)
918 			RETURN_ERROR(B_NOT_ALLOWED);
919 		bigtime_t newTime;
920 		if ((mask & B_STAT_CHANGE_TIME) == 0)
921 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
922 		else
923 			newTime = bfs_inode::ToInode(stat->st_ctim);
924 
925 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
926 	}
927 
928 	status_t status = inode->WriteBack(transaction);
929 	if (status == B_OK)
930 		status = transaction.Done();
931 	if (status == B_OK)
932 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
933 
934 	return status;
935 }
936 
937 
938 status_t
939 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
940 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
941 {
942 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
943 		openMode));
944 
945 	Volume* volume = (Volume*)_volume->private_volume;
946 	Inode* directory = (Inode*)_directory->private_node;
947 
948 	if (volume->IsReadOnly())
949 		return B_READ_ONLY_DEVICE;
950 
951 	if (!directory->IsDirectory())
952 		RETURN_ERROR(B_BAD_TYPE);
953 
954 	// We are creating the cookie at this point, so that we don't have
955 	// to remove the inode if we don't have enough free memory later...
956 	file_cookie* cookie = new(std::nothrow) file_cookie;
957 	if (cookie == NULL)
958 		RETURN_ERROR(B_NO_MEMORY);
959 
960 	// initialize the cookie
961 	cookie->open_mode = openMode;
962 	cookie->last_size = 0;
963 	cookie->last_notification = system_time();
964 
965 	Transaction transaction(volume, directory->BlockNumber());
966 
967 	Inode* inode;
968 	bool created;
969 	status_t status = Inode::Create(transaction, directory, name,
970 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
971 
972 	// Disable the file cache, if requested?
973 	if (status == B_OK && (openMode & O_NOCACHE) != 0
974 		&& inode->FileCache() != NULL) {
975 		status = file_cache_disable(inode->FileCache());
976 	}
977 
978 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
979 
980 	if (status == B_OK)
981 		status = transaction.Done();
982 
983 	if (status == B_OK) {
984 		// register the cookie
985 		*_cookie = cookie;
986 
987 		if (created) {
988 			notify_entry_created(volume->ID(), directory->ID(), name,
989 				*_vnodeID);
990 		}
991 	} else {
992 		entry_cache_remove(volume->ID(), directory->ID(), name);
993 		delete cookie;
994 	}
995 
996 	return status;
997 }
998 
999 
1000 static status_t
1001 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1002 	const char* path, int mode)
1003 {
1004 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1005 
1006 	Volume* volume = (Volume*)_volume->private_volume;
1007 	Inode* directory = (Inode*)_directory->private_node;
1008 
1009 	if (volume->IsReadOnly())
1010 		return B_READ_ONLY_DEVICE;
1011 
1012 	if (!directory->IsDirectory())
1013 		RETURN_ERROR(B_BAD_TYPE);
1014 
1015 	status_t status = directory->CheckPermissions(W_OK);
1016 	if (status < B_OK)
1017 		RETURN_ERROR(status);
1018 
1019 	Transaction transaction(volume, directory->BlockNumber());
1020 
1021 	Inode* link;
1022 	off_t id;
1023 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1024 		0, 0, NULL, &id, &link);
1025 	if (status < B_OK)
1026 		RETURN_ERROR(status);
1027 
1028 	size_t length = strlen(path);
1029 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1030 		strcpy(link->Node().short_symlink, path);
1031 	} else {
1032 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1033 			| INODE_LOGGED);
1034 
1035 		// links usually don't have a file cache attached - but we now need one
1036 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1037 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1038 
1039 		// The following call will have to write the inode back, so
1040 		// we don't have to do that here...
1041 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1042 	}
1043 
1044 	if (status == B_OK)
1045 		status = link->WriteBack(transaction);
1046 
1047 	// Inode::Create() left the inode locked in memory, and also doesn't
1048 	// publish links
1049 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1050 	put_vnode(volume->FSVolume(), id);
1051 
1052 	if (status == B_OK) {
1053 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1054 
1055 		status = transaction.Done();
1056 		if (status == B_OK)
1057 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1058 		else
1059 			entry_cache_remove(volume->ID(), directory->ID(), name);
1060 	}
1061 
1062 	return status;
1063 }
1064 
1065 
1066 status_t
1067 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1068 {
1069 	FUNCTION_START(("name = \"%s\"\n", name));
1070 
1071 	// This one won't be implemented in a binary compatible BFS
1072 	return B_UNSUPPORTED;
1073 }
1074 
1075 
1076 status_t
1077 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1078 {
1079 	FUNCTION_START(("name = \"%s\"\n", name));
1080 
1081 	if (!strcmp(name, "..") || !strcmp(name, "."))
1082 		return B_NOT_ALLOWED;
1083 
1084 	Volume* volume = (Volume*)_volume->private_volume;
1085 	Inode* directory = (Inode*)_directory->private_node;
1086 
1087 	status_t status = directory->CheckPermissions(W_OK);
1088 	if (status < B_OK)
1089 		return status;
1090 
1091 	Transaction transaction(volume, directory->BlockNumber());
1092 
1093 	off_t id;
1094 	status = directory->Remove(transaction, name, &id);
1095 	if (status == B_OK) {
1096 		entry_cache_remove(volume->ID(), directory->ID(), name);
1097 
1098 		status = transaction.Done();
1099 		if (status == B_OK)
1100 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1101 		else
1102 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1103 	}
1104 	return status;
1105 }
1106 
1107 
1108 status_t
1109 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1110 	fs_vnode* _newDir, const char* newName)
1111 {
1112 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1113 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1114 
1115 	Volume* volume = (Volume*)_volume->private_volume;
1116 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1117 	Inode* newDirectory = (Inode*)_newDir->private_node;
1118 
1119 	// are we already done?
1120 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1121 		return B_OK;
1122 
1123 	Transaction transaction(volume, oldDirectory->BlockNumber());
1124 
1125 	oldDirectory->WriteLockInTransaction(transaction);
1126 	if (oldDirectory != newDirectory)
1127 		newDirectory->WriteLockInTransaction(transaction);
1128 
1129 	// are we allowed to do what we've been told?
1130 	status_t status = oldDirectory->CheckPermissions(W_OK);
1131 	if (status == B_OK)
1132 		status = newDirectory->CheckPermissions(W_OK);
1133 	if (status != B_OK)
1134 		return status;
1135 
1136 	// Get the directory's tree, and a pointer to the inode which should be
1137 	// changed
1138 	BPlusTree* tree = oldDirectory->Tree();
1139 	if (tree == NULL)
1140 		RETURN_ERROR(B_BAD_VALUE);
1141 
1142 	off_t id;
1143 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1144 	if (status != B_OK)
1145 		RETURN_ERROR(status);
1146 
1147 	Vnode vnode(volume, id);
1148 	Inode* inode;
1149 	if (vnode.Get(&inode) != B_OK)
1150 		return B_IO_ERROR;
1151 
1152 	// Don't move a directory into one of its children - we soar up
1153 	// from the newDirectory to either the root node or the old
1154 	// directory, whichever comes first.
1155 	// If we meet our inode on that way, we have to bail out.
1156 
1157 	if (oldDirectory != newDirectory) {
1158 		ino_t parent = newDirectory->ID();
1159 		ino_t root = volume->RootNode()->ID();
1160 
1161 		while (true) {
1162 			if (parent == id)
1163 				return B_BAD_VALUE;
1164 			else if (parent == root || parent == oldDirectory->ID())
1165 				break;
1166 
1167 			Vnode vnode(volume, parent);
1168 			Inode* parentNode;
1169 			if (vnode.Get(&parentNode) != B_OK)
1170 				return B_ERROR;
1171 
1172 			parent = volume->ToVnode(parentNode->Parent());
1173 		}
1174 	}
1175 
1176 	// Everything okay? Then lets get to work...
1177 
1178 	// First, try to make sure there is nothing that will stop us in
1179 	// the target directory - since this is the only non-critical
1180 	// failure, we will test this case first
1181 	BPlusTree* newTree = tree;
1182 	if (newDirectory != oldDirectory) {
1183 		newTree = newDirectory->Tree();
1184 		if (newTree == NULL)
1185 			RETURN_ERROR(B_BAD_VALUE);
1186 	}
1187 
1188 	status = newTree->Insert(transaction, (const uint8*)newName,
1189 		strlen(newName), id);
1190 	if (status == B_NAME_IN_USE) {
1191 		// If there is already a file with that name, we have to remove
1192 		// it, as long it's not a directory with files in it
1193 		off_t clobber;
1194 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1195 				< B_OK)
1196 			return B_NAME_IN_USE;
1197 		if (clobber == id)
1198 			return B_BAD_VALUE;
1199 
1200 		Vnode vnode(volume, clobber);
1201 		Inode* other;
1202 		if (vnode.Get(&other) < B_OK)
1203 			return B_NAME_IN_USE;
1204 
1205 		// only allowed, if either both nodes are directories or neither is
1206 		if (inode->IsDirectory() != other->IsDirectory())
1207 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1208 
1209 		status = newDirectory->Remove(transaction, newName, NULL,
1210 			other->IsDirectory());
1211 		if (status < B_OK)
1212 			return status;
1213 
1214 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1215 
1216 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1217 			clobber);
1218 
1219 		status = newTree->Insert(transaction, (const uint8*)newName,
1220 			strlen(newName), id);
1221 	}
1222 	if (status != B_OK)
1223 		return status;
1224 
1225 	inode->WriteLockInTransaction(transaction);
1226 
1227 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1228 		newDirectory->ID(), newName);
1229 
1230 	// update the name only when they differ
1231 	if (strcmp(oldName, newName)) {
1232 		status = inode->SetName(transaction, newName);
1233 		if (status == B_OK) {
1234 			Index index(volume);
1235 			index.UpdateName(transaction, oldName, newName, inode);
1236 		}
1237 	}
1238 
1239 	if (status == B_OK) {
1240 		status = tree->Remove(transaction, (const uint8*)oldName,
1241 			strlen(oldName), id);
1242 		if (status == B_OK) {
1243 			inode->Parent() = newDirectory->BlockRun();
1244 
1245 			// if it's a directory, update the parent directory pointer
1246 			// in its tree if necessary
1247 			BPlusTree* movedTree = inode->Tree();
1248 			if (oldDirectory != newDirectory
1249 				&& inode->IsDirectory()
1250 				&& movedTree != NULL) {
1251 				status = movedTree->Replace(transaction, (const uint8*)"..",
1252 					2, newDirectory->ID());
1253 
1254 				if (status == B_OK) {
1255 					// update/add the cache entry for the parent
1256 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1257 				}
1258 			}
1259 
1260 			if (status == B_OK && newDirectory != oldDirectory)
1261 				status = oldDirectory->ContainerContentsChanged(transaction);
1262 			if (status == B_OK)
1263 				status = newDirectory->ContainerContentsChanged(transaction);
1264 
1265 			if (status == B_OK)
1266 				status = inode->WriteBack(transaction);
1267 
1268 			if (status == B_OK) {
1269 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1270 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1271 
1272 				status = transaction.Done();
1273 				if (status == B_OK) {
1274 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1275 						oldName, newDirectory->ID(), newName, id);
1276 					return B_OK;
1277 				}
1278 
1279 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1280 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1281 			}
1282 		}
1283 	}
1284 
1285 	return status;
1286 }
1287 
1288 
1289 static status_t
1290 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1291 {
1292 	FUNCTION();
1293 
1294 	Volume* volume = (Volume*)_volume->private_volume;
1295 	Inode* inode = (Inode*)_node->private_node;
1296 
1297 	// Opening a directory read-only is allowed, although you can't read
1298 	// any data from it.
1299 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1300 		return B_IS_A_DIRECTORY;
1301 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1302 		return B_NOT_A_DIRECTORY;
1303 
1304 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1305 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1306 	if (status != B_OK)
1307 		RETURN_ERROR(status);
1308 
1309 	file_cookie* cookie = new(std::nothrow) file_cookie;
1310 	if (cookie == NULL)
1311 		RETURN_ERROR(B_NO_MEMORY);
1312 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1313 
1314 	// initialize the cookie
1315 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1316 	cookie->last_size = inode->Size();
1317 	cookie->last_notification = system_time();
1318 
1319 	// Disable the file cache, if requested?
1320 	CObjectDeleter<void> fileCacheEnabler(file_cache_enable);
1321 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1322 		status = file_cache_disable(inode->FileCache());
1323 		if (status != B_OK)
1324 			return status;
1325 		fileCacheEnabler.SetTo(inode->FileCache());
1326 	}
1327 
1328 	// Should we truncate the file?
1329 	if ((openMode & O_TRUNC) != 0) {
1330 		if ((openMode & O_RWMASK) == O_RDONLY)
1331 			return B_NOT_ALLOWED;
1332 
1333 		Transaction transaction(volume, inode->BlockNumber());
1334 		inode->WriteLockInTransaction(transaction);
1335 
1336 		status_t status = inode->SetFileSize(transaction, 0);
1337 		if (status == B_OK)
1338 			status = inode->WriteBack(transaction);
1339 		if (status == B_OK)
1340 			status = transaction.Done();
1341 		if (status != B_OK)
1342 			return status;
1343 	}
1344 
1345 	fileCacheEnabler.Detach();
1346 	cookieDeleter.Detach();
1347 	*_cookie = cookie;
1348 	return B_OK;
1349 }
1350 
1351 
1352 static status_t
1353 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1354 	void* buffer, size_t* _length)
1355 {
1356 	//FUNCTION();
1357 	Inode* inode = (Inode*)_node->private_node;
1358 
1359 	if (!inode->HasUserAccessableStream()) {
1360 		*_length = 0;
1361 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1362 	}
1363 
1364 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1365 }
1366 
1367 
1368 static status_t
1369 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1370 	const void* buffer, size_t* _length)
1371 {
1372 	//FUNCTION();
1373 	Volume* volume = (Volume*)_volume->private_volume;
1374 	Inode* inode = (Inode*)_node->private_node;
1375 
1376 	if (volume->IsReadOnly())
1377 		return B_READ_ONLY_DEVICE;
1378 
1379 	if (!inode->HasUserAccessableStream()) {
1380 		*_length = 0;
1381 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1382 	}
1383 
1384 	file_cookie* cookie = (file_cookie*)_cookie;
1385 
1386 	if (cookie->open_mode & O_APPEND)
1387 		pos = inode->Size();
1388 
1389 	Transaction transaction;
1390 		// We are not starting the transaction here, since
1391 		// it might not be needed at all (the contents of
1392 		// regular files aren't logged)
1393 
1394 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1395 		_length);
1396 	if (status == B_OK)
1397 		status = transaction.Done();
1398 	if (status == B_OK) {
1399 		InodeReadLocker locker(inode);
1400 
1401 		// periodically notify if the file size has changed
1402 		// TODO: should we better test for a change in the last_modified time only?
1403 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1404 			&& system_time() > cookie->last_notification
1405 					+ INODE_NOTIFICATION_INTERVAL) {
1406 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1407 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1408 			cookie->last_size = inode->Size();
1409 			cookie->last_notification = system_time();
1410 		}
1411 	}
1412 
1413 	return status;
1414 }
1415 
1416 
1417 static status_t
1418 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1419 {
1420 	FUNCTION();
1421 	return B_OK;
1422 }
1423 
1424 
1425 static status_t
1426 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1427 {
1428 	FUNCTION();
1429 
1430 	file_cookie* cookie = (file_cookie*)_cookie;
1431 	Volume* volume = (Volume*)_volume->private_volume;
1432 	Inode* inode = (Inode*)_node->private_node;
1433 
1434 	Transaction transaction;
1435 	bool needsTrimming = false;
1436 
1437 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1438 		InodeReadLocker locker(inode);
1439 		needsTrimming = inode->NeedsTrimming();
1440 
1441 		if ((cookie->open_mode & O_RWMASK) != 0
1442 			&& !inode->IsDeleted()
1443 			&& (needsTrimming
1444 				|| inode->OldLastModified() != inode->LastModified()
1445 				|| (inode->InSizeIndex()
1446 					// TODO: this can prevent the size update notification
1447 					// for nodes not in the index!
1448 					&& inode->OldSize() != inode->Size()))) {
1449 			locker.Unlock();
1450 			transaction.Start(volume, inode->BlockNumber());
1451 		}
1452 	}
1453 
1454 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1455 
1456 	if (status == B_OK) {
1457 		inode->WriteLockInTransaction(transaction);
1458 
1459 		// trim the preallocated blocks and update the size,
1460 		// and last_modified indices if needed
1461 		bool changedSize = false, changedTime = false;
1462 		Index index(volume);
1463 
1464 		if (needsTrimming) {
1465 			status = inode->TrimPreallocation(transaction);
1466 			if (status < B_OK) {
1467 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1468 					", transaction %d: %s!\n", inode->ID(),
1469 					(int)transaction.ID(), strerror(status)));
1470 
1471 				// we still want this transaction to succeed
1472 				status = B_OK;
1473 			}
1474 		}
1475 		if (inode->OldSize() != inode->Size()) {
1476 			if (inode->InSizeIndex())
1477 				index.UpdateSize(transaction, inode);
1478 			changedSize = true;
1479 		}
1480 		if (inode->OldLastModified() != inode->LastModified()) {
1481 			if (inode->InLastModifiedIndex()) {
1482 				index.UpdateLastModified(transaction, inode,
1483 					inode->LastModified());
1484 			}
1485 			changedTime = true;
1486 
1487 			// updating the index doesn't write back the inode
1488 			inode->WriteBack(transaction);
1489 		}
1490 
1491 		if (changedSize || changedTime) {
1492 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1493 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1494 				| (changedSize ? B_STAT_SIZE : 0));
1495 		}
1496 	}
1497 	if (status == B_OK)
1498 		transaction.Done();
1499 
1500 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1501 		// "chkbfs" exited abnormally, so we have to stop it here...
1502 		FATAL(("check process was aborted!\n"));
1503 		volume->Allocator().StopChecking(NULL);
1504 	}
1505 
1506 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1507 		file_cache_enable(inode->FileCache());
1508 
1509 	delete cookie;
1510 	return B_OK;
1511 }
1512 
1513 
1514 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1515 	is not allowed.
1516 */
1517 static status_t
1518 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1519 {
1520 	//FUNCTION();
1521 
1522 	Inode* inode = (Inode*)_node->private_node;
1523 	status_t status = inode->CheckPermissions(accessMode);
1524 	if (status < B_OK)
1525 		RETURN_ERROR(status);
1526 
1527 	return B_OK;
1528 }
1529 
1530 
1531 static status_t
1532 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1533 	size_t* _bufferSize)
1534 {
1535 	FUNCTION();
1536 
1537 	Inode* inode = (Inode*)_node->private_node;
1538 
1539 	if (!inode->IsSymLink())
1540 		RETURN_ERROR(B_BAD_VALUE);
1541 
1542 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1543 		if ((uint64)inode->Size() < (uint64)*_bufferSize)
1544 			*_bufferSize = inode->Size();
1545 
1546 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1547 		if (status < B_OK)
1548 			RETURN_ERROR(status);
1549 
1550 		return B_OK;
1551 	}
1552 
1553 	size_t linkLen = strlen(inode->Node().short_symlink);
1554 	if (linkLen < *_bufferSize)
1555 		*_bufferSize = linkLen;
1556 
1557 	return user_memcpy(buffer, inode->Node().short_symlink, *_bufferSize);
1558 }
1559 
1560 
1561 //	#pragma mark - Directory functions
1562 
1563 
1564 static status_t
1565 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1566 	int mode)
1567 {
1568 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1569 
1570 	Volume* volume = (Volume*)_volume->private_volume;
1571 	Inode* directory = (Inode*)_directory->private_node;
1572 
1573 	if (volume->IsReadOnly())
1574 		return B_READ_ONLY_DEVICE;
1575 
1576 	if (!directory->IsDirectory())
1577 		RETURN_ERROR(B_BAD_TYPE);
1578 
1579 	status_t status = directory->CheckPermissions(W_OK);
1580 	if (status < B_OK)
1581 		RETURN_ERROR(status);
1582 
1583 	Transaction transaction(volume, directory->BlockNumber());
1584 
1585 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1586 	// need it anyway
1587 	off_t id;
1588 	status = Inode::Create(transaction, directory, name,
1589 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1590 	if (status == B_OK) {
1591 		put_vnode(volume->FSVolume(), id);
1592 
1593 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1594 
1595 		status = transaction.Done();
1596 		if (status == B_OK)
1597 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1598 		else
1599 			entry_cache_remove(volume->ID(), directory->ID(), name);
1600 	}
1601 
1602 	return status;
1603 }
1604 
1605 
1606 static status_t
1607 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1608 {
1609 	FUNCTION_START(("name = \"%s\"\n", name));
1610 
1611 	Volume* volume = (Volume*)_volume->private_volume;
1612 	Inode* directory = (Inode*)_directory->private_node;
1613 
1614 	Transaction transaction(volume, directory->BlockNumber());
1615 
1616 	off_t id;
1617 	status_t status = directory->Remove(transaction, name, &id, true);
1618 	if (status == B_OK) {
1619 		// Remove the cache entry for the directory and potentially also
1620 		// the parent entry still belonging to the directory
1621 		entry_cache_remove(volume->ID(), directory->ID(), name);
1622 		entry_cache_remove(volume->ID(), id, "..");
1623 
1624 		status = transaction.Done();
1625 		if (status == B_OK)
1626 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1627 		else {
1628 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1629 			entry_cache_add(volume->ID(), id, "..", id);
1630 		}
1631 	}
1632 
1633 	return status;
1634 }
1635 
1636 
1637 /*!	Opens a directory ready to be traversed.
1638 	bfs_open_dir() is also used by bfs_open_index_dir().
1639 */
1640 static status_t
1641 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1642 {
1643 	FUNCTION();
1644 
1645 	Inode* inode = (Inode*)_node->private_node;
1646 	status_t status = inode->CheckPermissions(R_OK);
1647 	if (status < B_OK)
1648 		RETURN_ERROR(status);
1649 
1650 	// we don't ask here for directories only, because the bfs_open_index_dir()
1651 	// function utilizes us (so we must be able to open indices as well)
1652 	if (!inode->IsContainer())
1653 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1654 
1655 	BPlusTree* tree = inode->Tree();
1656 	if (tree == NULL)
1657 		RETURN_ERROR(B_BAD_VALUE);
1658 
1659 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1660 	if (iterator == NULL)
1661 		RETURN_ERROR(B_NO_MEMORY);
1662 
1663 	*_cookie = iterator;
1664 	return B_OK;
1665 }
1666 
1667 
1668 static status_t
1669 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1670 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1671 {
1672 	FUNCTION();
1673 
1674 	TreeIterator* iterator = (TreeIterator*)_cookie;
1675 	Volume* volume = (Volume*)_volume->private_volume;
1676 
1677 	uint32 maxCount = *_num;
1678 	uint32 count = 0;
1679 
1680 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1681 		ino_t id;
1682 		uint16 length;
1683 		size_t nameBufferSize = bufferSize - sizeof(struct dirent) + 1;
1684 
1685 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1686 			nameBufferSize, &id);
1687 
1688 		if (status == B_ENTRY_NOT_FOUND)
1689 			break;
1690 
1691 		if (status == B_BUFFER_OVERFLOW) {
1692 			// the remaining name buffer length was too small
1693 			if (count == 0)
1694 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1695 			break;
1696 		}
1697 
1698 		if (status != B_OK)
1699 			RETURN_ERROR(status);
1700 
1701 		ASSERT(length < nameBufferSize);
1702 
1703 		dirent->d_dev = volume->ID();
1704 		dirent->d_ino = id;
1705 		dirent->d_reclen = sizeof(struct dirent) + length;
1706 
1707 		bufferSize -= dirent->d_reclen;
1708 		dirent = (struct dirent*)((uint8*)dirent + dirent->d_reclen);
1709 		count++;
1710 	}
1711 
1712 	*_num = count;
1713 	return B_OK;
1714 }
1715 
1716 
1717 /*!	Sets the TreeIterator back to the beginning of the directory. */
1718 static status_t
1719 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1720 {
1721 	FUNCTION();
1722 	TreeIterator* iterator = (TreeIterator*)_cookie;
1723 
1724 	return iterator->Rewind();
1725 }
1726 
1727 
1728 static status_t
1729 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1730 {
1731 	FUNCTION();
1732 	return B_OK;
1733 }
1734 
1735 
1736 static status_t
1737 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1738 {
1739 	delete (TreeIterator*)_cookie;
1740 	return B_OK;
1741 }
1742 
1743 
1744 //	#pragma mark - Attribute functions
1745 
1746 
1747 static status_t
1748 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1749 {
1750 	Inode* inode = (Inode*)_node->private_node;
1751 
1752 	FUNCTION();
1753 
1754 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1755 	if (iterator == NULL)
1756 		RETURN_ERROR(B_NO_MEMORY);
1757 
1758 	*_cookie = iterator;
1759 	return B_OK;
1760 }
1761 
1762 
1763 static status_t
1764 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1765 {
1766 	FUNCTION();
1767 	return B_OK;
1768 }
1769 
1770 
1771 static status_t
1772 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1773 {
1774 	FUNCTION();
1775 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1776 
1777 	delete iterator;
1778 	return B_OK;
1779 }
1780 
1781 
1782 static status_t
1783 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1784 {
1785 	FUNCTION();
1786 
1787 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1788 	RETURN_ERROR(iterator->Rewind());
1789 }
1790 
1791 
1792 static status_t
1793 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1794 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1795 {
1796 	FUNCTION();
1797 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1798 
1799 	uint32 type;
1800 	size_t length;
1801 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1802 		&dirent->d_ino);
1803 	if (status == B_ENTRY_NOT_FOUND) {
1804 		*_num = 0;
1805 		return B_OK;
1806 	} else if (status != B_OK) {
1807 		RETURN_ERROR(status);
1808 	}
1809 
1810 	Volume* volume = (Volume*)_volume->private_volume;
1811 
1812 	dirent->d_dev = volume->ID();
1813 	dirent->d_reclen = sizeof(struct dirent) + length;
1814 
1815 	*_num = 1;
1816 	return B_OK;
1817 }
1818 
1819 
1820 static status_t
1821 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1822 	uint32 type, int openMode, void** _cookie)
1823 {
1824 	FUNCTION();
1825 
1826 	Volume* volume = (Volume*)_volume->private_volume;
1827 	if (volume->IsReadOnly())
1828 		return B_READ_ONLY_DEVICE;
1829 
1830 	Inode* inode = (Inode*)_node->private_node;
1831 	Attribute attribute(inode);
1832 
1833 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1834 }
1835 
1836 
1837 static status_t
1838 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1839 	int openMode, void** _cookie)
1840 {
1841 	FUNCTION();
1842 
1843 	Inode* inode = (Inode*)_node->private_node;
1844 	Attribute attribute(inode);
1845 
1846 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1847 }
1848 
1849 
1850 static status_t
1851 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1852 {
1853 	return B_OK;
1854 }
1855 
1856 
1857 static status_t
1858 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1859 {
1860 	delete (attr_cookie*)cookie;
1861 	return B_OK;
1862 }
1863 
1864 
1865 static status_t
1866 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1867 	void* buffer, size_t* _length)
1868 {
1869 	FUNCTION();
1870 
1871 	attr_cookie* cookie = (attr_cookie*)_cookie;
1872 	Inode* inode = (Inode*)_file->private_node;
1873 
1874 	Attribute attribute(inode, cookie);
1875 
1876 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1877 }
1878 
1879 
1880 static status_t
1881 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1882 	off_t pos, const void* buffer, size_t* _length)
1883 {
1884 	FUNCTION();
1885 
1886 	attr_cookie* cookie = (attr_cookie*)_cookie;
1887 	Volume* volume = (Volume*)_volume->private_volume;
1888 	Inode* inode = (Inode*)_file->private_node;
1889 
1890 	Transaction transaction(volume, inode->BlockNumber());
1891 	Attribute attribute(inode, cookie);
1892 
1893 	bool created;
1894 	status_t status = attribute.Write(transaction, cookie, pos,
1895 		(const uint8*)buffer, _length, &created);
1896 	if (status == B_OK) {
1897 		status = transaction.Done();
1898 		if (status == B_OK) {
1899 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1900 				inode->ID(), cookie->name,
1901 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1902 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1903 				B_STAT_CHANGE_TIME);
1904 		}
1905 	}
1906 
1907 	return status;
1908 }
1909 
1910 
1911 static status_t
1912 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1913 	struct stat* stat)
1914 {
1915 	FUNCTION();
1916 
1917 	attr_cookie* cookie = (attr_cookie*)_cookie;
1918 	Inode* inode = (Inode*)_file->private_node;
1919 
1920 	Attribute attribute(inode, cookie);
1921 
1922 	return attribute.Stat(*stat);
1923 }
1924 
1925 
1926 static status_t
1927 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1928 	const struct stat* stat, int statMask)
1929 {
1930 	// TODO: Implement (at least setting the size)!
1931 	return EOPNOTSUPP;
1932 }
1933 
1934 
1935 static status_t
1936 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1937 	fs_vnode* toFile, const char* toName)
1938 {
1939 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1940 
1941 	// TODO: implement bfs_rename_attr()!
1942 	// There will probably be an API to move one attribute to another file,
1943 	// making that function much more complicated - oh joy ;-)
1944 
1945 	return EOPNOTSUPP;
1946 }
1947 
1948 
1949 static status_t
1950 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
1951 {
1952 	FUNCTION_START(("name = \"%s\"\n", name));
1953 
1954 	Volume* volume = (Volume*)_volume->private_volume;
1955 	Inode* inode = (Inode*)_node->private_node;
1956 
1957 	status_t status = inode->CheckPermissions(W_OK);
1958 	if (status != B_OK)
1959 		return status;
1960 
1961 	Transaction transaction(volume, inode->BlockNumber());
1962 
1963 	status = inode->RemoveAttribute(transaction, name);
1964 	if (status == B_OK)
1965 		status = transaction.Done();
1966 	if (status == B_OK) {
1967 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
1968 			name, B_ATTR_REMOVED);
1969 	}
1970 
1971 	return status;
1972 }
1973 
1974 
1975 //	#pragma mark - Special Nodes
1976 
1977 
1978 status_t
1979 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
1980 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
1981 	fs_vnode* _superVnode, ino_t* _nodeID)
1982 {
1983 	// no need to support entry-less nodes
1984 	if (name == NULL)
1985 		return B_UNSUPPORTED;
1986 
1987 	FUNCTION_START(("name = \"%s\", mode = %d, flags = 0x%lx, subVnode: %p\n",
1988 		name, mode, flags, subVnode));
1989 
1990 	Volume* volume = (Volume*)_volume->private_volume;
1991 	Inode* directory = (Inode*)_directory->private_node;
1992 
1993 	if (volume->IsReadOnly())
1994 		return B_READ_ONLY_DEVICE;
1995 
1996 	if (!directory->IsDirectory())
1997 		RETURN_ERROR(B_BAD_TYPE);
1998 
1999 	status_t status = directory->CheckPermissions(W_OK);
2000 	if (status < B_OK)
2001 		RETURN_ERROR(status);
2002 
2003 	Transaction transaction(volume, directory->BlockNumber());
2004 
2005 	off_t id;
2006 	Inode* inode;
2007 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2008 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2009 	if (status == B_OK) {
2010 		_superVnode->private_node = inode;
2011 		_superVnode->ops = &gBFSVnodeOps;
2012 		*_nodeID = id;
2013 
2014 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2015 
2016 		status = transaction.Done();
2017 		if (status == B_OK)
2018 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2019 		else
2020 			entry_cache_remove(volume->ID(), directory->ID(), name);
2021 	}
2022 
2023 	return status;
2024 }
2025 
2026 
2027 //	#pragma mark - Index functions
2028 
2029 
2030 static status_t
2031 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2032 {
2033 	FUNCTION();
2034 
2035 	Volume* volume = (Volume*)_volume->private_volume;
2036 
2037 	if (volume->IndicesNode() == NULL) {
2038 		// This volume does not have any indices
2039 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2040 	}
2041 
2042 	// Since the indices root node is just a directory, and we are storing
2043 	// a pointer to it in our Volume object, we can just use the directory
2044 	// traversal functions.
2045 	// In fact we're storing it in the Volume object for that reason.
2046 
2047 	fs_vnode indicesNode;
2048 	indicesNode.private_node = volume->IndicesNode();
2049 
2050 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2051 }
2052 
2053 
2054 static status_t
2055 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2056 {
2057 	FUNCTION();
2058 
2059 	Volume* volume = (Volume*)_volume->private_volume;
2060 
2061 	fs_vnode indicesNode;
2062 	indicesNode.private_node = volume->IndicesNode();
2063 
2064 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2065 }
2066 
2067 
2068 static status_t
2069 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2070 {
2071 	FUNCTION();
2072 
2073 	Volume* volume = (Volume*)_volume->private_volume;
2074 
2075 	fs_vnode indicesNode;
2076 	indicesNode.private_node = volume->IndicesNode();
2077 
2078 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2079 }
2080 
2081 
2082 static status_t
2083 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2084 {
2085 	FUNCTION();
2086 
2087 	Volume* volume = (Volume*)_volume->private_volume;
2088 
2089 	fs_vnode indicesNode;
2090 	indicesNode.private_node = volume->IndicesNode();
2091 
2092 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2093 }
2094 
2095 
2096 static status_t
2097 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2098 	size_t bufferSize, uint32* _num)
2099 {
2100 	FUNCTION();
2101 
2102 	Volume* volume = (Volume*)_volume->private_volume;
2103 
2104 	fs_vnode indicesNode;
2105 	indicesNode.private_node = volume->IndicesNode();
2106 
2107 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2108 		bufferSize, _num));
2109 }
2110 
2111 
2112 static status_t
2113 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2114 	uint32 flags)
2115 {
2116 	FUNCTION_START(("name = \"%s\", type = %ld, flags = %ld\n", name, type, flags));
2117 
2118 	Volume* volume = (Volume*)_volume->private_volume;
2119 
2120 	if (volume->IsReadOnly())
2121 		return B_READ_ONLY_DEVICE;
2122 
2123 	// only root users are allowed to create indices
2124 	if (geteuid() != 0)
2125 		return B_NOT_ALLOWED;
2126 
2127 	Transaction transaction(volume, volume->Indices());
2128 
2129 	Index index(volume);
2130 	status_t status = index.Create(transaction, name, type);
2131 
2132 	if (status == B_OK)
2133 		status = transaction.Done();
2134 
2135 	RETURN_ERROR(status);
2136 }
2137 
2138 
2139 static status_t
2140 bfs_remove_index(fs_volume* _volume, const char* name)
2141 {
2142 	FUNCTION();
2143 
2144 	Volume* volume = (Volume*)_volume->private_volume;
2145 
2146 	if (volume->IsReadOnly())
2147 		return B_READ_ONLY_DEVICE;
2148 
2149 	// only root users are allowed to remove indices
2150 	if (geteuid() != 0)
2151 		return B_NOT_ALLOWED;
2152 
2153 	Inode* indices = volume->IndicesNode();
2154 	if (indices == NULL)
2155 		return B_ENTRY_NOT_FOUND;
2156 
2157 	Transaction transaction(volume, volume->Indices());
2158 
2159 	status_t status = indices->Remove(transaction, name);
2160 	if (status == B_OK)
2161 		status = transaction.Done();
2162 
2163 	RETURN_ERROR(status);
2164 }
2165 
2166 
2167 static status_t
2168 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2169 {
2170 	FUNCTION_START(("name = %s\n", name));
2171 
2172 	Volume* volume = (Volume*)_volume->private_volume;
2173 
2174 	Index index(volume);
2175 	status_t status = index.SetTo(name);
2176 	if (status < B_OK)
2177 		RETURN_ERROR(status);
2178 
2179 	bfs_inode& node = index.Node()->Node();
2180 
2181 	stat->st_type = index.Type();
2182 	stat->st_mode = node.Mode();
2183 
2184 	stat->st_size = node.data.Size();
2185 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2186 
2187 	stat->st_nlink = 1;
2188 	stat->st_blksize = 65536;
2189 
2190 	stat->st_uid = node.UserID();
2191 	stat->st_gid = node.GroupID();
2192 
2193 	fill_stat_time(node, *stat);
2194 
2195 	return B_OK;
2196 }
2197 
2198 
2199 //	#pragma mark - Query functions
2200 
2201 
2202 static status_t
2203 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2204 	port_id port, uint32 token, void** _cookie)
2205 {
2206 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %lu, port_id = %ld, token = %ld)\n",
2207 		queryString, flags, port, token));
2208 
2209 	Volume* volume = (Volume*)_volume->private_volume;
2210 
2211 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2212 	if (expression == NULL)
2213 		RETURN_ERROR(B_NO_MEMORY);
2214 
2215 	if (expression->InitCheck() < B_OK) {
2216 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2217 			queryString, expression->Position()));
2218 
2219 		delete expression;
2220 		RETURN_ERROR(B_BAD_VALUE);
2221 	}
2222 
2223 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2224 	if (query == NULL) {
2225 		delete expression;
2226 		RETURN_ERROR(B_NO_MEMORY);
2227 	}
2228 
2229 	if (flags & B_LIVE_QUERY)
2230 		query->SetLiveMode(port, token);
2231 
2232 	*_cookie = (void*)query;
2233 
2234 	return B_OK;
2235 }
2236 
2237 
2238 static status_t
2239 bfs_close_query(fs_volume* _volume, void* cookie)
2240 {
2241 	FUNCTION();
2242 	return B_OK;
2243 }
2244 
2245 
2246 static status_t
2247 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2248 {
2249 	FUNCTION();
2250 
2251 	Query* query = (Query*)cookie;
2252 	Expression* expression = query->GetExpression();
2253 	delete query;
2254 	delete expression;
2255 
2256 	return B_OK;
2257 }
2258 
2259 
2260 static status_t
2261 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2262 	size_t bufferSize, uint32* _num)
2263 {
2264 	FUNCTION();
2265 	Query* query = (Query*)cookie;
2266 	status_t status = query->GetNextEntry(dirent, bufferSize);
2267 	if (status == B_OK)
2268 		*_num = 1;
2269 	else if (status == B_ENTRY_NOT_FOUND)
2270 		*_num = 0;
2271 	else
2272 		return status;
2273 
2274 	return B_OK;
2275 }
2276 
2277 
2278 static status_t
2279 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2280 {
2281 	FUNCTION();
2282 
2283 	Query* query = (Query*)cookie;
2284 	return query->Rewind();
2285 }
2286 
2287 
2288 //	#pragma mark -
2289 
2290 
2291 static uint32
2292 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2293 {
2294 	// TODO: We should at least check the partition size.
2295 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2296 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2297 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2298 }
2299 
2300 
2301 static status_t
2302 bfs_initialize(int fd, partition_id partitionID, const char* name,
2303 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2304 {
2305 	// check name
2306 	status_t status = check_volume_name(name);
2307 	if (status != B_OK)
2308 		return status;
2309 
2310 	// parse parameters
2311 	initialize_parameters parameters;
2312 	status = parse_initialize_parameters(parameterString, parameters);
2313 	if (status != B_OK)
2314 		return status;
2315 
2316 	update_disk_device_job_progress(job, 0);
2317 
2318 	// initialize the volume
2319 	Volume volume(NULL);
2320 	status = volume.Initialize(fd, name, parameters.blockSize,
2321 		parameters.flags);
2322 	if (status < B_OK) {
2323 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2324 		return status;
2325 	}
2326 
2327 	// rescan partition
2328 	status = scan_partition(partitionID);
2329 	if (status != B_OK)
2330 		return status;
2331 
2332 	update_disk_device_job_progress(job, 1);
2333 
2334 	// print some info, if desired
2335 	if (parameters.verbose) {
2336 		disk_super_block super = volume.SuperBlock();
2337 
2338 		INFORM(("Disk was initialized successfully.\n"));
2339 		INFORM(("\tname: \"%s\"\n", super.name));
2340 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2341 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2342 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2343 		INFORM(("\tnum allocation groups: %d\n",
2344 			(int)super.AllocationGroups()));
2345 		INFORM(("\tallocation group size: %ld blocks\n",
2346 			1L << super.AllocationGroupShift()));
2347 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2348 	}
2349 
2350 	return B_OK;
2351 }
2352 
2353 
2354 static status_t
2355 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2356 	uint32 blockSize, disk_job_id job)
2357 {
2358 	if (blockSize == 0)
2359 		return B_BAD_VALUE;
2360 
2361 	update_disk_device_job_progress(job, 0.0);
2362 
2363 	// just overwrite the superblock
2364 	disk_super_block superBlock;
2365 	memset(&superBlock, 0, sizeof(superBlock));
2366 
2367 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2368 		return errno;
2369 
2370 	update_disk_device_job_progress(job, 1.0);
2371 
2372 	return B_OK;
2373 }
2374 
2375 
2376 //	#pragma mark -
2377 
2378 
2379 static status_t
2380 bfs_std_ops(int32 op, ...)
2381 {
2382 	switch (op) {
2383 		case B_MODULE_INIT:
2384 #ifdef BFS_DEBUGGER_COMMANDS
2385 			add_debugger_commands();
2386 #endif
2387 			return B_OK;
2388 		case B_MODULE_UNINIT:
2389 #ifdef BFS_DEBUGGER_COMMANDS
2390 			remove_debugger_commands();
2391 #endif
2392 			return B_OK;
2393 
2394 		default:
2395 			return B_ERROR;
2396 	}
2397 }
2398 
2399 fs_volume_ops gBFSVolumeOps = {
2400 	&bfs_unmount,
2401 	&bfs_read_fs_stat,
2402 	&bfs_write_fs_stat,
2403 	&bfs_sync,
2404 	&bfs_get_vnode,
2405 
2406 	/* index directory & index operations */
2407 	&bfs_open_index_dir,
2408 	&bfs_close_index_dir,
2409 	&bfs_free_index_dir_cookie,
2410 	&bfs_read_index_dir,
2411 	&bfs_rewind_index_dir,
2412 
2413 	&bfs_create_index,
2414 	&bfs_remove_index,
2415 	&bfs_stat_index,
2416 
2417 	/* query operations */
2418 	&bfs_open_query,
2419 	&bfs_close_query,
2420 	&bfs_free_query_cookie,
2421 	&bfs_read_query,
2422 	&bfs_rewind_query,
2423 };
2424 
2425 fs_vnode_ops gBFSVnodeOps = {
2426 	/* vnode operations */
2427 	&bfs_lookup,
2428 	&bfs_get_vnode_name,
2429 	&bfs_put_vnode,
2430 	&bfs_remove_vnode,
2431 
2432 	/* VM file access */
2433 	&bfs_can_page,
2434 	&bfs_read_pages,
2435 	&bfs_write_pages,
2436 
2437 	&bfs_io,
2438 	NULL,	// cancel_io()
2439 
2440 	&bfs_get_file_map,
2441 
2442 	&bfs_ioctl,
2443 	&bfs_set_flags,
2444 	NULL,	// fs_select
2445 	NULL,	// fs_deselect
2446 	&bfs_fsync,
2447 
2448 	&bfs_read_link,
2449 	&bfs_create_symlink,
2450 
2451 	&bfs_link,
2452 	&bfs_unlink,
2453 	&bfs_rename,
2454 
2455 	&bfs_access,
2456 	&bfs_read_stat,
2457 	&bfs_write_stat,
2458 	NULL,	// fs_preallocate
2459 
2460 	/* file operations */
2461 	&bfs_create,
2462 	&bfs_open,
2463 	&bfs_close,
2464 	&bfs_free_cookie,
2465 	&bfs_read,
2466 	&bfs_write,
2467 
2468 	/* directory operations */
2469 	&bfs_create_dir,
2470 	&bfs_remove_dir,
2471 	&bfs_open_dir,
2472 	&bfs_close_dir,
2473 	&bfs_free_dir_cookie,
2474 	&bfs_read_dir,
2475 	&bfs_rewind_dir,
2476 
2477 	/* attribute directory operations */
2478 	&bfs_open_attr_dir,
2479 	&bfs_close_attr_dir,
2480 	&bfs_free_attr_dir_cookie,
2481 	&bfs_read_attr_dir,
2482 	&bfs_rewind_attr_dir,
2483 
2484 	/* attribute operations */
2485 	&bfs_create_attr,
2486 	&bfs_open_attr,
2487 	&bfs_close_attr,
2488 	&bfs_free_attr_cookie,
2489 	&bfs_read_attr,
2490 	&bfs_write_attr,
2491 
2492 	&bfs_read_attr_stat,
2493 	&bfs_write_attr_stat,
2494 	&bfs_rename_attr,
2495 	&bfs_remove_attr,
2496 
2497 	/* special nodes */
2498 	&bfs_create_special_node
2499 };
2500 
2501 static file_system_module_info sBeFileSystem = {
2502 	{
2503 		"file_systems/bfs" B_CURRENT_FS_API_VERSION,
2504 		0,
2505 		bfs_std_ops,
2506 	},
2507 
2508 	"bfs",						// short_name
2509 	"Be File System",			// pretty_name
2510 
2511 	// DDM flags
2512 	0
2513 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2514 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2515 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2516 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2517 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2518 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2519 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2520 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2521 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2522 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2523 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2524 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2525 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2526 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2527 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2528 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2529 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2530 	,
2531 
2532 	// scanning
2533 	bfs_identify_partition,
2534 	bfs_scan_partition,
2535 	bfs_free_identify_partition_cookie,
2536 	NULL,	// free_partition_content_cookie()
2537 
2538 	&bfs_mount,
2539 
2540 	/* capability querying operations */
2541 	&bfs_get_supported_operations,
2542 
2543 	NULL,	// validate_resize
2544 	NULL,	// validate_move
2545 	NULL,	// validate_set_content_name
2546 	NULL,	// validate_set_content_parameters
2547 	NULL,	// validate_initialize,
2548 
2549 	/* shadow partition modification */
2550 	NULL,	// shadow_changed
2551 
2552 	/* writing */
2553 	NULL,	// defragment
2554 	NULL,	// repair
2555 	NULL,	// resize
2556 	NULL,	// move
2557 	NULL,	// set_content_name
2558 	NULL,	// set_content_parameters
2559 	bfs_initialize,
2560 	bfs_uninitialize
2561 };
2562 
2563 module_info* modules[] = {
2564 	(module_info*)&sBeFileSystem,
2565 	NULL,
2566 };
2567