xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision 4c8e85b316c35a9161f5a1c50ad70bc91c83a76f)
1 /*
2  * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "Volume.h"
14 #include "Inode.h"
15 #include "Index.h"
16 #include "BPlusTree.h"
17 #include "Query.h"
18 #include "ResizeVisitor.h"
19 #include "bfs_control.h"
20 #include "bfs_disk_system.h"
21 
22 // TODO: temporary solution as long as there is no public I/O requests API
23 #ifndef FS_SHELL
24 #	include <io_requests.h>
25 #	include <util/fs_trim_support.h>
26 #endif
27 
28 
29 #define BFS_IO_SIZE	65536
30 
31 #if defined(BFS_LITTLE_ENDIAN_ONLY)
32 #define BFS_ENDIAN_SUFFIX ""
33 #define BFS_ENDIAN_PRETTY_SUFFIX ""
34 #else
35 #define BFS_ENDIAN_SUFFIX "_big"
36 #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
37 #endif
38 
39 
40 struct identify_cookie {
41 	disk_super_block super_block;
42 };
43 
44 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
45 
46 
47 static void
48 fill_stat_time(const bfs_inode& node, struct stat& stat)
49 {
50 	bigtime_t now = real_time_clock_usecs();
51 	stat.st_atim.tv_sec = now / 1000000LL;
52 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
53 
54 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
55 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
56 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
57 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
58 
59 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
60 	bigtime_t changeTime = node.StatusChangeTime();
61 	if (changeTime < node.LastModifiedTime())
62 		stat.st_ctim = stat.st_mtim;
63 	else {
64 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
65 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
66 	}
67 }
68 
69 
70 void
71 fill_stat_buffer(Inode* inode, struct stat& stat)
72 {
73 	const bfs_inode& node = inode->Node();
74 
75 	stat.st_dev = inode->GetVolume()->ID();
76 	stat.st_ino = inode->ID();
77 	stat.st_nlink = 1;
78 	stat.st_blksize = BFS_IO_SIZE;
79 
80 	stat.st_uid = node.UserID();
81 	stat.st_gid = node.GroupID();
82 	stat.st_mode = node.Mode();
83 	stat.st_type = node.Type();
84 
85 	fill_stat_time(node, stat);
86 
87 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
88 		// symlinks report the size of the link here
89 		stat.st_size = strlen(node.short_symlink);
90 	} else
91 		stat.st_size = inode->Size();
92 
93 	stat.st_blocks = inode->AllocatedSize() / 512;
94 }
95 
96 
97 //!	bfs_io() callback hook
98 static status_t
99 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
100 	size_t size, struct file_io_vec* vecs, size_t* _count)
101 {
102 	Inode* inode = (Inode*)cookie;
103 
104 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
105 		inode->GetVolume()->BlockSize());
106 }
107 
108 
109 //!	bfs_io() callback hook
110 static status_t
111 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
112 	bool partialTransfer, size_t bytesTransferred)
113 {
114 	Inode* inode = (Inode*)cookie;
115 	rw_lock_read_unlock(&inode->Lock());
116 	return B_OK;
117 }
118 
119 
120 //	#pragma mark - Scanning
121 
122 
123 static float
124 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
125 {
126 	disk_super_block superBlock;
127 	status_t status = Volume::Identify(fd, &superBlock);
128 	if (status != B_OK)
129 		return -1;
130 
131 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
132 	if (cookie == NULL)
133 		return -1;
134 
135 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
136 
137 	*_cookie = cookie;
138 	return 0.85f;
139 }
140 
141 
142 static status_t
143 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
144 {
145 	identify_cookie* cookie = (identify_cookie*)_cookie;
146 
147 	partition->status = B_PARTITION_VALID;
148 	partition->flags |= B_PARTITION_FILE_SYSTEM;
149 	partition->content_size = cookie->super_block.NumBlocks()
150 		* cookie->super_block.BlockSize();
151 	partition->block_size = cookie->super_block.BlockSize();
152 	partition->content_name = strdup(cookie->super_block.name);
153 	if (partition->content_name == NULL)
154 		return B_NO_MEMORY;
155 
156 	return B_OK;
157 }
158 
159 
160 static void
161 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
162 {
163 	identify_cookie* cookie = (identify_cookie*)_cookie;
164 	delete cookie;
165 }
166 
167 
168 //	#pragma mark -
169 
170 
171 static status_t
172 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
173 	const char* args, ino_t* _rootID)
174 {
175 	FUNCTION();
176 
177 	Volume* volume = new(std::nothrow) Volume(_volume);
178 	if (volume == NULL)
179 		return B_NO_MEMORY;
180 
181 	status_t status = volume->Mount(device, flags);
182 	if (status != B_OK) {
183 		delete volume;
184 		RETURN_ERROR(status);
185 	}
186 
187 	_volume->private_volume = volume;
188 	_volume->ops = &gBFSVolumeOps;
189 	*_rootID = volume->ToVnode(volume->Root());
190 
191 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
192 		volume->Name(), *_rootID, device));
193 	return B_OK;
194 }
195 
196 
197 static status_t
198 bfs_unmount(fs_volume* _volume)
199 {
200 	FUNCTION();
201 	Volume* volume = (Volume*)_volume->private_volume;
202 
203 	status_t status = volume->Unmount();
204 	delete volume;
205 
206 	RETURN_ERROR(status);
207 }
208 
209 
210 static status_t
211 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
212 {
213 	FUNCTION();
214 
215 	Volume* volume = (Volume*)_volume->private_volume;
216 	MutexLocker locker(volume->Lock());
217 
218 	// File system flags.
219 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
220 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
221 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
222 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
223 
224 	info->io_size = BFS_IO_SIZE;
225 		// whatever is appropriate here?
226 
227 	info->block_size = volume->BlockSize();
228 	info->total_blocks = volume->NumBlocks();
229 	info->free_blocks = volume->FreeBlocks();
230 
231 	// Volume name
232 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
233 
234 	// File system name
235 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
236 
237 	return B_OK;
238 }
239 
240 
241 static status_t
242 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
243 {
244 	FUNCTION_START(("mask = %" B_PRId32 "\n", mask));
245 
246 	Volume* volume = (Volume*)_volume->private_volume;
247 	if (volume->IsReadOnly())
248 		return B_READ_ONLY_DEVICE;
249 
250 	MutexLocker locker(volume->Lock());
251 
252 	status_t status = B_BAD_VALUE;
253 
254 	if (mask & FS_WRITE_FSINFO_NAME) {
255 		disk_super_block& superBlock = volume->SuperBlock();
256 
257 		strncpy(superBlock.name, info->volume_name,
258 			sizeof(superBlock.name) - 1);
259 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
260 
261 		status = volume->WriteSuperBlock();
262 	}
263 	return status;
264 }
265 
266 
267 static status_t
268 bfs_sync(fs_volume* _volume)
269 {
270 	FUNCTION();
271 
272 	Volume* volume = (Volume*)_volume->private_volume;
273 	return volume->Sync();
274 }
275 
276 
277 //	#pragma mark -
278 
279 
280 /*!	Reads in the node from disk and creates an inode object from it.
281 */
282 static status_t
283 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
284 	uint32* _flags, bool reenter)
285 {
286 	//FUNCTION_START(("ino_t = %Ld\n", id));
287 	Volume* volume = (Volume*)_volume->private_volume;
288 
289 	// first inode may be after the log area, we don't go through
290 	// the hassle and try to load an earlier block from disk
291 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
292 		|| id > volume->NumBlocks()) {
293 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
294 		return B_ERROR;
295 	}
296 
297 	CachedBlock cached(volume);
298 	status_t status = cached.SetTo(id);
299 	if (status != B_OK) {
300 		FATAL(("could not read inode: %" B_PRIdINO ": %s\n", id,
301 			strerror(status)));
302 		return status;
303 	}
304 	bfs_inode* node = (bfs_inode*)cached.Block();
305 
306 	status = node->InitCheck(volume);
307 	if (status != B_OK) {
308 		if ((node->Flags() & INODE_DELETED) != 0) {
309 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
310 		} else {
311 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
312 				strerror(status)));
313 		}
314 		return status;
315 	}
316 
317 	Inode* inode = new(std::nothrow) Inode(volume, id);
318 	if (inode == NULL)
319 		return B_NO_MEMORY;
320 
321 	status = inode->InitCheck(false);
322 	if (status != B_OK)
323 		delete inode;
324 
325 	if (status == B_OK) {
326 		_node->private_node = inode;
327 		_node->ops = &gBFSVnodeOps;
328 		*_type = inode->Mode();
329 		*_flags = 0;
330 	}
331 
332 	return status;
333 }
334 
335 
336 static status_t
337 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
338 {
339 	Volume* volume = (Volume*)_volume->private_volume;
340 	Inode* inode = (Inode*)_node->private_node;
341 
342 	// since a directory's size can be changed without having it opened,
343 	// we need to take care about their preallocated blocks here
344 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
345 		&& inode->NeedsTrimming()) {
346 		Transaction transaction(volume, inode->BlockNumber());
347 
348 		if (inode->TrimPreallocation(transaction) == B_OK)
349 			transaction.Done();
350 		else if (transaction.HasParent()) {
351 			// TODO: for now, we don't let sub-transactions fail
352 			transaction.Done();
353 		}
354 	}
355 
356 	delete inode;
357 	return B_OK;
358 }
359 
360 
361 static status_t
362 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
363 {
364 	FUNCTION();
365 
366 	Volume* volume = (Volume*)_volume->private_volume;
367 	Inode* inode = (Inode*)_node->private_node;
368 
369 	// If the inode isn't in use anymore, we were called before
370 	// bfs_unlink() returns - in this case, we can just use the
371 	// transaction which has already deleted the inode.
372 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
373 
374 	// The file system check functionality uses this flag to prevent the space
375 	// used up by the inode from being freed - this flag is set only in
376 	// situations where this does not cause any harm as the block bitmap will
377 	// get fixed anyway in this case).
378 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
379 		delete inode;
380 		return B_OK;
381 	}
382 
383 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
384 
385 	status_t status = inode->Free(transaction);
386 	if (status == B_OK) {
387 		status = transaction.Done();
388 	} else if (transaction.HasParent()) {
389 		// TODO: for now, we don't let sub-transactions fail
390 		status = transaction.Done();
391 	}
392 
393 	volume->RemovedInodes().Remove(inode);
394 
395 	// TODO: the VFS currently does not allow this to fail
396 	delete inode;
397 
398 	return status;
399 }
400 
401 
402 static bool
403 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
404 {
405 	// TODO: we're obviously not even asked...
406 	return false;
407 }
408 
409 
410 static status_t
411 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
412 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
413 {
414 	Volume* volume = (Volume*)_volume->private_volume;
415 	Inode* inode = (Inode*)_node->private_node;
416 
417 	if (inode->FileCache() == NULL)
418 		RETURN_ERROR(B_BAD_VALUE);
419 
420 	InodeReadLocker _(inode);
421 
422 	uint32 vecIndex = 0;
423 	size_t vecOffset = 0;
424 	size_t bytesLeft = *_numBytes;
425 	status_t status;
426 
427 	while (true) {
428 		file_io_vec fileVecs[8];
429 		size_t fileVecCount = 8;
430 
431 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
432 			&fileVecCount, 0);
433 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
434 			break;
435 
436 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
437 
438 		size_t bytes = bytesLeft;
439 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
440 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
441 		if (status != B_OK || !bufferOverflow)
442 			break;
443 
444 		pos += bytes;
445 		bytesLeft -= bytes;
446 	}
447 
448 	return status;
449 }
450 
451 
452 static status_t
453 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
454 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
455 {
456 	Volume* volume = (Volume*)_volume->private_volume;
457 	Inode* inode = (Inode*)_node->private_node;
458 
459 	if (volume->IsReadOnly())
460 		return B_READ_ONLY_DEVICE;
461 
462 	if (inode->FileCache() == NULL)
463 		RETURN_ERROR(B_BAD_VALUE);
464 
465 	InodeReadLocker _(inode);
466 
467 	uint32 vecIndex = 0;
468 	size_t vecOffset = 0;
469 	size_t bytesLeft = *_numBytes;
470 	status_t status;
471 
472 	while (true) {
473 		file_io_vec fileVecs[8];
474 		size_t fileVecCount = 8;
475 
476 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
477 			&fileVecCount, 0);
478 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
479 			break;
480 
481 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
482 
483 		size_t bytes = bytesLeft;
484 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
485 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
486 		if (status != B_OK || !bufferOverflow)
487 			break;
488 
489 		pos += bytes;
490 		bytesLeft -= bytes;
491 	}
492 
493 	return status;
494 }
495 
496 
497 static status_t
498 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
499 {
500 	Volume* volume = (Volume*)_volume->private_volume;
501 	Inode* inode = (Inode*)_node->private_node;
502 
503 #ifndef FS_SHELL
504 	if (io_request_is_write(request) && volume->IsReadOnly()) {
505 		notify_io_request(request, B_READ_ONLY_DEVICE);
506 		return B_READ_ONLY_DEVICE;
507 	}
508 #endif
509 
510 	if (inode->FileCache() == NULL) {
511 #ifndef FS_SHELL
512 		notify_io_request(request, B_BAD_VALUE);
513 #endif
514 		RETURN_ERROR(B_BAD_VALUE);
515 	}
516 
517 	// We lock the node here and will unlock it in the "finished" hook.
518 	rw_lock_read_lock(&inode->Lock());
519 
520 	return do_iterative_fd_io(volume->Device(), request,
521 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
522 }
523 
524 
525 static status_t
526 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
527 	struct file_io_vec* vecs, size_t* _count)
528 {
529 	Volume* volume = (Volume*)_volume->private_volume;
530 	Inode* inode = (Inode*)_node->private_node;
531 
532 	int32 blockShift = volume->BlockShift();
533 	uint32 index = 0, max = *_count;
534 	block_run run;
535 	off_t fileOffset;
536 
537 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
538 
539 	while (true) {
540 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
541 		if (status != B_OK)
542 			return status;
543 
544 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
545 		vecs[index].length = ((uint32)run.Length() << blockShift)
546 			- offset + fileOffset;
547 
548 		// are we already done?
549 		if ((uint64)size <= (uint64)vecs[index].length
550 			|| (uint64)offset + (uint64)vecs[index].length
551 				>= (uint64)inode->Size()) {
552 			if ((uint64)offset + (uint64)vecs[index].length
553 					> (uint64)inode->Size()) {
554 				// make sure the extent ends with the last official file
555 				// block (without taking any preallocations into account)
556 				vecs[index].length = round_up(inode->Size() - offset,
557 					volume->BlockSize());
558 			}
559 			*_count = index + 1;
560 			return B_OK;
561 		}
562 
563 		offset += vecs[index].length;
564 		size -= vecs[index].length;
565 		index++;
566 
567 		if (index >= max) {
568 			// we're out of file_io_vecs; let's bail out
569 			*_count = index;
570 			return B_BUFFER_OVERFLOW;
571 		}
572 	}
573 
574 	// can never get here
575 	return B_ERROR;
576 }
577 
578 
579 //	#pragma mark -
580 
581 
582 static status_t
583 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
584 	ino_t* _vnodeID)
585 {
586 	Volume* volume = (Volume*)_volume->private_volume;
587 	Inode* directory = (Inode*)_directory->private_node;
588 
589 	InodeReadLocker locker(directory);
590 
591 	// check access permissions
592 	status_t status = directory->CheckPermissions(X_OK);
593 	if (status != B_OK)
594 		RETURN_ERROR(status);
595 
596 	BPlusTree* tree = directory->Tree();
597 	if (tree == NULL)
598 		RETURN_ERROR(B_BAD_VALUE);
599 
600 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
601 	if (status != B_OK) {
602 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
603 		if (status == B_ENTRY_NOT_FOUND)
604 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
605 
606 		return status;
607 	}
608 
609 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
610 
611 	locker.Unlock();
612 
613 	Inode* inode;
614 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
615 	if (status != B_OK) {
616 		REPORT_ERROR(status);
617 		return B_ENTRY_NOT_FOUND;
618 	}
619 
620 	return B_OK;
621 }
622 
623 
624 static status_t
625 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
626 	size_t bufferSize)
627 {
628 	Inode* inode = (Inode*)_node->private_node;
629 
630 	return inode->GetName(buffer, bufferSize);
631 }
632 
633 
634 static status_t
635 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
636 	void* buffer, size_t bufferLength)
637 {
638 	FUNCTION_START(("node = %p, cmd = %" B_PRIu32 ", buf = %p"
639 		", len = %" B_PRIuSIZE "\n", _node, cmd, buffer, bufferLength));
640 
641 	Volume* volume = (Volume*)_volume->private_volume;
642 
643 	switch (cmd) {
644 #ifndef FS_SHELL
645 		case B_TRIM_DEVICE:
646 		{
647 			fs_trim_data* trimData;
648 			MemoryDeleter deleter;
649 			status_t status = get_trim_data_from_user(buffer, bufferLength,
650 				deleter, trimData);
651 			if (status != B_OK)
652 				return status;
653 
654 			trimData->trimmed_size = 0;
655 
656 			for (uint32 i = 0; i < trimData->range_count; i++) {
657 				uint64 trimmedSize = 0;
658 				status_t status = volume->Allocator().Trim(
659 					trimData->ranges[i].offset, trimData->ranges[i].size,
660 					trimmedSize);
661 				if (status != B_OK)
662 					return status;
663 
664 				trimData->trimmed_size += trimmedSize;
665 			}
666 
667 			return copy_trim_data_to_user(buffer, trimData);
668 		}
669 #endif
670 
671 		case BFS_IOCTL_VERSION:
672 		{
673 			uint32 version = 0x10000;
674 			return user_memcpy(buffer, &version, sizeof(uint32));
675 		}
676 		case BFS_IOCTL_START_CHECKING:
677 		{
678 			// start checking
679 			status_t status = volume->CreateCheckVisitor();
680 			if (status != B_OK)
681 				return status;
682 
683 			CheckVisitor* checker = volume->CheckVisitor();
684 
685 			if (user_memcpy(&checker->Control(), buffer,
686 					sizeof(check_control)) != B_OK) {
687 				return B_BAD_ADDRESS;
688 			}
689 
690 			status = checker->StartBitmapPass();
691 			if (status == B_OK) {
692 				file_cookie* cookie = (file_cookie*)_cookie;
693 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
694 			}
695 
696 			return status;
697 		}
698 		case BFS_IOCTL_STOP_CHECKING:
699 		{
700 			// stop checking
701 			CheckVisitor* checker = volume->CheckVisitor();
702 			if (checker == NULL)
703 				return B_NO_INIT;
704 
705 			status_t status = checker->StopChecking();
706 
707 			if (status == B_OK) {
708 				file_cookie* cookie = (file_cookie*)_cookie;
709 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
710 
711 				status = user_memcpy(buffer, &checker->Control(),
712 					sizeof(check_control));
713 			}
714 
715 			volume->DeleteCheckVisitor();
716 			volume->SetCheckingThread(-1);
717 
718 			return status;
719 		}
720 		case BFS_IOCTL_CHECK_NEXT_NODE:
721 		{
722 			// check next
723 			CheckVisitor* checker = volume->CheckVisitor();
724 			if (checker == NULL)
725 				return B_NO_INIT;
726 
727 			volume->SetCheckingThread(find_thread(NULL));
728 
729 			checker->Control().errors = 0;
730 
731 			status_t status = checker->Next();
732 			if (status == B_ENTRY_NOT_FOUND) {
733 				checker->Control().status = B_ENTRY_NOT_FOUND;
734 					// tells StopChecking() that we finished the pass
735 
736 				if (checker->Pass() == BFS_CHECK_PASS_BITMAP) {
737 					if (checker->WriteBackCheckBitmap() == B_OK)
738 						status = checker->StartIndexPass();
739 				}
740 			}
741 
742 			if (status == B_OK) {
743 				status = user_memcpy(buffer, &checker->Control(),
744 					sizeof(check_control));
745 			}
746 
747 			return status;
748 		}
749 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
750 		{
751 			// let's makebootable (or anyone else) update the boot block
752 			// while BFS is mounted
753 			update_boot_block update;
754 			if (bufferLength != sizeof(update_boot_block))
755 				return B_BAD_VALUE;
756 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
757 				return B_BAD_ADDRESS;
758 
759 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
760 			if (update.offset < minOffset
761 				|| update.offset >= 512 || update.length > 512 - minOffset
762 				|| update.length + update.offset > 512) {
763 				return B_BAD_VALUE;
764 			}
765 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
766 					update.data, update.length) != B_OK) {
767 				return B_BAD_ADDRESS;
768 			}
769 
770 			return volume->WriteSuperBlock();
771 		}
772 		case BFS_IOCTL_RESIZE:
773 		{
774 			if (bufferLength != sizeof(uint64))
775 				return B_BAD_VALUE;
776 
777 			uint64 size;
778 			if (user_memcpy((uint8*)&size, buffer, sizeof(uint64)) != B_OK)
779 				return B_BAD_ADDRESS;
780 
781 			ResizeVisitor resizer(volume);
782 			return resizer.Resize(size, -1);
783 		}
784 
785 #ifdef DEBUG_FRAGMENTER
786 		case 56741:
787 		{
788 			BlockAllocator& allocator = volume->Allocator();
789 			allocator.Fragment();
790 			return B_OK;
791 		}
792 #endif
793 
794 #ifdef DEBUG
795 		case 56742:
796 		{
797 			// allocate all free blocks and zero them out
798 			// (a test for the BlockAllocator)!
799 			BlockAllocator& allocator = volume->Allocator();
800 			Transaction transaction(volume, 0);
801 			CachedBlock cached(volume);
802 			block_run run;
803 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
804 					== B_OK) {
805 				PRINT(("write block_run(%" B_PRId32 ", %" B_PRIu16
806 					", %" B_PRIu16 ")\n", run.allocation_group, run.start,
807 					run.length));
808 
809 				for (int32 i = 0;i < run.length;i++) {
810 					status_t status = cached.SetToWritable(transaction, run);
811 					if (status == B_OK)
812 						memset(cached.WritableBlock(), 0, volume->BlockSize());
813 				}
814 			}
815 			return B_OK;
816 		}
817 #endif
818 	}
819 	return B_DEV_INVALID_IOCTL;
820 }
821 
822 
823 /*!	Sets the open-mode flags for the open file cookie - only
824 	supports O_APPEND currently, but that should be sufficient
825 	for a file system.
826 */
827 static status_t
828 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
829 {
830 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
831 
832 	file_cookie* cookie = (file_cookie*)_cookie;
833 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
834 
835 	return B_OK;
836 }
837 
838 
839 static status_t
840 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
841 {
842 	FUNCTION();
843 
844 	Inode* inode = (Inode*)_node->private_node;
845 	return inode->Sync();
846 }
847 
848 
849 static status_t
850 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
851 {
852 	FUNCTION();
853 
854 	Inode* inode = (Inode*)_node->private_node;
855 	fill_stat_buffer(inode, *stat);
856 	return B_OK;
857 }
858 
859 
860 static status_t
861 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
862 	uint32 mask)
863 {
864 	FUNCTION();
865 
866 	Volume* volume = (Volume*)_volume->private_volume;
867 	Inode* inode = (Inode*)_node->private_node;
868 
869 	if (volume->IsReadOnly())
870 		return B_READ_ONLY_DEVICE;
871 
872 	// TODO: we should definitely check a bit more if the new stats are
873 	//	valid - or even better, the VFS should check this before calling us
874 
875 	bfs_inode& node = inode->Node();
876 	bool updateTime = false;
877 	uid_t uid = geteuid();
878 
879 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
880 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
881 
882 	Transaction transaction(volume, inode->BlockNumber());
883 	inode->WriteLockInTransaction(transaction);
884 
885 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
886 		// Since B_STAT_SIZE is the only thing that can fail directly, we
887 		// do it first, so that the inode state will still be consistent
888 		// with the on-disk version
889 		if (inode->IsDirectory())
890 			return B_IS_A_DIRECTORY;
891 		if (!inode->IsFile())
892 			return B_BAD_VALUE;
893 		if (!hasWriteAccess)
894 			RETURN_ERROR(B_NOT_ALLOWED);
895 
896 		off_t oldSize = inode->Size();
897 
898 		status_t status = inode->SetFileSize(transaction, stat->st_size);
899 		if (status != B_OK)
900 			return status;
901 
902 		// fill the new blocks (if any) with zeros
903 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
904 			// We must not keep the inode locked during a write operation,
905 			// or else we might deadlock.
906 			rw_lock_write_unlock(&inode->Lock());
907 			inode->FillGapWithZeros(oldSize, inode->Size());
908 			rw_lock_write_lock(&inode->Lock());
909 		}
910 
911 		if (!inode->IsDeleted()) {
912 			Index index(volume);
913 			index.UpdateSize(transaction, inode);
914 
915 			updateTime = true;
916 		}
917 	}
918 
919 	if ((mask & B_STAT_UID) != 0) {
920 		// only root should be allowed
921 		if (uid != 0)
922 			RETURN_ERROR(B_NOT_ALLOWED);
923 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
924 		updateTime = true;
925 	}
926 
927 	if ((mask & B_STAT_GID) != 0) {
928 		// only the user or root can do that
929 		if (!isOwnerOrRoot)
930 			RETURN_ERROR(B_NOT_ALLOWED);
931 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
932 		updateTime = true;
933 	}
934 
935 	if ((mask & B_STAT_MODE) != 0) {
936 		// only the user or root can do that
937 		if (!isOwnerOrRoot)
938 			RETURN_ERROR(B_NOT_ALLOWED);
939 		PRINT(("original mode = %u, stat->st_mode = %u\n",
940 			(unsigned int)node.Mode(), (unsigned int)stat->st_mode));
941 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
942 			| (stat->st_mode & S_IUMSK));
943 		updateTime = true;
944 	}
945 
946 	if ((mask & B_STAT_CREATION_TIME) != 0) {
947 		// the user or root can do that or any user with write access
948 		if (!isOwnerOrRoot && !hasWriteAccess)
949 			RETURN_ERROR(B_NOT_ALLOWED);
950 		node.create_time
951 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
952 	}
953 
954 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
955 		// the user or root can do that or any user with write access
956 		if (!isOwnerOrRoot && !hasWriteAccess)
957 			RETURN_ERROR(B_NOT_ALLOWED);
958 		if (!inode->InLastModifiedIndex()) {
959 			// directory modification times are not part of the index
960 			node.last_modified_time
961 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
962 		} else if (!inode->IsDeleted()) {
963 			// Index::UpdateLastModified() will set the new time in the inode
964 			Index index(volume);
965 			index.UpdateLastModified(transaction, inode,
966 				bfs_inode::ToInode(stat->st_mtim));
967 		}
968 	}
969 
970 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
971 		// the user or root can do that or any user with write access
972 		if (!isOwnerOrRoot && !hasWriteAccess)
973 			RETURN_ERROR(B_NOT_ALLOWED);
974 		bigtime_t newTime;
975 		if ((mask & B_STAT_CHANGE_TIME) == 0)
976 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
977 		else
978 			newTime = bfs_inode::ToInode(stat->st_ctim);
979 
980 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
981 	}
982 
983 	status_t status = inode->WriteBack(transaction);
984 	if (status == B_OK)
985 		status = transaction.Done();
986 	if (status == B_OK)
987 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
988 
989 	return status;
990 }
991 
992 
993 status_t
994 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
995 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
996 {
997 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
998 		openMode));
999 
1000 	Volume* volume = (Volume*)_volume->private_volume;
1001 	Inode* directory = (Inode*)_directory->private_node;
1002 
1003 	if (volume->IsReadOnly())
1004 		return B_READ_ONLY_DEVICE;
1005 
1006 	if (!directory->IsDirectory())
1007 		RETURN_ERROR(B_BAD_TYPE);
1008 
1009 	// We are creating the cookie at this point, so that we don't have
1010 	// to remove the inode if we don't have enough free memory later...
1011 	file_cookie* cookie = new(std::nothrow) file_cookie;
1012 	if (cookie == NULL)
1013 		RETURN_ERROR(B_NO_MEMORY);
1014 
1015 	// initialize the cookie
1016 	cookie->open_mode = openMode;
1017 	cookie->last_size = 0;
1018 	cookie->last_notification = system_time();
1019 
1020 	Transaction transaction(volume, directory->BlockNumber());
1021 
1022 	Inode* inode;
1023 	bool created;
1024 	status_t status = Inode::Create(transaction, directory, name,
1025 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
1026 
1027 	// Disable the file cache, if requested?
1028 	if (status == B_OK && (openMode & O_NOCACHE) != 0
1029 		&& inode->FileCache() != NULL) {
1030 		status = file_cache_disable(inode->FileCache());
1031 	}
1032 
1033 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
1034 
1035 	if (status == B_OK)
1036 		status = transaction.Done();
1037 
1038 	if (status == B_OK) {
1039 		// register the cookie
1040 		*_cookie = cookie;
1041 
1042 		if (created) {
1043 			notify_entry_created(volume->ID(), directory->ID(), name,
1044 				*_vnodeID);
1045 		}
1046 	} else {
1047 		entry_cache_remove(volume->ID(), directory->ID(), name);
1048 		delete cookie;
1049 	}
1050 
1051 	return status;
1052 }
1053 
1054 
1055 static status_t
1056 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1057 	const char* path, int mode)
1058 {
1059 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1060 
1061 	Volume* volume = (Volume*)_volume->private_volume;
1062 	Inode* directory = (Inode*)_directory->private_node;
1063 
1064 	if (volume->IsReadOnly())
1065 		return B_READ_ONLY_DEVICE;
1066 
1067 	if (!directory->IsDirectory())
1068 		RETURN_ERROR(B_BAD_TYPE);
1069 
1070 	status_t status = directory->CheckPermissions(W_OK);
1071 	if (status < B_OK)
1072 		RETURN_ERROR(status);
1073 
1074 	Transaction transaction(volume, directory->BlockNumber());
1075 
1076 	Inode* link;
1077 	off_t id;
1078 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1079 		0, 0, NULL, &id, &link);
1080 	if (status < B_OK)
1081 		RETURN_ERROR(status);
1082 
1083 	size_t length = strlen(path);
1084 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1085 		strcpy(link->Node().short_symlink, path);
1086 	} else {
1087 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1088 			| INODE_LOGGED);
1089 
1090 		// links usually don't have a file cache attached - but we now need one
1091 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1092 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1093 
1094 		// The following call will have to write the inode back, so
1095 		// we don't have to do that here...
1096 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1097 	}
1098 
1099 	if (status == B_OK)
1100 		status = link->WriteBack(transaction);
1101 
1102 	// Inode::Create() left the inode locked in memory, and also doesn't
1103 	// publish links
1104 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1105 	put_vnode(volume->FSVolume(), id);
1106 
1107 	if (status == B_OK) {
1108 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1109 
1110 		status = transaction.Done();
1111 		if (status == B_OK)
1112 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1113 		else
1114 			entry_cache_remove(volume->ID(), directory->ID(), name);
1115 	}
1116 
1117 	return status;
1118 }
1119 
1120 
1121 status_t
1122 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1123 {
1124 	FUNCTION_START(("name = \"%s\"\n", name));
1125 
1126 	// This one won't be implemented in a binary compatible BFS
1127 	return B_UNSUPPORTED;
1128 }
1129 
1130 
1131 status_t
1132 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1133 {
1134 	FUNCTION_START(("name = \"%s\"\n", name));
1135 
1136 	if (!strcmp(name, "..") || !strcmp(name, "."))
1137 		return B_NOT_ALLOWED;
1138 
1139 	Volume* volume = (Volume*)_volume->private_volume;
1140 	Inode* directory = (Inode*)_directory->private_node;
1141 
1142 	status_t status = directory->CheckPermissions(W_OK);
1143 	if (status < B_OK)
1144 		return status;
1145 
1146 	Transaction transaction(volume, directory->BlockNumber());
1147 
1148 	off_t id;
1149 	status = directory->Remove(transaction, name, &id);
1150 	if (status == B_OK) {
1151 		entry_cache_remove(volume->ID(), directory->ID(), name);
1152 
1153 		status = transaction.Done();
1154 		if (status == B_OK)
1155 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1156 		else
1157 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1158 	}
1159 	return status;
1160 }
1161 
1162 
1163 status_t
1164 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1165 	fs_vnode* _newDir, const char* newName)
1166 {
1167 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1168 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1169 
1170 	Volume* volume = (Volume*)_volume->private_volume;
1171 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1172 	Inode* newDirectory = (Inode*)_newDir->private_node;
1173 
1174 	// are we already done?
1175 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1176 		return B_OK;
1177 
1178 	Transaction transaction(volume, oldDirectory->BlockNumber());
1179 
1180 	oldDirectory->WriteLockInTransaction(transaction);
1181 	if (oldDirectory != newDirectory)
1182 		newDirectory->WriteLockInTransaction(transaction);
1183 
1184 	// are we allowed to do what we've been told?
1185 	status_t status = oldDirectory->CheckPermissions(W_OK);
1186 	if (status == B_OK)
1187 		status = newDirectory->CheckPermissions(W_OK);
1188 	if (status != B_OK)
1189 		return status;
1190 
1191 	// Get the directory's tree, and a pointer to the inode which should be
1192 	// changed
1193 	BPlusTree* tree = oldDirectory->Tree();
1194 	if (tree == NULL)
1195 		RETURN_ERROR(B_BAD_VALUE);
1196 
1197 	off_t id;
1198 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1199 	if (status != B_OK)
1200 		RETURN_ERROR(status);
1201 
1202 	Vnode vnode(volume, id);
1203 	Inode* inode;
1204 	if (vnode.Get(&inode) != B_OK)
1205 		return B_IO_ERROR;
1206 
1207 	// Don't move a directory into one of its children - we soar up
1208 	// from the newDirectory to either the root node or the old
1209 	// directory, whichever comes first.
1210 	// If we meet our inode on that way, we have to bail out.
1211 
1212 	if (oldDirectory != newDirectory) {
1213 		ino_t parent = newDirectory->ID();
1214 		ino_t root = volume->RootNode()->ID();
1215 
1216 		while (true) {
1217 			if (parent == id)
1218 				return B_BAD_VALUE;
1219 			else if (parent == root || parent == oldDirectory->ID())
1220 				break;
1221 
1222 			Vnode vnode(volume, parent);
1223 			Inode* parentNode;
1224 			if (vnode.Get(&parentNode) != B_OK)
1225 				return B_ERROR;
1226 
1227 			parent = volume->ToVnode(parentNode->Parent());
1228 		}
1229 	}
1230 
1231 	// Everything okay? Then lets get to work...
1232 
1233 	// First, try to make sure there is nothing that will stop us in
1234 	// the target directory - since this is the only non-critical
1235 	// failure, we will test this case first
1236 	BPlusTree* newTree = tree;
1237 	if (newDirectory != oldDirectory) {
1238 		newTree = newDirectory->Tree();
1239 		if (newTree == NULL)
1240 			RETURN_ERROR(B_BAD_VALUE);
1241 	}
1242 
1243 	status = newTree->Insert(transaction, (const uint8*)newName,
1244 		strlen(newName), id);
1245 	if (status == B_NAME_IN_USE) {
1246 		// If there is already a file with that name, we have to remove
1247 		// it, as long it's not a directory with files in it
1248 		off_t clobber;
1249 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1250 				< B_OK)
1251 			return B_NAME_IN_USE;
1252 		if (clobber == id)
1253 			return B_BAD_VALUE;
1254 
1255 		Vnode vnode(volume, clobber);
1256 		Inode* other;
1257 		if (vnode.Get(&other) < B_OK)
1258 			return B_NAME_IN_USE;
1259 
1260 		// only allowed, if either both nodes are directories or neither is
1261 		if (inode->IsDirectory() != other->IsDirectory())
1262 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1263 
1264 		status = newDirectory->Remove(transaction, newName, NULL,
1265 			other->IsDirectory());
1266 		if (status < B_OK)
1267 			return status;
1268 
1269 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1270 
1271 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1272 			clobber);
1273 
1274 		status = newTree->Insert(transaction, (const uint8*)newName,
1275 			strlen(newName), id);
1276 	}
1277 	if (status != B_OK)
1278 		return status;
1279 
1280 	inode->WriteLockInTransaction(transaction);
1281 
1282 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1283 		newDirectory->ID(), newName);
1284 
1285 	// update the name only when they differ
1286 	if (strcmp(oldName, newName)) {
1287 		status = inode->SetName(transaction, newName);
1288 		if (status == B_OK) {
1289 			Index index(volume);
1290 			index.UpdateName(transaction, oldName, newName, inode);
1291 		}
1292 	}
1293 
1294 	if (status == B_OK) {
1295 		status = tree->Remove(transaction, (const uint8*)oldName,
1296 			strlen(oldName), id);
1297 		if (status == B_OK) {
1298 			inode->Parent() = newDirectory->BlockRun();
1299 
1300 			// if it's a directory, update the parent directory pointer
1301 			// in its tree if necessary
1302 			BPlusTree* movedTree = inode->Tree();
1303 			if (oldDirectory != newDirectory
1304 				&& inode->IsDirectory()
1305 				&& movedTree != NULL) {
1306 				status = movedTree->Replace(transaction, (const uint8*)"..",
1307 					2, newDirectory->ID());
1308 
1309 				if (status == B_OK) {
1310 					// update/add the cache entry for the parent
1311 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1312 				}
1313 			}
1314 
1315 			if (status == B_OK && newDirectory != oldDirectory)
1316 				status = oldDirectory->ContainerContentsChanged(transaction);
1317 			if (status == B_OK)
1318 				status = newDirectory->ContainerContentsChanged(transaction);
1319 
1320 			if (status == B_OK)
1321 				status = inode->WriteBack(transaction);
1322 
1323 			if (status == B_OK) {
1324 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1325 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1326 
1327 				status = transaction.Done();
1328 				if (status == B_OK) {
1329 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1330 						oldName, newDirectory->ID(), newName, id);
1331 					return B_OK;
1332 				}
1333 
1334 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1335 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1336 			}
1337 		}
1338 	}
1339 
1340 	return status;
1341 }
1342 
1343 
1344 static status_t
1345 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1346 {
1347 	FUNCTION();
1348 
1349 	Volume* volume = (Volume*)_volume->private_volume;
1350 	Inode* inode = (Inode*)_node->private_node;
1351 
1352 	// Opening a directory read-only is allowed, although you can't read
1353 	// any data from it.
1354 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1355 		return B_IS_A_DIRECTORY;
1356 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1357 		return B_NOT_A_DIRECTORY;
1358 
1359 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1360 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1361 	if (status != B_OK)
1362 		RETURN_ERROR(status);
1363 
1364 	file_cookie* cookie = new(std::nothrow) file_cookie;
1365 	if (cookie == NULL)
1366 		RETURN_ERROR(B_NO_MEMORY);
1367 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1368 
1369 	// initialize the cookie
1370 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1371 	cookie->last_size = inode->Size();
1372 	cookie->last_notification = system_time();
1373 
1374 	// Disable the file cache, if requested?
1375 	CObjectDeleter<void, void, file_cache_enable> fileCacheEnabler;
1376 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1377 		status = file_cache_disable(inode->FileCache());
1378 		if (status != B_OK)
1379 			return status;
1380 		fileCacheEnabler.SetTo(inode->FileCache());
1381 	}
1382 
1383 	// Should we truncate the file?
1384 	if ((openMode & O_TRUNC) != 0) {
1385 		if ((openMode & O_RWMASK) == O_RDONLY)
1386 			return B_NOT_ALLOWED;
1387 
1388 		Transaction transaction(volume, inode->BlockNumber());
1389 		inode->WriteLockInTransaction(transaction);
1390 
1391 		status_t status = inode->SetFileSize(transaction, 0);
1392 		if (status == B_OK)
1393 			status = inode->WriteBack(transaction);
1394 		if (status == B_OK)
1395 			status = transaction.Done();
1396 		if (status != B_OK)
1397 			return status;
1398 	}
1399 
1400 	fileCacheEnabler.Detach();
1401 	cookieDeleter.Detach();
1402 	*_cookie = cookie;
1403 	return B_OK;
1404 }
1405 
1406 
1407 static status_t
1408 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1409 	void* buffer, size_t* _length)
1410 {
1411 	//FUNCTION();
1412 	Inode* inode = (Inode*)_node->private_node;
1413 
1414 	if (!inode->HasUserAccessableStream()) {
1415 		*_length = 0;
1416 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1417 	}
1418 
1419 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1420 }
1421 
1422 
1423 static status_t
1424 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1425 	const void* buffer, size_t* _length)
1426 {
1427 	//FUNCTION();
1428 	Volume* volume = (Volume*)_volume->private_volume;
1429 	Inode* inode = (Inode*)_node->private_node;
1430 
1431 	if (volume->IsReadOnly())
1432 		return B_READ_ONLY_DEVICE;
1433 
1434 	if (!inode->HasUserAccessableStream()) {
1435 		*_length = 0;
1436 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1437 	}
1438 
1439 	file_cookie* cookie = (file_cookie*)_cookie;
1440 
1441 	if (cookie->open_mode & O_APPEND)
1442 		pos = inode->Size();
1443 
1444 	Transaction transaction;
1445 		// We are not starting the transaction here, since
1446 		// it might not be needed at all (the contents of
1447 		// regular files aren't logged)
1448 
1449 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1450 		_length);
1451 	if (status == B_OK)
1452 		status = transaction.Done();
1453 	if (status == B_OK) {
1454 		InodeReadLocker locker(inode);
1455 
1456 		// periodically notify if the file size has changed
1457 		// TODO: should we better test for a change in the last_modified time only?
1458 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1459 			&& system_time() > cookie->last_notification
1460 					+ INODE_NOTIFICATION_INTERVAL) {
1461 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1462 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1463 			cookie->last_size = inode->Size();
1464 			cookie->last_notification = system_time();
1465 		}
1466 	}
1467 
1468 	return status;
1469 }
1470 
1471 
1472 static status_t
1473 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1474 {
1475 	FUNCTION();
1476 	return B_OK;
1477 }
1478 
1479 
1480 static status_t
1481 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1482 {
1483 	FUNCTION();
1484 
1485 	file_cookie* cookie = (file_cookie*)_cookie;
1486 	Volume* volume = (Volume*)_volume->private_volume;
1487 	Inode* inode = (Inode*)_node->private_node;
1488 
1489 	Transaction transaction;
1490 	bool needsTrimming = false;
1491 
1492 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1493 		InodeReadLocker locker(inode);
1494 		needsTrimming = inode->NeedsTrimming();
1495 
1496 		if ((cookie->open_mode & O_RWMASK) != 0
1497 			&& !inode->IsDeleted()
1498 			&& (needsTrimming
1499 				|| inode->OldLastModified() != inode->LastModified()
1500 				|| (inode->InSizeIndex()
1501 					// TODO: this can prevent the size update notification
1502 					// for nodes not in the index!
1503 					&& inode->OldSize() != inode->Size()))) {
1504 			locker.Unlock();
1505 			transaction.Start(volume, inode->BlockNumber());
1506 		}
1507 	}
1508 
1509 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1510 
1511 	if (status == B_OK) {
1512 		inode->WriteLockInTransaction(transaction);
1513 
1514 		// trim the preallocated blocks and update the size,
1515 		// and last_modified indices if needed
1516 		bool changedSize = false, changedTime = false;
1517 		Index index(volume);
1518 
1519 		if (needsTrimming) {
1520 			status = inode->TrimPreallocation(transaction);
1521 			if (status < B_OK) {
1522 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1523 					", transaction %d: %s!\n", inode->ID(),
1524 					(int)transaction.ID(), strerror(status)));
1525 
1526 				// we still want this transaction to succeed
1527 				status = B_OK;
1528 			}
1529 		}
1530 		if (inode->OldSize() != inode->Size()) {
1531 			if (inode->InSizeIndex())
1532 				index.UpdateSize(transaction, inode);
1533 			changedSize = true;
1534 		}
1535 		if (inode->OldLastModified() != inode->LastModified()) {
1536 			if (inode->InLastModifiedIndex()) {
1537 				index.UpdateLastModified(transaction, inode,
1538 					inode->LastModified());
1539 			}
1540 			changedTime = true;
1541 
1542 			// updating the index doesn't write back the inode
1543 			inode->WriteBack(transaction);
1544 		}
1545 
1546 		if (changedSize || changedTime) {
1547 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1548 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1549 				| (changedSize ? B_STAT_SIZE : 0));
1550 		}
1551 	}
1552 	if (status == B_OK)
1553 		transaction.Done();
1554 
1555 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1556 		// "chkbfs" exited abnormally, so we have to stop it here...
1557 		FATAL(("check process was aborted!\n"));
1558 		volume->CheckVisitor()->StopChecking();
1559 		volume->DeleteCheckVisitor();
1560 	}
1561 
1562 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1563 		file_cache_enable(inode->FileCache());
1564 
1565 	delete cookie;
1566 	return B_OK;
1567 }
1568 
1569 
1570 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1571 	is not allowed.
1572 */
1573 static status_t
1574 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1575 {
1576 	//FUNCTION();
1577 
1578 	Inode* inode = (Inode*)_node->private_node;
1579 	status_t status = inode->CheckPermissions(accessMode);
1580 	if (status < B_OK)
1581 		RETURN_ERROR(status);
1582 
1583 	return B_OK;
1584 }
1585 
1586 
1587 static status_t
1588 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1589 	size_t* _bufferSize)
1590 {
1591 	FUNCTION();
1592 
1593 	Inode* inode = (Inode*)_node->private_node;
1594 
1595 	if (!inode->IsSymLink())
1596 		RETURN_ERROR(B_BAD_VALUE);
1597 
1598 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1599 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1600 		if (status < B_OK)
1601 			RETURN_ERROR(status);
1602 
1603 		*_bufferSize = inode->Size();
1604 		return B_OK;
1605 	}
1606 
1607 	size_t linkLength = strlen(inode->Node().short_symlink);
1608 
1609 	size_t bytesToCopy = min_c(linkLength, *_bufferSize);
1610 
1611 	*_bufferSize = linkLength;
1612 
1613 	memcpy(buffer, inode->Node().short_symlink, bytesToCopy);
1614 	return B_OK;
1615 }
1616 
1617 
1618 //	#pragma mark - Directory functions
1619 
1620 
1621 static status_t
1622 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1623 	int mode)
1624 {
1625 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1626 
1627 	Volume* volume = (Volume*)_volume->private_volume;
1628 	Inode* directory = (Inode*)_directory->private_node;
1629 
1630 	if (volume->IsReadOnly())
1631 		return B_READ_ONLY_DEVICE;
1632 
1633 	if (!directory->IsDirectory())
1634 		RETURN_ERROR(B_BAD_TYPE);
1635 
1636 	status_t status = directory->CheckPermissions(W_OK);
1637 	if (status < B_OK)
1638 		RETURN_ERROR(status);
1639 
1640 	Transaction transaction(volume, directory->BlockNumber());
1641 
1642 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1643 	// need it anyway
1644 	off_t id;
1645 	status = Inode::Create(transaction, directory, name,
1646 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1647 	if (status == B_OK) {
1648 		put_vnode(volume->FSVolume(), id);
1649 
1650 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1651 
1652 		status = transaction.Done();
1653 		if (status == B_OK)
1654 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1655 		else
1656 			entry_cache_remove(volume->ID(), directory->ID(), name);
1657 	}
1658 
1659 	return status;
1660 }
1661 
1662 
1663 static status_t
1664 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1665 {
1666 	FUNCTION_START(("name = \"%s\"\n", name));
1667 
1668 	Volume* volume = (Volume*)_volume->private_volume;
1669 	Inode* directory = (Inode*)_directory->private_node;
1670 
1671 	Transaction transaction(volume, directory->BlockNumber());
1672 
1673 	off_t id;
1674 	status_t status = directory->Remove(transaction, name, &id, true);
1675 	if (status == B_OK) {
1676 		// Remove the cache entry for the directory and potentially also
1677 		// the parent entry still belonging to the directory
1678 		entry_cache_remove(volume->ID(), directory->ID(), name);
1679 		entry_cache_remove(volume->ID(), id, "..");
1680 
1681 		status = transaction.Done();
1682 		if (status == B_OK)
1683 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1684 		else {
1685 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1686 			entry_cache_add(volume->ID(), id, "..", id);
1687 		}
1688 	}
1689 
1690 	return status;
1691 }
1692 
1693 
1694 /*!	Opens a directory ready to be traversed.
1695 	bfs_open_dir() is also used by bfs_open_index_dir().
1696 */
1697 static status_t
1698 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1699 {
1700 	FUNCTION();
1701 
1702 	Inode* inode = (Inode*)_node->private_node;
1703 	status_t status = inode->CheckPermissions(R_OK);
1704 	if (status < B_OK)
1705 		RETURN_ERROR(status);
1706 
1707 	// we don't ask here for directories only, because the bfs_open_index_dir()
1708 	// function utilizes us (so we must be able to open indices as well)
1709 	if (!inode->IsContainer())
1710 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1711 
1712 	BPlusTree* tree = inode->Tree();
1713 	if (tree == NULL)
1714 		RETURN_ERROR(B_BAD_VALUE);
1715 
1716 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1717 	if (iterator == NULL)
1718 		RETURN_ERROR(B_NO_MEMORY);
1719 
1720 	*_cookie = iterator;
1721 	return B_OK;
1722 }
1723 
1724 
1725 static status_t
1726 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1727 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1728 {
1729 	FUNCTION();
1730 
1731 	TreeIterator* iterator = (TreeIterator*)_cookie;
1732 	Volume* volume = (Volume*)_volume->private_volume;
1733 
1734 	uint32 maxCount = *_num;
1735 	uint32 count = 0;
1736 
1737 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1738 		ino_t id;
1739 		uint16 length;
1740 		size_t nameBufferSize = bufferSize - offsetof(struct dirent, d_name);
1741 
1742 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1743 			nameBufferSize, &id);
1744 
1745 		if (status == B_ENTRY_NOT_FOUND)
1746 			break;
1747 
1748 		if (status == B_BUFFER_OVERFLOW) {
1749 			// the remaining name buffer length was too small
1750 			if (count == 0)
1751 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1752 			break;
1753 		}
1754 
1755 		if (status != B_OK)
1756 			RETURN_ERROR(status);
1757 
1758 		ASSERT(length < nameBufferSize);
1759 
1760 		dirent->d_dev = volume->ID();
1761 		dirent->d_ino = id;
1762 		dirent->d_reclen = offsetof(struct dirent, d_name) + length + 1;
1763 
1764 		bufferSize -= dirent->d_reclen;
1765 		dirent = (struct dirent*)((uint8*)dirent + dirent->d_reclen);
1766 		count++;
1767 	}
1768 
1769 	*_num = count;
1770 	return B_OK;
1771 }
1772 
1773 
1774 /*!	Sets the TreeIterator back to the beginning of the directory. */
1775 static status_t
1776 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1777 {
1778 	FUNCTION();
1779 	TreeIterator* iterator = (TreeIterator*)_cookie;
1780 
1781 	return iterator->Rewind();
1782 }
1783 
1784 
1785 static status_t
1786 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1787 {
1788 	FUNCTION();
1789 	return B_OK;
1790 }
1791 
1792 
1793 static status_t
1794 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1795 {
1796 	delete (TreeIterator*)_cookie;
1797 	return B_OK;
1798 }
1799 
1800 
1801 //	#pragma mark - Attribute functions
1802 
1803 
1804 static status_t
1805 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1806 {
1807 	Inode* inode = (Inode*)_node->private_node;
1808 
1809 	FUNCTION();
1810 
1811 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1812 	if (iterator == NULL)
1813 		RETURN_ERROR(B_NO_MEMORY);
1814 
1815 	*_cookie = iterator;
1816 	return B_OK;
1817 }
1818 
1819 
1820 static status_t
1821 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1822 {
1823 	FUNCTION();
1824 	return B_OK;
1825 }
1826 
1827 
1828 static status_t
1829 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1830 {
1831 	FUNCTION();
1832 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1833 
1834 	delete iterator;
1835 	return B_OK;
1836 }
1837 
1838 
1839 static status_t
1840 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1841 {
1842 	FUNCTION();
1843 
1844 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1845 	RETURN_ERROR(iterator->Rewind());
1846 }
1847 
1848 
1849 static status_t
1850 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1851 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1852 {
1853 	FUNCTION();
1854 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1855 
1856 	uint32 type;
1857 	size_t length;
1858 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1859 		&dirent->d_ino);
1860 	if (status == B_ENTRY_NOT_FOUND) {
1861 		*_num = 0;
1862 		return B_OK;
1863 	} else if (status != B_OK) {
1864 		RETURN_ERROR(status);
1865 	}
1866 
1867 	Volume* volume = (Volume*)_volume->private_volume;
1868 
1869 	dirent->d_dev = volume->ID();
1870 	dirent->d_reclen = offsetof(struct dirent, d_name) + length + 1;
1871 
1872 	*_num = 1;
1873 	return B_OK;
1874 }
1875 
1876 
1877 static status_t
1878 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1879 	uint32 type, int openMode, void** _cookie)
1880 {
1881 	FUNCTION();
1882 
1883 	Volume* volume = (Volume*)_volume->private_volume;
1884 	if (volume->IsReadOnly())
1885 		return B_READ_ONLY_DEVICE;
1886 
1887 	Inode* inode = (Inode*)_node->private_node;
1888 	Attribute attribute(inode);
1889 
1890 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1891 }
1892 
1893 
1894 static status_t
1895 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1896 	int openMode, void** _cookie)
1897 {
1898 	FUNCTION();
1899 
1900 	Inode* inode = (Inode*)_node->private_node;
1901 	Attribute attribute(inode);
1902 
1903 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1904 }
1905 
1906 
1907 static status_t
1908 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1909 {
1910 	return B_OK;
1911 }
1912 
1913 
1914 static status_t
1915 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1916 {
1917 	delete (attr_cookie*)cookie;
1918 	return B_OK;
1919 }
1920 
1921 
1922 static status_t
1923 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1924 	void* buffer, size_t* _length)
1925 {
1926 	FUNCTION();
1927 
1928 	attr_cookie* cookie = (attr_cookie*)_cookie;
1929 	Inode* inode = (Inode*)_file->private_node;
1930 
1931 	Attribute attribute(inode, cookie);
1932 
1933 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1934 }
1935 
1936 
1937 static status_t
1938 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1939 	off_t pos, const void* buffer, size_t* _length)
1940 {
1941 	FUNCTION();
1942 
1943 	attr_cookie* cookie = (attr_cookie*)_cookie;
1944 	Volume* volume = (Volume*)_volume->private_volume;
1945 	Inode* inode = (Inode*)_file->private_node;
1946 
1947 	Transaction transaction(volume, inode->BlockNumber());
1948 	Attribute attribute(inode, cookie);
1949 
1950 	bool created;
1951 	status_t status = attribute.Write(transaction, cookie, pos,
1952 		(const uint8*)buffer, _length, &created);
1953 	if (status == B_OK) {
1954 		status = transaction.Done();
1955 		if (status == B_OK) {
1956 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1957 				inode->ID(), cookie->name,
1958 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1959 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1960 				B_STAT_CHANGE_TIME);
1961 		}
1962 	}
1963 
1964 	return status;
1965 }
1966 
1967 
1968 static status_t
1969 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1970 	struct stat* stat)
1971 {
1972 	FUNCTION();
1973 
1974 	attr_cookie* cookie = (attr_cookie*)_cookie;
1975 	Inode* inode = (Inode*)_file->private_node;
1976 
1977 	Attribute attribute(inode, cookie);
1978 
1979 	return attribute.Stat(*stat);
1980 }
1981 
1982 
1983 static status_t
1984 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1985 	const struct stat* stat, int statMask)
1986 {
1987 	// TODO: Implement (at least setting the size)!
1988 	return EOPNOTSUPP;
1989 }
1990 
1991 
1992 static status_t
1993 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1994 	fs_vnode* toFile, const char* toName)
1995 {
1996 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1997 
1998 	// TODO: implement bfs_rename_attr()!
1999 	// There will probably be an API to move one attribute to another file,
2000 	// making that function much more complicated - oh joy ;-)
2001 
2002 	return EOPNOTSUPP;
2003 }
2004 
2005 
2006 static status_t
2007 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
2008 {
2009 	FUNCTION_START(("name = \"%s\"\n", name));
2010 
2011 	Volume* volume = (Volume*)_volume->private_volume;
2012 	Inode* inode = (Inode*)_node->private_node;
2013 
2014 	status_t status = inode->CheckPermissions(W_OK);
2015 	if (status != B_OK)
2016 		return status;
2017 
2018 	Transaction transaction(volume, inode->BlockNumber());
2019 
2020 	status = inode->RemoveAttribute(transaction, name);
2021 	if (status == B_OK)
2022 		status = transaction.Done();
2023 	if (status == B_OK) {
2024 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
2025 			name, B_ATTR_REMOVED);
2026 	}
2027 
2028 	return status;
2029 }
2030 
2031 
2032 //	#pragma mark - Special Nodes
2033 
2034 
2035 status_t
2036 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
2037 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
2038 	fs_vnode* _superVnode, ino_t* _nodeID)
2039 {
2040 	// no need to support entry-less nodes
2041 	if (name == NULL)
2042 		return B_UNSUPPORTED;
2043 
2044 	FUNCTION_START(("name = \"%s\", mode = %u, flags = 0x%" B_PRIx32
2045 		", subVnode: %p\n", name, (unsigned int)mode, flags, subVnode));
2046 
2047 	Volume* volume = (Volume*)_volume->private_volume;
2048 	Inode* directory = (Inode*)_directory->private_node;
2049 
2050 	if (volume->IsReadOnly())
2051 		return B_READ_ONLY_DEVICE;
2052 
2053 	if (!directory->IsDirectory())
2054 		RETURN_ERROR(B_BAD_TYPE);
2055 
2056 	status_t status = directory->CheckPermissions(W_OK);
2057 	if (status < B_OK)
2058 		RETURN_ERROR(status);
2059 
2060 	Transaction transaction(volume, directory->BlockNumber());
2061 
2062 	off_t id;
2063 	Inode* inode;
2064 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2065 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2066 	if (status == B_OK) {
2067 		_superVnode->private_node = inode;
2068 		_superVnode->ops = &gBFSVnodeOps;
2069 		*_nodeID = id;
2070 
2071 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2072 
2073 		status = transaction.Done();
2074 		if (status == B_OK)
2075 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2076 		else
2077 			entry_cache_remove(volume->ID(), directory->ID(), name);
2078 	}
2079 
2080 	return status;
2081 }
2082 
2083 
2084 //	#pragma mark - Index functions
2085 
2086 
2087 static status_t
2088 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2089 {
2090 	FUNCTION();
2091 
2092 	Volume* volume = (Volume*)_volume->private_volume;
2093 
2094 	if (volume->IndicesNode() == NULL) {
2095 		// This volume does not have any indices
2096 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2097 	}
2098 
2099 	// Since the indices root node is just a directory, and we are storing
2100 	// a pointer to it in our Volume object, we can just use the directory
2101 	// traversal functions.
2102 	// In fact we're storing it in the Volume object for that reason.
2103 
2104 	fs_vnode indicesNode;
2105 	indicesNode.private_node = volume->IndicesNode();
2106 
2107 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2108 }
2109 
2110 
2111 static status_t
2112 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2113 {
2114 	FUNCTION();
2115 
2116 	Volume* volume = (Volume*)_volume->private_volume;
2117 
2118 	fs_vnode indicesNode;
2119 	indicesNode.private_node = volume->IndicesNode();
2120 
2121 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2122 }
2123 
2124 
2125 static status_t
2126 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2127 {
2128 	FUNCTION();
2129 
2130 	Volume* volume = (Volume*)_volume->private_volume;
2131 
2132 	fs_vnode indicesNode;
2133 	indicesNode.private_node = volume->IndicesNode();
2134 
2135 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2136 }
2137 
2138 
2139 static status_t
2140 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2141 {
2142 	FUNCTION();
2143 
2144 	Volume* volume = (Volume*)_volume->private_volume;
2145 
2146 	fs_vnode indicesNode;
2147 	indicesNode.private_node = volume->IndicesNode();
2148 
2149 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2150 }
2151 
2152 
2153 static status_t
2154 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2155 	size_t bufferSize, uint32* _num)
2156 {
2157 	FUNCTION();
2158 
2159 	Volume* volume = (Volume*)_volume->private_volume;
2160 
2161 	fs_vnode indicesNode;
2162 	indicesNode.private_node = volume->IndicesNode();
2163 
2164 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2165 		bufferSize, _num));
2166 }
2167 
2168 
2169 static status_t
2170 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2171 	uint32 flags)
2172 {
2173 	FUNCTION_START(("name = \"%s\", type = %" B_PRIu32
2174 		", flags = %" B_PRIu32 "\n", name, type, flags));
2175 
2176 	Volume* volume = (Volume*)_volume->private_volume;
2177 
2178 	if (volume->IsReadOnly())
2179 		return B_READ_ONLY_DEVICE;
2180 
2181 	// only root users are allowed to create indices
2182 	if (geteuid() != 0)
2183 		return B_NOT_ALLOWED;
2184 
2185 	Transaction transaction(volume, volume->Indices());
2186 
2187 	Index index(volume);
2188 	status_t status = index.Create(transaction, name, type);
2189 
2190 	if (status == B_OK)
2191 		status = transaction.Done();
2192 
2193 	RETURN_ERROR(status);
2194 }
2195 
2196 
2197 static status_t
2198 bfs_remove_index(fs_volume* _volume, const char* name)
2199 {
2200 	FUNCTION();
2201 
2202 	Volume* volume = (Volume*)_volume->private_volume;
2203 
2204 	if (volume->IsReadOnly())
2205 		return B_READ_ONLY_DEVICE;
2206 
2207 	// only root users are allowed to remove indices
2208 	if (geteuid() != 0)
2209 		return B_NOT_ALLOWED;
2210 
2211 	Inode* indices = volume->IndicesNode();
2212 	if (indices == NULL)
2213 		return B_ENTRY_NOT_FOUND;
2214 
2215 	Transaction transaction(volume, volume->Indices());
2216 
2217 	status_t status = indices->Remove(transaction, name);
2218 	if (status == B_OK)
2219 		status = transaction.Done();
2220 
2221 	RETURN_ERROR(status);
2222 }
2223 
2224 
2225 static status_t
2226 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2227 {
2228 	FUNCTION_START(("name = %s\n", name));
2229 
2230 	Volume* volume = (Volume*)_volume->private_volume;
2231 
2232 	Index index(volume);
2233 	status_t status = index.SetTo(name);
2234 	if (status < B_OK)
2235 		RETURN_ERROR(status);
2236 
2237 	bfs_inode& node = index.Node()->Node();
2238 
2239 	stat->st_type = index.Type();
2240 	stat->st_mode = node.Mode();
2241 
2242 	stat->st_size = node.data.Size();
2243 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2244 
2245 	stat->st_nlink = 1;
2246 	stat->st_blksize = 65536;
2247 
2248 	stat->st_uid = node.UserID();
2249 	stat->st_gid = node.GroupID();
2250 
2251 	fill_stat_time(node, *stat);
2252 
2253 	return B_OK;
2254 }
2255 
2256 
2257 //	#pragma mark - Query functions
2258 
2259 
2260 static status_t
2261 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2262 	port_id port, uint32 token, void** _cookie)
2263 {
2264 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %" B_PRIu32
2265 		", port_id = %" B_PRId32 ", token = %" B_PRIu32 ")\n",
2266 		queryString, flags, port, token));
2267 
2268 	Volume* volume = (Volume*)_volume->private_volume;
2269 
2270 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2271 	if (expression == NULL)
2272 		RETURN_ERROR(B_NO_MEMORY);
2273 
2274 	if (expression->InitCheck() < B_OK) {
2275 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2276 			queryString, expression->Position()));
2277 
2278 		delete expression;
2279 		RETURN_ERROR(B_BAD_VALUE);
2280 	}
2281 
2282 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2283 	if (query == NULL) {
2284 		delete expression;
2285 		RETURN_ERROR(B_NO_MEMORY);
2286 	}
2287 
2288 	if (flags & B_LIVE_QUERY)
2289 		query->SetLiveMode(port, token);
2290 
2291 	*_cookie = (void*)query;
2292 
2293 	return B_OK;
2294 }
2295 
2296 
2297 static status_t
2298 bfs_close_query(fs_volume* _volume, void* cookie)
2299 {
2300 	FUNCTION();
2301 	return B_OK;
2302 }
2303 
2304 
2305 static status_t
2306 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2307 {
2308 	FUNCTION();
2309 
2310 	Query* query = (Query*)cookie;
2311 	Expression* expression = query->GetExpression();
2312 	delete query;
2313 	delete expression;
2314 
2315 	return B_OK;
2316 }
2317 
2318 
2319 static status_t
2320 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2321 	size_t bufferSize, uint32* _num)
2322 {
2323 	FUNCTION();
2324 	Query* query = (Query*)cookie;
2325 	status_t status = query->GetNextEntry(dirent, bufferSize);
2326 	if (status == B_OK)
2327 		*_num = 1;
2328 	else if (status == B_ENTRY_NOT_FOUND)
2329 		*_num = 0;
2330 	else
2331 		return status;
2332 
2333 	return B_OK;
2334 }
2335 
2336 
2337 static status_t
2338 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2339 {
2340 	FUNCTION();
2341 
2342 	Query* query = (Query*)cookie;
2343 	return query->Rewind();
2344 }
2345 
2346 
2347 //	#pragma mark -
2348 
2349 
2350 static uint32
2351 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2352 {
2353 	// TODO: We should at least check the partition size.
2354 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2355 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2356 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2357 }
2358 
2359 
2360 static status_t
2361 bfs_initialize(int fd, partition_id partitionID, const char* name,
2362 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2363 {
2364 	// check name
2365 	status_t status = check_volume_name(name);
2366 	if (status != B_OK)
2367 		return status;
2368 
2369 	// parse parameters
2370 	initialize_parameters parameters;
2371 	status = parse_initialize_parameters(parameterString, parameters);
2372 	if (status != B_OK)
2373 		return status;
2374 
2375 	update_disk_device_job_progress(job, 0);
2376 
2377 	// initialize the volume
2378 	Volume volume(NULL);
2379 	status = volume.Initialize(fd, name, parameters.blockSize,
2380 		parameters.flags);
2381 	if (status < B_OK) {
2382 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2383 		return status;
2384 	}
2385 
2386 	// rescan partition
2387 	status = scan_partition(partitionID);
2388 	if (status != B_OK)
2389 		return status;
2390 
2391 	update_disk_device_job_progress(job, 1);
2392 
2393 	// print some info, if desired
2394 	if (parameters.verbose) {
2395 		disk_super_block super = volume.SuperBlock();
2396 
2397 		INFORM(("Disk was initialized successfully.\n"));
2398 		INFORM(("\tname: \"%s\"\n", super.name));
2399 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2400 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2401 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2402 		INFORM(("\tnum allocation groups: %d\n",
2403 			(int)super.AllocationGroups()));
2404 		INFORM(("\tallocation group size: %ld blocks\n",
2405 			1L << super.AllocationGroupShift()));
2406 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2407 	}
2408 
2409 	return B_OK;
2410 }
2411 
2412 
2413 static status_t
2414 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2415 	uint32 blockSize, disk_job_id job)
2416 {
2417 	if (blockSize == 0)
2418 		return B_BAD_VALUE;
2419 
2420 	update_disk_device_job_progress(job, 0.0);
2421 
2422 	// just overwrite the superblock
2423 	disk_super_block superBlock;
2424 	memset(&superBlock, 0, sizeof(superBlock));
2425 
2426 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2427 		return errno;
2428 
2429 	update_disk_device_job_progress(job, 1.0);
2430 
2431 	return B_OK;
2432 }
2433 
2434 
2435 //	#pragma mark -
2436 
2437 
2438 static status_t
2439 bfs_std_ops(int32 op, ...)
2440 {
2441 	switch (op) {
2442 		case B_MODULE_INIT:
2443 #ifdef BFS_DEBUGGER_COMMANDS
2444 			add_debugger_commands();
2445 #endif
2446 			return B_OK;
2447 		case B_MODULE_UNINIT:
2448 #ifdef BFS_DEBUGGER_COMMANDS
2449 			remove_debugger_commands();
2450 #endif
2451 			return B_OK;
2452 
2453 		default:
2454 			return B_ERROR;
2455 	}
2456 }
2457 
2458 fs_volume_ops gBFSVolumeOps = {
2459 	&bfs_unmount,
2460 	&bfs_read_fs_stat,
2461 	&bfs_write_fs_stat,
2462 	&bfs_sync,
2463 	&bfs_get_vnode,
2464 
2465 	/* index directory & index operations */
2466 	&bfs_open_index_dir,
2467 	&bfs_close_index_dir,
2468 	&bfs_free_index_dir_cookie,
2469 	&bfs_read_index_dir,
2470 	&bfs_rewind_index_dir,
2471 
2472 	&bfs_create_index,
2473 	&bfs_remove_index,
2474 	&bfs_stat_index,
2475 
2476 	/* query operations */
2477 	&bfs_open_query,
2478 	&bfs_close_query,
2479 	&bfs_free_query_cookie,
2480 	&bfs_read_query,
2481 	&bfs_rewind_query,
2482 };
2483 
2484 fs_vnode_ops gBFSVnodeOps = {
2485 	/* vnode operations */
2486 	&bfs_lookup,
2487 	&bfs_get_vnode_name,
2488 	&bfs_put_vnode,
2489 	&bfs_remove_vnode,
2490 
2491 	/* VM file access */
2492 	&bfs_can_page,
2493 	&bfs_read_pages,
2494 	&bfs_write_pages,
2495 
2496 	&bfs_io,
2497 	NULL,	// cancel_io()
2498 
2499 	&bfs_get_file_map,
2500 
2501 	&bfs_ioctl,
2502 	&bfs_set_flags,
2503 	NULL,	// fs_select
2504 	NULL,	// fs_deselect
2505 	&bfs_fsync,
2506 
2507 	&bfs_read_link,
2508 	&bfs_create_symlink,
2509 
2510 	&bfs_link,
2511 	&bfs_unlink,
2512 	&bfs_rename,
2513 
2514 	&bfs_access,
2515 	&bfs_read_stat,
2516 	&bfs_write_stat,
2517 	NULL,	// fs_preallocate
2518 
2519 	/* file operations */
2520 	&bfs_create,
2521 	&bfs_open,
2522 	&bfs_close,
2523 	&bfs_free_cookie,
2524 	&bfs_read,
2525 	&bfs_write,
2526 
2527 	/* directory operations */
2528 	&bfs_create_dir,
2529 	&bfs_remove_dir,
2530 	&bfs_open_dir,
2531 	&bfs_close_dir,
2532 	&bfs_free_dir_cookie,
2533 	&bfs_read_dir,
2534 	&bfs_rewind_dir,
2535 
2536 	/* attribute directory operations */
2537 	&bfs_open_attr_dir,
2538 	&bfs_close_attr_dir,
2539 	&bfs_free_attr_dir_cookie,
2540 	&bfs_read_attr_dir,
2541 	&bfs_rewind_attr_dir,
2542 
2543 	/* attribute operations */
2544 	&bfs_create_attr,
2545 	&bfs_open_attr,
2546 	&bfs_close_attr,
2547 	&bfs_free_attr_cookie,
2548 	&bfs_read_attr,
2549 	&bfs_write_attr,
2550 
2551 	&bfs_read_attr_stat,
2552 	&bfs_write_attr_stat,
2553 	&bfs_rename_attr,
2554 	&bfs_remove_attr,
2555 
2556 	/* special nodes */
2557 	&bfs_create_special_node
2558 };
2559 
2560 static file_system_module_info sBeFileSystem = {
2561 	{
2562 		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2563 		0,
2564 		bfs_std_ops,
2565 	},
2566 
2567 	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2568 	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2569 
2570 	// DDM flags
2571 	0
2572 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2573 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2574 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2575 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2576 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2577 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2578 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2579 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2580 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2581 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2582 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2583 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2584 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2585 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2586 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2587 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2588 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2589 	,
2590 
2591 	// scanning
2592 	bfs_identify_partition,
2593 	bfs_scan_partition,
2594 	bfs_free_identify_partition_cookie,
2595 	NULL,	// free_partition_content_cookie()
2596 
2597 	&bfs_mount,
2598 
2599 	/* capability querying operations */
2600 	&bfs_get_supported_operations,
2601 
2602 	NULL,	// validate_resize
2603 	NULL,	// validate_move
2604 	NULL,	// validate_set_content_name
2605 	NULL,	// validate_set_content_parameters
2606 	NULL,	// validate_initialize,
2607 
2608 	/* shadow partition modification */
2609 	NULL,	// shadow_changed
2610 
2611 	/* writing */
2612 	NULL,	// defragment
2613 	NULL,	// repair
2614 	NULL,	// resize
2615 	NULL,	// move
2616 	NULL,	// set_content_name
2617 	NULL,	// set_content_parameters
2618 	bfs_initialize,
2619 	bfs_uninitialize
2620 };
2621 
2622 module_info* modules[] = {
2623 	(module_info*)&sBeFileSystem,
2624 	NULL,
2625 };
2626