xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Attribute.h"
11 #include "CheckVisitor.h"
12 #include "Debug.h"
13 #include "Volume.h"
14 #include "Inode.h"
15 #include "Index.h"
16 #include "BPlusTree.h"
17 #include "Query.h"
18 #include "ResizeVisitor.h"
19 #include "bfs_control.h"
20 #include "bfs_disk_system.h"
21 
22 // TODO: temporary solution as long as there is no public I/O requests API
23 #ifndef FS_SHELL
24 #	include <io_requests.h>
25 #	include <util/fs_trim_support.h>
26 #endif
27 
28 
29 #define BFS_IO_SIZE	65536
30 
31 #if defined(BFS_LITTLE_ENDIAN_ONLY)
32 #define BFS_ENDIAN_SUFFIX ""
33 #define BFS_ENDIAN_PRETTY_SUFFIX ""
34 #else
35 #define BFS_ENDIAN_SUFFIX "_big"
36 #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
37 #endif
38 
39 
40 struct identify_cookie {
41 	disk_super_block super_block;
42 };
43 
44 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
45 
46 
47 static void
48 fill_stat_time(const bfs_inode& node, struct stat& stat)
49 {
50 	bigtime_t now = real_time_clock_usecs();
51 	stat.st_atim.tv_sec = now / 1000000LL;
52 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
53 
54 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
55 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
56 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
57 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
58 
59 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
60 	bigtime_t changeTime = node.StatusChangeTime();
61 	if (changeTime < node.LastModifiedTime())
62 		stat.st_ctim = stat.st_mtim;
63 	else {
64 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
65 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
66 	}
67 }
68 
69 
70 void
71 fill_stat_buffer(Inode* inode, struct stat& stat)
72 {
73 	const bfs_inode& node = inode->Node();
74 
75 	stat.st_dev = inode->GetVolume()->ID();
76 	stat.st_ino = inode->ID();
77 	stat.st_nlink = 1;
78 	stat.st_blksize = BFS_IO_SIZE;
79 
80 	stat.st_uid = node.UserID();
81 	stat.st_gid = node.GroupID();
82 	stat.st_mode = node.Mode();
83 	stat.st_type = node.Type();
84 
85 	fill_stat_time(node, stat);
86 
87 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
88 		// symlinks report the size of the link here
89 		stat.st_size = strlen(node.short_symlink);
90 	} else
91 		stat.st_size = inode->Size();
92 
93 	stat.st_blocks = inode->AllocatedSize() / 512;
94 }
95 
96 
97 //!	bfs_io() callback hook
98 static status_t
99 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
100 	size_t size, struct file_io_vec* vecs, size_t* _count)
101 {
102 	Inode* inode = (Inode*)cookie;
103 
104 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
105 		inode->GetVolume()->BlockSize());
106 }
107 
108 
109 //!	bfs_io() callback hook
110 static status_t
111 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
112 	bool partialTransfer, size_t bytesTransferred)
113 {
114 	Inode* inode = (Inode*)cookie;
115 	rw_lock_read_unlock(&inode->Lock());
116 	return B_OK;
117 }
118 
119 
120 //	#pragma mark - Scanning
121 
122 
123 static float
124 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
125 {
126 	disk_super_block superBlock;
127 	status_t status = Volume::Identify(fd, &superBlock);
128 	if (status != B_OK)
129 		return -1;
130 
131 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
132 	if (cookie == NULL)
133 		return -1;
134 
135 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
136 
137 	*_cookie = cookie;
138 	return 0.85f;
139 }
140 
141 
142 static status_t
143 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
144 {
145 	identify_cookie* cookie = (identify_cookie*)_cookie;
146 
147 	partition->status = B_PARTITION_VALID;
148 	partition->flags |= B_PARTITION_FILE_SYSTEM;
149 	partition->content_size = cookie->super_block.NumBlocks()
150 		* cookie->super_block.BlockSize();
151 	partition->block_size = cookie->super_block.BlockSize();
152 	partition->content_name = strdup(cookie->super_block.name);
153 	if (partition->content_name == NULL)
154 		return B_NO_MEMORY;
155 
156 	return B_OK;
157 }
158 
159 
160 static void
161 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
162 {
163 	identify_cookie* cookie = (identify_cookie*)_cookie;
164 	delete cookie;
165 }
166 
167 
168 //	#pragma mark -
169 
170 
171 static status_t
172 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
173 	const char* args, ino_t* _rootID)
174 {
175 	FUNCTION();
176 
177 	Volume* volume = new(std::nothrow) Volume(_volume);
178 	if (volume == NULL)
179 		return B_NO_MEMORY;
180 
181 	status_t status = volume->Mount(device, flags);
182 	if (status != B_OK) {
183 		delete volume;
184 		RETURN_ERROR(status);
185 	}
186 
187 	_volume->private_volume = volume;
188 	_volume->ops = &gBFSVolumeOps;
189 	*_rootID = volume->ToVnode(volume->Root());
190 
191 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
192 		volume->Name(), *_rootID, device));
193 	return B_OK;
194 }
195 
196 
197 static status_t
198 bfs_unmount(fs_volume* _volume)
199 {
200 	FUNCTION();
201 	Volume* volume = (Volume*)_volume->private_volume;
202 
203 	status_t status = volume->Unmount();
204 	delete volume;
205 
206 	RETURN_ERROR(status);
207 }
208 
209 
210 static status_t
211 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
212 {
213 	FUNCTION();
214 
215 	Volume* volume = (Volume*)_volume->private_volume;
216 	MutexLocker locker(volume->Lock());
217 
218 	// File system flags.
219 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
220 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
221 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
222 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
223 
224 	info->io_size = BFS_IO_SIZE;
225 		// whatever is appropriate here?
226 
227 	info->block_size = volume->BlockSize();
228 	info->total_blocks = volume->NumBlocks();
229 	info->free_blocks = volume->FreeBlocks();
230 
231 	// Volume name
232 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
233 
234 	// File system name
235 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
236 
237 	return B_OK;
238 }
239 
240 
241 static status_t
242 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
243 {
244 	FUNCTION_START(("mask = %" B_PRId32 "\n", mask));
245 
246 	Volume* volume = (Volume*)_volume->private_volume;
247 	if (volume->IsReadOnly())
248 		return B_READ_ONLY_DEVICE;
249 
250 	MutexLocker locker(volume->Lock());
251 
252 	status_t status = B_BAD_VALUE;
253 
254 	if (mask & FS_WRITE_FSINFO_NAME) {
255 		disk_super_block& superBlock = volume->SuperBlock();
256 
257 		strncpy(superBlock.name, info->volume_name,
258 			sizeof(superBlock.name) - 1);
259 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
260 
261 		status = volume->WriteSuperBlock();
262 	}
263 	return status;
264 }
265 
266 
267 static status_t
268 bfs_sync(fs_volume* _volume)
269 {
270 	FUNCTION();
271 
272 	Volume* volume = (Volume*)_volume->private_volume;
273 	return volume->Sync();
274 }
275 
276 
277 //	#pragma mark -
278 
279 
280 /*!	Reads in the node from disk and creates an inode object from it.
281 */
282 static status_t
283 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
284 	uint32* _flags, bool reenter)
285 {
286 	//FUNCTION_START(("ino_t = %lld\n", id));
287 	Volume* volume = (Volume*)_volume->private_volume;
288 
289 	// first inode may be after the log area, we don't go through
290 	// the hassle and try to load an earlier block from disk
291 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
292 		|| id > volume->NumBlocks()) {
293 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
294 		return B_ERROR;
295 	}
296 
297 	CachedBlock cached(volume);
298 	status_t status = cached.SetTo(id);
299 	if (status != B_OK) {
300 		FATAL(("could not read inode: %" B_PRIdINO ": %s\n", id,
301 			strerror(status)));
302 		return status;
303 	}
304 	bfs_inode* node = (bfs_inode*)cached.Block();
305 
306 	status = node->InitCheck(volume);
307 	if (status != B_OK) {
308 		if ((node->Flags() & INODE_DELETED) != 0) {
309 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
310 		} else {
311 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
312 				strerror(status)));
313 		}
314 		return status;
315 	}
316 
317 	Inode* inode = new(std::nothrow) Inode(volume, id);
318 	if (inode == NULL)
319 		return B_NO_MEMORY;
320 
321 	status = inode->InitCheck(false);
322 	if (status != B_OK)
323 		delete inode;
324 
325 	if (status == B_OK) {
326 		_node->private_node = inode;
327 		_node->ops = &gBFSVnodeOps;
328 		*_type = inode->Mode();
329 		*_flags = 0;
330 	}
331 
332 	return status;
333 }
334 
335 
336 static status_t
337 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
338 {
339 	Volume* volume = (Volume*)_volume->private_volume;
340 	Inode* inode = (Inode*)_node->private_node;
341 
342 	// since a directory's size can be changed without having it opened,
343 	// we need to take care about their preallocated blocks here
344 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
345 		&& inode->NeedsTrimming()) {
346 		Transaction transaction(volume, inode->BlockNumber());
347 
348 		if (inode->TrimPreallocation(transaction) == B_OK)
349 			transaction.Done();
350 		else if (transaction.HasParent()) {
351 			// TODO: for now, we don't let sub-transactions fail
352 			transaction.Done();
353 		}
354 	}
355 
356 	delete inode;
357 	return B_OK;
358 }
359 
360 
361 static status_t
362 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
363 {
364 	FUNCTION();
365 
366 	Volume* volume = (Volume*)_volume->private_volume;
367 	Inode* inode = (Inode*)_node->private_node;
368 
369 	// If the inode isn't in use anymore, we were called before
370 	// bfs_unlink() returns - in this case, we can just use the
371 	// transaction which has already deleted the inode.
372 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
373 
374 	// The file system check functionality uses this flag to prevent the space
375 	// used up by the inode from being freed - this flag is set only in
376 	// situations where this does not cause any harm as the block bitmap will
377 	// get fixed anyway in this case).
378 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
379 		delete inode;
380 		return B_OK;
381 	}
382 
383 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
384 
385 	status_t status = inode->Free(transaction);
386 	if (status == B_OK) {
387 		status = transaction.Done();
388 	} else if (transaction.HasParent()) {
389 		// TODO: for now, we don't let sub-transactions fail
390 		status = transaction.Done();
391 	}
392 
393 	volume->RemovedInodes().Remove(inode);
394 
395 	// TODO: the VFS currently does not allow this to fail
396 	delete inode;
397 
398 	return status;
399 }
400 
401 
402 static bool
403 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
404 {
405 	// TODO: we're obviously not even asked...
406 	return false;
407 }
408 
409 
410 static status_t
411 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
412 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
413 {
414 	Volume* volume = (Volume*)_volume->private_volume;
415 	Inode* inode = (Inode*)_node->private_node;
416 
417 	if (inode->FileCache() == NULL)
418 		RETURN_ERROR(B_BAD_VALUE);
419 
420 	InodeReadLocker _(inode);
421 
422 	uint32 vecIndex = 0;
423 	size_t vecOffset = 0;
424 	size_t bytesLeft = *_numBytes;
425 	status_t status;
426 
427 	while (true) {
428 		file_io_vec fileVecs[8];
429 		size_t fileVecCount = 8;
430 
431 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
432 			&fileVecCount, 0);
433 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
434 			break;
435 
436 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
437 
438 		size_t bytes = bytesLeft;
439 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
440 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
441 		if (status != B_OK || !bufferOverflow)
442 			break;
443 
444 		pos += bytes;
445 		bytesLeft -= bytes;
446 	}
447 
448 	return status;
449 }
450 
451 
452 static status_t
453 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
454 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
455 {
456 	Volume* volume = (Volume*)_volume->private_volume;
457 	Inode* inode = (Inode*)_node->private_node;
458 
459 	if (volume->IsReadOnly())
460 		return B_READ_ONLY_DEVICE;
461 
462 	if (inode->FileCache() == NULL)
463 		RETURN_ERROR(B_BAD_VALUE);
464 
465 	InodeReadLocker _(inode);
466 
467 	uint32 vecIndex = 0;
468 	size_t vecOffset = 0;
469 	size_t bytesLeft = *_numBytes;
470 	status_t status;
471 
472 	while (true) {
473 		file_io_vec fileVecs[8];
474 		size_t fileVecCount = 8;
475 
476 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
477 			&fileVecCount, 0);
478 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
479 			break;
480 
481 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
482 
483 		size_t bytes = bytesLeft;
484 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
485 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
486 		if (status != B_OK || !bufferOverflow)
487 			break;
488 
489 		pos += bytes;
490 		bytesLeft -= bytes;
491 	}
492 
493 	return status;
494 }
495 
496 
497 static status_t
498 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
499 {
500 #if KDEBUG_RW_LOCK_DEBUG
501 	// bfs_io depends on read-locks being implicitly transferrable across threads.
502 	return B_UNSUPPORTED;
503 #endif
504 
505 	Volume* volume = (Volume*)_volume->private_volume;
506 	Inode* inode = (Inode*)_node->private_node;
507 
508 #ifndef FS_SHELL
509 	if (io_request_is_write(request) && volume->IsReadOnly()) {
510 		notify_io_request(request, B_READ_ONLY_DEVICE);
511 		return B_READ_ONLY_DEVICE;
512 	}
513 #endif
514 
515 	if (inode->FileCache() == NULL) {
516 #ifndef FS_SHELL
517 		notify_io_request(request, B_BAD_VALUE);
518 #endif
519 		RETURN_ERROR(B_BAD_VALUE);
520 	}
521 
522 	// We lock the node here and will unlock it in the "finished" hook.
523 	rw_lock_read_lock(&inode->Lock());
524 
525 	return do_iterative_fd_io(volume->Device(), request,
526 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
527 }
528 
529 
530 static status_t
531 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
532 	struct file_io_vec* vecs, size_t* _count)
533 {
534 	Volume* volume = (Volume*)_volume->private_volume;
535 	Inode* inode = (Inode*)_node->private_node;
536 
537 	int32 blockShift = volume->BlockShift();
538 	uint32 index = 0, max = *_count;
539 	block_run run;
540 	off_t fileOffset;
541 
542 	//FUNCTION_START(("offset = %lld, size = %lu\n", offset, size));
543 
544 	while (true) {
545 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
546 		if (status != B_OK)
547 			return status;
548 
549 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
550 		vecs[index].length = ((uint32)run.Length() << blockShift)
551 			- offset + fileOffset;
552 
553 		// are we already done?
554 		if ((uint64)size <= (uint64)vecs[index].length
555 			|| (uint64)offset + (uint64)vecs[index].length
556 				>= (uint64)inode->Size()) {
557 			if ((uint64)offset + (uint64)vecs[index].length
558 					> (uint64)inode->Size()) {
559 				// make sure the extent ends with the last official file
560 				// block (without taking any preallocations into account)
561 				vecs[index].length = round_up(inode->Size() - offset,
562 					volume->BlockSize());
563 			}
564 			*_count = index + 1;
565 			return B_OK;
566 		}
567 
568 		offset += vecs[index].length;
569 		size -= vecs[index].length;
570 		index++;
571 
572 		if (index >= max) {
573 			// we're out of file_io_vecs; let's bail out
574 			*_count = index;
575 			return B_BUFFER_OVERFLOW;
576 		}
577 	}
578 
579 	// can never get here
580 	return B_ERROR;
581 }
582 
583 
584 //	#pragma mark -
585 
586 
587 static status_t
588 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
589 	ino_t* _vnodeID)
590 {
591 	Volume* volume = (Volume*)_volume->private_volume;
592 	Inode* directory = (Inode*)_directory->private_node;
593 
594 	InodeReadLocker locker(directory);
595 
596 	// check access permissions
597 	status_t status = directory->CheckPermissions(X_OK);
598 	if (status != B_OK)
599 		RETURN_ERROR(status);
600 
601 	BPlusTree* tree = directory->Tree();
602 	if (tree == NULL)
603 		RETURN_ERROR(B_BAD_VALUE);
604 
605 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
606 	if (status != B_OK) {
607 		//PRINT(("bfs_walk() could not find %lld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
608 		if (status == B_ENTRY_NOT_FOUND)
609 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
610 
611 		return status;
612 	}
613 
614 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
615 
616 	locker.Unlock();
617 
618 	Inode* inode;
619 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
620 	if (status != B_OK) {
621 		REPORT_ERROR(status);
622 		return B_ENTRY_NOT_FOUND;
623 	}
624 
625 	return B_OK;
626 }
627 
628 
629 static status_t
630 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
631 	size_t bufferSize)
632 {
633 	Inode* inode = (Inode*)_node->private_node;
634 
635 	return inode->GetName(buffer, bufferSize);
636 }
637 
638 
639 static status_t
640 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
641 	void* buffer, size_t bufferLength)
642 {
643 	FUNCTION_START(("node = %p, cmd = %" B_PRIu32 ", buf = %p"
644 		", len = %" B_PRIuSIZE "\n", _node, cmd, buffer, bufferLength));
645 
646 	Volume* volume = (Volume*)_volume->private_volume;
647 
648 	switch (cmd) {
649 #ifndef FS_SHELL
650 		case B_TRIM_DEVICE:
651 		{
652 			fs_trim_data* trimData;
653 			MemoryDeleter deleter;
654 			status_t status = get_trim_data_from_user(buffer, bufferLength,
655 				deleter, trimData);
656 			if (status != B_OK)
657 				return status;
658 
659 			trimData->trimmed_size = 0;
660 
661 			for (uint32 i = 0; i < trimData->range_count; i++) {
662 				uint64 trimmedSize = 0;
663 				status_t status = volume->Allocator().Trim(
664 					trimData->ranges[i].offset, trimData->ranges[i].size,
665 					trimmedSize);
666 				if (status != B_OK)
667 					return status;
668 
669 				trimData->trimmed_size += trimmedSize;
670 			}
671 
672 			return copy_trim_data_to_user(buffer, trimData);
673 		}
674 #endif
675 
676 		case BFS_IOCTL_VERSION:
677 		{
678 			uint32 version = 0x10000;
679 			return user_memcpy(buffer, &version, sizeof(uint32));
680 		}
681 		case BFS_IOCTL_START_CHECKING:
682 		{
683 			// start checking
684 			status_t status = volume->CreateCheckVisitor();
685 			if (status != B_OK)
686 				return status;
687 
688 			CheckVisitor* checker = volume->CheckVisitor();
689 
690 			if (user_memcpy(&checker->Control(), buffer,
691 					sizeof(check_control)) != B_OK) {
692 				return B_BAD_ADDRESS;
693 			}
694 
695 			status = checker->StartBitmapPass();
696 			if (status == B_OK) {
697 				file_cookie* cookie = (file_cookie*)_cookie;
698 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
699 			}
700 
701 			return status;
702 		}
703 		case BFS_IOCTL_STOP_CHECKING:
704 		{
705 			// stop checking
706 			CheckVisitor* checker = volume->CheckVisitor();
707 			if (checker == NULL)
708 				return B_NO_INIT;
709 
710 			status_t status = checker->StopChecking();
711 
712 			if (status == B_OK) {
713 				file_cookie* cookie = (file_cookie*)_cookie;
714 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
715 
716 				status = user_memcpy(buffer, &checker->Control(),
717 					sizeof(check_control));
718 			}
719 
720 			volume->DeleteCheckVisitor();
721 			volume->SetCheckingThread(-1);
722 
723 			return status;
724 		}
725 		case BFS_IOCTL_CHECK_NEXT_NODE:
726 		{
727 			// check next
728 			CheckVisitor* checker = volume->CheckVisitor();
729 			if (checker == NULL)
730 				return B_NO_INIT;
731 
732 			volume->SetCheckingThread(find_thread(NULL));
733 
734 			checker->Control().errors = 0;
735 
736 			status_t status = checker->Next();
737 			if (status == B_ENTRY_NOT_FOUND) {
738 				checker->Control().status = B_ENTRY_NOT_FOUND;
739 					// tells StopChecking() that we finished the pass
740 
741 				if (checker->Pass() == BFS_CHECK_PASS_BITMAP) {
742 					if (checker->WriteBackCheckBitmap() == B_OK)
743 						status = checker->StartIndexPass();
744 				}
745 			}
746 
747 			if (status == B_OK) {
748 				status = user_memcpy(buffer, &checker->Control(),
749 					sizeof(check_control));
750 			}
751 
752 			return status;
753 		}
754 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
755 		{
756 			// let's makebootable (or anyone else) update the boot block
757 			// while BFS is mounted
758 			update_boot_block update;
759 			if (bufferLength != sizeof(update_boot_block))
760 				return B_BAD_VALUE;
761 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
762 				return B_BAD_ADDRESS;
763 
764 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
765 			if (update.offset < minOffset
766 				|| update.offset >= 512 || update.length > 512 - minOffset
767 				|| update.length + update.offset > 512) {
768 				return B_BAD_VALUE;
769 			}
770 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
771 					update.data, update.length) != B_OK) {
772 				return B_BAD_ADDRESS;
773 			}
774 
775 			return volume->WriteSuperBlock();
776 		}
777 		case BFS_IOCTL_RESIZE:
778 		{
779 			if (bufferLength != sizeof(uint64))
780 				return B_BAD_VALUE;
781 
782 			uint64 size;
783 			if (user_memcpy((uint8*)&size, buffer, sizeof(uint64)) != B_OK)
784 				return B_BAD_ADDRESS;
785 
786 			ResizeVisitor resizer(volume);
787 			return resizer.Resize(size, -1);
788 		}
789 
790 #ifdef DEBUG_FRAGMENTER
791 		case 56741:
792 		{
793 			BlockAllocator& allocator = volume->Allocator();
794 			allocator.Fragment();
795 			return B_OK;
796 		}
797 #endif
798 
799 #ifdef DEBUG
800 		case 56742:
801 		{
802 			// allocate all free blocks and zero them out
803 			// (a test for the BlockAllocator)!
804 			BlockAllocator& allocator = volume->Allocator();
805 			Transaction transaction(volume, 0);
806 			CachedBlock cached(volume);
807 			block_run run;
808 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
809 					== B_OK) {
810 				PRINT(("write block_run(%" B_PRId32 ", %" B_PRIu16
811 					", %" B_PRIu16 ")\n", run.allocation_group, run.start,
812 					run.length));
813 
814 				for (int32 i = 0;i < run.length;i++) {
815 					status_t status = cached.SetToWritable(transaction, run);
816 					if (status == B_OK)
817 						memset(cached.WritableBlock(), 0, volume->BlockSize());
818 				}
819 			}
820 			return B_OK;
821 		}
822 #endif
823 	}
824 	return B_DEV_INVALID_IOCTL;
825 }
826 
827 
828 /*!	Sets the open-mode flags for the open file cookie - only
829 	supports O_APPEND currently, but that should be sufficient
830 	for a file system.
831 */
832 static status_t
833 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
834 {
835 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
836 
837 	file_cookie* cookie = (file_cookie*)_cookie;
838 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
839 
840 	return B_OK;
841 }
842 
843 
844 static status_t
845 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
846 {
847 	FUNCTION();
848 
849 	Inode* inode = (Inode*)_node->private_node;
850 	return inode->Sync();
851 }
852 
853 
854 static status_t
855 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
856 {
857 	FUNCTION();
858 
859 	Inode* inode = (Inode*)_node->private_node;
860 	fill_stat_buffer(inode, *stat);
861 	return B_OK;
862 }
863 
864 
865 static status_t
866 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
867 	uint32 mask)
868 {
869 	FUNCTION();
870 
871 	Volume* volume = (Volume*)_volume->private_volume;
872 	Inode* inode = (Inode*)_node->private_node;
873 
874 	if (volume->IsReadOnly())
875 		return B_READ_ONLY_DEVICE;
876 
877 	// TODO: we should definitely check a bit more if the new stats are
878 	//	valid - or even better, the VFS should check this before calling us
879 
880 	bfs_inode& node = inode->Node();
881 	bool updateTime = false;
882 	uid_t uid = geteuid();
883 
884 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
885 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
886 
887 	Transaction transaction(volume, inode->BlockNumber());
888 	inode->WriteLockInTransaction(transaction);
889 
890 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
891 		// Since B_STAT_SIZE is the only thing that can fail directly, we
892 		// do it first, so that the inode state will still be consistent
893 		// with the on-disk version
894 		if (inode->IsDirectory())
895 			return B_IS_A_DIRECTORY;
896 		if (!inode->IsFile())
897 			return B_BAD_VALUE;
898 		if (!hasWriteAccess)
899 			RETURN_ERROR(B_NOT_ALLOWED);
900 
901 		off_t oldSize = inode->Size();
902 
903 		status_t status = inode->SetFileSize(transaction, stat->st_size);
904 		if (status != B_OK)
905 			return status;
906 
907 		// fill the new blocks (if any) with zeros
908 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
909 			// We must not keep the inode locked during a write operation,
910 			// or else we might deadlock.
911 			rw_lock_write_unlock(&inode->Lock());
912 			inode->FillGapWithZeros(oldSize, inode->Size());
913 			rw_lock_write_lock(&inode->Lock());
914 		}
915 
916 		if (!inode->IsDeleted()) {
917 			Index index(volume);
918 			index.UpdateSize(transaction, inode);
919 
920 			updateTime = true;
921 		}
922 	}
923 
924 	if ((mask & B_STAT_UID) != 0) {
925 		// only root should be allowed
926 		if (uid != 0)
927 			RETURN_ERROR(B_NOT_ALLOWED);
928 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
929 		updateTime = true;
930 	}
931 
932 	if ((mask & B_STAT_GID) != 0) {
933 		// only the user or root can do that
934 		if (!isOwnerOrRoot)
935 			RETURN_ERROR(B_NOT_ALLOWED);
936 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
937 		updateTime = true;
938 	}
939 
940 	if ((mask & B_STAT_MODE) != 0) {
941 		// only the user or root can do that
942 		if (!isOwnerOrRoot)
943 			RETURN_ERROR(B_NOT_ALLOWED);
944 		PRINT(("original mode = %u, stat->st_mode = %u\n",
945 			(unsigned int)node.Mode(), (unsigned int)stat->st_mode));
946 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
947 			| (stat->st_mode & S_IUMSK));
948 		updateTime = true;
949 	}
950 
951 	if ((mask & B_STAT_CREATION_TIME) != 0) {
952 		// the user or root can do that or any user with write access
953 		if (!isOwnerOrRoot && !hasWriteAccess)
954 			RETURN_ERROR(B_NOT_ALLOWED);
955 		node.create_time
956 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
957 	}
958 
959 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
960 		// the user or root can do that or any user with write access
961 		if (!isOwnerOrRoot && !hasWriteAccess)
962 			RETURN_ERROR(B_NOT_ALLOWED);
963 		if (!inode->InLastModifiedIndex()) {
964 			// directory modification times are not part of the index
965 			node.last_modified_time
966 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
967 		} else if (!inode->IsDeleted()) {
968 			// Index::UpdateLastModified() will set the new time in the inode
969 			Index index(volume);
970 			index.UpdateLastModified(transaction, inode,
971 				bfs_inode::ToInode(stat->st_mtim));
972 		}
973 	}
974 
975 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
976 		// the user or root can do that or any user with write access
977 		if (!isOwnerOrRoot && !hasWriteAccess)
978 			RETURN_ERROR(B_NOT_ALLOWED);
979 		bigtime_t newTime;
980 		if ((mask & B_STAT_CHANGE_TIME) == 0)
981 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
982 		else
983 			newTime = bfs_inode::ToInode(stat->st_ctim);
984 
985 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
986 	}
987 
988 	status_t status = inode->WriteBack(transaction);
989 	if (status == B_OK)
990 		status = transaction.Done();
991 	if (status == B_OK)
992 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
993 
994 	return status;
995 }
996 
997 
998 status_t
999 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
1000 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
1001 {
1002 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
1003 		openMode));
1004 
1005 	Volume* volume = (Volume*)_volume->private_volume;
1006 	Inode* directory = (Inode*)_directory->private_node;
1007 
1008 	if (volume->IsReadOnly())
1009 		return B_READ_ONLY_DEVICE;
1010 
1011 	if (!directory->IsDirectory())
1012 		RETURN_ERROR(B_BAD_TYPE);
1013 
1014 	// We are creating the cookie at this point, so that we don't have
1015 	// to remove the inode if we don't have enough free memory later...
1016 	file_cookie* cookie = new(std::nothrow) file_cookie;
1017 	if (cookie == NULL)
1018 		RETURN_ERROR(B_NO_MEMORY);
1019 
1020 	// initialize the cookie
1021 	cookie->open_mode = openMode;
1022 	cookie->last_size = 0;
1023 	cookie->last_notification = system_time();
1024 
1025 	Transaction transaction(volume, directory->BlockNumber());
1026 
1027 	Inode* inode;
1028 	bool created;
1029 	status_t status = Inode::Create(transaction, directory, name,
1030 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
1031 
1032 	// Disable the file cache, if requested?
1033 	if (status == B_OK && (openMode & O_NOCACHE) != 0
1034 		&& inode->FileCache() != NULL) {
1035 		status = file_cache_disable(inode->FileCache());
1036 	}
1037 
1038 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
1039 
1040 	if (status == B_OK)
1041 		status = transaction.Done();
1042 
1043 	if (status == B_OK) {
1044 		// register the cookie
1045 		*_cookie = cookie;
1046 
1047 		if (created) {
1048 			notify_entry_created(volume->ID(), directory->ID(), name,
1049 				*_vnodeID);
1050 		}
1051 	} else {
1052 		entry_cache_remove(volume->ID(), directory->ID(), name);
1053 		delete cookie;
1054 	}
1055 
1056 	return status;
1057 }
1058 
1059 
1060 static status_t
1061 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1062 	const char* path, int mode)
1063 {
1064 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1065 
1066 	Volume* volume = (Volume*)_volume->private_volume;
1067 	Inode* directory = (Inode*)_directory->private_node;
1068 
1069 	if (volume->IsReadOnly())
1070 		return B_READ_ONLY_DEVICE;
1071 
1072 	if (!directory->IsDirectory())
1073 		RETURN_ERROR(B_BAD_TYPE);
1074 
1075 	status_t status = directory->CheckPermissions(W_OK);
1076 	if (status < B_OK)
1077 		RETURN_ERROR(status);
1078 
1079 	Transaction transaction(volume, directory->BlockNumber());
1080 
1081 	Inode* link;
1082 	off_t id;
1083 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1084 		0, 0, NULL, &id, &link);
1085 	if (status < B_OK)
1086 		RETURN_ERROR(status);
1087 
1088 	size_t length = strlen(path);
1089 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1090 		strcpy(link->Node().short_symlink, path);
1091 	} else {
1092 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1093 			| INODE_LOGGED);
1094 
1095 		// links usually don't have a file cache attached - but we now need one
1096 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1097 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1098 
1099 		// The following call will have to write the inode back, so
1100 		// we don't have to do that here...
1101 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1102 	}
1103 
1104 	if (status == B_OK)
1105 		status = link->WriteBack(transaction);
1106 
1107 	// Inode::Create() left the inode locked in memory, and also doesn't
1108 	// publish links
1109 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1110 	put_vnode(volume->FSVolume(), id);
1111 
1112 	if (status == B_OK) {
1113 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1114 
1115 		status = transaction.Done();
1116 		if (status == B_OK)
1117 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1118 		else
1119 			entry_cache_remove(volume->ID(), directory->ID(), name);
1120 	}
1121 
1122 	return status;
1123 }
1124 
1125 
1126 status_t
1127 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1128 {
1129 	FUNCTION_START(("name = \"%s\"\n", name));
1130 
1131 	// This one won't be implemented in a binary compatible BFS
1132 	return B_UNSUPPORTED;
1133 }
1134 
1135 
1136 status_t
1137 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1138 {
1139 	FUNCTION_START(("name = \"%s\"\n", name));
1140 
1141 	if (!strcmp(name, "..") || !strcmp(name, "."))
1142 		return B_NOT_ALLOWED;
1143 
1144 	Volume* volume = (Volume*)_volume->private_volume;
1145 	Inode* directory = (Inode*)_directory->private_node;
1146 
1147 	status_t status = directory->CheckPermissions(W_OK);
1148 	if (status < B_OK)
1149 		return status;
1150 
1151 	Transaction transaction(volume, directory->BlockNumber());
1152 
1153 	off_t id;
1154 	status = directory->Remove(transaction, name, &id);
1155 	if (status == B_OK) {
1156 		entry_cache_remove(volume->ID(), directory->ID(), name);
1157 
1158 		status = transaction.Done();
1159 		if (status == B_OK)
1160 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1161 		else
1162 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1163 	}
1164 	return status;
1165 }
1166 
1167 
1168 status_t
1169 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1170 	fs_vnode* _newDir, const char* newName)
1171 {
1172 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1173 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1174 
1175 	Volume* volume = (Volume*)_volume->private_volume;
1176 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1177 	Inode* newDirectory = (Inode*)_newDir->private_node;
1178 
1179 	// are we already done?
1180 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1181 		return B_OK;
1182 
1183 	Transaction transaction(volume, oldDirectory->BlockNumber());
1184 
1185 	oldDirectory->WriteLockInTransaction(transaction);
1186 	if (oldDirectory != newDirectory)
1187 		newDirectory->WriteLockInTransaction(transaction);
1188 
1189 	// are we allowed to do what we've been told?
1190 	status_t status = oldDirectory->CheckPermissions(W_OK);
1191 	if (status == B_OK)
1192 		status = newDirectory->CheckPermissions(W_OK);
1193 	if (status != B_OK)
1194 		return status;
1195 
1196 	// Get the directory's tree, and a pointer to the inode which should be
1197 	// changed
1198 	BPlusTree* tree = oldDirectory->Tree();
1199 	if (tree == NULL)
1200 		RETURN_ERROR(B_BAD_VALUE);
1201 
1202 	off_t id;
1203 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1204 	if (status != B_OK)
1205 		RETURN_ERROR(status);
1206 
1207 	Vnode vnode(volume, id);
1208 	Inode* inode;
1209 	if (vnode.Get(&inode) != B_OK)
1210 		return B_IO_ERROR;
1211 
1212 	// Don't move a directory into one of its children - we soar up
1213 	// from the newDirectory to either the root node or the old
1214 	// directory, whichever comes first.
1215 	// If we meet our inode on that way, we have to bail out.
1216 
1217 	if (oldDirectory != newDirectory) {
1218 		ino_t parent = newDirectory->ID();
1219 		ino_t root = volume->RootNode()->ID();
1220 
1221 		while (true) {
1222 			if (parent == id)
1223 				return B_BAD_VALUE;
1224 			else if (parent == root || parent == oldDirectory->ID())
1225 				break;
1226 
1227 			Vnode vnode(volume, parent);
1228 			Inode* parentNode;
1229 			if (vnode.Get(&parentNode) != B_OK)
1230 				return B_ERROR;
1231 
1232 			parent = volume->ToVnode(parentNode->Parent());
1233 		}
1234 	}
1235 
1236 	// Everything okay? Then lets get to work...
1237 
1238 	// First, try to make sure there is nothing that will stop us in
1239 	// the target directory - since this is the only non-critical
1240 	// failure, we will test this case first
1241 	BPlusTree* newTree = tree;
1242 	if (newDirectory != oldDirectory) {
1243 		newTree = newDirectory->Tree();
1244 		if (newTree == NULL)
1245 			RETURN_ERROR(B_BAD_VALUE);
1246 	}
1247 
1248 	status = newTree->Insert(transaction, (const uint8*)newName,
1249 		strlen(newName), id);
1250 	if (status == B_NAME_IN_USE) {
1251 		// If there is already a file with that name, we have to remove
1252 		// it, as long it's not a directory with files in it
1253 		off_t clobber;
1254 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1255 				< B_OK)
1256 			return B_NAME_IN_USE;
1257 		if (clobber == id)
1258 			return B_BAD_VALUE;
1259 
1260 		Vnode vnode(volume, clobber);
1261 		Inode* other;
1262 		if (vnode.Get(&other) < B_OK)
1263 			return B_NAME_IN_USE;
1264 
1265 		// only allowed, if either both nodes are directories or neither is
1266 		if (inode->IsDirectory() != other->IsDirectory())
1267 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1268 
1269 		status = newDirectory->Remove(transaction, newName, NULL,
1270 			other->IsDirectory());
1271 		if (status < B_OK)
1272 			return status;
1273 
1274 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1275 
1276 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1277 			clobber);
1278 
1279 		status = newTree->Insert(transaction, (const uint8*)newName,
1280 			strlen(newName), id);
1281 	}
1282 	if (status != B_OK)
1283 		return status;
1284 
1285 	inode->WriteLockInTransaction(transaction);
1286 
1287 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1288 		newDirectory->ID(), newName);
1289 
1290 	// update the name only when they differ
1291 	if (strcmp(oldName, newName)) {
1292 		status = inode->SetName(transaction, newName);
1293 		if (status == B_OK) {
1294 			Index index(volume);
1295 			index.UpdateName(transaction, oldName, newName, inode);
1296 		}
1297 	}
1298 
1299 	if (status == B_OK) {
1300 		status = tree->Remove(transaction, (const uint8*)oldName,
1301 			strlen(oldName), id);
1302 		if (status == B_OK) {
1303 			inode->Parent() = newDirectory->BlockRun();
1304 
1305 			// if it's a directory, update the parent directory pointer
1306 			// in its tree if necessary
1307 			BPlusTree* movedTree = inode->Tree();
1308 			if (oldDirectory != newDirectory
1309 				&& inode->IsDirectory()
1310 				&& movedTree != NULL) {
1311 				status = movedTree->Replace(transaction, (const uint8*)"..",
1312 					2, newDirectory->ID());
1313 
1314 				if (status == B_OK) {
1315 					// update/add the cache entry for the parent
1316 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1317 				}
1318 			}
1319 
1320 			if (status == B_OK && newDirectory != oldDirectory)
1321 				status = oldDirectory->ContainerContentsChanged(transaction);
1322 			if (status == B_OK)
1323 				status = newDirectory->ContainerContentsChanged(transaction);
1324 
1325 			if (status == B_OK)
1326 				status = inode->WriteBack(transaction);
1327 
1328 			if (status == B_OK) {
1329 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1330 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1331 
1332 				status = transaction.Done();
1333 				if (status == B_OK) {
1334 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1335 						oldName, newDirectory->ID(), newName, id);
1336 					return B_OK;
1337 				}
1338 
1339 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1340 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1341 			}
1342 		}
1343 	}
1344 
1345 	return status;
1346 }
1347 
1348 
1349 static status_t
1350 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1351 {
1352 	FUNCTION();
1353 
1354 	Volume* volume = (Volume*)_volume->private_volume;
1355 	Inode* inode = (Inode*)_node->private_node;
1356 
1357 	// Opening a directory read-only is allowed, although you can't read
1358 	// any data from it.
1359 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1360 		return B_IS_A_DIRECTORY;
1361 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1362 		return B_NOT_A_DIRECTORY;
1363 
1364 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1365 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1366 	if (status != B_OK)
1367 		RETURN_ERROR(status);
1368 
1369 	file_cookie* cookie = new(std::nothrow) file_cookie;
1370 	if (cookie == NULL)
1371 		RETURN_ERROR(B_NO_MEMORY);
1372 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1373 
1374 	// initialize the cookie
1375 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1376 	cookie->last_size = inode->Size();
1377 	cookie->last_notification = system_time();
1378 
1379 	// Disable the file cache, if requested?
1380 	CObjectDeleter<void, void, file_cache_enable> fileCacheEnabler;
1381 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1382 		status = file_cache_disable(inode->FileCache());
1383 		if (status != B_OK)
1384 			return status;
1385 		fileCacheEnabler.SetTo(inode->FileCache());
1386 	}
1387 
1388 	// Should we truncate the file?
1389 	if ((openMode & O_TRUNC) != 0) {
1390 		if ((openMode & O_RWMASK) == O_RDONLY)
1391 			return B_NOT_ALLOWED;
1392 
1393 		Transaction transaction(volume, inode->BlockNumber());
1394 		inode->WriteLockInTransaction(transaction);
1395 
1396 		status_t status = inode->SetFileSize(transaction, 0);
1397 		if (status == B_OK)
1398 			status = inode->WriteBack(transaction);
1399 		if (status == B_OK)
1400 			status = transaction.Done();
1401 		if (status != B_OK)
1402 			return status;
1403 	}
1404 
1405 	fileCacheEnabler.Detach();
1406 	cookieDeleter.Detach();
1407 	*_cookie = cookie;
1408 	return B_OK;
1409 }
1410 
1411 
1412 static status_t
1413 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1414 	void* buffer, size_t* _length)
1415 {
1416 	//FUNCTION();
1417 	Inode* inode = (Inode*)_node->private_node;
1418 
1419 	if (!inode->HasUserAccessableStream()) {
1420 		*_length = 0;
1421 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1422 	}
1423 
1424 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1425 }
1426 
1427 
1428 static status_t
1429 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1430 	const void* buffer, size_t* _length)
1431 {
1432 	//FUNCTION();
1433 	Volume* volume = (Volume*)_volume->private_volume;
1434 	Inode* inode = (Inode*)_node->private_node;
1435 
1436 	if (volume->IsReadOnly())
1437 		return B_READ_ONLY_DEVICE;
1438 
1439 	if (!inode->HasUserAccessableStream()) {
1440 		*_length = 0;
1441 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1442 	}
1443 
1444 	file_cookie* cookie = (file_cookie*)_cookie;
1445 
1446 	if (cookie->open_mode & O_APPEND)
1447 		pos = inode->Size();
1448 
1449 	Transaction transaction;
1450 		// We are not starting the transaction here, since
1451 		// it might not be needed at all (the contents of
1452 		// regular files aren't logged)
1453 
1454 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1455 		_length);
1456 	if (status == B_OK)
1457 		status = transaction.Done();
1458 	if (status == B_OK) {
1459 		InodeReadLocker locker(inode);
1460 
1461 		// periodically notify if the file size has changed
1462 		// TODO: should we better test for a change in the last_modified time only?
1463 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1464 			&& system_time() > cookie->last_notification
1465 					+ INODE_NOTIFICATION_INTERVAL) {
1466 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1467 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1468 			cookie->last_size = inode->Size();
1469 			cookie->last_notification = system_time();
1470 		}
1471 	}
1472 
1473 	return status;
1474 }
1475 
1476 
1477 static status_t
1478 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1479 {
1480 	FUNCTION();
1481 	return B_OK;
1482 }
1483 
1484 
1485 static status_t
1486 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1487 {
1488 	FUNCTION();
1489 
1490 	file_cookie* cookie = (file_cookie*)_cookie;
1491 	Volume* volume = (Volume*)_volume->private_volume;
1492 	Inode* inode = (Inode*)_node->private_node;
1493 
1494 	Transaction transaction;
1495 	bool needsTrimming = false;
1496 
1497 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1498 		InodeReadLocker locker(inode);
1499 		needsTrimming = inode->NeedsTrimming();
1500 
1501 		if ((cookie->open_mode & O_RWMASK) != 0
1502 			&& !inode->IsDeleted()
1503 			&& (needsTrimming
1504 				|| inode->OldLastModified() != inode->LastModified()
1505 				|| (inode->InSizeIndex()
1506 					// TODO: this can prevent the size update notification
1507 					// for nodes not in the index!
1508 					&& inode->OldSize() != inode->Size()))) {
1509 			locker.Unlock();
1510 			transaction.Start(volume, inode->BlockNumber());
1511 		}
1512 	}
1513 
1514 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1515 
1516 	if (status == B_OK) {
1517 		inode->WriteLockInTransaction(transaction);
1518 
1519 		// trim the preallocated blocks and update the size,
1520 		// and last_modified indices if needed
1521 		bool changedSize = false, changedTime = false;
1522 		Index index(volume);
1523 
1524 		if (needsTrimming) {
1525 			status = inode->TrimPreallocation(transaction);
1526 			if (status < B_OK) {
1527 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1528 					", transaction %d: %s!\n", inode->ID(),
1529 					(int)transaction.ID(), strerror(status)));
1530 
1531 				// we still want this transaction to succeed
1532 				status = B_OK;
1533 			}
1534 		}
1535 		if (inode->OldSize() != inode->Size()) {
1536 			if (inode->InSizeIndex())
1537 				index.UpdateSize(transaction, inode);
1538 			changedSize = true;
1539 		}
1540 		if (inode->OldLastModified() != inode->LastModified()) {
1541 			if (inode->InLastModifiedIndex()) {
1542 				index.UpdateLastModified(transaction, inode,
1543 					inode->LastModified());
1544 			}
1545 			changedTime = true;
1546 
1547 			// updating the index doesn't write back the inode
1548 			inode->WriteBack(transaction);
1549 		}
1550 
1551 		if (changedSize || changedTime) {
1552 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1553 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1554 				| (changedSize ? B_STAT_SIZE : 0));
1555 		}
1556 	}
1557 	if (status == B_OK)
1558 		transaction.Done();
1559 
1560 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1561 		// "chkbfs" exited abnormally, so we have to stop it here...
1562 		FATAL(("check process was aborted!\n"));
1563 		volume->CheckVisitor()->StopChecking();
1564 		volume->DeleteCheckVisitor();
1565 	}
1566 
1567 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1568 		file_cache_enable(inode->FileCache());
1569 
1570 	delete cookie;
1571 	return B_OK;
1572 }
1573 
1574 
1575 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1576 	is not allowed.
1577 */
1578 static status_t
1579 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1580 {
1581 	//FUNCTION();
1582 
1583 	Inode* inode = (Inode*)_node->private_node;
1584 	status_t status = inode->CheckPermissions(accessMode);
1585 	if (status < B_OK)
1586 		RETURN_ERROR(status);
1587 
1588 	return B_OK;
1589 }
1590 
1591 
1592 static status_t
1593 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1594 	size_t* _bufferSize)
1595 {
1596 	FUNCTION();
1597 
1598 	Inode* inode = (Inode*)_node->private_node;
1599 
1600 	if (!inode->IsSymLink())
1601 		RETURN_ERROR(B_BAD_VALUE);
1602 
1603 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1604 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1605 		if (status < B_OK)
1606 			RETURN_ERROR(status);
1607 
1608 		*_bufferSize = inode->Size();
1609 		return B_OK;
1610 	}
1611 
1612 	size_t linkLength = strlen(inode->Node().short_symlink);
1613 
1614 	size_t bytesToCopy = min_c(linkLength, *_bufferSize);
1615 
1616 	*_bufferSize = linkLength;
1617 
1618 	memcpy(buffer, inode->Node().short_symlink, bytesToCopy);
1619 	return B_OK;
1620 }
1621 
1622 
1623 //	#pragma mark - Directory functions
1624 
1625 
1626 static status_t
1627 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1628 	int mode)
1629 {
1630 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1631 
1632 	Volume* volume = (Volume*)_volume->private_volume;
1633 	Inode* directory = (Inode*)_directory->private_node;
1634 
1635 	if (volume->IsReadOnly())
1636 		return B_READ_ONLY_DEVICE;
1637 
1638 	if (!directory->IsDirectory())
1639 		RETURN_ERROR(B_BAD_TYPE);
1640 
1641 	status_t status = directory->CheckPermissions(W_OK);
1642 	if (status < B_OK)
1643 		RETURN_ERROR(status);
1644 
1645 	Transaction transaction(volume, directory->BlockNumber());
1646 
1647 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1648 	// need it anyway
1649 	off_t id;
1650 	status = Inode::Create(transaction, directory, name,
1651 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1652 	if (status == B_OK) {
1653 		put_vnode(volume->FSVolume(), id);
1654 
1655 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1656 
1657 		status = transaction.Done();
1658 		if (status == B_OK)
1659 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1660 		else
1661 			entry_cache_remove(volume->ID(), directory->ID(), name);
1662 	}
1663 
1664 	return status;
1665 }
1666 
1667 
1668 static status_t
1669 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1670 {
1671 	FUNCTION_START(("name = \"%s\"\n", name));
1672 
1673 	Volume* volume = (Volume*)_volume->private_volume;
1674 	Inode* directory = (Inode*)_directory->private_node;
1675 
1676 	Transaction transaction(volume, directory->BlockNumber());
1677 
1678 	off_t id;
1679 	status_t status = directory->Remove(transaction, name, &id, true);
1680 	if (status == B_OK) {
1681 		// Remove the cache entry for the directory and potentially also
1682 		// the parent entry still belonging to the directory
1683 		entry_cache_remove(volume->ID(), directory->ID(), name);
1684 		entry_cache_remove(volume->ID(), id, "..");
1685 
1686 		status = transaction.Done();
1687 		if (status == B_OK)
1688 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1689 		else {
1690 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1691 			entry_cache_add(volume->ID(), id, "..", id);
1692 		}
1693 	}
1694 
1695 	return status;
1696 }
1697 
1698 
1699 /*!	Opens a directory ready to be traversed.
1700 	bfs_open_dir() is also used by bfs_open_index_dir().
1701 */
1702 static status_t
1703 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1704 {
1705 	FUNCTION();
1706 
1707 	Inode* inode = (Inode*)_node->private_node;
1708 	status_t status = inode->CheckPermissions(R_OK);
1709 	if (status < B_OK)
1710 		RETURN_ERROR(status);
1711 
1712 	// we don't ask here for directories only, because the bfs_open_index_dir()
1713 	// function utilizes us (so we must be able to open indices as well)
1714 	if (!inode->IsContainer())
1715 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1716 
1717 	BPlusTree* tree = inode->Tree();
1718 	if (tree == NULL)
1719 		RETURN_ERROR(B_BAD_VALUE);
1720 
1721 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1722 	if (iterator == NULL)
1723 		RETURN_ERROR(B_NO_MEMORY);
1724 
1725 	*_cookie = iterator;
1726 	return B_OK;
1727 }
1728 
1729 
1730 static status_t
1731 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1732 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1733 {
1734 	FUNCTION();
1735 
1736 	TreeIterator* iterator = (TreeIterator*)_cookie;
1737 	Volume* volume = (Volume*)_volume->private_volume;
1738 
1739 	uint32 maxCount = *_num;
1740 	uint32 count = 0;
1741 
1742 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1743 		ino_t id;
1744 		uint16 length;
1745 		size_t nameBufferSize = bufferSize - offsetof(struct dirent, d_name);
1746 
1747 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1748 			nameBufferSize, &id);
1749 
1750 		if (status == B_ENTRY_NOT_FOUND)
1751 			break;
1752 
1753 		if (status == B_BUFFER_OVERFLOW) {
1754 			// the remaining name buffer length was too small
1755 			if (count == 0)
1756 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1757 			break;
1758 		}
1759 
1760 		if (status != B_OK)
1761 			RETURN_ERROR(status);
1762 
1763 		dirent->d_dev = volume->ID();
1764 		dirent->d_ino = id;
1765 
1766 		dirent = next_dirent(dirent, length, bufferSize);
1767 		count++;
1768 	}
1769 
1770 	*_num = count;
1771 	return B_OK;
1772 }
1773 
1774 
1775 /*!	Sets the TreeIterator back to the beginning of the directory. */
1776 static status_t
1777 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1778 {
1779 	FUNCTION();
1780 	TreeIterator* iterator = (TreeIterator*)_cookie;
1781 
1782 	return iterator->Rewind();
1783 }
1784 
1785 
1786 static status_t
1787 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1788 {
1789 	FUNCTION();
1790 	return B_OK;
1791 }
1792 
1793 
1794 static status_t
1795 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1796 {
1797 	delete (TreeIterator*)_cookie;
1798 	return B_OK;
1799 }
1800 
1801 
1802 //	#pragma mark - Attribute functions
1803 
1804 
1805 static status_t
1806 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1807 {
1808 	Inode* inode = (Inode*)_node->private_node;
1809 
1810 	FUNCTION();
1811 
1812 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1813 	if (iterator == NULL)
1814 		RETURN_ERROR(B_NO_MEMORY);
1815 
1816 	*_cookie = iterator;
1817 	return B_OK;
1818 }
1819 
1820 
1821 static status_t
1822 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1823 {
1824 	FUNCTION();
1825 	return B_OK;
1826 }
1827 
1828 
1829 static status_t
1830 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1831 {
1832 	FUNCTION();
1833 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1834 
1835 	delete iterator;
1836 	return B_OK;
1837 }
1838 
1839 
1840 static status_t
1841 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1842 {
1843 	FUNCTION();
1844 
1845 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1846 	RETURN_ERROR(iterator->Rewind());
1847 }
1848 
1849 
1850 static status_t
1851 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1852 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1853 {
1854 	FUNCTION();
1855 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1856 
1857 	uint32 type;
1858 	size_t length;
1859 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1860 		&dirent->d_ino);
1861 	if (status == B_ENTRY_NOT_FOUND) {
1862 		*_num = 0;
1863 		return B_OK;
1864 	} else if (status != B_OK) {
1865 		RETURN_ERROR(status);
1866 	}
1867 
1868 	Volume* volume = (Volume*)_volume->private_volume;
1869 
1870 	dirent->d_dev = volume->ID();
1871 	dirent->d_reclen = offsetof(struct dirent, d_name) + length + 1;
1872 
1873 	*_num = 1;
1874 	return B_OK;
1875 }
1876 
1877 
1878 static status_t
1879 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1880 	uint32 type, int openMode, void** _cookie)
1881 {
1882 	FUNCTION();
1883 
1884 	Volume* volume = (Volume*)_volume->private_volume;
1885 	if (volume->IsReadOnly())
1886 		return B_READ_ONLY_DEVICE;
1887 
1888 	Inode* inode = (Inode*)_node->private_node;
1889 	Attribute attribute(inode);
1890 
1891 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1892 }
1893 
1894 
1895 static status_t
1896 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1897 	int openMode, void** _cookie)
1898 {
1899 	FUNCTION();
1900 
1901 	Inode* inode = (Inode*)_node->private_node;
1902 	Attribute attribute(inode);
1903 
1904 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1905 }
1906 
1907 
1908 static status_t
1909 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1910 {
1911 	return B_OK;
1912 }
1913 
1914 
1915 static status_t
1916 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1917 {
1918 	delete (attr_cookie*)cookie;
1919 	return B_OK;
1920 }
1921 
1922 
1923 static status_t
1924 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1925 	void* buffer, size_t* _length)
1926 {
1927 	FUNCTION();
1928 
1929 	attr_cookie* cookie = (attr_cookie*)_cookie;
1930 	Inode* inode = (Inode*)_file->private_node;
1931 
1932 	Attribute attribute(inode, cookie);
1933 
1934 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1935 }
1936 
1937 
1938 static status_t
1939 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1940 	off_t pos, const void* buffer, size_t* _length)
1941 {
1942 	FUNCTION();
1943 
1944 	attr_cookie* cookie = (attr_cookie*)_cookie;
1945 	Volume* volume = (Volume*)_volume->private_volume;
1946 	Inode* inode = (Inode*)_file->private_node;
1947 
1948 	Transaction transaction(volume, inode->BlockNumber());
1949 	Attribute attribute(inode, cookie);
1950 
1951 	bool created;
1952 	status_t status = attribute.Write(transaction, cookie, pos,
1953 		(const uint8*)buffer, _length, &created);
1954 	if (status == B_OK) {
1955 		status = transaction.Done();
1956 		if (status == B_OK) {
1957 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1958 				inode->ID(), cookie->name,
1959 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1960 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1961 				B_STAT_CHANGE_TIME);
1962 		}
1963 	}
1964 
1965 	return status;
1966 }
1967 
1968 
1969 static status_t
1970 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1971 	struct stat* stat)
1972 {
1973 	FUNCTION();
1974 
1975 	attr_cookie* cookie = (attr_cookie*)_cookie;
1976 	Inode* inode = (Inode*)_file->private_node;
1977 
1978 	Attribute attribute(inode, cookie);
1979 
1980 	return attribute.Stat(*stat);
1981 }
1982 
1983 
1984 static status_t
1985 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1986 	const struct stat* stat, int statMask)
1987 {
1988 	// TODO: Implement (at least setting the size)!
1989 	return EOPNOTSUPP;
1990 }
1991 
1992 
1993 static status_t
1994 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1995 	fs_vnode* toFile, const char* toName)
1996 {
1997 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1998 
1999 	// TODO: implement bfs_rename_attr()!
2000 	// There will probably be an API to move one attribute to another file,
2001 	// making that function much more complicated - oh joy ;-)
2002 
2003 	return EOPNOTSUPP;
2004 }
2005 
2006 
2007 static status_t
2008 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
2009 {
2010 	FUNCTION_START(("name = \"%s\"\n", name));
2011 
2012 	Volume* volume = (Volume*)_volume->private_volume;
2013 	Inode* inode = (Inode*)_node->private_node;
2014 
2015 	status_t status = inode->CheckPermissions(W_OK);
2016 	if (status != B_OK)
2017 		return status;
2018 
2019 	Transaction transaction(volume, inode->BlockNumber());
2020 
2021 	status = inode->RemoveAttribute(transaction, name);
2022 	if (status == B_OK)
2023 		status = transaction.Done();
2024 	if (status == B_OK) {
2025 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
2026 			name, B_ATTR_REMOVED);
2027 	}
2028 
2029 	return status;
2030 }
2031 
2032 
2033 //	#pragma mark - Special Nodes
2034 
2035 
2036 status_t
2037 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
2038 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
2039 	fs_vnode* _superVnode, ino_t* _nodeID)
2040 {
2041 	// no need to support entry-less nodes
2042 	if (name == NULL)
2043 		return B_UNSUPPORTED;
2044 
2045 	FUNCTION_START(("name = \"%s\", mode = %u, flags = 0x%" B_PRIx32
2046 		", subVnode: %p\n", name, (unsigned int)mode, flags, subVnode));
2047 
2048 	Volume* volume = (Volume*)_volume->private_volume;
2049 	Inode* directory = (Inode*)_directory->private_node;
2050 
2051 	if (volume->IsReadOnly())
2052 		return B_READ_ONLY_DEVICE;
2053 
2054 	if (!directory->IsDirectory())
2055 		RETURN_ERROR(B_BAD_TYPE);
2056 
2057 	status_t status = directory->CheckPermissions(W_OK);
2058 	if (status < B_OK)
2059 		RETURN_ERROR(status);
2060 
2061 	Transaction transaction(volume, directory->BlockNumber());
2062 
2063 	off_t id;
2064 	Inode* inode;
2065 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2066 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2067 	if (status == B_OK) {
2068 		_superVnode->private_node = inode;
2069 		_superVnode->ops = &gBFSVnodeOps;
2070 		*_nodeID = id;
2071 
2072 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2073 
2074 		status = transaction.Done();
2075 		if (status == B_OK)
2076 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2077 		else
2078 			entry_cache_remove(volume->ID(), directory->ID(), name);
2079 	}
2080 
2081 	return status;
2082 }
2083 
2084 
2085 //	#pragma mark - Index functions
2086 
2087 
2088 static status_t
2089 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2090 {
2091 	FUNCTION();
2092 
2093 	Volume* volume = (Volume*)_volume->private_volume;
2094 
2095 	if (volume->IndicesNode() == NULL) {
2096 		// This volume does not have any indices
2097 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2098 	}
2099 
2100 	// Since the indices root node is just a directory, and we are storing
2101 	// a pointer to it in our Volume object, we can just use the directory
2102 	// traversal functions.
2103 	// In fact we're storing it in the Volume object for that reason.
2104 
2105 	fs_vnode indicesNode;
2106 	indicesNode.private_node = volume->IndicesNode();
2107 
2108 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2109 }
2110 
2111 
2112 static status_t
2113 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2114 {
2115 	FUNCTION();
2116 
2117 	Volume* volume = (Volume*)_volume->private_volume;
2118 
2119 	fs_vnode indicesNode;
2120 	indicesNode.private_node = volume->IndicesNode();
2121 
2122 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2123 }
2124 
2125 
2126 static status_t
2127 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2128 {
2129 	FUNCTION();
2130 
2131 	Volume* volume = (Volume*)_volume->private_volume;
2132 
2133 	fs_vnode indicesNode;
2134 	indicesNode.private_node = volume->IndicesNode();
2135 
2136 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2137 }
2138 
2139 
2140 static status_t
2141 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2142 {
2143 	FUNCTION();
2144 
2145 	Volume* volume = (Volume*)_volume->private_volume;
2146 
2147 	fs_vnode indicesNode;
2148 	indicesNode.private_node = volume->IndicesNode();
2149 
2150 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2151 }
2152 
2153 
2154 static status_t
2155 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2156 	size_t bufferSize, uint32* _num)
2157 {
2158 	FUNCTION();
2159 
2160 	Volume* volume = (Volume*)_volume->private_volume;
2161 
2162 	fs_vnode indicesNode;
2163 	indicesNode.private_node = volume->IndicesNode();
2164 
2165 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2166 		bufferSize, _num));
2167 }
2168 
2169 
2170 static status_t
2171 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2172 	uint32 flags)
2173 {
2174 	FUNCTION_START(("name = \"%s\", type = %" B_PRIu32
2175 		", flags = %" B_PRIu32 "\n", name, type, flags));
2176 
2177 	Volume* volume = (Volume*)_volume->private_volume;
2178 
2179 	if (volume->IsReadOnly())
2180 		return B_READ_ONLY_DEVICE;
2181 
2182 	// only root users are allowed to create indices
2183 	if (geteuid() != 0)
2184 		return B_NOT_ALLOWED;
2185 
2186 	Transaction transaction(volume, volume->Indices());
2187 
2188 	Index index(volume);
2189 	status_t status = index.Create(transaction, name, type);
2190 
2191 	if (status == B_OK)
2192 		status = transaction.Done();
2193 
2194 	RETURN_ERROR(status);
2195 }
2196 
2197 
2198 static status_t
2199 bfs_remove_index(fs_volume* _volume, const char* name)
2200 {
2201 	FUNCTION();
2202 
2203 	Volume* volume = (Volume*)_volume->private_volume;
2204 
2205 	if (volume->IsReadOnly())
2206 		return B_READ_ONLY_DEVICE;
2207 
2208 	// only root users are allowed to remove indices
2209 	if (geteuid() != 0)
2210 		return B_NOT_ALLOWED;
2211 
2212 	Inode* indices = volume->IndicesNode();
2213 	if (indices == NULL)
2214 		return B_ENTRY_NOT_FOUND;
2215 
2216 	Transaction transaction(volume, volume->Indices());
2217 
2218 	status_t status = indices->Remove(transaction, name);
2219 	if (status == B_OK)
2220 		status = transaction.Done();
2221 
2222 	RETURN_ERROR(status);
2223 }
2224 
2225 
2226 static status_t
2227 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2228 {
2229 	FUNCTION_START(("name = %s\n", name));
2230 
2231 	Volume* volume = (Volume*)_volume->private_volume;
2232 
2233 	Index index(volume);
2234 	status_t status = index.SetTo(name);
2235 	if (status < B_OK)
2236 		RETURN_ERROR(status);
2237 
2238 	bfs_inode& node = index.Node()->Node();
2239 
2240 	stat->st_type = index.Type();
2241 	stat->st_mode = node.Mode();
2242 
2243 	stat->st_size = node.data.Size();
2244 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2245 
2246 	stat->st_nlink = 1;
2247 	stat->st_blksize = 65536;
2248 
2249 	stat->st_uid = node.UserID();
2250 	stat->st_gid = node.GroupID();
2251 
2252 	fill_stat_time(node, *stat);
2253 
2254 	return B_OK;
2255 }
2256 
2257 
2258 //	#pragma mark - Query functions
2259 
2260 
2261 static status_t
2262 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2263 	port_id port, uint32 token, void** _cookie)
2264 {
2265 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %" B_PRIu32
2266 		", port_id = %" B_PRId32 ", token = %" B_PRIu32 ")\n",
2267 		queryString, flags, port, token));
2268 
2269 	Volume* volume = (Volume*)_volume->private_volume;
2270 
2271 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2272 	if (expression == NULL)
2273 		RETURN_ERROR(B_NO_MEMORY);
2274 
2275 	if (expression->InitCheck() < B_OK) {
2276 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2277 			queryString, expression->Position()));
2278 
2279 		delete expression;
2280 		RETURN_ERROR(B_BAD_VALUE);
2281 	}
2282 
2283 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2284 	if (query == NULL) {
2285 		delete expression;
2286 		RETURN_ERROR(B_NO_MEMORY);
2287 	}
2288 
2289 	if (flags & B_LIVE_QUERY)
2290 		query->SetLiveMode(port, token);
2291 
2292 	*_cookie = (void*)query;
2293 
2294 	return B_OK;
2295 }
2296 
2297 
2298 static status_t
2299 bfs_close_query(fs_volume* _volume, void* cookie)
2300 {
2301 	FUNCTION();
2302 	return B_OK;
2303 }
2304 
2305 
2306 static status_t
2307 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2308 {
2309 	FUNCTION();
2310 
2311 	Query* query = (Query*)cookie;
2312 	Expression* expression = query->GetExpression();
2313 	delete query;
2314 	delete expression;
2315 
2316 	return B_OK;
2317 }
2318 
2319 
2320 static status_t
2321 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2322 	size_t bufferSize, uint32* _num)
2323 {
2324 	FUNCTION();
2325 	Query* query = (Query*)cookie;
2326 	status_t status = query->GetNextEntry(dirent, bufferSize);
2327 	if (status == B_OK)
2328 		*_num = 1;
2329 	else if (status == B_ENTRY_NOT_FOUND)
2330 		*_num = 0;
2331 	else
2332 		return status;
2333 
2334 	return B_OK;
2335 }
2336 
2337 
2338 static status_t
2339 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2340 {
2341 	FUNCTION();
2342 
2343 	Query* query = (Query*)cookie;
2344 	return query->Rewind();
2345 }
2346 
2347 
2348 //	#pragma mark -
2349 
2350 
2351 static uint32
2352 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2353 {
2354 	// TODO: We should at least check the partition size.
2355 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2356 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2357 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2358 }
2359 
2360 
2361 static status_t
2362 bfs_initialize(int fd, partition_id partitionID, const char* name,
2363 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2364 {
2365 	// check name
2366 	status_t status = check_volume_name(name);
2367 	if (status != B_OK)
2368 		return status;
2369 
2370 	// parse parameters
2371 	initialize_parameters parameters;
2372 	status = parse_initialize_parameters(parameterString, parameters);
2373 	if (status != B_OK)
2374 		return status;
2375 
2376 	update_disk_device_job_progress(job, 0);
2377 
2378 	// initialize the volume
2379 	Volume volume(NULL);
2380 	status = volume.Initialize(fd, name, parameters.blockSize,
2381 		parameters.flags);
2382 	if (status < B_OK) {
2383 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2384 		return status;
2385 	}
2386 
2387 	// rescan partition
2388 	status = scan_partition(partitionID);
2389 	if (status != B_OK)
2390 		return status;
2391 
2392 	update_disk_device_job_progress(job, 1);
2393 
2394 	// print some info, if desired
2395 	if (parameters.verbose) {
2396 		disk_super_block super = volume.SuperBlock();
2397 
2398 		INFORM(("Disk was initialized successfully.\n"));
2399 		INFORM(("\tname: \"%s\"\n", super.name));
2400 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2401 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2402 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2403 		INFORM(("\tnum allocation groups: %d\n",
2404 			(int)super.AllocationGroups()));
2405 		INFORM(("\tallocation group size: %ld blocks\n",
2406 			1L << super.AllocationGroupShift()));
2407 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2408 	}
2409 
2410 	return B_OK;
2411 }
2412 
2413 
2414 static status_t
2415 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2416 	uint32 blockSize, disk_job_id job)
2417 {
2418 	if (blockSize == 0)
2419 		return B_BAD_VALUE;
2420 
2421 	update_disk_device_job_progress(job, 0.0);
2422 
2423 	// just overwrite the superblock
2424 	disk_super_block superBlock;
2425 	memset(&superBlock, 0, sizeof(superBlock));
2426 
2427 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2428 		return errno;
2429 
2430 	update_disk_device_job_progress(job, 1.0);
2431 
2432 	return B_OK;
2433 }
2434 
2435 
2436 //	#pragma mark -
2437 
2438 
2439 static status_t
2440 bfs_std_ops(int32 op, ...)
2441 {
2442 	switch (op) {
2443 		case B_MODULE_INIT:
2444 #ifdef BFS_DEBUGGER_COMMANDS
2445 			add_debugger_commands();
2446 #endif
2447 			return B_OK;
2448 		case B_MODULE_UNINIT:
2449 #ifdef BFS_DEBUGGER_COMMANDS
2450 			remove_debugger_commands();
2451 #endif
2452 			return B_OK;
2453 
2454 		default:
2455 			return B_ERROR;
2456 	}
2457 }
2458 
2459 fs_volume_ops gBFSVolumeOps = {
2460 	&bfs_unmount,
2461 	&bfs_read_fs_stat,
2462 	&bfs_write_fs_stat,
2463 	&bfs_sync,
2464 	&bfs_get_vnode,
2465 
2466 	/* index directory & index operations */
2467 	&bfs_open_index_dir,
2468 	&bfs_close_index_dir,
2469 	&bfs_free_index_dir_cookie,
2470 	&bfs_read_index_dir,
2471 	&bfs_rewind_index_dir,
2472 
2473 	&bfs_create_index,
2474 	&bfs_remove_index,
2475 	&bfs_stat_index,
2476 
2477 	/* query operations */
2478 	&bfs_open_query,
2479 	&bfs_close_query,
2480 	&bfs_free_query_cookie,
2481 	&bfs_read_query,
2482 	&bfs_rewind_query,
2483 };
2484 
2485 fs_vnode_ops gBFSVnodeOps = {
2486 	/* vnode operations */
2487 	&bfs_lookup,
2488 	&bfs_get_vnode_name,
2489 	&bfs_put_vnode,
2490 	&bfs_remove_vnode,
2491 
2492 	/* VM file access */
2493 	&bfs_can_page,
2494 	&bfs_read_pages,
2495 	&bfs_write_pages,
2496 
2497 	&bfs_io,
2498 	NULL,	// cancel_io()
2499 
2500 	&bfs_get_file_map,
2501 
2502 	&bfs_ioctl,
2503 	&bfs_set_flags,
2504 	NULL,	// fs_select
2505 	NULL,	// fs_deselect
2506 	&bfs_fsync,
2507 
2508 	&bfs_read_link,
2509 	&bfs_create_symlink,
2510 
2511 	&bfs_link,
2512 	&bfs_unlink,
2513 	&bfs_rename,
2514 
2515 	&bfs_access,
2516 	&bfs_read_stat,
2517 	&bfs_write_stat,
2518 	NULL,	// fs_preallocate
2519 
2520 	/* file operations */
2521 	&bfs_create,
2522 	&bfs_open,
2523 	&bfs_close,
2524 	&bfs_free_cookie,
2525 	&bfs_read,
2526 	&bfs_write,
2527 
2528 	/* directory operations */
2529 	&bfs_create_dir,
2530 	&bfs_remove_dir,
2531 	&bfs_open_dir,
2532 	&bfs_close_dir,
2533 	&bfs_free_dir_cookie,
2534 	&bfs_read_dir,
2535 	&bfs_rewind_dir,
2536 
2537 	/* attribute directory operations */
2538 	&bfs_open_attr_dir,
2539 	&bfs_close_attr_dir,
2540 	&bfs_free_attr_dir_cookie,
2541 	&bfs_read_attr_dir,
2542 	&bfs_rewind_attr_dir,
2543 
2544 	/* attribute operations */
2545 	&bfs_create_attr,
2546 	&bfs_open_attr,
2547 	&bfs_close_attr,
2548 	&bfs_free_attr_cookie,
2549 	&bfs_read_attr,
2550 	&bfs_write_attr,
2551 
2552 	&bfs_read_attr_stat,
2553 	&bfs_write_attr_stat,
2554 	&bfs_rename_attr,
2555 	&bfs_remove_attr,
2556 
2557 	/* special nodes */
2558 	&bfs_create_special_node
2559 };
2560 
2561 static file_system_module_info sBeFileSystem = {
2562 	{
2563 		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2564 		0,
2565 		bfs_std_ops,
2566 	},
2567 
2568 	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2569 	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2570 
2571 	// DDM flags
2572 	0
2573 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2574 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2575 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2576 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2577 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2578 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2579 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2580 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2581 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2582 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2583 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2584 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2585 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2586 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2587 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2588 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2589 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2590 	,
2591 
2592 	// scanning
2593 	bfs_identify_partition,
2594 	bfs_scan_partition,
2595 	bfs_free_identify_partition_cookie,
2596 	NULL,	// free_partition_content_cookie()
2597 
2598 	&bfs_mount,
2599 
2600 	/* capability querying operations */
2601 	&bfs_get_supported_operations,
2602 
2603 	NULL,	// validate_resize
2604 	NULL,	// validate_move
2605 	NULL,	// validate_set_content_name
2606 	NULL,	// validate_set_content_parameters
2607 	NULL,	// validate_initialize,
2608 
2609 	/* shadow partition modification */
2610 	NULL,	// shadow_changed
2611 
2612 	/* writing */
2613 	NULL,	// defragment
2614 	NULL,	// repair
2615 	NULL,	// resize
2616 	NULL,	// move
2617 	NULL,	// set_content_name
2618 	NULL,	// set_content_parameters
2619 	bfs_initialize,
2620 	bfs_uninitialize
2621 };
2622 
2623 module_info* modules[] = {
2624 	(module_info*)&sBeFileSystem,
2625 	NULL,
2626 };
2627