xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision 5ac9b506412b11afb993bb52d161efe7666958a5)
1 /*
2  * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 
6 
7 //!	file system interface to Haiku's vnode layer
8 
9 
10 #include "Debug.h"
11 #include "Volume.h"
12 #include "Inode.h"
13 #include "Index.h"
14 #include "BPlusTree.h"
15 #include "Query.h"
16 #include "Attribute.h"
17 #include "bfs_control.h"
18 #include "bfs_disk_system.h"
19 
20 // TODO: temporary solution as long as there is no public I/O requests API
21 #ifndef FS_SHELL
22 #	include <io_requests.h>
23 #	include <util/fs_trim_support.h>
24 #endif
25 
26 
27 #define BFS_IO_SIZE	65536
28 
29 #if defined(BFS_LITTLE_ENDIAN_ONLY)
30 #define BFS_ENDIAN_SUFFIX ""
31 #define BFS_ENDIAN_PRETTY_SUFFIX ""
32 #else
33 #define BFS_ENDIAN_SUFFIX "_big"
34 #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
35 #endif
36 
37 
38 struct identify_cookie {
39 	disk_super_block super_block;
40 };
41 
42 extern void fill_stat_buffer(Inode* inode, struct stat& stat);
43 
44 
45 static void
46 fill_stat_time(const bfs_inode& node, struct stat& stat)
47 {
48 	bigtime_t now = real_time_clock_usecs();
49 	stat.st_atim.tv_sec = now / 1000000LL;
50 	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
51 
52 	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
53 	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
54 	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
55 	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
56 
57 	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
58 	bigtime_t changeTime = node.StatusChangeTime();
59 	if (changeTime < node.LastModifiedTime())
60 		stat.st_ctim = stat.st_mtim;
61 	else {
62 		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
63 		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
64 	}
65 }
66 
67 
68 void
69 fill_stat_buffer(Inode* inode, struct stat& stat)
70 {
71 	const bfs_inode& node = inode->Node();
72 
73 	stat.st_dev = inode->GetVolume()->ID();
74 	stat.st_ino = inode->ID();
75 	stat.st_nlink = 1;
76 	stat.st_blksize = BFS_IO_SIZE;
77 
78 	stat.st_uid = node.UserID();
79 	stat.st_gid = node.GroupID();
80 	stat.st_mode = node.Mode();
81 	stat.st_type = node.Type();
82 
83 	fill_stat_time(node, stat);
84 
85 	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
86 		// symlinks report the size of the link here
87 		stat.st_size = strlen(node.short_symlink);
88 	} else
89 		stat.st_size = inode->Size();
90 
91 	stat.st_blocks = inode->AllocatedSize() / 512;
92 }
93 
94 
95 //!	bfs_io() callback hook
96 static status_t
97 iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
98 	size_t size, struct file_io_vec* vecs, size_t* _count)
99 {
100 	Inode* inode = (Inode*)cookie;
101 
102 	return file_map_translate(inode->Map(), offset, size, vecs, _count,
103 		inode->GetVolume()->BlockSize());
104 }
105 
106 
107 //!	bfs_io() callback hook
108 static status_t
109 iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
110 	bool partialTransfer, size_t bytesTransferred)
111 {
112 	Inode* inode = (Inode*)cookie;
113 	rw_lock_read_unlock(&inode->Lock());
114 	return B_OK;
115 }
116 
117 
118 //	#pragma mark - Scanning
119 
120 
121 static float
122 bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
123 {
124 	disk_super_block superBlock;
125 	status_t status = Volume::Identify(fd, &superBlock);
126 	if (status != B_OK)
127 		return -1;
128 
129 	identify_cookie* cookie = new(std::nothrow) identify_cookie;
130 	if (cookie == NULL)
131 		return -1;
132 
133 	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
134 
135 	*_cookie = cookie;
136 	return 0.8f;
137 }
138 
139 
140 static status_t
141 bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
142 {
143 	identify_cookie* cookie = (identify_cookie*)_cookie;
144 
145 	partition->status = B_PARTITION_VALID;
146 	partition->flags |= B_PARTITION_FILE_SYSTEM;
147 	partition->content_size = cookie->super_block.NumBlocks()
148 		* cookie->super_block.BlockSize();
149 	partition->block_size = cookie->super_block.BlockSize();
150 	partition->content_name = strdup(cookie->super_block.name);
151 	if (partition->content_name == NULL)
152 		return B_NO_MEMORY;
153 
154 	return B_OK;
155 }
156 
157 
158 static void
159 bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
160 {
161 	identify_cookie* cookie = (identify_cookie*)_cookie;
162 	delete cookie;
163 }
164 
165 
166 //	#pragma mark -
167 
168 
169 static status_t
170 bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
171 	const char* args, ino_t* _rootID)
172 {
173 	FUNCTION();
174 
175 	Volume* volume = new(std::nothrow) Volume(_volume);
176 	if (volume == NULL)
177 		return B_NO_MEMORY;
178 
179 	status_t status = volume->Mount(device, flags);
180 	if (status != B_OK) {
181 		delete volume;
182 		RETURN_ERROR(status);
183 	}
184 
185 	_volume->private_volume = volume;
186 	_volume->ops = &gBFSVolumeOps;
187 	*_rootID = volume->ToVnode(volume->Root());
188 
189 	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
190 		volume->Name(), *_rootID, device));
191 	return B_OK;
192 }
193 
194 
195 static status_t
196 bfs_unmount(fs_volume* _volume)
197 {
198 	FUNCTION();
199 	Volume* volume = (Volume*)_volume->private_volume;
200 
201 	status_t status = volume->Unmount();
202 	delete volume;
203 
204 	RETURN_ERROR(status);
205 }
206 
207 
208 static status_t
209 bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
210 {
211 	FUNCTION();
212 
213 	Volume* volume = (Volume*)_volume->private_volume;
214 	MutexLocker locker(volume->Lock());
215 
216 	// File system flags.
217 	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
218 		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
219 		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
220 		| B_FS_SUPPORTS_MONITOR_CHILDREN;
221 
222 	info->io_size = BFS_IO_SIZE;
223 		// whatever is appropriate here?
224 
225 	info->block_size = volume->BlockSize();
226 	info->total_blocks = volume->NumBlocks();
227 	info->free_blocks = volume->FreeBlocks();
228 
229 	// Volume name
230 	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
231 
232 	// File system name
233 	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
234 
235 	return B_OK;
236 }
237 
238 
239 static status_t
240 bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
241 {
242 	FUNCTION_START(("mask = %ld\n", mask));
243 
244 	Volume* volume = (Volume*)_volume->private_volume;
245 	if (volume->IsReadOnly())
246 		return B_READ_ONLY_DEVICE;
247 
248 	MutexLocker locker(volume->Lock());
249 
250 	status_t status = B_BAD_VALUE;
251 
252 	if (mask & FS_WRITE_FSINFO_NAME) {
253 		disk_super_block& superBlock = volume->SuperBlock();
254 
255 		strncpy(superBlock.name, info->volume_name,
256 			sizeof(superBlock.name) - 1);
257 		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
258 
259 		status = volume->WriteSuperBlock();
260 	}
261 	return status;
262 }
263 
264 
265 static status_t
266 bfs_sync(fs_volume* _volume)
267 {
268 	FUNCTION();
269 
270 	Volume* volume = (Volume*)_volume->private_volume;
271 	return volume->Sync();
272 }
273 
274 
275 //	#pragma mark -
276 
277 
278 /*!	Reads in the node from disk and creates an inode object from it.
279 */
280 static status_t
281 bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
282 	uint32* _flags, bool reenter)
283 {
284 	//FUNCTION_START(("ino_t = %Ld\n", id));
285 	Volume* volume = (Volume*)_volume->private_volume;
286 
287 	// first inode may be after the log area, we don't go through
288 	// the hassle and try to load an earlier block from disk
289 	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
290 		|| id > volume->NumBlocks()) {
291 		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
292 		return B_ERROR;
293 	}
294 
295 	CachedBlock cached(volume, id);
296 	bfs_inode* node = (bfs_inode*)cached.Block();
297 	if (node == NULL) {
298 		FATAL(("could not read inode: %" B_PRIdINO "\n", id));
299 		return B_IO_ERROR;
300 	}
301 
302 	status_t status = node->InitCheck(volume);
303 	if (status != B_OK) {
304 		if ((node->Flags() & INODE_DELETED) != 0) {
305 			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
306 		} else {
307 			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
308 				strerror(status)));
309 		}
310 		return status;
311 	}
312 
313 	Inode* inode = new(std::nothrow) Inode(volume, id);
314 	if (inode == NULL)
315 		return B_NO_MEMORY;
316 
317 	status = inode->InitCheck(false);
318 	if (status != B_OK)
319 		delete inode;
320 
321 	if (status == B_OK) {
322 		_node->private_node = inode;
323 		_node->ops = &gBFSVnodeOps;
324 		*_type = inode->Mode();
325 		*_flags = 0;
326 	}
327 
328 	return status;
329 }
330 
331 
332 static status_t
333 bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
334 {
335 	Volume* volume = (Volume*)_volume->private_volume;
336 	Inode* inode = (Inode*)_node->private_node;
337 
338 	// since a directory's size can be changed without having it opened,
339 	// we need to take care about their preallocated blocks here
340 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
341 		&& inode->NeedsTrimming()) {
342 		Transaction transaction(volume, inode->BlockNumber());
343 
344 		if (inode->TrimPreallocation(transaction) == B_OK)
345 			transaction.Done();
346 		else if (transaction.HasParent()) {
347 			// TODO: for now, we don't let sub-transactions fail
348 			transaction.Done();
349 		}
350 	}
351 
352 	delete inode;
353 	return B_OK;
354 }
355 
356 
357 static status_t
358 bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
359 {
360 	FUNCTION();
361 
362 	Volume* volume = (Volume*)_volume->private_volume;
363 	Inode* inode = (Inode*)_node->private_node;
364 
365 	// If the inode isn't in use anymore, we were called before
366 	// bfs_unlink() returns - in this case, we can just use the
367 	// transaction which has already deleted the inode.
368 	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
369 
370 	// The file system check functionality uses this flag to prevent the space
371 	// used up by the inode from being freed - this flag is set only in
372 	// situations where this does not cause any harm as the block bitmap will
373 	// get fixed anyway in this case).
374 	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
375 		delete inode;
376 		return B_OK;
377 	}
378 
379 	ASSERT((inode->Flags() & INODE_DELETED) != 0);
380 
381 	status_t status = inode->Free(transaction);
382 	if (status == B_OK) {
383 		status = transaction.Done();
384 	} else if (transaction.HasParent()) {
385 		// TODO: for now, we don't let sub-transactions fail
386 		status = transaction.Done();
387 	}
388 
389 	volume->RemovedInodes().Remove(inode);
390 
391 	// TODO: the VFS currently does not allow this to fail
392 	delete inode;
393 
394 	return status;
395 }
396 
397 
398 static bool
399 bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
400 {
401 	// TODO: we're obviously not even asked...
402 	return false;
403 }
404 
405 
406 static status_t
407 bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
408 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
409 {
410 	Volume* volume = (Volume*)_volume->private_volume;
411 	Inode* inode = (Inode*)_node->private_node;
412 
413 	if (inode->FileCache() == NULL)
414 		RETURN_ERROR(B_BAD_VALUE);
415 
416 	InodeReadLocker _(inode);
417 
418 	uint32 vecIndex = 0;
419 	size_t vecOffset = 0;
420 	size_t bytesLeft = *_numBytes;
421 	status_t status;
422 
423 	while (true) {
424 		file_io_vec fileVecs[8];
425 		size_t fileVecCount = 8;
426 
427 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
428 			&fileVecCount, 0);
429 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
430 			break;
431 
432 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
433 
434 		size_t bytes = bytesLeft;
435 		status = read_file_io_vec_pages(volume->Device(), fileVecs,
436 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
437 		if (status != B_OK || !bufferOverflow)
438 			break;
439 
440 		pos += bytes;
441 		bytesLeft -= bytes;
442 	}
443 
444 	return status;
445 }
446 
447 
448 static status_t
449 bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
450 	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
451 {
452 	Volume* volume = (Volume*)_volume->private_volume;
453 	Inode* inode = (Inode*)_node->private_node;
454 
455 	if (volume->IsReadOnly())
456 		return B_READ_ONLY_DEVICE;
457 
458 	if (inode->FileCache() == NULL)
459 		RETURN_ERROR(B_BAD_VALUE);
460 
461 	InodeReadLocker _(inode);
462 
463 	uint32 vecIndex = 0;
464 	size_t vecOffset = 0;
465 	size_t bytesLeft = *_numBytes;
466 	status_t status;
467 
468 	while (true) {
469 		file_io_vec fileVecs[8];
470 		size_t fileVecCount = 8;
471 
472 		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
473 			&fileVecCount, 0);
474 		if (status != B_OK && status != B_BUFFER_OVERFLOW)
475 			break;
476 
477 		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
478 
479 		size_t bytes = bytesLeft;
480 		status = write_file_io_vec_pages(volume->Device(), fileVecs,
481 			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
482 		if (status != B_OK || !bufferOverflow)
483 			break;
484 
485 		pos += bytes;
486 		bytesLeft -= bytes;
487 	}
488 
489 	return status;
490 }
491 
492 
493 static status_t
494 bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
495 {
496 	Volume* volume = (Volume*)_volume->private_volume;
497 	Inode* inode = (Inode*)_node->private_node;
498 
499 #ifndef FS_SHELL
500 	if (io_request_is_write(request) && volume->IsReadOnly()) {
501 		notify_io_request(request, B_READ_ONLY_DEVICE);
502 		return B_READ_ONLY_DEVICE;
503 	}
504 #endif
505 
506 	if (inode->FileCache() == NULL) {
507 #ifndef FS_SHELL
508 		notify_io_request(request, B_BAD_VALUE);
509 #endif
510 		RETURN_ERROR(B_BAD_VALUE);
511 	}
512 
513 	// We lock the node here and will unlock it in the "finished" hook.
514 	rw_lock_read_lock(&inode->Lock());
515 
516 	return do_iterative_fd_io(volume->Device(), request,
517 		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
518 }
519 
520 
521 static status_t
522 bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
523 	struct file_io_vec* vecs, size_t* _count)
524 {
525 	Volume* volume = (Volume*)_volume->private_volume;
526 	Inode* inode = (Inode*)_node->private_node;
527 
528 	int32 blockShift = volume->BlockShift();
529 	uint32 index = 0, max = *_count;
530 	block_run run;
531 	off_t fileOffset;
532 
533 	//FUNCTION_START(("offset = %Ld, size = %lu\n", offset, size));
534 
535 	while (true) {
536 		status_t status = inode->FindBlockRun(offset, run, fileOffset);
537 		if (status != B_OK)
538 			return status;
539 
540 		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
541 		vecs[index].length = ((uint32)run.Length() << blockShift)
542 			- offset + fileOffset;
543 
544 		// are we already done?
545 		if ((uint64)size <= (uint64)vecs[index].length
546 			|| (uint64)offset + (uint64)vecs[index].length
547 				>= (uint64)inode->Size()) {
548 			if ((uint64)offset + (uint64)vecs[index].length
549 					> (uint64)inode->Size()) {
550 				// make sure the extent ends with the last official file
551 				// block (without taking any preallocations into account)
552 				vecs[index].length = round_up(inode->Size() - offset,
553 					volume->BlockSize());
554 			}
555 			*_count = index + 1;
556 			return B_OK;
557 		}
558 
559 		offset += vecs[index].length;
560 		size -= vecs[index].length;
561 		index++;
562 
563 		if (index >= max) {
564 			// we're out of file_io_vecs; let's bail out
565 			*_count = index;
566 			return B_BUFFER_OVERFLOW;
567 		}
568 	}
569 
570 	// can never get here
571 	return B_ERROR;
572 }
573 
574 
575 //	#pragma mark -
576 
577 
578 static status_t
579 bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
580 	ino_t* _vnodeID)
581 {
582 	Volume* volume = (Volume*)_volume->private_volume;
583 	Inode* directory = (Inode*)_directory->private_node;
584 
585 	InodeReadLocker locker(directory);
586 
587 	// check access permissions
588 	status_t status = directory->CheckPermissions(X_OK);
589 	if (status != B_OK)
590 		RETURN_ERROR(status);
591 
592 	BPlusTree* tree = directory->Tree();
593 	if (tree == NULL)
594 		RETURN_ERROR(B_BAD_VALUE);
595 
596 	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
597 	if (status != B_OK) {
598 		//PRINT(("bfs_walk() could not find %Ld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
599 		if (status == B_ENTRY_NOT_FOUND)
600 			entry_cache_add_missing(volume->ID(), directory->ID(), file);
601 
602 		return status;
603 	}
604 
605 	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
606 
607 	locker.Unlock();
608 
609 	Inode* inode;
610 	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
611 	if (status != B_OK) {
612 		REPORT_ERROR(status);
613 		return B_ENTRY_NOT_FOUND;
614 	}
615 
616 	return B_OK;
617 }
618 
619 
620 static status_t
621 bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
622 	size_t bufferSize)
623 {
624 	Inode* inode = (Inode*)_node->private_node;
625 
626 	return inode->GetName(buffer, bufferSize);
627 }
628 
629 
630 static status_t
631 bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
632 	void* buffer, size_t bufferLength)
633 {
634 	FUNCTION_START(("node = %p, cmd = %lu, buf = %p, len = %ld\n", _node, cmd,
635 		buffer, bufferLength));
636 
637 	Volume* volume = (Volume*)_volume->private_volume;
638 
639 	switch (cmd) {
640 #ifndef FS_SHELL
641 		case B_TRIM_DEVICE:
642 		{
643 			fs_trim_data* trimData;
644 			MemoryDeleter deleter;
645 			status_t status = get_trim_data_from_user(buffer, bufferLength,
646 				deleter, trimData);
647 			if (status != B_OK)
648 				return status;
649 
650 			trimData->trimmed_size = 0;
651 
652 			for (uint32 i = 0; i < trimData->range_count; i++) {
653 				uint64 trimmedSize = 0;
654 				status_t status = volume->Allocator().Trim(
655 					trimData->ranges[i].offset, trimData->ranges[i].size,
656 					trimmedSize);
657 				if (status != B_OK)
658 					return status;
659 
660 				trimData->trimmed_size += trimmedSize;
661 			}
662 
663 			return copy_trim_data_to_user(buffer, trimData);
664 		}
665 #endif
666 
667 		case BFS_IOCTL_VERSION:
668 		{
669 			uint32 version = 0x10000;
670 			return user_memcpy(buffer, &version, sizeof(uint32));
671 		}
672 		case BFS_IOCTL_START_CHECKING:
673 		{
674 			// start checking
675 			BlockAllocator& allocator = volume->Allocator();
676 			check_control control;
677 			if (user_memcpy(&control, buffer, sizeof(check_control)) != B_OK)
678 				return B_BAD_ADDRESS;
679 
680 			status_t status = allocator.StartChecking(&control);
681 			if (status == B_OK) {
682 				file_cookie* cookie = (file_cookie*)_cookie;
683 				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
684 			}
685 
686 			return status;
687 		}
688 		case BFS_IOCTL_STOP_CHECKING:
689 		{
690 			// stop checking
691 			BlockAllocator& allocator = volume->Allocator();
692 			check_control control;
693 
694 			status_t status = allocator.StopChecking(&control);
695 			if (status == B_OK) {
696 				file_cookie* cookie = (file_cookie*)_cookie;
697 				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
698 			}
699 			if (status == B_OK)
700 				status = user_memcpy(buffer, &control, sizeof(check_control));
701 
702 			return status;
703 		}
704 		case BFS_IOCTL_CHECK_NEXT_NODE:
705 		{
706 			// check next
707 			BlockAllocator& allocator = volume->Allocator();
708 			check_control control;
709 
710 			status_t status = allocator.CheckNextNode(&control);
711 			if (status == B_OK)
712 				status = user_memcpy(buffer, &control, sizeof(check_control));
713 
714 			return status;
715 		}
716 		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
717 		{
718 			// let's makebootable (or anyone else) update the boot block
719 			// while BFS is mounted
720 			update_boot_block update;
721 			if (bufferLength != sizeof(update_boot_block))
722 				return B_BAD_VALUE;
723 			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
724 				return B_BAD_ADDRESS;
725 
726 			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
727 			if (update.offset < minOffset
728 				|| update.offset >= 512 || update.length > 512 - minOffset
729 				|| update.length + update.offset > 512) {
730 				return B_BAD_VALUE;
731 			}
732 			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
733 					update.data, update.length) != B_OK) {
734 				return B_BAD_ADDRESS;
735 			}
736 
737 			return volume->WriteSuperBlock();
738 		}
739 
740 #ifdef DEBUG_FRAGMENTER
741 		case 56741:
742 		{
743 			BlockAllocator& allocator = volume->Allocator();
744 			allocator.Fragment();
745 			return B_OK;
746 		}
747 #endif
748 
749 #ifdef DEBUG
750 		case 56742:
751 		{
752 			// allocate all free blocks and zero them out
753 			// (a test for the BlockAllocator)!
754 			BlockAllocator& allocator = volume->Allocator();
755 			Transaction transaction(volume, 0);
756 			CachedBlock cached(volume);
757 			block_run run;
758 			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
759 					== B_OK) {
760 				PRINT(("write block_run(%ld, %d, %d)\n", run.allocation_group,
761 					run.start, run.length));
762 				for (int32 i = 0;i < run.length;i++) {
763 					uint8* block = cached.SetToWritable(transaction, run);
764 					if (block != NULL)
765 						memset(block, 0, volume->BlockSize());
766 				}
767 			}
768 			return B_OK;
769 		}
770 #endif
771 	}
772 	return B_DEV_INVALID_IOCTL;
773 }
774 
775 
776 /*!	Sets the open-mode flags for the open file cookie - only
777 	supports O_APPEND currently, but that should be sufficient
778 	for a file system.
779 */
780 static status_t
781 bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
782 {
783 	FUNCTION_START(("node = %p, flags = %d", _node, flags));
784 
785 	file_cookie* cookie = (file_cookie*)_cookie;
786 	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
787 
788 	return B_OK;
789 }
790 
791 
792 static status_t
793 bfs_fsync(fs_volume* _volume, fs_vnode* _node)
794 {
795 	FUNCTION();
796 
797 	Inode* inode = (Inode*)_node->private_node;
798 	return inode->Sync();
799 }
800 
801 
802 static status_t
803 bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
804 {
805 	FUNCTION();
806 
807 	Inode* inode = (Inode*)_node->private_node;
808 	fill_stat_buffer(inode, *stat);
809 	return B_OK;
810 }
811 
812 
813 static status_t
814 bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
815 	uint32 mask)
816 {
817 	FUNCTION();
818 
819 	Volume* volume = (Volume*)_volume->private_volume;
820 	Inode* inode = (Inode*)_node->private_node;
821 
822 	if (volume->IsReadOnly())
823 		return B_READ_ONLY_DEVICE;
824 
825 	// TODO: we should definitely check a bit more if the new stats are
826 	//	valid - or even better, the VFS should check this before calling us
827 
828 	bfs_inode& node = inode->Node();
829 	bool updateTime = false;
830 	uid_t uid = geteuid();
831 
832 	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
833 	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
834 
835 	Transaction transaction(volume, inode->BlockNumber());
836 	inode->WriteLockInTransaction(transaction);
837 
838 	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
839 		// Since B_STAT_SIZE is the only thing that can fail directly, we
840 		// do it first, so that the inode state will still be consistent
841 		// with the on-disk version
842 		if (inode->IsDirectory())
843 			return B_IS_A_DIRECTORY;
844 		if (!inode->IsFile())
845 			return B_BAD_VALUE;
846 		if (!hasWriteAccess)
847 			RETURN_ERROR(B_NOT_ALLOWED);
848 
849 		off_t oldSize = inode->Size();
850 
851 		status_t status = inode->SetFileSize(transaction, stat->st_size);
852 		if (status != B_OK)
853 			return status;
854 
855 		// fill the new blocks (if any) with zeros
856 		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
857 			// We must not keep the inode locked during a write operation,
858 			// or else we might deadlock.
859 			rw_lock_write_unlock(&inode->Lock());
860 			inode->FillGapWithZeros(oldSize, inode->Size());
861 			rw_lock_write_lock(&inode->Lock());
862 		}
863 
864 		if (!inode->IsDeleted()) {
865 			Index index(volume);
866 			index.UpdateSize(transaction, inode);
867 
868 			updateTime = true;
869 		}
870 	}
871 
872 	if ((mask & B_STAT_UID) != 0) {
873 		// only root should be allowed
874 		if (uid != 0)
875 			RETURN_ERROR(B_NOT_ALLOWED);
876 		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
877 		updateTime = true;
878 	}
879 
880 	if ((mask & B_STAT_GID) != 0) {
881 		// only the user or root can do that
882 		if (!isOwnerOrRoot)
883 			RETURN_ERROR(B_NOT_ALLOWED);
884 		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
885 		updateTime = true;
886 	}
887 
888 	if ((mask & B_STAT_MODE) != 0) {
889 		// only the user or root can do that
890 		if (!isOwnerOrRoot)
891 			RETURN_ERROR(B_NOT_ALLOWED);
892 		PRINT(("original mode = %ld, stat->st_mode = %d\n", node.Mode(),
893 			stat->st_mode));
894 		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
895 			| (stat->st_mode & S_IUMSK));
896 		updateTime = true;
897 	}
898 
899 	if ((mask & B_STAT_CREATION_TIME) != 0) {
900 		// the user or root can do that or any user with write access
901 		if (!isOwnerOrRoot && !hasWriteAccess)
902 			RETURN_ERROR(B_NOT_ALLOWED);
903 		node.create_time
904 			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
905 	}
906 
907 	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
908 		// the user or root can do that or any user with write access
909 		if (!isOwnerOrRoot && !hasWriteAccess)
910 			RETURN_ERROR(B_NOT_ALLOWED);
911 		if (!inode->InLastModifiedIndex()) {
912 			// directory modification times are not part of the index
913 			node.last_modified_time
914 				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
915 		} else if (!inode->IsDeleted()) {
916 			// Index::UpdateLastModified() will set the new time in the inode
917 			Index index(volume);
918 			index.UpdateLastModified(transaction, inode,
919 				bfs_inode::ToInode(stat->st_mtim));
920 		}
921 	}
922 
923 	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
924 		// the user or root can do that or any user with write access
925 		if (!isOwnerOrRoot && !hasWriteAccess)
926 			RETURN_ERROR(B_NOT_ALLOWED);
927 		bigtime_t newTime;
928 		if ((mask & B_STAT_CHANGE_TIME) == 0)
929 			newTime = bfs_inode::ToInode(real_time_clock_usecs());
930 		else
931 			newTime = bfs_inode::ToInode(stat->st_ctim);
932 
933 		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
934 	}
935 
936 	status_t status = inode->WriteBack(transaction);
937 	if (status == B_OK)
938 		status = transaction.Done();
939 	if (status == B_OK)
940 		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
941 
942 	return status;
943 }
944 
945 
946 status_t
947 bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
948 	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
949 {
950 	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
951 		openMode));
952 
953 	Volume* volume = (Volume*)_volume->private_volume;
954 	Inode* directory = (Inode*)_directory->private_node;
955 
956 	if (volume->IsReadOnly())
957 		return B_READ_ONLY_DEVICE;
958 
959 	if (!directory->IsDirectory())
960 		RETURN_ERROR(B_BAD_TYPE);
961 
962 	// We are creating the cookie at this point, so that we don't have
963 	// to remove the inode if we don't have enough free memory later...
964 	file_cookie* cookie = new(std::nothrow) file_cookie;
965 	if (cookie == NULL)
966 		RETURN_ERROR(B_NO_MEMORY);
967 
968 	// initialize the cookie
969 	cookie->open_mode = openMode;
970 	cookie->last_size = 0;
971 	cookie->last_notification = system_time();
972 
973 	Transaction transaction(volume, directory->BlockNumber());
974 
975 	Inode* inode;
976 	bool created;
977 	status_t status = Inode::Create(transaction, directory, name,
978 		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
979 
980 	// Disable the file cache, if requested?
981 	if (status == B_OK && (openMode & O_NOCACHE) != 0
982 		&& inode->FileCache() != NULL) {
983 		status = file_cache_disable(inode->FileCache());
984 	}
985 
986 	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
987 
988 	if (status == B_OK)
989 		status = transaction.Done();
990 
991 	if (status == B_OK) {
992 		// register the cookie
993 		*_cookie = cookie;
994 
995 		if (created) {
996 			notify_entry_created(volume->ID(), directory->ID(), name,
997 				*_vnodeID);
998 		}
999 	} else {
1000 		entry_cache_remove(volume->ID(), directory->ID(), name);
1001 		delete cookie;
1002 	}
1003 
1004 	return status;
1005 }
1006 
1007 
1008 static status_t
1009 bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1010 	const char* path, int mode)
1011 {
1012 	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1013 
1014 	Volume* volume = (Volume*)_volume->private_volume;
1015 	Inode* directory = (Inode*)_directory->private_node;
1016 
1017 	if (volume->IsReadOnly())
1018 		return B_READ_ONLY_DEVICE;
1019 
1020 	if (!directory->IsDirectory())
1021 		RETURN_ERROR(B_BAD_TYPE);
1022 
1023 	status_t status = directory->CheckPermissions(W_OK);
1024 	if (status < B_OK)
1025 		RETURN_ERROR(status);
1026 
1027 	Transaction transaction(volume, directory->BlockNumber());
1028 
1029 	Inode* link;
1030 	off_t id;
1031 	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1032 		0, 0, NULL, &id, &link);
1033 	if (status < B_OK)
1034 		RETURN_ERROR(status);
1035 
1036 	size_t length = strlen(path);
1037 	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1038 		strcpy(link->Node().short_symlink, path);
1039 	} else {
1040 		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1041 			| INODE_LOGGED);
1042 
1043 		// links usually don't have a file cache attached - but we now need one
1044 		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1045 		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1046 
1047 		// The following call will have to write the inode back, so
1048 		// we don't have to do that here...
1049 		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1050 	}
1051 
1052 	if (status == B_OK)
1053 		status = link->WriteBack(transaction);
1054 
1055 	// Inode::Create() left the inode locked in memory, and also doesn't
1056 	// publish links
1057 	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1058 	put_vnode(volume->FSVolume(), id);
1059 
1060 	if (status == B_OK) {
1061 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1062 
1063 		status = transaction.Done();
1064 		if (status == B_OK)
1065 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1066 		else
1067 			entry_cache_remove(volume->ID(), directory->ID(), name);
1068 	}
1069 
1070 	return status;
1071 }
1072 
1073 
1074 status_t
1075 bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1076 {
1077 	FUNCTION_START(("name = \"%s\"\n", name));
1078 
1079 	// This one won't be implemented in a binary compatible BFS
1080 	return B_UNSUPPORTED;
1081 }
1082 
1083 
1084 status_t
1085 bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1086 {
1087 	FUNCTION_START(("name = \"%s\"\n", name));
1088 
1089 	if (!strcmp(name, "..") || !strcmp(name, "."))
1090 		return B_NOT_ALLOWED;
1091 
1092 	Volume* volume = (Volume*)_volume->private_volume;
1093 	Inode* directory = (Inode*)_directory->private_node;
1094 
1095 	status_t status = directory->CheckPermissions(W_OK);
1096 	if (status < B_OK)
1097 		return status;
1098 
1099 	Transaction transaction(volume, directory->BlockNumber());
1100 
1101 	off_t id;
1102 	status = directory->Remove(transaction, name, &id);
1103 	if (status == B_OK) {
1104 		entry_cache_remove(volume->ID(), directory->ID(), name);
1105 
1106 		status = transaction.Done();
1107 		if (status == B_OK)
1108 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1109 		else
1110 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1111 	}
1112 	return status;
1113 }
1114 
1115 
1116 status_t
1117 bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1118 	fs_vnode* _newDir, const char* newName)
1119 {
1120 	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1121 		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1122 
1123 	Volume* volume = (Volume*)_volume->private_volume;
1124 	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1125 	Inode* newDirectory = (Inode*)_newDir->private_node;
1126 
1127 	// are we already done?
1128 	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1129 		return B_OK;
1130 
1131 	Transaction transaction(volume, oldDirectory->BlockNumber());
1132 
1133 	oldDirectory->WriteLockInTransaction(transaction);
1134 	if (oldDirectory != newDirectory)
1135 		newDirectory->WriteLockInTransaction(transaction);
1136 
1137 	// are we allowed to do what we've been told?
1138 	status_t status = oldDirectory->CheckPermissions(W_OK);
1139 	if (status == B_OK)
1140 		status = newDirectory->CheckPermissions(W_OK);
1141 	if (status != B_OK)
1142 		return status;
1143 
1144 	// Get the directory's tree, and a pointer to the inode which should be
1145 	// changed
1146 	BPlusTree* tree = oldDirectory->Tree();
1147 	if (tree == NULL)
1148 		RETURN_ERROR(B_BAD_VALUE);
1149 
1150 	off_t id;
1151 	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1152 	if (status != B_OK)
1153 		RETURN_ERROR(status);
1154 
1155 	Vnode vnode(volume, id);
1156 	Inode* inode;
1157 	if (vnode.Get(&inode) != B_OK)
1158 		return B_IO_ERROR;
1159 
1160 	// Don't move a directory into one of its children - we soar up
1161 	// from the newDirectory to either the root node or the old
1162 	// directory, whichever comes first.
1163 	// If we meet our inode on that way, we have to bail out.
1164 
1165 	if (oldDirectory != newDirectory) {
1166 		ino_t parent = newDirectory->ID();
1167 		ino_t root = volume->RootNode()->ID();
1168 
1169 		while (true) {
1170 			if (parent == id)
1171 				return B_BAD_VALUE;
1172 			else if (parent == root || parent == oldDirectory->ID())
1173 				break;
1174 
1175 			Vnode vnode(volume, parent);
1176 			Inode* parentNode;
1177 			if (vnode.Get(&parentNode) != B_OK)
1178 				return B_ERROR;
1179 
1180 			parent = volume->ToVnode(parentNode->Parent());
1181 		}
1182 	}
1183 
1184 	// Everything okay? Then lets get to work...
1185 
1186 	// First, try to make sure there is nothing that will stop us in
1187 	// the target directory - since this is the only non-critical
1188 	// failure, we will test this case first
1189 	BPlusTree* newTree = tree;
1190 	if (newDirectory != oldDirectory) {
1191 		newTree = newDirectory->Tree();
1192 		if (newTree == NULL)
1193 			RETURN_ERROR(B_BAD_VALUE);
1194 	}
1195 
1196 	status = newTree->Insert(transaction, (const uint8*)newName,
1197 		strlen(newName), id);
1198 	if (status == B_NAME_IN_USE) {
1199 		// If there is already a file with that name, we have to remove
1200 		// it, as long it's not a directory with files in it
1201 		off_t clobber;
1202 		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1203 				< B_OK)
1204 			return B_NAME_IN_USE;
1205 		if (clobber == id)
1206 			return B_BAD_VALUE;
1207 
1208 		Vnode vnode(volume, clobber);
1209 		Inode* other;
1210 		if (vnode.Get(&other) < B_OK)
1211 			return B_NAME_IN_USE;
1212 
1213 		// only allowed, if either both nodes are directories or neither is
1214 		if (inode->IsDirectory() != other->IsDirectory())
1215 			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1216 
1217 		status = newDirectory->Remove(transaction, newName, NULL,
1218 			other->IsDirectory());
1219 		if (status < B_OK)
1220 			return status;
1221 
1222 		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1223 
1224 		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1225 			clobber);
1226 
1227 		status = newTree->Insert(transaction, (const uint8*)newName,
1228 			strlen(newName), id);
1229 	}
1230 	if (status != B_OK)
1231 		return status;
1232 
1233 	inode->WriteLockInTransaction(transaction);
1234 
1235 	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1236 		newDirectory->ID(), newName);
1237 
1238 	// update the name only when they differ
1239 	if (strcmp(oldName, newName)) {
1240 		status = inode->SetName(transaction, newName);
1241 		if (status == B_OK) {
1242 			Index index(volume);
1243 			index.UpdateName(transaction, oldName, newName, inode);
1244 		}
1245 	}
1246 
1247 	if (status == B_OK) {
1248 		status = tree->Remove(transaction, (const uint8*)oldName,
1249 			strlen(oldName), id);
1250 		if (status == B_OK) {
1251 			inode->Parent() = newDirectory->BlockRun();
1252 
1253 			// if it's a directory, update the parent directory pointer
1254 			// in its tree if necessary
1255 			BPlusTree* movedTree = inode->Tree();
1256 			if (oldDirectory != newDirectory
1257 				&& inode->IsDirectory()
1258 				&& movedTree != NULL) {
1259 				status = movedTree->Replace(transaction, (const uint8*)"..",
1260 					2, newDirectory->ID());
1261 
1262 				if (status == B_OK) {
1263 					// update/add the cache entry for the parent
1264 					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1265 				}
1266 			}
1267 
1268 			if (status == B_OK && newDirectory != oldDirectory)
1269 				status = oldDirectory->ContainerContentsChanged(transaction);
1270 			if (status == B_OK)
1271 				status = newDirectory->ContainerContentsChanged(transaction);
1272 
1273 			if (status == B_OK)
1274 				status = inode->WriteBack(transaction);
1275 
1276 			if (status == B_OK) {
1277 				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1278 				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1279 
1280 				status = transaction.Done();
1281 				if (status == B_OK) {
1282 					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1283 						oldName, newDirectory->ID(), newName, id);
1284 					return B_OK;
1285 				}
1286 
1287 				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1288 				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1289 			}
1290 		}
1291 	}
1292 
1293 	return status;
1294 }
1295 
1296 
1297 static status_t
1298 bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1299 {
1300 	FUNCTION();
1301 
1302 	Volume* volume = (Volume*)_volume->private_volume;
1303 	Inode* inode = (Inode*)_node->private_node;
1304 
1305 	// Opening a directory read-only is allowed, although you can't read
1306 	// any data from it.
1307 	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1308 		return B_IS_A_DIRECTORY;
1309 	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1310 		return B_NOT_A_DIRECTORY;
1311 
1312 	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1313 		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1314 	if (status != B_OK)
1315 		RETURN_ERROR(status);
1316 
1317 	file_cookie* cookie = new(std::nothrow) file_cookie;
1318 	if (cookie == NULL)
1319 		RETURN_ERROR(B_NO_MEMORY);
1320 	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1321 
1322 	// initialize the cookie
1323 	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1324 	cookie->last_size = inode->Size();
1325 	cookie->last_notification = system_time();
1326 
1327 	// Disable the file cache, if requested?
1328 	CObjectDeleter<void> fileCacheEnabler(file_cache_enable);
1329 	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1330 		status = file_cache_disable(inode->FileCache());
1331 		if (status != B_OK)
1332 			return status;
1333 		fileCacheEnabler.SetTo(inode->FileCache());
1334 	}
1335 
1336 	// Should we truncate the file?
1337 	if ((openMode & O_TRUNC) != 0) {
1338 		if ((openMode & O_RWMASK) == O_RDONLY)
1339 			return B_NOT_ALLOWED;
1340 
1341 		Transaction transaction(volume, inode->BlockNumber());
1342 		inode->WriteLockInTransaction(transaction);
1343 
1344 		status_t status = inode->SetFileSize(transaction, 0);
1345 		if (status == B_OK)
1346 			status = inode->WriteBack(transaction);
1347 		if (status == B_OK)
1348 			status = transaction.Done();
1349 		if (status != B_OK)
1350 			return status;
1351 	}
1352 
1353 	fileCacheEnabler.Detach();
1354 	cookieDeleter.Detach();
1355 	*_cookie = cookie;
1356 	return B_OK;
1357 }
1358 
1359 
1360 static status_t
1361 bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1362 	void* buffer, size_t* _length)
1363 {
1364 	//FUNCTION();
1365 	Inode* inode = (Inode*)_node->private_node;
1366 
1367 	if (!inode->HasUserAccessableStream()) {
1368 		*_length = 0;
1369 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1370 	}
1371 
1372 	return inode->ReadAt(pos, (uint8*)buffer, _length);
1373 }
1374 
1375 
1376 static status_t
1377 bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1378 	const void* buffer, size_t* _length)
1379 {
1380 	//FUNCTION();
1381 	Volume* volume = (Volume*)_volume->private_volume;
1382 	Inode* inode = (Inode*)_node->private_node;
1383 
1384 	if (volume->IsReadOnly())
1385 		return B_READ_ONLY_DEVICE;
1386 
1387 	if (!inode->HasUserAccessableStream()) {
1388 		*_length = 0;
1389 		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1390 	}
1391 
1392 	file_cookie* cookie = (file_cookie*)_cookie;
1393 
1394 	if (cookie->open_mode & O_APPEND)
1395 		pos = inode->Size();
1396 
1397 	Transaction transaction;
1398 		// We are not starting the transaction here, since
1399 		// it might not be needed at all (the contents of
1400 		// regular files aren't logged)
1401 
1402 	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1403 		_length);
1404 	if (status == B_OK)
1405 		status = transaction.Done();
1406 	if (status == B_OK) {
1407 		InodeReadLocker locker(inode);
1408 
1409 		// periodically notify if the file size has changed
1410 		// TODO: should we better test for a change in the last_modified time only?
1411 		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1412 			&& system_time() > cookie->last_notification
1413 					+ INODE_NOTIFICATION_INTERVAL) {
1414 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1415 				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1416 			cookie->last_size = inode->Size();
1417 			cookie->last_notification = system_time();
1418 		}
1419 	}
1420 
1421 	return status;
1422 }
1423 
1424 
1425 static status_t
1426 bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1427 {
1428 	FUNCTION();
1429 	return B_OK;
1430 }
1431 
1432 
1433 static status_t
1434 bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1435 {
1436 	FUNCTION();
1437 
1438 	file_cookie* cookie = (file_cookie*)_cookie;
1439 	Volume* volume = (Volume*)_volume->private_volume;
1440 	Inode* inode = (Inode*)_node->private_node;
1441 
1442 	Transaction transaction;
1443 	bool needsTrimming = false;
1444 
1445 	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1446 		InodeReadLocker locker(inode);
1447 		needsTrimming = inode->NeedsTrimming();
1448 
1449 		if ((cookie->open_mode & O_RWMASK) != 0
1450 			&& !inode->IsDeleted()
1451 			&& (needsTrimming
1452 				|| inode->OldLastModified() != inode->LastModified()
1453 				|| (inode->InSizeIndex()
1454 					// TODO: this can prevent the size update notification
1455 					// for nodes not in the index!
1456 					&& inode->OldSize() != inode->Size()))) {
1457 			locker.Unlock();
1458 			transaction.Start(volume, inode->BlockNumber());
1459 		}
1460 	}
1461 
1462 	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1463 
1464 	if (status == B_OK) {
1465 		inode->WriteLockInTransaction(transaction);
1466 
1467 		// trim the preallocated blocks and update the size,
1468 		// and last_modified indices if needed
1469 		bool changedSize = false, changedTime = false;
1470 		Index index(volume);
1471 
1472 		if (needsTrimming) {
1473 			status = inode->TrimPreallocation(transaction);
1474 			if (status < B_OK) {
1475 				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1476 					", transaction %d: %s!\n", inode->ID(),
1477 					(int)transaction.ID(), strerror(status)));
1478 
1479 				// we still want this transaction to succeed
1480 				status = B_OK;
1481 			}
1482 		}
1483 		if (inode->OldSize() != inode->Size()) {
1484 			if (inode->InSizeIndex())
1485 				index.UpdateSize(transaction, inode);
1486 			changedSize = true;
1487 		}
1488 		if (inode->OldLastModified() != inode->LastModified()) {
1489 			if (inode->InLastModifiedIndex()) {
1490 				index.UpdateLastModified(transaction, inode,
1491 					inode->LastModified());
1492 			}
1493 			changedTime = true;
1494 
1495 			// updating the index doesn't write back the inode
1496 			inode->WriteBack(transaction);
1497 		}
1498 
1499 		if (changedSize || changedTime) {
1500 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1501 				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1502 				| (changedSize ? B_STAT_SIZE : 0));
1503 		}
1504 	}
1505 	if (status == B_OK)
1506 		transaction.Done();
1507 
1508 	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1509 		// "chkbfs" exited abnormally, so we have to stop it here...
1510 		FATAL(("check process was aborted!\n"));
1511 		volume->Allocator().StopChecking(NULL);
1512 	}
1513 
1514 	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1515 		file_cache_enable(inode->FileCache());
1516 
1517 	delete cookie;
1518 	return B_OK;
1519 }
1520 
1521 
1522 /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1523 	is not allowed.
1524 */
1525 static status_t
1526 bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1527 {
1528 	//FUNCTION();
1529 
1530 	Inode* inode = (Inode*)_node->private_node;
1531 	status_t status = inode->CheckPermissions(accessMode);
1532 	if (status < B_OK)
1533 		RETURN_ERROR(status);
1534 
1535 	return B_OK;
1536 }
1537 
1538 
1539 static status_t
1540 bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1541 	size_t* _bufferSize)
1542 {
1543 	FUNCTION();
1544 
1545 	Inode* inode = (Inode*)_node->private_node;
1546 
1547 	if (!inode->IsSymLink())
1548 		RETURN_ERROR(B_BAD_VALUE);
1549 
1550 	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1551 		if ((uint64)inode->Size() < (uint64)*_bufferSize)
1552 			*_bufferSize = inode->Size();
1553 
1554 		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1555 		if (status < B_OK)
1556 			RETURN_ERROR(status);
1557 
1558 		return B_OK;
1559 	}
1560 
1561 	size_t linkLen = strlen(inode->Node().short_symlink);
1562 	if (linkLen < *_bufferSize)
1563 		*_bufferSize = linkLen;
1564 
1565 	return user_memcpy(buffer, inode->Node().short_symlink, *_bufferSize);
1566 }
1567 
1568 
1569 //	#pragma mark - Directory functions
1570 
1571 
1572 static status_t
1573 bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1574 	int mode)
1575 {
1576 	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1577 
1578 	Volume* volume = (Volume*)_volume->private_volume;
1579 	Inode* directory = (Inode*)_directory->private_node;
1580 
1581 	if (volume->IsReadOnly())
1582 		return B_READ_ONLY_DEVICE;
1583 
1584 	if (!directory->IsDirectory())
1585 		RETURN_ERROR(B_BAD_TYPE);
1586 
1587 	status_t status = directory->CheckPermissions(W_OK);
1588 	if (status < B_OK)
1589 		RETURN_ERROR(status);
1590 
1591 	Transaction transaction(volume, directory->BlockNumber());
1592 
1593 	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1594 	// need it anyway
1595 	off_t id;
1596 	status = Inode::Create(transaction, directory, name,
1597 		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1598 	if (status == B_OK) {
1599 		put_vnode(volume->FSVolume(), id);
1600 
1601 		entry_cache_add(volume->ID(), directory->ID(), name, id);
1602 
1603 		status = transaction.Done();
1604 		if (status == B_OK)
1605 			notify_entry_created(volume->ID(), directory->ID(), name, id);
1606 		else
1607 			entry_cache_remove(volume->ID(), directory->ID(), name);
1608 	}
1609 
1610 	return status;
1611 }
1612 
1613 
1614 static status_t
1615 bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1616 {
1617 	FUNCTION_START(("name = \"%s\"\n", name));
1618 
1619 	Volume* volume = (Volume*)_volume->private_volume;
1620 	Inode* directory = (Inode*)_directory->private_node;
1621 
1622 	Transaction transaction(volume, directory->BlockNumber());
1623 
1624 	off_t id;
1625 	status_t status = directory->Remove(transaction, name, &id, true);
1626 	if (status == B_OK) {
1627 		// Remove the cache entry for the directory and potentially also
1628 		// the parent entry still belonging to the directory
1629 		entry_cache_remove(volume->ID(), directory->ID(), name);
1630 		entry_cache_remove(volume->ID(), id, "..");
1631 
1632 		status = transaction.Done();
1633 		if (status == B_OK)
1634 			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1635 		else {
1636 			entry_cache_add(volume->ID(), directory->ID(), name, id);
1637 			entry_cache_add(volume->ID(), id, "..", id);
1638 		}
1639 	}
1640 
1641 	return status;
1642 }
1643 
1644 
1645 /*!	Opens a directory ready to be traversed.
1646 	bfs_open_dir() is also used by bfs_open_index_dir().
1647 */
1648 static status_t
1649 bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1650 {
1651 	FUNCTION();
1652 
1653 	Inode* inode = (Inode*)_node->private_node;
1654 	status_t status = inode->CheckPermissions(R_OK);
1655 	if (status < B_OK)
1656 		RETURN_ERROR(status);
1657 
1658 	// we don't ask here for directories only, because the bfs_open_index_dir()
1659 	// function utilizes us (so we must be able to open indices as well)
1660 	if (!inode->IsContainer())
1661 		RETURN_ERROR(B_NOT_A_DIRECTORY);
1662 
1663 	BPlusTree* tree = inode->Tree();
1664 	if (tree == NULL)
1665 		RETURN_ERROR(B_BAD_VALUE);
1666 
1667 	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1668 	if (iterator == NULL)
1669 		RETURN_ERROR(B_NO_MEMORY);
1670 
1671 	*_cookie = iterator;
1672 	return B_OK;
1673 }
1674 
1675 
1676 static status_t
1677 bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1678 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1679 {
1680 	FUNCTION();
1681 
1682 	TreeIterator* iterator = (TreeIterator*)_cookie;
1683 	Volume* volume = (Volume*)_volume->private_volume;
1684 
1685 	uint32 maxCount = *_num;
1686 	uint32 count = 0;
1687 
1688 	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1689 		ino_t id;
1690 		uint16 length;
1691 		size_t nameBufferSize = bufferSize - sizeof(struct dirent) + 1;
1692 
1693 		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1694 			nameBufferSize, &id);
1695 
1696 		if (status == B_ENTRY_NOT_FOUND)
1697 			break;
1698 
1699 		if (status == B_BUFFER_OVERFLOW) {
1700 			// the remaining name buffer length was too small
1701 			if (count == 0)
1702 				RETURN_ERROR(B_BUFFER_OVERFLOW);
1703 			break;
1704 		}
1705 
1706 		if (status != B_OK)
1707 			RETURN_ERROR(status);
1708 
1709 		ASSERT(length < nameBufferSize);
1710 
1711 		dirent->d_dev = volume->ID();
1712 		dirent->d_ino = id;
1713 		dirent->d_reclen = sizeof(struct dirent) + length;
1714 
1715 		bufferSize -= dirent->d_reclen;
1716 		dirent = (struct dirent*)((uint8*)dirent + dirent->d_reclen);
1717 		count++;
1718 	}
1719 
1720 	*_num = count;
1721 	return B_OK;
1722 }
1723 
1724 
1725 /*!	Sets the TreeIterator back to the beginning of the directory. */
1726 static status_t
1727 bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1728 {
1729 	FUNCTION();
1730 	TreeIterator* iterator = (TreeIterator*)_cookie;
1731 
1732 	return iterator->Rewind();
1733 }
1734 
1735 
1736 static status_t
1737 bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1738 {
1739 	FUNCTION();
1740 	return B_OK;
1741 }
1742 
1743 
1744 static status_t
1745 bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1746 {
1747 	delete (TreeIterator*)_cookie;
1748 	return B_OK;
1749 }
1750 
1751 
1752 //	#pragma mark - Attribute functions
1753 
1754 
1755 static status_t
1756 bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1757 {
1758 	Inode* inode = (Inode*)_node->private_node;
1759 
1760 	FUNCTION();
1761 
1762 	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1763 	if (iterator == NULL)
1764 		RETURN_ERROR(B_NO_MEMORY);
1765 
1766 	*_cookie = iterator;
1767 	return B_OK;
1768 }
1769 
1770 
1771 static status_t
1772 bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1773 {
1774 	FUNCTION();
1775 	return B_OK;
1776 }
1777 
1778 
1779 static status_t
1780 bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1781 {
1782 	FUNCTION();
1783 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1784 
1785 	delete iterator;
1786 	return B_OK;
1787 }
1788 
1789 
1790 static status_t
1791 bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1792 {
1793 	FUNCTION();
1794 
1795 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1796 	RETURN_ERROR(iterator->Rewind());
1797 }
1798 
1799 
1800 static status_t
1801 bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1802 	struct dirent* dirent, size_t bufferSize, uint32* _num)
1803 {
1804 	FUNCTION();
1805 	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1806 
1807 	uint32 type;
1808 	size_t length;
1809 	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1810 		&dirent->d_ino);
1811 	if (status == B_ENTRY_NOT_FOUND) {
1812 		*_num = 0;
1813 		return B_OK;
1814 	} else if (status != B_OK) {
1815 		RETURN_ERROR(status);
1816 	}
1817 
1818 	Volume* volume = (Volume*)_volume->private_volume;
1819 
1820 	dirent->d_dev = volume->ID();
1821 	dirent->d_reclen = sizeof(struct dirent) + length;
1822 
1823 	*_num = 1;
1824 	return B_OK;
1825 }
1826 
1827 
1828 static status_t
1829 bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1830 	uint32 type, int openMode, void** _cookie)
1831 {
1832 	FUNCTION();
1833 
1834 	Volume* volume = (Volume*)_volume->private_volume;
1835 	if (volume->IsReadOnly())
1836 		return B_READ_ONLY_DEVICE;
1837 
1838 	Inode* inode = (Inode*)_node->private_node;
1839 	Attribute attribute(inode);
1840 
1841 	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1842 }
1843 
1844 
1845 static status_t
1846 bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1847 	int openMode, void** _cookie)
1848 {
1849 	FUNCTION();
1850 
1851 	Inode* inode = (Inode*)_node->private_node;
1852 	Attribute attribute(inode);
1853 
1854 	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1855 }
1856 
1857 
1858 static status_t
1859 bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1860 {
1861 	return B_OK;
1862 }
1863 
1864 
1865 static status_t
1866 bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1867 {
1868 	delete (attr_cookie*)cookie;
1869 	return B_OK;
1870 }
1871 
1872 
1873 static status_t
1874 bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1875 	void* buffer, size_t* _length)
1876 {
1877 	FUNCTION();
1878 
1879 	attr_cookie* cookie = (attr_cookie*)_cookie;
1880 	Inode* inode = (Inode*)_file->private_node;
1881 
1882 	Attribute attribute(inode, cookie);
1883 
1884 	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1885 }
1886 
1887 
1888 static status_t
1889 bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1890 	off_t pos, const void* buffer, size_t* _length)
1891 {
1892 	FUNCTION();
1893 
1894 	attr_cookie* cookie = (attr_cookie*)_cookie;
1895 	Volume* volume = (Volume*)_volume->private_volume;
1896 	Inode* inode = (Inode*)_file->private_node;
1897 
1898 	Transaction transaction(volume, inode->BlockNumber());
1899 	Attribute attribute(inode, cookie);
1900 
1901 	bool created;
1902 	status_t status = attribute.Write(transaction, cookie, pos,
1903 		(const uint8*)buffer, _length, &created);
1904 	if (status == B_OK) {
1905 		status = transaction.Done();
1906 		if (status == B_OK) {
1907 			notify_attribute_changed(volume->ID(), inode->ParentID(),
1908 				inode->ID(), cookie->name,
1909 				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1910 			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1911 				B_STAT_CHANGE_TIME);
1912 		}
1913 	}
1914 
1915 	return status;
1916 }
1917 
1918 
1919 static status_t
1920 bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1921 	struct stat* stat)
1922 {
1923 	FUNCTION();
1924 
1925 	attr_cookie* cookie = (attr_cookie*)_cookie;
1926 	Inode* inode = (Inode*)_file->private_node;
1927 
1928 	Attribute attribute(inode, cookie);
1929 
1930 	return attribute.Stat(*stat);
1931 }
1932 
1933 
1934 static status_t
1935 bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1936 	const struct stat* stat, int statMask)
1937 {
1938 	// TODO: Implement (at least setting the size)!
1939 	return EOPNOTSUPP;
1940 }
1941 
1942 
1943 static status_t
1944 bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1945 	fs_vnode* toFile, const char* toName)
1946 {
1947 	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1948 
1949 	// TODO: implement bfs_rename_attr()!
1950 	// There will probably be an API to move one attribute to another file,
1951 	// making that function much more complicated - oh joy ;-)
1952 
1953 	return EOPNOTSUPP;
1954 }
1955 
1956 
1957 static status_t
1958 bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
1959 {
1960 	FUNCTION_START(("name = \"%s\"\n", name));
1961 
1962 	Volume* volume = (Volume*)_volume->private_volume;
1963 	Inode* inode = (Inode*)_node->private_node;
1964 
1965 	status_t status = inode->CheckPermissions(W_OK);
1966 	if (status != B_OK)
1967 		return status;
1968 
1969 	Transaction transaction(volume, inode->BlockNumber());
1970 
1971 	status = inode->RemoveAttribute(transaction, name);
1972 	if (status == B_OK)
1973 		status = transaction.Done();
1974 	if (status == B_OK) {
1975 		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
1976 			name, B_ATTR_REMOVED);
1977 	}
1978 
1979 	return status;
1980 }
1981 
1982 
1983 //	#pragma mark - Special Nodes
1984 
1985 
1986 status_t
1987 bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
1988 	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
1989 	fs_vnode* _superVnode, ino_t* _nodeID)
1990 {
1991 	// no need to support entry-less nodes
1992 	if (name == NULL)
1993 		return B_UNSUPPORTED;
1994 
1995 	FUNCTION_START(("name = \"%s\", mode = %d, flags = 0x%lx, subVnode: %p\n",
1996 		name, mode, flags, subVnode));
1997 
1998 	Volume* volume = (Volume*)_volume->private_volume;
1999 	Inode* directory = (Inode*)_directory->private_node;
2000 
2001 	if (volume->IsReadOnly())
2002 		return B_READ_ONLY_DEVICE;
2003 
2004 	if (!directory->IsDirectory())
2005 		RETURN_ERROR(B_BAD_TYPE);
2006 
2007 	status_t status = directory->CheckPermissions(W_OK);
2008 	if (status < B_OK)
2009 		RETURN_ERROR(status);
2010 
2011 	Transaction transaction(volume, directory->BlockNumber());
2012 
2013 	off_t id;
2014 	Inode* inode;
2015 	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2016 		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2017 	if (status == B_OK) {
2018 		_superVnode->private_node = inode;
2019 		_superVnode->ops = &gBFSVnodeOps;
2020 		*_nodeID = id;
2021 
2022 		entry_cache_add(volume->ID(), directory->ID(), name, id);
2023 
2024 		status = transaction.Done();
2025 		if (status == B_OK)
2026 			notify_entry_created(volume->ID(), directory->ID(), name, id);
2027 		else
2028 			entry_cache_remove(volume->ID(), directory->ID(), name);
2029 	}
2030 
2031 	return status;
2032 }
2033 
2034 
2035 //	#pragma mark - Index functions
2036 
2037 
2038 static status_t
2039 bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2040 {
2041 	FUNCTION();
2042 
2043 	Volume* volume = (Volume*)_volume->private_volume;
2044 
2045 	if (volume->IndicesNode() == NULL) {
2046 		// This volume does not have any indices
2047 		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2048 	}
2049 
2050 	// Since the indices root node is just a directory, and we are storing
2051 	// a pointer to it in our Volume object, we can just use the directory
2052 	// traversal functions.
2053 	// In fact we're storing it in the Volume object for that reason.
2054 
2055 	fs_vnode indicesNode;
2056 	indicesNode.private_node = volume->IndicesNode();
2057 
2058 	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2059 }
2060 
2061 
2062 static status_t
2063 bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2064 {
2065 	FUNCTION();
2066 
2067 	Volume* volume = (Volume*)_volume->private_volume;
2068 
2069 	fs_vnode indicesNode;
2070 	indicesNode.private_node = volume->IndicesNode();
2071 
2072 	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2073 }
2074 
2075 
2076 static status_t
2077 bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2078 {
2079 	FUNCTION();
2080 
2081 	Volume* volume = (Volume*)_volume->private_volume;
2082 
2083 	fs_vnode indicesNode;
2084 	indicesNode.private_node = volume->IndicesNode();
2085 
2086 	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2087 }
2088 
2089 
2090 static status_t
2091 bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2092 {
2093 	FUNCTION();
2094 
2095 	Volume* volume = (Volume*)_volume->private_volume;
2096 
2097 	fs_vnode indicesNode;
2098 	indicesNode.private_node = volume->IndicesNode();
2099 
2100 	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2101 }
2102 
2103 
2104 static status_t
2105 bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2106 	size_t bufferSize, uint32* _num)
2107 {
2108 	FUNCTION();
2109 
2110 	Volume* volume = (Volume*)_volume->private_volume;
2111 
2112 	fs_vnode indicesNode;
2113 	indicesNode.private_node = volume->IndicesNode();
2114 
2115 	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2116 		bufferSize, _num));
2117 }
2118 
2119 
2120 static status_t
2121 bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2122 	uint32 flags)
2123 {
2124 	FUNCTION_START(("name = \"%s\", type = %ld, flags = %ld\n", name, type, flags));
2125 
2126 	Volume* volume = (Volume*)_volume->private_volume;
2127 
2128 	if (volume->IsReadOnly())
2129 		return B_READ_ONLY_DEVICE;
2130 
2131 	// only root users are allowed to create indices
2132 	if (geteuid() != 0)
2133 		return B_NOT_ALLOWED;
2134 
2135 	Transaction transaction(volume, volume->Indices());
2136 
2137 	Index index(volume);
2138 	status_t status = index.Create(transaction, name, type);
2139 
2140 	if (status == B_OK)
2141 		status = transaction.Done();
2142 
2143 	RETURN_ERROR(status);
2144 }
2145 
2146 
2147 static status_t
2148 bfs_remove_index(fs_volume* _volume, const char* name)
2149 {
2150 	FUNCTION();
2151 
2152 	Volume* volume = (Volume*)_volume->private_volume;
2153 
2154 	if (volume->IsReadOnly())
2155 		return B_READ_ONLY_DEVICE;
2156 
2157 	// only root users are allowed to remove indices
2158 	if (geteuid() != 0)
2159 		return B_NOT_ALLOWED;
2160 
2161 	Inode* indices = volume->IndicesNode();
2162 	if (indices == NULL)
2163 		return B_ENTRY_NOT_FOUND;
2164 
2165 	Transaction transaction(volume, volume->Indices());
2166 
2167 	status_t status = indices->Remove(transaction, name);
2168 	if (status == B_OK)
2169 		status = transaction.Done();
2170 
2171 	RETURN_ERROR(status);
2172 }
2173 
2174 
2175 static status_t
2176 bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2177 {
2178 	FUNCTION_START(("name = %s\n", name));
2179 
2180 	Volume* volume = (Volume*)_volume->private_volume;
2181 
2182 	Index index(volume);
2183 	status_t status = index.SetTo(name);
2184 	if (status < B_OK)
2185 		RETURN_ERROR(status);
2186 
2187 	bfs_inode& node = index.Node()->Node();
2188 
2189 	stat->st_type = index.Type();
2190 	stat->st_mode = node.Mode();
2191 
2192 	stat->st_size = node.data.Size();
2193 	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2194 
2195 	stat->st_nlink = 1;
2196 	stat->st_blksize = 65536;
2197 
2198 	stat->st_uid = node.UserID();
2199 	stat->st_gid = node.GroupID();
2200 
2201 	fill_stat_time(node, *stat);
2202 
2203 	return B_OK;
2204 }
2205 
2206 
2207 //	#pragma mark - Query functions
2208 
2209 
2210 static status_t
2211 bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2212 	port_id port, uint32 token, void** _cookie)
2213 {
2214 	FUNCTION_START(("bfs_open_query(\"%s\", flags = %lu, port_id = %ld, token = %ld)\n",
2215 		queryString, flags, port, token));
2216 
2217 	Volume* volume = (Volume*)_volume->private_volume;
2218 
2219 	Expression* expression = new(std::nothrow) Expression((char*)queryString);
2220 	if (expression == NULL)
2221 		RETURN_ERROR(B_NO_MEMORY);
2222 
2223 	if (expression->InitCheck() < B_OK) {
2224 		INFORM(("Could not parse query \"%s\", stopped at: \"%s\"\n",
2225 			queryString, expression->Position()));
2226 
2227 		delete expression;
2228 		RETURN_ERROR(B_BAD_VALUE);
2229 	}
2230 
2231 	Query* query = new(std::nothrow) Query(volume, expression, flags);
2232 	if (query == NULL) {
2233 		delete expression;
2234 		RETURN_ERROR(B_NO_MEMORY);
2235 	}
2236 
2237 	if (flags & B_LIVE_QUERY)
2238 		query->SetLiveMode(port, token);
2239 
2240 	*_cookie = (void*)query;
2241 
2242 	return B_OK;
2243 }
2244 
2245 
2246 static status_t
2247 bfs_close_query(fs_volume* _volume, void* cookie)
2248 {
2249 	FUNCTION();
2250 	return B_OK;
2251 }
2252 
2253 
2254 static status_t
2255 bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2256 {
2257 	FUNCTION();
2258 
2259 	Query* query = (Query*)cookie;
2260 	Expression* expression = query->GetExpression();
2261 	delete query;
2262 	delete expression;
2263 
2264 	return B_OK;
2265 }
2266 
2267 
2268 static status_t
2269 bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2270 	size_t bufferSize, uint32* _num)
2271 {
2272 	FUNCTION();
2273 	Query* query = (Query*)cookie;
2274 	status_t status = query->GetNextEntry(dirent, bufferSize);
2275 	if (status == B_OK)
2276 		*_num = 1;
2277 	else if (status == B_ENTRY_NOT_FOUND)
2278 		*_num = 0;
2279 	else
2280 		return status;
2281 
2282 	return B_OK;
2283 }
2284 
2285 
2286 static status_t
2287 bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2288 {
2289 	FUNCTION();
2290 
2291 	Query* query = (Query*)cookie;
2292 	return query->Rewind();
2293 }
2294 
2295 
2296 //	#pragma mark -
2297 
2298 
2299 static uint32
2300 bfs_get_supported_operations(partition_data* partition, uint32 mask)
2301 {
2302 	// TODO: We should at least check the partition size.
2303 	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2304 		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2305 		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2306 }
2307 
2308 
2309 static status_t
2310 bfs_initialize(int fd, partition_id partitionID, const char* name,
2311 	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2312 {
2313 	// check name
2314 	status_t status = check_volume_name(name);
2315 	if (status != B_OK)
2316 		return status;
2317 
2318 	// parse parameters
2319 	initialize_parameters parameters;
2320 	status = parse_initialize_parameters(parameterString, parameters);
2321 	if (status != B_OK)
2322 		return status;
2323 
2324 	update_disk_device_job_progress(job, 0);
2325 
2326 	// initialize the volume
2327 	Volume volume(NULL);
2328 	status = volume.Initialize(fd, name, parameters.blockSize,
2329 		parameters.flags);
2330 	if (status < B_OK) {
2331 		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2332 		return status;
2333 	}
2334 
2335 	// rescan partition
2336 	status = scan_partition(partitionID);
2337 	if (status != B_OK)
2338 		return status;
2339 
2340 	update_disk_device_job_progress(job, 1);
2341 
2342 	// print some info, if desired
2343 	if (parameters.verbose) {
2344 		disk_super_block super = volume.SuperBlock();
2345 
2346 		INFORM(("Disk was initialized successfully.\n"));
2347 		INFORM(("\tname: \"%s\"\n", super.name));
2348 		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2349 		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2350 		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2351 		INFORM(("\tnum allocation groups: %d\n",
2352 			(int)super.AllocationGroups()));
2353 		INFORM(("\tallocation group size: %ld blocks\n",
2354 			1L << super.AllocationGroupShift()));
2355 		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2356 	}
2357 
2358 	return B_OK;
2359 }
2360 
2361 
2362 static status_t
2363 bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2364 	uint32 blockSize, disk_job_id job)
2365 {
2366 	if (blockSize == 0)
2367 		return B_BAD_VALUE;
2368 
2369 	update_disk_device_job_progress(job, 0.0);
2370 
2371 	// just overwrite the superblock
2372 	disk_super_block superBlock;
2373 	memset(&superBlock, 0, sizeof(superBlock));
2374 
2375 	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2376 		return errno;
2377 
2378 	update_disk_device_job_progress(job, 1.0);
2379 
2380 	return B_OK;
2381 }
2382 
2383 
2384 //	#pragma mark -
2385 
2386 
2387 static status_t
2388 bfs_std_ops(int32 op, ...)
2389 {
2390 	switch (op) {
2391 		case B_MODULE_INIT:
2392 #ifdef BFS_DEBUGGER_COMMANDS
2393 			add_debugger_commands();
2394 #endif
2395 			return B_OK;
2396 		case B_MODULE_UNINIT:
2397 #ifdef BFS_DEBUGGER_COMMANDS
2398 			remove_debugger_commands();
2399 #endif
2400 			return B_OK;
2401 
2402 		default:
2403 			return B_ERROR;
2404 	}
2405 }
2406 
2407 fs_volume_ops gBFSVolumeOps = {
2408 	&bfs_unmount,
2409 	&bfs_read_fs_stat,
2410 	&bfs_write_fs_stat,
2411 	&bfs_sync,
2412 	&bfs_get_vnode,
2413 
2414 	/* index directory & index operations */
2415 	&bfs_open_index_dir,
2416 	&bfs_close_index_dir,
2417 	&bfs_free_index_dir_cookie,
2418 	&bfs_read_index_dir,
2419 	&bfs_rewind_index_dir,
2420 
2421 	&bfs_create_index,
2422 	&bfs_remove_index,
2423 	&bfs_stat_index,
2424 
2425 	/* query operations */
2426 	&bfs_open_query,
2427 	&bfs_close_query,
2428 	&bfs_free_query_cookie,
2429 	&bfs_read_query,
2430 	&bfs_rewind_query,
2431 };
2432 
2433 fs_vnode_ops gBFSVnodeOps = {
2434 	/* vnode operations */
2435 	&bfs_lookup,
2436 	&bfs_get_vnode_name,
2437 	&bfs_put_vnode,
2438 	&bfs_remove_vnode,
2439 
2440 	/* VM file access */
2441 	&bfs_can_page,
2442 	&bfs_read_pages,
2443 	&bfs_write_pages,
2444 
2445 	&bfs_io,
2446 	NULL,	// cancel_io()
2447 
2448 	&bfs_get_file_map,
2449 
2450 	&bfs_ioctl,
2451 	&bfs_set_flags,
2452 	NULL,	// fs_select
2453 	NULL,	// fs_deselect
2454 	&bfs_fsync,
2455 
2456 	&bfs_read_link,
2457 	&bfs_create_symlink,
2458 
2459 	&bfs_link,
2460 	&bfs_unlink,
2461 	&bfs_rename,
2462 
2463 	&bfs_access,
2464 	&bfs_read_stat,
2465 	&bfs_write_stat,
2466 	NULL,	// fs_preallocate
2467 
2468 	/* file operations */
2469 	&bfs_create,
2470 	&bfs_open,
2471 	&bfs_close,
2472 	&bfs_free_cookie,
2473 	&bfs_read,
2474 	&bfs_write,
2475 
2476 	/* directory operations */
2477 	&bfs_create_dir,
2478 	&bfs_remove_dir,
2479 	&bfs_open_dir,
2480 	&bfs_close_dir,
2481 	&bfs_free_dir_cookie,
2482 	&bfs_read_dir,
2483 	&bfs_rewind_dir,
2484 
2485 	/* attribute directory operations */
2486 	&bfs_open_attr_dir,
2487 	&bfs_close_attr_dir,
2488 	&bfs_free_attr_dir_cookie,
2489 	&bfs_read_attr_dir,
2490 	&bfs_rewind_attr_dir,
2491 
2492 	/* attribute operations */
2493 	&bfs_create_attr,
2494 	&bfs_open_attr,
2495 	&bfs_close_attr,
2496 	&bfs_free_attr_cookie,
2497 	&bfs_read_attr,
2498 	&bfs_write_attr,
2499 
2500 	&bfs_read_attr_stat,
2501 	&bfs_write_attr_stat,
2502 	&bfs_rename_attr,
2503 	&bfs_remove_attr,
2504 
2505 	/* special nodes */
2506 	&bfs_create_special_node
2507 };
2508 
2509 static file_system_module_info sBeFileSystem = {
2510 	{
2511 		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2512 		0,
2513 		bfs_std_ops,
2514 	},
2515 
2516 	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2517 	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2518 
2519 	// DDM flags
2520 	0
2521 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2522 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2523 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2524 //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2525 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2526 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2527 	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2528 	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2529 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2530 //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2531 //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2532 //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2533 //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2534 //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2535 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2536 //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2537 	| B_DISK_SYSTEM_SUPPORTS_WRITING
2538 	,
2539 
2540 	// scanning
2541 	bfs_identify_partition,
2542 	bfs_scan_partition,
2543 	bfs_free_identify_partition_cookie,
2544 	NULL,	// free_partition_content_cookie()
2545 
2546 	&bfs_mount,
2547 
2548 	/* capability querying operations */
2549 	&bfs_get_supported_operations,
2550 
2551 	NULL,	// validate_resize
2552 	NULL,	// validate_move
2553 	NULL,	// validate_set_content_name
2554 	NULL,	// validate_set_content_parameters
2555 	NULL,	// validate_initialize,
2556 
2557 	/* shadow partition modification */
2558 	NULL,	// shadow_changed
2559 
2560 	/* writing */
2561 	NULL,	// defragment
2562 	NULL,	// repair
2563 	NULL,	// resize
2564 	NULL,	// move
2565 	NULL,	// set_content_name
2566 	NULL,	// set_content_parameters
2567 	bfs_initialize,
2568 	bfs_uninitialize
2569 };
2570 
2571 module_info* modules[] = {
2572 	(module_info*)&sBeFileSystem,
2573 	NULL,
2574 };
2575