xref: /haiku/src/add-ons/kernel/file_systems/bfs/kernel_interface.cpp (revision 6a2d53e7237764eab0c7b6d121772f26d636fb60)
1  /*
2   * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3   * This file may be used under the terms of the MIT License.
4   */
5  
6  
7  //!	file system interface to Haiku's vnode layer
8  
9  
10  #include "Attribute.h"
11  #include "CheckVisitor.h"
12  #include "Debug.h"
13  #include "Volume.h"
14  #include "Inode.h"
15  #include "Index.h"
16  #include "BPlusTree.h"
17  #include "Query.h"
18  #include "ResizeVisitor.h"
19  #include "bfs_control.h"
20  #include "bfs_disk_system.h"
21  
22  // TODO: temporary solution as long as there is no public I/O requests API
23  #ifndef FS_SHELL
24  #	include <io_requests.h>
25  #	include <util/fs_trim_support.h>
26  #endif
27  
28  
29  #define BFS_IO_SIZE	65536
30  
31  #if defined(BFS_LITTLE_ENDIAN_ONLY)
32  #define BFS_ENDIAN_SUFFIX ""
33  #define BFS_ENDIAN_PRETTY_SUFFIX ""
34  #else
35  #define BFS_ENDIAN_SUFFIX "_big"
36  #define BFS_ENDIAN_PRETTY_SUFFIX " (Big Endian)"
37  #endif
38  
39  
40  struct identify_cookie {
41  	disk_super_block super_block;
42  };
43  
44  extern void fill_stat_buffer(Inode* inode, struct stat& stat);
45  
46  
47  static void
48  fill_stat_time(const bfs_inode& node, struct stat& stat)
49  {
50  	bigtime_t now = real_time_clock_usecs();
51  	stat.st_atim.tv_sec = now / 1000000LL;
52  	stat.st_atim.tv_nsec = (now % 1000000LL) * 1000;
53  
54  	stat.st_mtim.tv_sec = bfs_inode::ToSecs(node.LastModifiedTime());
55  	stat.st_mtim.tv_nsec = bfs_inode::ToNsecs(node.LastModifiedTime());
56  	stat.st_crtim.tv_sec = bfs_inode::ToSecs(node.CreateTime());
57  	stat.st_crtim.tv_nsec = bfs_inode::ToNsecs(node.CreateTime());
58  
59  	// For BeOS compatibility, if on-disk ctime is invalid, fall back to mtime:
60  	bigtime_t changeTime = node.StatusChangeTime();
61  	if (changeTime < node.LastModifiedTime())
62  		stat.st_ctim = stat.st_mtim;
63  	else {
64  		stat.st_ctim.tv_sec = bfs_inode::ToSecs(changeTime);
65  		stat.st_ctim.tv_nsec = bfs_inode::ToNsecs(changeTime);
66  	}
67  }
68  
69  
70  void
71  fill_stat_buffer(Inode* inode, struct stat& stat)
72  {
73  	const bfs_inode& node = inode->Node();
74  
75  	stat.st_dev = inode->GetVolume()->ID();
76  	stat.st_ino = inode->ID();
77  	stat.st_nlink = 1;
78  	stat.st_blksize = BFS_IO_SIZE;
79  
80  	stat.st_uid = node.UserID();
81  	stat.st_gid = node.GroupID();
82  	stat.st_mode = node.Mode();
83  	stat.st_type = node.Type();
84  
85  	fill_stat_time(node, stat);
86  
87  	if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
88  		// symlinks report the size of the link here
89  		stat.st_size = strlen(node.short_symlink);
90  	} else
91  		stat.st_size = inode->Size();
92  
93  	stat.st_blocks = inode->AllocatedSize() / 512;
94  }
95  
96  
97  //!	bfs_io() callback hook
98  static status_t
99  iterative_io_get_vecs_hook(void* cookie, io_request* request, off_t offset,
100  	size_t size, struct file_io_vec* vecs, size_t* _count)
101  {
102  	Inode* inode = (Inode*)cookie;
103  
104  	return file_map_translate(inode->Map(), offset, size, vecs, _count,
105  		inode->GetVolume()->BlockSize());
106  }
107  
108  
109  //!	bfs_io() callback hook
110  static status_t
111  iterative_io_finished_hook(void* cookie, io_request* request, status_t status,
112  	bool partialTransfer, size_t bytesTransferred)
113  {
114  	Inode* inode = (Inode*)cookie;
115  	rw_lock_read_unlock(&inode->Lock());
116  	return B_OK;
117  }
118  
119  
120  //	#pragma mark - Scanning
121  
122  
123  static float
124  bfs_identify_partition(int fd, partition_data* partition, void** _cookie)
125  {
126  	disk_super_block superBlock;
127  	status_t status = Volume::Identify(fd, &superBlock);
128  	if (status != B_OK)
129  		return -1;
130  
131  	identify_cookie* cookie = new(std::nothrow) identify_cookie;
132  	if (cookie == NULL)
133  		return -1;
134  
135  	memcpy(&cookie->super_block, &superBlock, sizeof(disk_super_block));
136  
137  	*_cookie = cookie;
138  	return 0.85f;
139  }
140  
141  
142  static status_t
143  bfs_scan_partition(int fd, partition_data* partition, void* _cookie)
144  {
145  	identify_cookie* cookie = (identify_cookie*)_cookie;
146  
147  	partition->status = B_PARTITION_VALID;
148  	partition->flags |= B_PARTITION_FILE_SYSTEM;
149  	partition->content_size = cookie->super_block.NumBlocks()
150  		* cookie->super_block.BlockSize();
151  	partition->block_size = cookie->super_block.BlockSize();
152  	partition->content_name = strdup(cookie->super_block.name);
153  	if (partition->content_name == NULL)
154  		return B_NO_MEMORY;
155  
156  	return B_OK;
157  }
158  
159  
160  static void
161  bfs_free_identify_partition_cookie(partition_data* partition, void* _cookie)
162  {
163  	identify_cookie* cookie = (identify_cookie*)_cookie;
164  	delete cookie;
165  }
166  
167  
168  //	#pragma mark -
169  
170  
171  static status_t
172  bfs_mount(fs_volume* _volume, const char* device, uint32 flags,
173  	const char* args, ino_t* _rootID)
174  {
175  	FUNCTION();
176  
177  	Volume* volume = new(std::nothrow) Volume(_volume);
178  	if (volume == NULL)
179  		return B_NO_MEMORY;
180  
181  	status_t status = volume->Mount(device, flags);
182  	if (status != B_OK) {
183  		delete volume;
184  		RETURN_ERROR(status);
185  	}
186  
187  	_volume->private_volume = volume;
188  	_volume->ops = &gBFSVolumeOps;
189  	*_rootID = volume->ToVnode(volume->Root());
190  
191  	INFORM(("mounted \"%s\" (root node at %" B_PRIdINO ", device = %s)\n",
192  		volume->Name(), *_rootID, device));
193  	return B_OK;
194  }
195  
196  
197  static status_t
198  bfs_unmount(fs_volume* _volume)
199  {
200  	FUNCTION();
201  	Volume* volume = (Volume*)_volume->private_volume;
202  
203  	status_t status = volume->Unmount();
204  	delete volume;
205  
206  	RETURN_ERROR(status);
207  }
208  
209  
210  static status_t
211  bfs_read_fs_stat(fs_volume* _volume, struct fs_info* info)
212  {
213  	FUNCTION();
214  
215  	Volume* volume = (Volume*)_volume->private_volume;
216  	MutexLocker locker(volume->Lock());
217  
218  	// File system flags.
219  	info->flags = B_FS_IS_PERSISTENT | B_FS_HAS_ATTR | B_FS_HAS_MIME
220  		| (volume->IndicesNode() != NULL ? B_FS_HAS_QUERY : 0)
221  		| (volume->IsReadOnly() ? B_FS_IS_READONLY : 0)
222  		| B_FS_SUPPORTS_MONITOR_CHILDREN;
223  
224  	info->io_size = BFS_IO_SIZE;
225  		// whatever is appropriate here?
226  
227  	info->block_size = volume->BlockSize();
228  	info->total_blocks = volume->NumBlocks();
229  	info->free_blocks = volume->FreeBlocks();
230  
231  	// Volume name
232  	strlcpy(info->volume_name, volume->Name(), sizeof(info->volume_name));
233  
234  	// File system name
235  	strlcpy(info->fsh_name, "bfs", sizeof(info->fsh_name));
236  
237  	return B_OK;
238  }
239  
240  
241  static status_t
242  bfs_write_fs_stat(fs_volume* _volume, const struct fs_info* info, uint32 mask)
243  {
244  	FUNCTION_START(("mask = %" B_PRId32 "\n", mask));
245  
246  	Volume* volume = (Volume*)_volume->private_volume;
247  	if (volume->IsReadOnly())
248  		return B_READ_ONLY_DEVICE;
249  
250  	MutexLocker locker(volume->Lock());
251  
252  	status_t status = B_BAD_VALUE;
253  
254  	if (mask & FS_WRITE_FSINFO_NAME) {
255  		disk_super_block& superBlock = volume->SuperBlock();
256  
257  		strncpy(superBlock.name, info->volume_name,
258  			sizeof(superBlock.name) - 1);
259  		superBlock.name[sizeof(superBlock.name) - 1] = '\0';
260  
261  		status = volume->WriteSuperBlock();
262  	}
263  	return status;
264  }
265  
266  
267  static status_t
268  bfs_sync(fs_volume* _volume)
269  {
270  	FUNCTION();
271  
272  	Volume* volume = (Volume*)_volume->private_volume;
273  	return volume->Sync();
274  }
275  
276  
277  //	#pragma mark -
278  
279  
280  /*!	Reads in the node from disk and creates an inode object from it.
281  */
282  static status_t
283  bfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _node, int* _type,
284  	uint32* _flags, bool reenter)
285  {
286  	//FUNCTION_START(("ino_t = %lld\n", id));
287  	Volume* volume = (Volume*)_volume->private_volume;
288  
289  	// first inode may be after the log area, we don't go through
290  	// the hassle and try to load an earlier block from disk
291  	if (id < volume->ToBlock(volume->Log()) + volume->Log().Length()
292  		|| id > volume->NumBlocks()) {
293  		INFORM(("inode at %" B_PRIdINO " requested!\n", id));
294  		return B_ERROR;
295  	}
296  
297  	CachedBlock cached(volume);
298  	status_t status = cached.SetTo(id);
299  	if (status != B_OK) {
300  		FATAL(("could not read inode: %" B_PRIdINO ": %s\n", id,
301  			strerror(status)));
302  		return status;
303  	}
304  	bfs_inode* node = (bfs_inode*)cached.Block();
305  
306  	status = node->InitCheck(volume);
307  	if (status != B_OK) {
308  		if ((node->Flags() & INODE_DELETED) != 0) {
309  			INFORM(("inode at %" B_PRIdINO " is already deleted!\n", id));
310  		} else {
311  			FATAL(("inode at %" B_PRIdINO " could not be read: %s!\n", id,
312  				strerror(status)));
313  		}
314  		return status;
315  	}
316  
317  	Inode* inode = new(std::nothrow) Inode(volume, id);
318  	if (inode == NULL)
319  		return B_NO_MEMORY;
320  
321  	status = inode->InitCheck(false);
322  	if (status != B_OK)
323  		delete inode;
324  
325  	if (status == B_OK) {
326  		_node->private_node = inode;
327  		_node->ops = &gBFSVnodeOps;
328  		*_type = inode->Mode();
329  		*_flags = 0;
330  	}
331  
332  	return status;
333  }
334  
335  
336  static status_t
337  bfs_put_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
338  {
339  	Volume* volume = (Volume*)_volume->private_volume;
340  	Inode* inode = (Inode*)_node->private_node;
341  
342  	// since a directory's size can be changed without having it opened,
343  	// we need to take care about their preallocated blocks here
344  	if (!volume->IsReadOnly() && !volume->IsCheckingThread()
345  		&& inode->NeedsTrimming()) {
346  		Transaction transaction(volume, inode->BlockNumber());
347  
348  		if (inode->TrimPreallocation(transaction) == B_OK)
349  			transaction.Done();
350  		else if (transaction.HasParent()) {
351  			// TODO: for now, we don't let sub-transactions fail
352  			transaction.Done();
353  		}
354  	}
355  
356  	delete inode;
357  	return B_OK;
358  }
359  
360  
361  static status_t
362  bfs_remove_vnode(fs_volume* _volume, fs_vnode* _node, bool reenter)
363  {
364  	FUNCTION();
365  
366  	Volume* volume = (Volume*)_volume->private_volume;
367  	Inode* inode = (Inode*)_node->private_node;
368  
369  	// If the inode isn't in use anymore, we were called before
370  	// bfs_unlink() returns - in this case, we can just use the
371  	// transaction which has already deleted the inode.
372  	Transaction transaction(volume, volume->ToBlock(inode->Parent()));
373  
374  	// The file system check functionality uses this flag to prevent the space
375  	// used up by the inode from being freed - this flag is set only in
376  	// situations where this does not cause any harm as the block bitmap will
377  	// get fixed anyway in this case).
378  	if ((inode->Flags() & INODE_DONT_FREE_SPACE) != 0) {
379  		delete inode;
380  		return B_OK;
381  	}
382  
383  	ASSERT((inode->Flags() & INODE_DELETED) != 0);
384  
385  	status_t status = inode->Free(transaction);
386  	if (status == B_OK) {
387  		status = transaction.Done();
388  	} else if (transaction.HasParent()) {
389  		// TODO: for now, we don't let sub-transactions fail
390  		status = transaction.Done();
391  	}
392  
393  	volume->RemovedInodes().Remove(inode);
394  
395  	// TODO: the VFS currently does not allow this to fail
396  	delete inode;
397  
398  	return status;
399  }
400  
401  
402  static bool
403  bfs_can_page(fs_volume* _volume, fs_vnode* _v, void* _cookie)
404  {
405  	// TODO: we're obviously not even asked...
406  	return false;
407  }
408  
409  
410  static status_t
411  bfs_read_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
412  	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
413  {
414  	Volume* volume = (Volume*)_volume->private_volume;
415  	Inode* inode = (Inode*)_node->private_node;
416  
417  	if (inode->FileCache() == NULL)
418  		RETURN_ERROR(B_BAD_VALUE);
419  
420  	InodeReadLocker _(inode);
421  
422  	uint32 vecIndex = 0;
423  	size_t vecOffset = 0;
424  	size_t bytesLeft = *_numBytes;
425  	status_t status;
426  
427  	while (true) {
428  		file_io_vec fileVecs[8];
429  		size_t fileVecCount = 8;
430  
431  		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
432  			&fileVecCount, 0);
433  		if (status != B_OK && status != B_BUFFER_OVERFLOW)
434  			break;
435  
436  		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
437  
438  		size_t bytes = bytesLeft;
439  		status = read_file_io_vec_pages(volume->Device(), fileVecs,
440  			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
441  		if (status != B_OK || !bufferOverflow)
442  			break;
443  
444  		pos += bytes;
445  		bytesLeft -= bytes;
446  	}
447  
448  	return status;
449  }
450  
451  
452  static status_t
453  bfs_write_pages(fs_volume* _volume, fs_vnode* _node, void* _cookie,
454  	off_t pos, const iovec* vecs, size_t count, size_t* _numBytes)
455  {
456  	Volume* volume = (Volume*)_volume->private_volume;
457  	Inode* inode = (Inode*)_node->private_node;
458  
459  	if (volume->IsReadOnly())
460  		return B_READ_ONLY_DEVICE;
461  
462  	if (inode->FileCache() == NULL)
463  		RETURN_ERROR(B_BAD_VALUE);
464  
465  	InodeReadLocker _(inode);
466  
467  	uint32 vecIndex = 0;
468  	size_t vecOffset = 0;
469  	size_t bytesLeft = *_numBytes;
470  	status_t status;
471  
472  	while (true) {
473  		file_io_vec fileVecs[8];
474  		size_t fileVecCount = 8;
475  
476  		status = file_map_translate(inode->Map(), pos, bytesLeft, fileVecs,
477  			&fileVecCount, 0);
478  		if (status != B_OK && status != B_BUFFER_OVERFLOW)
479  			break;
480  
481  		bool bufferOverflow = status == B_BUFFER_OVERFLOW;
482  
483  		size_t bytes = bytesLeft;
484  		status = write_file_io_vec_pages(volume->Device(), fileVecs,
485  			fileVecCount, vecs, count, &vecIndex, &vecOffset, &bytes);
486  		if (status != B_OK || !bufferOverflow)
487  			break;
488  
489  		pos += bytes;
490  		bytesLeft -= bytes;
491  	}
492  
493  	return status;
494  }
495  
496  
497  static status_t
498  bfs_io(fs_volume* _volume, fs_vnode* _node, void* _cookie, io_request* request)
499  {
500  #if KDEBUG_RW_LOCK_DEBUG
501  	// bfs_io depends on read-locks being implicitly transferrable across threads.
502  	return B_UNSUPPORTED;
503  #endif
504  
505  	Volume* volume = (Volume*)_volume->private_volume;
506  	Inode* inode = (Inode*)_node->private_node;
507  
508  #ifndef FS_SHELL
509  	if (io_request_is_write(request) && volume->IsReadOnly()) {
510  		notify_io_request(request, B_READ_ONLY_DEVICE);
511  		return B_READ_ONLY_DEVICE;
512  	}
513  #endif
514  
515  	if (inode->FileCache() == NULL) {
516  #ifndef FS_SHELL
517  		notify_io_request(request, B_BAD_VALUE);
518  #endif
519  		RETURN_ERROR(B_BAD_VALUE);
520  	}
521  
522  	// We lock the node here and will unlock it in the "finished" hook.
523  	rw_lock_read_lock(&inode->Lock());
524  
525  	return do_iterative_fd_io(volume->Device(), request,
526  		iterative_io_get_vecs_hook, iterative_io_finished_hook, inode);
527  }
528  
529  
530  static status_t
531  bfs_get_file_map(fs_volume* _volume, fs_vnode* _node, off_t offset, size_t size,
532  	struct file_io_vec* vecs, size_t* _count)
533  {
534  	Volume* volume = (Volume*)_volume->private_volume;
535  	Inode* inode = (Inode*)_node->private_node;
536  
537  	int32 blockShift = volume->BlockShift();
538  	uint32 index = 0, max = *_count;
539  	block_run run;
540  	off_t fileOffset;
541  
542  	//FUNCTION_START(("offset = %lld, size = %lu\n", offset, size));
543  
544  	while (true) {
545  		status_t status = inode->FindBlockRun(offset, run, fileOffset);
546  		if (status != B_OK)
547  			return status;
548  
549  		vecs[index].offset = volume->ToOffset(run) + offset - fileOffset;
550  		vecs[index].length = ((uint32)run.Length() << blockShift)
551  			- offset + fileOffset;
552  
553  		// are we already done?
554  		if ((uint64)size <= (uint64)vecs[index].length
555  			|| (uint64)offset + (uint64)vecs[index].length
556  				>= (uint64)inode->Size()) {
557  			if ((uint64)offset + (uint64)vecs[index].length
558  					> (uint64)inode->Size()) {
559  				// make sure the extent ends with the last official file
560  				// block (without taking any preallocations into account)
561  				vecs[index].length = round_up(inode->Size() - offset,
562  					volume->BlockSize());
563  			}
564  			*_count = index + 1;
565  			return B_OK;
566  		}
567  
568  		offset += vecs[index].length;
569  		size -= vecs[index].length;
570  		index++;
571  
572  		if (index >= max) {
573  			// we're out of file_io_vecs; let's bail out
574  			*_count = index;
575  			return B_BUFFER_OVERFLOW;
576  		}
577  	}
578  
579  	// can never get here
580  	return B_ERROR;
581  }
582  
583  
584  //	#pragma mark -
585  
586  
587  static status_t
588  bfs_lookup(fs_volume* _volume, fs_vnode* _directory, const char* file,
589  	ino_t* _vnodeID)
590  {
591  	Volume* volume = (Volume*)_volume->private_volume;
592  	Inode* directory = (Inode*)_directory->private_node;
593  
594  	InodeReadLocker locker(directory);
595  
596  	// check access permissions
597  	status_t status = directory->CheckPermissions(X_OK);
598  	if (status != B_OK)
599  		RETURN_ERROR(status);
600  
601  	BPlusTree* tree = directory->Tree();
602  	if (tree == NULL)
603  		RETURN_ERROR(B_BAD_VALUE);
604  
605  	status = tree->Find((uint8*)file, (uint16)strlen(file), _vnodeID);
606  	if (status != B_OK) {
607  		//PRINT(("bfs_walk() could not find %lld:\"%s\": %s\n", directory->BlockNumber(), file, strerror(status)));
608  		if (status == B_ENTRY_NOT_FOUND)
609  			entry_cache_add_missing(volume->ID(), directory->ID(), file);
610  
611  		return status;
612  	}
613  
614  	entry_cache_add(volume->ID(), directory->ID(), file, *_vnodeID);
615  
616  	locker.Unlock();
617  
618  	Inode* inode;
619  	status = get_vnode(volume->FSVolume(), *_vnodeID, (void**)&inode);
620  	if (status != B_OK) {
621  		REPORT_ERROR(status);
622  		return B_ENTRY_NOT_FOUND;
623  	}
624  
625  	return B_OK;
626  }
627  
628  
629  static status_t
630  bfs_get_vnode_name(fs_volume* _volume, fs_vnode* _node, char* buffer,
631  	size_t bufferSize)
632  {
633  	Inode* inode = (Inode*)_node->private_node;
634  
635  	return inode->GetName(buffer, bufferSize);
636  }
637  
638  
639  static status_t
640  bfs_ioctl(fs_volume* _volume, fs_vnode* _node, void* _cookie, uint32 cmd,
641  	void* buffer, size_t bufferLength)
642  {
643  	FUNCTION_START(("node = %p, cmd = %" B_PRIu32 ", buf = %p"
644  		", len = %" B_PRIuSIZE "\n", _node, cmd, buffer, bufferLength));
645  
646  	Volume* volume = (Volume*)_volume->private_volume;
647  
648  	switch (cmd) {
649  #ifndef FS_SHELL
650  		case B_TRIM_DEVICE:
651  		{
652  			fs_trim_data* trimData;
653  			MemoryDeleter deleter;
654  			status_t status = get_trim_data_from_user(buffer, bufferLength,
655  				deleter, trimData);
656  			if (status != B_OK)
657  				return status;
658  
659  			trimData->trimmed_size = 0;
660  
661  			for (uint32 i = 0; i < trimData->range_count; i++) {
662  				uint64 trimmedSize = 0;
663  				status_t status = volume->Allocator().Trim(
664  					trimData->ranges[i].offset, trimData->ranges[i].size,
665  					trimmedSize);
666  				if (status != B_OK)
667  					return status;
668  
669  				trimData->trimmed_size += trimmedSize;
670  			}
671  
672  			return copy_trim_data_to_user(buffer, trimData);
673  		}
674  #endif
675  
676  		case BFS_IOCTL_VERSION:
677  		{
678  			uint32 version = 0x10000;
679  			return user_memcpy(buffer, &version, sizeof(uint32));
680  		}
681  		case BFS_IOCTL_START_CHECKING:
682  		{
683  			// start checking
684  			status_t status = volume->CreateCheckVisitor();
685  			if (status != B_OK)
686  				return status;
687  
688  			CheckVisitor* checker = volume->CheckVisitor();
689  
690  			if (user_memcpy(&checker->Control(), buffer,
691  					sizeof(check_control)) != B_OK) {
692  				return B_BAD_ADDRESS;
693  			}
694  
695  			status = checker->StartBitmapPass();
696  			if (status == B_OK) {
697  				file_cookie* cookie = (file_cookie*)_cookie;
698  				cookie->open_mode |= BFS_OPEN_MODE_CHECKING;
699  			}
700  
701  			return status;
702  		}
703  		case BFS_IOCTL_STOP_CHECKING:
704  		{
705  			// stop checking
706  			CheckVisitor* checker = volume->CheckVisitor();
707  			if (checker == NULL)
708  				return B_NO_INIT;
709  
710  			status_t status = checker->StopChecking();
711  
712  			if (status == B_OK) {
713  				file_cookie* cookie = (file_cookie*)_cookie;
714  				cookie->open_mode &= ~BFS_OPEN_MODE_CHECKING;
715  
716  				status = user_memcpy(buffer, &checker->Control(),
717  					sizeof(check_control));
718  			}
719  
720  			volume->DeleteCheckVisitor();
721  			volume->SetCheckingThread(-1);
722  
723  			return status;
724  		}
725  		case BFS_IOCTL_CHECK_NEXT_NODE:
726  		{
727  			// check next
728  			CheckVisitor* checker = volume->CheckVisitor();
729  			if (checker == NULL)
730  				return B_NO_INIT;
731  
732  			volume->SetCheckingThread(find_thread(NULL));
733  
734  			checker->Control().errors = 0;
735  
736  			status_t status = checker->Next();
737  			if (status == B_ENTRY_NOT_FOUND) {
738  				checker->Control().status = B_ENTRY_NOT_FOUND;
739  					// tells StopChecking() that we finished the pass
740  
741  				if (checker->Pass() == BFS_CHECK_PASS_BITMAP) {
742  					if (checker->WriteBackCheckBitmap() == B_OK)
743  						status = checker->StartIndexPass();
744  				}
745  			}
746  
747  			if (status == B_OK) {
748  				status = user_memcpy(buffer, &checker->Control(),
749  					sizeof(check_control));
750  			}
751  
752  			return status;
753  		}
754  		case BFS_IOCTL_UPDATE_BOOT_BLOCK:
755  		{
756  			// let's makebootable (or anyone else) update the boot block
757  			// while BFS is mounted
758  			update_boot_block update;
759  			if (bufferLength != sizeof(update_boot_block))
760  				return B_BAD_VALUE;
761  			if (user_memcpy(&update, buffer, sizeof(update_boot_block)) != B_OK)
762  				return B_BAD_ADDRESS;
763  
764  			uint32 minOffset = offsetof(disk_super_block, pad_to_block);
765  			if (update.offset < minOffset
766  				|| update.offset >= 512 || update.length > 512 - minOffset
767  				|| update.length + update.offset > 512) {
768  				return B_BAD_VALUE;
769  			}
770  			if (user_memcpy((uint8*)&volume->SuperBlock() + update.offset,
771  					update.data, update.length) != B_OK) {
772  				return B_BAD_ADDRESS;
773  			}
774  
775  			return volume->WriteSuperBlock();
776  		}
777  		case BFS_IOCTL_RESIZE:
778  		{
779  			if (bufferLength != sizeof(uint64))
780  				return B_BAD_VALUE;
781  
782  			uint64 size;
783  			if (user_memcpy((uint8*)&size, buffer, sizeof(uint64)) != B_OK)
784  				return B_BAD_ADDRESS;
785  
786  			ResizeVisitor resizer(volume);
787  			return resizer.Resize(size, -1);
788  		}
789  
790  #ifdef DEBUG_FRAGMENTER
791  		case 56741:
792  		{
793  			BlockAllocator& allocator = volume->Allocator();
794  			allocator.Fragment();
795  			return B_OK;
796  		}
797  #endif
798  
799  #ifdef DEBUG
800  		case 56742:
801  		{
802  			// allocate all free blocks and zero them out
803  			// (a test for the BlockAllocator)!
804  			BlockAllocator& allocator = volume->Allocator();
805  			Transaction transaction(volume, 0);
806  			CachedBlock cached(volume);
807  			block_run run;
808  			while (allocator.AllocateBlocks(transaction, 8, 0, 64, 1, run)
809  					== B_OK) {
810  				PRINT(("write block_run(%" B_PRId32 ", %" B_PRIu16
811  					", %" B_PRIu16 ")\n", run.allocation_group, run.start,
812  					run.length));
813  
814  				for (int32 i = 0;i < run.length;i++) {
815  					status_t status = cached.SetToWritable(transaction, run);
816  					if (status == B_OK)
817  						memset(cached.WritableBlock(), 0, volume->BlockSize());
818  				}
819  			}
820  			return B_OK;
821  		}
822  #endif
823  	}
824  	return B_DEV_INVALID_IOCTL;
825  }
826  
827  
828  /*!	Sets the open-mode flags for the open file cookie - only
829  	supports O_APPEND currently, but that should be sufficient
830  	for a file system.
831  */
832  static status_t
833  bfs_set_flags(fs_volume* _volume, fs_vnode* _node, void* _cookie, int flags)
834  {
835  	FUNCTION_START(("node = %p, flags = %d", _node, flags));
836  
837  	file_cookie* cookie = (file_cookie*)_cookie;
838  	cookie->open_mode = (cookie->open_mode & ~O_APPEND) | (flags & O_APPEND);
839  
840  	return B_OK;
841  }
842  
843  
844  static status_t
845  bfs_fsync(fs_volume* _volume, fs_vnode* _node)
846  {
847  	FUNCTION();
848  
849  	Inode* inode = (Inode*)_node->private_node;
850  	return inode->Sync();
851  }
852  
853  
854  static status_t
855  bfs_read_stat(fs_volume* _volume, fs_vnode* _node, struct stat* stat)
856  {
857  	FUNCTION();
858  
859  	Inode* inode = (Inode*)_node->private_node;
860  	fill_stat_buffer(inode, *stat);
861  	return B_OK;
862  }
863  
864  
865  static status_t
866  bfs_write_stat(fs_volume* _volume, fs_vnode* _node, const struct stat* stat,
867  	uint32 mask)
868  {
869  	FUNCTION();
870  
871  	Volume* volume = (Volume*)_volume->private_volume;
872  	Inode* inode = (Inode*)_node->private_node;
873  
874  	if (volume->IsReadOnly())
875  		return B_READ_ONLY_DEVICE;
876  
877  	// TODO: we should definitely check a bit more if the new stats are
878  	//	valid - or even better, the VFS should check this before calling us
879  
880  	bfs_inode& node = inode->Node();
881  	bool updateTime = false;
882  	uid_t uid = geteuid();
883  
884  	bool isOwnerOrRoot = uid == 0 || uid == (uid_t)node.UserID();
885  	bool hasWriteAccess = inode->CheckPermissions(W_OK) == B_OK;
886  
887  	Transaction transaction(volume, inode->BlockNumber());
888  	inode->WriteLockInTransaction(transaction);
889  
890  	if ((mask & B_STAT_SIZE) != 0 && inode->Size() != stat->st_size) {
891  		// Since B_STAT_SIZE is the only thing that can fail directly, we
892  		// do it first, so that the inode state will still be consistent
893  		// with the on-disk version
894  		if (inode->IsDirectory())
895  			return B_IS_A_DIRECTORY;
896  		if (!inode->IsFile())
897  			return B_BAD_VALUE;
898  		if (!hasWriteAccess)
899  			RETURN_ERROR(B_NOT_ALLOWED);
900  
901  		off_t oldSize = inode->Size();
902  
903  		status_t status = inode->SetFileSize(transaction, stat->st_size);
904  		if (status != B_OK)
905  			return status;
906  
907  		// fill the new blocks (if any) with zeros
908  		if ((mask & B_STAT_SIZE_INSECURE) == 0) {
909  			// We must not keep the inode locked during a write operation,
910  			// or else we might deadlock.
911  			rw_lock_write_unlock(&inode->Lock());
912  			inode->FillGapWithZeros(oldSize, inode->Size());
913  			rw_lock_write_lock(&inode->Lock());
914  		}
915  
916  		if (!inode->IsDeleted()) {
917  			Index index(volume);
918  			index.UpdateSize(transaction, inode);
919  
920  			updateTime = true;
921  		}
922  	}
923  
924  	if ((mask & B_STAT_UID) != 0) {
925  		// only root should be allowed
926  		if (uid != 0)
927  			RETURN_ERROR(B_NOT_ALLOWED);
928  		node.uid = HOST_ENDIAN_TO_BFS_INT32(stat->st_uid);
929  		updateTime = true;
930  	}
931  
932  	if ((mask & B_STAT_GID) != 0) {
933  		// only the user or root can do that
934  		if (!isOwnerOrRoot)
935  			RETURN_ERROR(B_NOT_ALLOWED);
936  		node.gid = HOST_ENDIAN_TO_BFS_INT32(stat->st_gid);
937  		updateTime = true;
938  	}
939  
940  	if ((mask & B_STAT_MODE) != 0) {
941  		// only the user or root can do that
942  		if (!isOwnerOrRoot)
943  			RETURN_ERROR(B_NOT_ALLOWED);
944  		PRINT(("original mode = %u, stat->st_mode = %u\n",
945  			(unsigned int)node.Mode(), (unsigned int)stat->st_mode));
946  		node.mode = HOST_ENDIAN_TO_BFS_INT32((node.Mode() & ~S_IUMSK)
947  			| (stat->st_mode & S_IUMSK));
948  		updateTime = true;
949  	}
950  
951  	if ((mask & B_STAT_CREATION_TIME) != 0) {
952  		// the user or root can do that or any user with write access
953  		if (!isOwnerOrRoot && !hasWriteAccess)
954  			RETURN_ERROR(B_NOT_ALLOWED);
955  		node.create_time
956  			= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_crtim));
957  	}
958  
959  	if ((mask & B_STAT_MODIFICATION_TIME) != 0) {
960  		// the user or root can do that or any user with write access
961  		if (!isOwnerOrRoot && !hasWriteAccess)
962  			RETURN_ERROR(B_NOT_ALLOWED);
963  		if (!inode->InLastModifiedIndex()) {
964  			// directory modification times are not part of the index
965  			node.last_modified_time
966  				= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(stat->st_mtim));
967  		} else if (!inode->IsDeleted()) {
968  			// Index::UpdateLastModified() will set the new time in the inode
969  			Index index(volume);
970  			index.UpdateLastModified(transaction, inode,
971  				bfs_inode::ToInode(stat->st_mtim));
972  		}
973  	}
974  
975  	if ((mask & B_STAT_CHANGE_TIME) != 0 || updateTime) {
976  		// the user or root can do that or any user with write access
977  		if (!isOwnerOrRoot && !hasWriteAccess)
978  			RETURN_ERROR(B_NOT_ALLOWED);
979  		bigtime_t newTime;
980  		if ((mask & B_STAT_CHANGE_TIME) == 0)
981  			newTime = bfs_inode::ToInode(real_time_clock_usecs());
982  		else
983  			newTime = bfs_inode::ToInode(stat->st_ctim);
984  
985  		node.status_change_time = HOST_ENDIAN_TO_BFS_INT64(newTime);
986  	}
987  
988  	status_t status = inode->WriteBack(transaction);
989  	if (status == B_OK)
990  		status = transaction.Done();
991  	if (status == B_OK)
992  		notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(), mask);
993  
994  	return status;
995  }
996  
997  
998  status_t
999  bfs_create(fs_volume* _volume, fs_vnode* _directory, const char* name,
1000  	int openMode, int mode, void** _cookie, ino_t* _vnodeID)
1001  {
1002  	FUNCTION_START(("name = \"%s\", perms = %d, openMode = %d\n", name, mode,
1003  		openMode));
1004  
1005  	Volume* volume = (Volume*)_volume->private_volume;
1006  	Inode* directory = (Inode*)_directory->private_node;
1007  
1008  	if (volume->IsReadOnly())
1009  		return B_READ_ONLY_DEVICE;
1010  
1011  	if (!directory->IsDirectory())
1012  		RETURN_ERROR(B_BAD_TYPE);
1013  
1014  	// We are creating the cookie at this point, so that we don't have
1015  	// to remove the inode if we don't have enough free memory later...
1016  	file_cookie* cookie = new(std::nothrow) file_cookie;
1017  	if (cookie == NULL)
1018  		RETURN_ERROR(B_NO_MEMORY);
1019  
1020  	// initialize the cookie
1021  	cookie->open_mode = openMode;
1022  	cookie->last_size = 0;
1023  	cookie->last_notification = system_time();
1024  
1025  	Transaction transaction(volume, directory->BlockNumber());
1026  
1027  	Inode* inode;
1028  	bool created;
1029  	status_t status = Inode::Create(transaction, directory, name,
1030  		S_FILE | (mode & S_IUMSK), openMode, 0, &created, _vnodeID, &inode);
1031  
1032  	// Disable the file cache, if requested?
1033  	if (status == B_OK && (openMode & O_NOCACHE) != 0
1034  		&& inode->FileCache() != NULL) {
1035  		status = file_cache_disable(inode->FileCache());
1036  	}
1037  
1038  	entry_cache_add(volume->ID(), directory->ID(), name, *_vnodeID);
1039  
1040  	if (status == B_OK)
1041  		status = transaction.Done();
1042  
1043  	if (status == B_OK) {
1044  		// register the cookie
1045  		*_cookie = cookie;
1046  
1047  		if (created) {
1048  			notify_entry_created(volume->ID(), directory->ID(), name,
1049  				*_vnodeID);
1050  		}
1051  	} else {
1052  		entry_cache_remove(volume->ID(), directory->ID(), name);
1053  		delete cookie;
1054  	}
1055  
1056  	return status;
1057  }
1058  
1059  
1060  static status_t
1061  bfs_create_symlink(fs_volume* _volume, fs_vnode* _directory, const char* name,
1062  	const char* path, int mode)
1063  {
1064  	FUNCTION_START(("name = \"%s\", path = \"%s\"\n", name, path));
1065  
1066  	Volume* volume = (Volume*)_volume->private_volume;
1067  	Inode* directory = (Inode*)_directory->private_node;
1068  
1069  	if (volume->IsReadOnly())
1070  		return B_READ_ONLY_DEVICE;
1071  
1072  	if (!directory->IsDirectory())
1073  		RETURN_ERROR(B_BAD_TYPE);
1074  
1075  	status_t status = directory->CheckPermissions(W_OK);
1076  	if (status < B_OK)
1077  		RETURN_ERROR(status);
1078  
1079  	Transaction transaction(volume, directory->BlockNumber());
1080  
1081  	Inode* link;
1082  	off_t id;
1083  	status = Inode::Create(transaction, directory, name, S_SYMLINK | 0777,
1084  		0, 0, NULL, &id, &link);
1085  	if (status < B_OK)
1086  		RETURN_ERROR(status);
1087  
1088  	size_t length = strlen(path);
1089  	if (length < SHORT_SYMLINK_NAME_LENGTH) {
1090  		strcpy(link->Node().short_symlink, path);
1091  	} else {
1092  		link->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_LONG_SYMLINK
1093  			| INODE_LOGGED);
1094  
1095  		// links usually don't have a file cache attached - but we now need one
1096  		link->SetFileCache(file_cache_create(volume->ID(), link->ID(), 0));
1097  		link->SetMap(file_map_create(volume->ID(), link->ID(), 0));
1098  
1099  		// The following call will have to write the inode back, so
1100  		// we don't have to do that here...
1101  		status = link->WriteAt(transaction, 0, (const uint8*)path, &length);
1102  	}
1103  
1104  	if (status == B_OK)
1105  		status = link->WriteBack(transaction);
1106  
1107  	// Inode::Create() left the inode locked in memory, and also doesn't
1108  	// publish links
1109  	publish_vnode(volume->FSVolume(), id, link, &gBFSVnodeOps, link->Mode(), 0);
1110  	put_vnode(volume->FSVolume(), id);
1111  
1112  	if (status == B_OK) {
1113  		entry_cache_add(volume->ID(), directory->ID(), name, id);
1114  
1115  		status = transaction.Done();
1116  		if (status == B_OK)
1117  			notify_entry_created(volume->ID(), directory->ID(), name, id);
1118  		else
1119  			entry_cache_remove(volume->ID(), directory->ID(), name);
1120  	}
1121  
1122  	return status;
1123  }
1124  
1125  
1126  status_t
1127  bfs_link(fs_volume* _volume, fs_vnode* dir, const char* name, fs_vnode* node)
1128  {
1129  	FUNCTION_START(("name = \"%s\"\n", name));
1130  
1131  	// This one won't be implemented in a binary compatible BFS
1132  	return B_UNSUPPORTED;
1133  }
1134  
1135  
1136  status_t
1137  bfs_unlink(fs_volume* _volume, fs_vnode* _directory, const char* name)
1138  {
1139  	FUNCTION_START(("name = \"%s\"\n", name));
1140  
1141  	if (!strcmp(name, "..") || !strcmp(name, "."))
1142  		return B_NOT_ALLOWED;
1143  
1144  	Volume* volume = (Volume*)_volume->private_volume;
1145  	Inode* directory = (Inode*)_directory->private_node;
1146  
1147  	status_t status = directory->CheckPermissions(W_OK);
1148  	if (status < B_OK)
1149  		return status;
1150  
1151  	Transaction transaction(volume, directory->BlockNumber());
1152  
1153  	off_t id;
1154  	status = directory->Remove(transaction, name, &id);
1155  	if (status == B_OK) {
1156  		entry_cache_remove(volume->ID(), directory->ID(), name);
1157  
1158  		status = transaction.Done();
1159  		if (status == B_OK)
1160  			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1161  		else
1162  			entry_cache_add(volume->ID(), directory->ID(), name, id);
1163  	}
1164  	return status;
1165  }
1166  
1167  
1168  status_t
1169  bfs_rename(fs_volume* _volume, fs_vnode* _oldDir, const char* oldName,
1170  	fs_vnode* _newDir, const char* newName)
1171  {
1172  	FUNCTION_START(("oldDir = %p, oldName = \"%s\", newDir = %p, newName = "
1173  		"\"%s\"\n", _oldDir, oldName, _newDir, newName));
1174  
1175  	Volume* volume = (Volume*)_volume->private_volume;
1176  	Inode* oldDirectory = (Inode*)_oldDir->private_node;
1177  	Inode* newDirectory = (Inode*)_newDir->private_node;
1178  
1179  	// are we already done?
1180  	if (oldDirectory == newDirectory && !strcmp(oldName, newName))
1181  		return B_OK;
1182  
1183  	Transaction transaction(volume, oldDirectory->BlockNumber());
1184  
1185  	oldDirectory->WriteLockInTransaction(transaction);
1186  	if (oldDirectory != newDirectory)
1187  		newDirectory->WriteLockInTransaction(transaction);
1188  
1189  	// are we allowed to do what we've been told?
1190  	status_t status = oldDirectory->CheckPermissions(W_OK);
1191  	if (status == B_OK)
1192  		status = newDirectory->CheckPermissions(W_OK);
1193  	if (status != B_OK)
1194  		return status;
1195  
1196  	// Get the directory's tree, and a pointer to the inode which should be
1197  	// changed
1198  	BPlusTree* tree = oldDirectory->Tree();
1199  	if (tree == NULL)
1200  		RETURN_ERROR(B_BAD_VALUE);
1201  
1202  	off_t id;
1203  	status = tree->Find((const uint8*)oldName, strlen(oldName), &id);
1204  	if (status != B_OK)
1205  		RETURN_ERROR(status);
1206  
1207  	Vnode vnode(volume, id);
1208  	Inode* inode;
1209  	if (vnode.Get(&inode) != B_OK)
1210  		return B_IO_ERROR;
1211  
1212  	// Don't move a directory into one of its children - we soar up
1213  	// from the newDirectory to either the root node or the old
1214  	// directory, whichever comes first.
1215  	// If we meet our inode on that way, we have to bail out.
1216  
1217  	if (oldDirectory != newDirectory) {
1218  		ino_t parent = newDirectory->ID();
1219  		ino_t root = volume->RootNode()->ID();
1220  
1221  		while (true) {
1222  			if (parent == id)
1223  				return B_BAD_VALUE;
1224  			else if (parent == root || parent == oldDirectory->ID())
1225  				break;
1226  
1227  			Vnode vnode(volume, parent);
1228  			Inode* parentNode;
1229  			if (vnode.Get(&parentNode) != B_OK)
1230  				return B_ERROR;
1231  
1232  			parent = volume->ToVnode(parentNode->Parent());
1233  		}
1234  	}
1235  
1236  	// Everything okay? Then lets get to work...
1237  
1238  	// First, try to make sure there is nothing that will stop us in
1239  	// the target directory - since this is the only non-critical
1240  	// failure, we will test this case first
1241  	BPlusTree* newTree = tree;
1242  	if (newDirectory != oldDirectory) {
1243  		newTree = newDirectory->Tree();
1244  		if (newTree == NULL)
1245  			RETURN_ERROR(B_BAD_VALUE);
1246  	}
1247  
1248  	status = newTree->Insert(transaction, (const uint8*)newName,
1249  		strlen(newName), id);
1250  	if (status == B_NAME_IN_USE) {
1251  		// If there is already a file with that name, we have to remove
1252  		// it, as long it's not a directory with files in it
1253  		off_t clobber;
1254  		if (newTree->Find((const uint8*)newName, strlen(newName), &clobber)
1255  				< B_OK)
1256  			return B_NAME_IN_USE;
1257  		if (clobber == id)
1258  			return B_BAD_VALUE;
1259  
1260  		Vnode vnode(volume, clobber);
1261  		Inode* other;
1262  		if (vnode.Get(&other) < B_OK)
1263  			return B_NAME_IN_USE;
1264  
1265  		// only allowed, if either both nodes are directories or neither is
1266  		if (inode->IsDirectory() != other->IsDirectory())
1267  			return other->IsDirectory() ? B_IS_A_DIRECTORY : B_NOT_A_DIRECTORY;
1268  
1269  		status = newDirectory->Remove(transaction, newName, NULL,
1270  			other->IsDirectory());
1271  		if (status < B_OK)
1272  			return status;
1273  
1274  		entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1275  
1276  		notify_entry_removed(volume->ID(), newDirectory->ID(), newName,
1277  			clobber);
1278  
1279  		status = newTree->Insert(transaction, (const uint8*)newName,
1280  			strlen(newName), id);
1281  	}
1282  	if (status != B_OK)
1283  		return status;
1284  
1285  	inode->WriteLockInTransaction(transaction);
1286  
1287  	volume->UpdateLiveQueriesRenameMove(inode, oldDirectory->ID(), oldName,
1288  		newDirectory->ID(), newName);
1289  
1290  	// update the name only when they differ
1291  	if (strcmp(oldName, newName)) {
1292  		status = inode->SetName(transaction, newName);
1293  		if (status == B_OK) {
1294  			Index index(volume);
1295  			index.UpdateName(transaction, oldName, newName, inode);
1296  		}
1297  	}
1298  
1299  	if (status == B_OK) {
1300  		status = tree->Remove(transaction, (const uint8*)oldName,
1301  			strlen(oldName), id);
1302  		if (status == B_OK) {
1303  			inode->Parent() = newDirectory->BlockRun();
1304  
1305  			// if it's a directory, update the parent directory pointer
1306  			// in its tree if necessary
1307  			BPlusTree* movedTree = inode->Tree();
1308  			if (oldDirectory != newDirectory
1309  				&& inode->IsDirectory()
1310  				&& movedTree != NULL) {
1311  				status = movedTree->Replace(transaction, (const uint8*)"..",
1312  					2, newDirectory->ID());
1313  
1314  				if (status == B_OK) {
1315  					// update/add the cache entry for the parent
1316  					entry_cache_add(volume->ID(), id, "..", newDirectory->ID());
1317  				}
1318  			}
1319  
1320  			if (status == B_OK && newDirectory != oldDirectory)
1321  				status = oldDirectory->ContainerContentsChanged(transaction);
1322  			if (status == B_OK)
1323  				status = newDirectory->ContainerContentsChanged(transaction);
1324  
1325  			if (status == B_OK)
1326  				status = inode->WriteBack(transaction);
1327  
1328  			if (status == B_OK) {
1329  				entry_cache_remove(volume->ID(), oldDirectory->ID(), oldName);
1330  				entry_cache_add(volume->ID(), newDirectory->ID(), newName, id);
1331  
1332  				status = transaction.Done();
1333  				if (status == B_OK) {
1334  					notify_entry_moved(volume->ID(), oldDirectory->ID(),
1335  						oldName, newDirectory->ID(), newName, id);
1336  					return B_OK;
1337  				}
1338  
1339  				entry_cache_remove(volume->ID(), newDirectory->ID(), newName);
1340  				entry_cache_add(volume->ID(), oldDirectory->ID(), oldName, id);
1341  			}
1342  		}
1343  	}
1344  
1345  	return status;
1346  }
1347  
1348  
1349  static status_t
1350  bfs_open(fs_volume* _volume, fs_vnode* _node, int openMode, void** _cookie)
1351  {
1352  	FUNCTION();
1353  
1354  	Volume* volume = (Volume*)_volume->private_volume;
1355  	Inode* inode = (Inode*)_node->private_node;
1356  
1357  	// Opening a directory read-only is allowed, although you can't read
1358  	// any data from it.
1359  	if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
1360  		return B_IS_A_DIRECTORY;
1361  	if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
1362  		return B_NOT_A_DIRECTORY;
1363  
1364  	status_t status = inode->CheckPermissions(open_mode_to_access(openMode)
1365  		| ((openMode & O_TRUNC) != 0 ? W_OK : 0));
1366  	if (status != B_OK)
1367  		RETURN_ERROR(status);
1368  
1369  	file_cookie* cookie = new(std::nothrow) file_cookie;
1370  	if (cookie == NULL)
1371  		RETURN_ERROR(B_NO_MEMORY);
1372  	ObjectDeleter<file_cookie> cookieDeleter(cookie);
1373  
1374  	// initialize the cookie
1375  	cookie->open_mode = openMode & BFS_OPEN_MODE_USER_MASK;
1376  	cookie->last_size = inode->Size();
1377  	cookie->last_notification = system_time();
1378  
1379  	// Disable the file cache, if requested?
1380  	CObjectDeleter<void, void, file_cache_enable> fileCacheEnabler;
1381  	if ((openMode & O_NOCACHE) != 0 && inode->FileCache() != NULL) {
1382  		status = file_cache_disable(inode->FileCache());
1383  		if (status != B_OK)
1384  			return status;
1385  		fileCacheEnabler.SetTo(inode->FileCache());
1386  	}
1387  
1388  	// Should we truncate the file?
1389  	if ((openMode & O_TRUNC) != 0) {
1390  		if ((openMode & O_RWMASK) == O_RDONLY)
1391  			return B_NOT_ALLOWED;
1392  
1393  		Transaction transaction(volume, inode->BlockNumber());
1394  		inode->WriteLockInTransaction(transaction);
1395  
1396  		status_t status = inode->SetFileSize(transaction, 0);
1397  		if (status == B_OK)
1398  			status = inode->WriteBack(transaction);
1399  		if (status == B_OK)
1400  			status = transaction.Done();
1401  		if (status != B_OK)
1402  			return status;
1403  	}
1404  
1405  	fileCacheEnabler.Detach();
1406  	cookieDeleter.Detach();
1407  	*_cookie = cookie;
1408  	return B_OK;
1409  }
1410  
1411  
1412  static status_t
1413  bfs_read(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1414  	void* buffer, size_t* _length)
1415  {
1416  	//FUNCTION();
1417  	Inode* inode = (Inode*)_node->private_node;
1418  
1419  	if (!inode->HasUserAccessableStream()) {
1420  		*_length = 0;
1421  		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1422  	}
1423  
1424  	return inode->ReadAt(pos, (uint8*)buffer, _length);
1425  }
1426  
1427  
1428  static status_t
1429  bfs_write(fs_volume* _volume, fs_vnode* _node, void* _cookie, off_t pos,
1430  	const void* buffer, size_t* _length)
1431  {
1432  	//FUNCTION();
1433  	Volume* volume = (Volume*)_volume->private_volume;
1434  	Inode* inode = (Inode*)_node->private_node;
1435  
1436  	if (volume->IsReadOnly())
1437  		return B_READ_ONLY_DEVICE;
1438  
1439  	if (!inode->HasUserAccessableStream()) {
1440  		*_length = 0;
1441  		return inode->IsDirectory() ? B_IS_A_DIRECTORY : B_BAD_VALUE;
1442  	}
1443  
1444  	file_cookie* cookie = (file_cookie*)_cookie;
1445  
1446  	if (cookie->open_mode & O_APPEND)
1447  		pos = inode->Size();
1448  
1449  	Transaction transaction;
1450  		// We are not starting the transaction here, since
1451  		// it might not be needed at all (the contents of
1452  		// regular files aren't logged)
1453  
1454  	status_t status = inode->WriteAt(transaction, pos, (const uint8*)buffer,
1455  		_length);
1456  	if (status == B_OK)
1457  		status = transaction.Done();
1458  	if (status == B_OK) {
1459  		InodeReadLocker locker(inode);
1460  
1461  		// periodically notify if the file size has changed
1462  		// TODO: should we better test for a change in the last_modified time only?
1463  		if (!inode->IsDeleted() && cookie->last_size != inode->Size()
1464  			&& system_time() > cookie->last_notification
1465  					+ INODE_NOTIFICATION_INTERVAL) {
1466  			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1467  				B_STAT_MODIFICATION_TIME | B_STAT_SIZE | B_STAT_INTERIM_UPDATE);
1468  			cookie->last_size = inode->Size();
1469  			cookie->last_notification = system_time();
1470  		}
1471  	}
1472  
1473  	return status;
1474  }
1475  
1476  
1477  static status_t
1478  bfs_close(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1479  {
1480  	FUNCTION();
1481  	return B_OK;
1482  }
1483  
1484  
1485  static status_t
1486  bfs_free_cookie(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1487  {
1488  	FUNCTION();
1489  
1490  	file_cookie* cookie = (file_cookie*)_cookie;
1491  	Volume* volume = (Volume*)_volume->private_volume;
1492  	Inode* inode = (Inode*)_node->private_node;
1493  
1494  	Transaction transaction;
1495  	bool needsTrimming = false;
1496  
1497  	if (!volume->IsReadOnly() && !volume->IsCheckingThread()) {
1498  		InodeReadLocker locker(inode);
1499  		needsTrimming = inode->NeedsTrimming();
1500  
1501  		if ((cookie->open_mode & O_RWMASK) != 0
1502  			&& !inode->IsDeleted()
1503  			&& (needsTrimming
1504  				|| inode->OldLastModified() != inode->LastModified()
1505  				|| (inode->InSizeIndex()
1506  					// TODO: this can prevent the size update notification
1507  					// for nodes not in the index!
1508  					&& inode->OldSize() != inode->Size()))) {
1509  			locker.Unlock();
1510  			transaction.Start(volume, inode->BlockNumber());
1511  		}
1512  	}
1513  
1514  	status_t status = transaction.IsStarted() ? B_OK : B_ERROR;
1515  
1516  	if (status == B_OK) {
1517  		inode->WriteLockInTransaction(transaction);
1518  
1519  		// trim the preallocated blocks and update the size,
1520  		// and last_modified indices if needed
1521  		bool changedSize = false, changedTime = false;
1522  		Index index(volume);
1523  
1524  		if (needsTrimming) {
1525  			status = inode->TrimPreallocation(transaction);
1526  			if (status < B_OK) {
1527  				FATAL(("Could not trim preallocated blocks: inode %" B_PRIdINO
1528  					", transaction %d: %s!\n", inode->ID(),
1529  					(int)transaction.ID(), strerror(status)));
1530  
1531  				// we still want this transaction to succeed
1532  				status = B_OK;
1533  			}
1534  		}
1535  		if (inode->OldSize() != inode->Size()) {
1536  			if (inode->InSizeIndex())
1537  				index.UpdateSize(transaction, inode);
1538  			changedSize = true;
1539  		}
1540  		if (inode->OldLastModified() != inode->LastModified()) {
1541  			if (inode->InLastModifiedIndex()) {
1542  				index.UpdateLastModified(transaction, inode,
1543  					inode->LastModified());
1544  			}
1545  			changedTime = true;
1546  
1547  			// updating the index doesn't write back the inode
1548  			inode->WriteBack(transaction);
1549  		}
1550  
1551  		if (changedSize || changedTime) {
1552  			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1553  				(changedTime ? B_STAT_MODIFICATION_TIME : 0)
1554  				| (changedSize ? B_STAT_SIZE : 0));
1555  		}
1556  	}
1557  	if (status == B_OK)
1558  		transaction.Done();
1559  
1560  	if ((cookie->open_mode & BFS_OPEN_MODE_CHECKING) != 0) {
1561  		// "chkbfs" exited abnormally, so we have to stop it here...
1562  		FATAL(("check process was aborted!\n"));
1563  		volume->CheckVisitor()->StopChecking();
1564  		volume->DeleteCheckVisitor();
1565  	}
1566  
1567  	if ((cookie->open_mode & O_NOCACHE) != 0 && inode->FileCache() != NULL)
1568  		file_cache_enable(inode->FileCache());
1569  
1570  	delete cookie;
1571  	return B_OK;
1572  }
1573  
1574  
1575  /*!	Checks access permissions, return B_NOT_ALLOWED if the action
1576  	is not allowed.
1577  */
1578  static status_t
1579  bfs_access(fs_volume* _volume, fs_vnode* _node, int accessMode)
1580  {
1581  	//FUNCTION();
1582  
1583  	Inode* inode = (Inode*)_node->private_node;
1584  	status_t status = inode->CheckPermissions(accessMode);
1585  	if (status < B_OK)
1586  		RETURN_ERROR(status);
1587  
1588  	return B_OK;
1589  }
1590  
1591  
1592  static status_t
1593  bfs_read_link(fs_volume* _volume, fs_vnode* _node, char* buffer,
1594  	size_t* _bufferSize)
1595  {
1596  	FUNCTION();
1597  
1598  	Inode* inode = (Inode*)_node->private_node;
1599  
1600  	if (!inode->IsSymLink())
1601  		RETURN_ERROR(B_BAD_VALUE);
1602  
1603  	if ((inode->Flags() & INODE_LONG_SYMLINK) != 0) {
1604  		status_t status = inode->ReadAt(0, (uint8*)buffer, _bufferSize);
1605  		if (status < B_OK)
1606  			RETURN_ERROR(status);
1607  
1608  		*_bufferSize = inode->Size();
1609  		return B_OK;
1610  	}
1611  
1612  	size_t linkLength = strlen(inode->Node().short_symlink);
1613  
1614  	size_t bytesToCopy = min_c(linkLength, *_bufferSize);
1615  
1616  	*_bufferSize = linkLength;
1617  
1618  	memcpy(buffer, inode->Node().short_symlink, bytesToCopy);
1619  	return B_OK;
1620  }
1621  
1622  
1623  //	#pragma mark - Directory functions
1624  
1625  
1626  static status_t
1627  bfs_create_dir(fs_volume* _volume, fs_vnode* _directory, const char* name,
1628  	int mode)
1629  {
1630  	FUNCTION_START(("name = \"%s\", perms = %d\n", name, mode));
1631  
1632  	Volume* volume = (Volume*)_volume->private_volume;
1633  	Inode* directory = (Inode*)_directory->private_node;
1634  
1635  	if (volume->IsReadOnly())
1636  		return B_READ_ONLY_DEVICE;
1637  
1638  	if (!directory->IsDirectory())
1639  		RETURN_ERROR(B_BAD_TYPE);
1640  
1641  	status_t status = directory->CheckPermissions(W_OK);
1642  	if (status < B_OK)
1643  		RETURN_ERROR(status);
1644  
1645  	Transaction transaction(volume, directory->BlockNumber());
1646  
1647  	// Inode::Create() locks the inode if we pass the "id" parameter, but we
1648  	// need it anyway
1649  	off_t id;
1650  	status = Inode::Create(transaction, directory, name,
1651  		S_DIRECTORY | (mode & S_IUMSK), 0, 0, NULL, &id);
1652  	if (status == B_OK) {
1653  		put_vnode(volume->FSVolume(), id);
1654  
1655  		entry_cache_add(volume->ID(), directory->ID(), name, id);
1656  
1657  		status = transaction.Done();
1658  		if (status == B_OK)
1659  			notify_entry_created(volume->ID(), directory->ID(), name, id);
1660  		else
1661  			entry_cache_remove(volume->ID(), directory->ID(), name);
1662  	}
1663  
1664  	return status;
1665  }
1666  
1667  
1668  static status_t
1669  bfs_remove_dir(fs_volume* _volume, fs_vnode* _directory, const char* name)
1670  {
1671  	FUNCTION_START(("name = \"%s\"\n", name));
1672  
1673  	Volume* volume = (Volume*)_volume->private_volume;
1674  	Inode* directory = (Inode*)_directory->private_node;
1675  
1676  	Transaction transaction(volume, directory->BlockNumber());
1677  
1678  	off_t id;
1679  	status_t status = directory->Remove(transaction, name, &id, true);
1680  	if (status == B_OK) {
1681  		// Remove the cache entry for the directory and potentially also
1682  		// the parent entry still belonging to the directory
1683  		entry_cache_remove(volume->ID(), directory->ID(), name);
1684  		entry_cache_remove(volume->ID(), id, "..");
1685  
1686  		status = transaction.Done();
1687  		if (status == B_OK)
1688  			notify_entry_removed(volume->ID(), directory->ID(), name, id);
1689  		else {
1690  			entry_cache_add(volume->ID(), directory->ID(), name, id);
1691  			entry_cache_add(volume->ID(), id, "..", id);
1692  		}
1693  	}
1694  
1695  	return status;
1696  }
1697  
1698  
1699  /*!	Opens a directory ready to be traversed.
1700  	bfs_open_dir() is also used by bfs_open_index_dir().
1701  */
1702  static status_t
1703  bfs_open_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1704  {
1705  	FUNCTION();
1706  
1707  	Inode* inode = (Inode*)_node->private_node;
1708  	status_t status = inode->CheckPermissions(R_OK);
1709  	if (status < B_OK)
1710  		RETURN_ERROR(status);
1711  
1712  	// we don't ask here for directories only, because the bfs_open_index_dir()
1713  	// function utilizes us (so we must be able to open indices as well)
1714  	if (!inode->IsContainer())
1715  		RETURN_ERROR(B_NOT_A_DIRECTORY);
1716  
1717  	BPlusTree* tree = inode->Tree();
1718  	if (tree == NULL)
1719  		RETURN_ERROR(B_BAD_VALUE);
1720  
1721  	TreeIterator* iterator = new(std::nothrow) TreeIterator(tree);
1722  	if (iterator == NULL)
1723  		RETURN_ERROR(B_NO_MEMORY);
1724  
1725  	*_cookie = iterator;
1726  	return B_OK;
1727  }
1728  
1729  
1730  static status_t
1731  bfs_read_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie,
1732  	struct dirent* dirent, size_t bufferSize, uint32* _num)
1733  {
1734  	FUNCTION();
1735  
1736  	TreeIterator* iterator = (TreeIterator*)_cookie;
1737  	Volume* volume = (Volume*)_volume->private_volume;
1738  
1739  	uint32 maxCount = *_num;
1740  	uint32 count = 0;
1741  
1742  	while (count < maxCount && bufferSize > sizeof(struct dirent)) {
1743  		ino_t id;
1744  		uint16 length;
1745  		size_t nameBufferSize = bufferSize - offsetof(struct dirent, d_name);
1746  
1747  		status_t status = iterator->GetNextEntry(dirent->d_name, &length,
1748  			nameBufferSize, &id);
1749  
1750  		if (status == B_ENTRY_NOT_FOUND)
1751  			break;
1752  
1753  		if (status == B_BUFFER_OVERFLOW) {
1754  			// the remaining name buffer length was too small
1755  			if (count == 0)
1756  				RETURN_ERROR(B_BUFFER_OVERFLOW);
1757  			break;
1758  		}
1759  
1760  		if (status != B_OK)
1761  			RETURN_ERROR(status);
1762  
1763  		dirent->d_dev = volume->ID();
1764  		dirent->d_ino = id;
1765  
1766  		dirent = next_dirent(dirent, length, bufferSize);
1767  		count++;
1768  	}
1769  
1770  	*_num = count;
1771  	return B_OK;
1772  }
1773  
1774  
1775  /*!	Sets the TreeIterator back to the beginning of the directory. */
1776  static status_t
1777  bfs_rewind_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* _cookie)
1778  {
1779  	FUNCTION();
1780  	TreeIterator* iterator = (TreeIterator*)_cookie;
1781  
1782  	return iterator->Rewind();
1783  }
1784  
1785  
1786  static status_t
1787  bfs_close_dir(fs_volume* /*_volume*/, fs_vnode* /*node*/, void* /*_cookie*/)
1788  {
1789  	FUNCTION();
1790  	return B_OK;
1791  }
1792  
1793  
1794  static status_t
1795  bfs_free_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1796  {
1797  	delete (TreeIterator*)_cookie;
1798  	return B_OK;
1799  }
1800  
1801  
1802  //	#pragma mark - Attribute functions
1803  
1804  
1805  static status_t
1806  bfs_open_attr_dir(fs_volume* _volume, fs_vnode* _node, void** _cookie)
1807  {
1808  	Inode* inode = (Inode*)_node->private_node;
1809  
1810  	FUNCTION();
1811  
1812  	AttributeIterator* iterator = new(std::nothrow) AttributeIterator(inode);
1813  	if (iterator == NULL)
1814  		RETURN_ERROR(B_NO_MEMORY);
1815  
1816  	*_cookie = iterator;
1817  	return B_OK;
1818  }
1819  
1820  
1821  static status_t
1822  bfs_close_attr_dir(fs_volume* _volume, fs_vnode* node, void* cookie)
1823  {
1824  	FUNCTION();
1825  	return B_OK;
1826  }
1827  
1828  
1829  static status_t
1830  bfs_free_attr_dir_cookie(fs_volume* _volume, fs_vnode* node, void* _cookie)
1831  {
1832  	FUNCTION();
1833  	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1834  
1835  	delete iterator;
1836  	return B_OK;
1837  }
1838  
1839  
1840  static status_t
1841  bfs_rewind_attr_dir(fs_volume* _volume, fs_vnode* _node, void* _cookie)
1842  {
1843  	FUNCTION();
1844  
1845  	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1846  	RETURN_ERROR(iterator->Rewind());
1847  }
1848  
1849  
1850  static status_t
1851  bfs_read_attr_dir(fs_volume* _volume, fs_vnode* node, void* _cookie,
1852  	struct dirent* dirent, size_t bufferSize, uint32* _num)
1853  {
1854  	FUNCTION();
1855  	AttributeIterator* iterator = (AttributeIterator*)_cookie;
1856  
1857  	uint32 type;
1858  	size_t length;
1859  	status_t status = iterator->GetNext(dirent->d_name, &length, &type,
1860  		&dirent->d_ino);
1861  	if (status == B_ENTRY_NOT_FOUND) {
1862  		*_num = 0;
1863  		return B_OK;
1864  	} else if (status != B_OK) {
1865  		RETURN_ERROR(status);
1866  	}
1867  
1868  	Volume* volume = (Volume*)_volume->private_volume;
1869  
1870  	dirent->d_dev = volume->ID();
1871  	dirent->d_reclen = offsetof(struct dirent, d_name) + length + 1;
1872  
1873  	*_num = 1;
1874  	return B_OK;
1875  }
1876  
1877  
1878  static status_t
1879  bfs_create_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1880  	uint32 type, int openMode, void** _cookie)
1881  {
1882  	FUNCTION();
1883  
1884  	Volume* volume = (Volume*)_volume->private_volume;
1885  	if (volume->IsReadOnly())
1886  		return B_READ_ONLY_DEVICE;
1887  
1888  	Inode* inode = (Inode*)_node->private_node;
1889  	Attribute attribute(inode);
1890  
1891  	return attribute.Create(name, type, openMode, (attr_cookie**)_cookie);
1892  }
1893  
1894  
1895  static status_t
1896  bfs_open_attr(fs_volume* _volume, fs_vnode* _node, const char* name,
1897  	int openMode, void** _cookie)
1898  {
1899  	FUNCTION();
1900  
1901  	Inode* inode = (Inode*)_node->private_node;
1902  	Attribute attribute(inode);
1903  
1904  	return attribute.Open(name, openMode, (attr_cookie**)_cookie);
1905  }
1906  
1907  
1908  static status_t
1909  bfs_close_attr(fs_volume* _volume, fs_vnode* _file, void* cookie)
1910  {
1911  	return B_OK;
1912  }
1913  
1914  
1915  static status_t
1916  bfs_free_attr_cookie(fs_volume* _volume, fs_vnode* _file, void* cookie)
1917  {
1918  	delete (attr_cookie*)cookie;
1919  	return B_OK;
1920  }
1921  
1922  
1923  static status_t
1924  bfs_read_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie, off_t pos,
1925  	void* buffer, size_t* _length)
1926  {
1927  	FUNCTION();
1928  
1929  	attr_cookie* cookie = (attr_cookie*)_cookie;
1930  	Inode* inode = (Inode*)_file->private_node;
1931  
1932  	Attribute attribute(inode, cookie);
1933  
1934  	return attribute.Read(cookie, pos, (uint8*)buffer, _length);
1935  }
1936  
1937  
1938  static status_t
1939  bfs_write_attr(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1940  	off_t pos, const void* buffer, size_t* _length)
1941  {
1942  	FUNCTION();
1943  
1944  	attr_cookie* cookie = (attr_cookie*)_cookie;
1945  	Volume* volume = (Volume*)_volume->private_volume;
1946  	Inode* inode = (Inode*)_file->private_node;
1947  
1948  	Transaction transaction(volume, inode->BlockNumber());
1949  	Attribute attribute(inode, cookie);
1950  
1951  	bool created;
1952  	status_t status = attribute.Write(transaction, cookie, pos,
1953  		(const uint8*)buffer, _length, &created);
1954  	if (status == B_OK) {
1955  		status = transaction.Done();
1956  		if (status == B_OK) {
1957  			notify_attribute_changed(volume->ID(), inode->ParentID(),
1958  				inode->ID(), cookie->name,
1959  				created ? B_ATTR_CREATED : B_ATTR_CHANGED);
1960  			notify_stat_changed(volume->ID(), inode->ParentID(), inode->ID(),
1961  				B_STAT_CHANGE_TIME);
1962  		}
1963  	}
1964  
1965  	return status;
1966  }
1967  
1968  
1969  static status_t
1970  bfs_read_attr_stat(fs_volume* _volume, fs_vnode* _file, void* _cookie,
1971  	struct stat* stat)
1972  {
1973  	FUNCTION();
1974  
1975  	attr_cookie* cookie = (attr_cookie*)_cookie;
1976  	Inode* inode = (Inode*)_file->private_node;
1977  
1978  	Attribute attribute(inode, cookie);
1979  
1980  	return attribute.Stat(*stat);
1981  }
1982  
1983  
1984  static status_t
1985  bfs_write_attr_stat(fs_volume* _volume, fs_vnode* file, void* cookie,
1986  	const struct stat* stat, int statMask)
1987  {
1988  	// TODO: Implement (at least setting the size)!
1989  	return EOPNOTSUPP;
1990  }
1991  
1992  
1993  static status_t
1994  bfs_rename_attr(fs_volume* _volume, fs_vnode* fromFile, const char* fromName,
1995  	fs_vnode* toFile, const char* toName)
1996  {
1997  	FUNCTION_START(("name = \"%s\", to = \"%s\"\n", fromName, toName));
1998  
1999  	// TODO: implement bfs_rename_attr()!
2000  	// There will probably be an API to move one attribute to another file,
2001  	// making that function much more complicated - oh joy ;-)
2002  
2003  	return EOPNOTSUPP;
2004  }
2005  
2006  
2007  static status_t
2008  bfs_remove_attr(fs_volume* _volume, fs_vnode* _node, const char* name)
2009  {
2010  	FUNCTION_START(("name = \"%s\"\n", name));
2011  
2012  	Volume* volume = (Volume*)_volume->private_volume;
2013  	Inode* inode = (Inode*)_node->private_node;
2014  
2015  	status_t status = inode->CheckPermissions(W_OK);
2016  	if (status != B_OK)
2017  		return status;
2018  
2019  	Transaction transaction(volume, inode->BlockNumber());
2020  
2021  	status = inode->RemoveAttribute(transaction, name);
2022  	if (status == B_OK)
2023  		status = transaction.Done();
2024  	if (status == B_OK) {
2025  		notify_attribute_changed(volume->ID(), inode->ParentID(), inode->ID(),
2026  			name, B_ATTR_REMOVED);
2027  	}
2028  
2029  	return status;
2030  }
2031  
2032  
2033  //	#pragma mark - Special Nodes
2034  
2035  
2036  status_t
2037  bfs_create_special_node(fs_volume* _volume, fs_vnode* _directory,
2038  	const char* name, fs_vnode* subVnode, mode_t mode, uint32 flags,
2039  	fs_vnode* _superVnode, ino_t* _nodeID)
2040  {
2041  	// no need to support entry-less nodes
2042  	if (name == NULL)
2043  		return B_UNSUPPORTED;
2044  
2045  	FUNCTION_START(("name = \"%s\", mode = %u, flags = 0x%" B_PRIx32
2046  		", subVnode: %p\n", name, (unsigned int)mode, flags, subVnode));
2047  
2048  	Volume* volume = (Volume*)_volume->private_volume;
2049  	Inode* directory = (Inode*)_directory->private_node;
2050  
2051  	if (volume->IsReadOnly())
2052  		return B_READ_ONLY_DEVICE;
2053  
2054  	if (!directory->IsDirectory())
2055  		RETURN_ERROR(B_BAD_TYPE);
2056  
2057  	status_t status = directory->CheckPermissions(W_OK);
2058  	if (status < B_OK)
2059  		RETURN_ERROR(status);
2060  
2061  	Transaction transaction(volume, directory->BlockNumber());
2062  
2063  	off_t id;
2064  	Inode* inode;
2065  	status = Inode::Create(transaction, directory, name, mode, O_EXCL, 0, NULL,
2066  		&id, &inode, subVnode ? subVnode->ops : NULL, flags);
2067  	if (status == B_OK) {
2068  		_superVnode->private_node = inode;
2069  		_superVnode->ops = &gBFSVnodeOps;
2070  		*_nodeID = id;
2071  
2072  		entry_cache_add(volume->ID(), directory->ID(), name, id);
2073  
2074  		status = transaction.Done();
2075  		if (status == B_OK)
2076  			notify_entry_created(volume->ID(), directory->ID(), name, id);
2077  		else
2078  			entry_cache_remove(volume->ID(), directory->ID(), name);
2079  	}
2080  
2081  	return status;
2082  }
2083  
2084  
2085  //	#pragma mark - Index functions
2086  
2087  
2088  static status_t
2089  bfs_open_index_dir(fs_volume* _volume, void** _cookie)
2090  {
2091  	FUNCTION();
2092  
2093  	Volume* volume = (Volume*)_volume->private_volume;
2094  
2095  	if (volume->IndicesNode() == NULL) {
2096  		// This volume does not have any indices
2097  		RETURN_ERROR(B_ENTRY_NOT_FOUND);
2098  	}
2099  
2100  	// Since the indices root node is just a directory, and we are storing
2101  	// a pointer to it in our Volume object, we can just use the directory
2102  	// traversal functions.
2103  	// In fact we're storing it in the Volume object for that reason.
2104  
2105  	fs_vnode indicesNode;
2106  	indicesNode.private_node = volume->IndicesNode();
2107  
2108  	RETURN_ERROR(bfs_open_dir(_volume, &indicesNode, _cookie));
2109  }
2110  
2111  
2112  static status_t
2113  bfs_close_index_dir(fs_volume* _volume, void* _cookie)
2114  {
2115  	FUNCTION();
2116  
2117  	Volume* volume = (Volume*)_volume->private_volume;
2118  
2119  	fs_vnode indicesNode;
2120  	indicesNode.private_node = volume->IndicesNode();
2121  
2122  	RETURN_ERROR(bfs_close_dir(_volume, &indicesNode, _cookie));
2123  }
2124  
2125  
2126  static status_t
2127  bfs_free_index_dir_cookie(fs_volume* _volume, void* _cookie)
2128  {
2129  	FUNCTION();
2130  
2131  	Volume* volume = (Volume*)_volume->private_volume;
2132  
2133  	fs_vnode indicesNode;
2134  	indicesNode.private_node = volume->IndicesNode();
2135  
2136  	RETURN_ERROR(bfs_free_dir_cookie(_volume, &indicesNode, _cookie));
2137  }
2138  
2139  
2140  static status_t
2141  bfs_rewind_index_dir(fs_volume* _volume, void* _cookie)
2142  {
2143  	FUNCTION();
2144  
2145  	Volume* volume = (Volume*)_volume->private_volume;
2146  
2147  	fs_vnode indicesNode;
2148  	indicesNode.private_node = volume->IndicesNode();
2149  
2150  	RETURN_ERROR(bfs_rewind_dir(_volume, &indicesNode, _cookie));
2151  }
2152  
2153  
2154  static status_t
2155  bfs_read_index_dir(fs_volume* _volume, void* _cookie, struct dirent* dirent,
2156  	size_t bufferSize, uint32* _num)
2157  {
2158  	FUNCTION();
2159  
2160  	Volume* volume = (Volume*)_volume->private_volume;
2161  
2162  	fs_vnode indicesNode;
2163  	indicesNode.private_node = volume->IndicesNode();
2164  
2165  	RETURN_ERROR(bfs_read_dir(_volume, &indicesNode, _cookie, dirent,
2166  		bufferSize, _num));
2167  }
2168  
2169  
2170  static status_t
2171  bfs_create_index(fs_volume* _volume, const char* name, uint32 type,
2172  	uint32 flags)
2173  {
2174  	FUNCTION_START(("name = \"%s\", type = %" B_PRIu32
2175  		", flags = %" B_PRIu32 "\n", name, type, flags));
2176  
2177  	Volume* volume = (Volume*)_volume->private_volume;
2178  
2179  	if (volume->IsReadOnly())
2180  		return B_READ_ONLY_DEVICE;
2181  
2182  	// only root users are allowed to create indices
2183  	if (geteuid() != 0)
2184  		return B_NOT_ALLOWED;
2185  
2186  	Transaction transaction(volume, volume->Indices());
2187  
2188  	Index index(volume);
2189  	status_t status = index.Create(transaction, name, type);
2190  
2191  	if (status == B_OK)
2192  		status = transaction.Done();
2193  
2194  	RETURN_ERROR(status);
2195  }
2196  
2197  
2198  static status_t
2199  bfs_remove_index(fs_volume* _volume, const char* name)
2200  {
2201  	FUNCTION();
2202  
2203  	Volume* volume = (Volume*)_volume->private_volume;
2204  
2205  	if (volume->IsReadOnly())
2206  		return B_READ_ONLY_DEVICE;
2207  
2208  	// only root users are allowed to remove indices
2209  	if (geteuid() != 0)
2210  		return B_NOT_ALLOWED;
2211  
2212  	Inode* indices = volume->IndicesNode();
2213  	if (indices == NULL)
2214  		return B_ENTRY_NOT_FOUND;
2215  
2216  	Transaction transaction(volume, volume->Indices());
2217  
2218  	status_t status = indices->Remove(transaction, name);
2219  	if (status == B_OK)
2220  		status = transaction.Done();
2221  
2222  	RETURN_ERROR(status);
2223  }
2224  
2225  
2226  static status_t
2227  bfs_stat_index(fs_volume* _volume, const char* name, struct stat* stat)
2228  {
2229  	FUNCTION_START(("name = %s\n", name));
2230  
2231  	Volume* volume = (Volume*)_volume->private_volume;
2232  
2233  	Index index(volume);
2234  	status_t status = index.SetTo(name);
2235  	if (status < B_OK)
2236  		RETURN_ERROR(status);
2237  
2238  	bfs_inode& node = index.Node()->Node();
2239  
2240  	stat->st_type = index.Type();
2241  	stat->st_mode = node.Mode();
2242  
2243  	stat->st_size = node.data.Size();
2244  	stat->st_blocks = index.Node()->AllocatedSize() / 512;
2245  
2246  	stat->st_nlink = 1;
2247  	stat->st_blksize = 65536;
2248  
2249  	stat->st_uid = node.UserID();
2250  	stat->st_gid = node.GroupID();
2251  
2252  	fill_stat_time(node, *stat);
2253  
2254  	return B_OK;
2255  }
2256  
2257  
2258  //	#pragma mark - Query functions
2259  
2260  
2261  static status_t
2262  bfs_open_query(fs_volume* _volume, const char* queryString, uint32 flags,
2263  	port_id port, uint32 token, void** _cookie)
2264  {
2265  	FUNCTION_START(("bfs_open_query(\"%s\", flags = %" B_PRIu32
2266  		", port_id = %" B_PRId32 ", token = %" B_PRIu32 ")\n",
2267  		queryString, flags, port, token));
2268  
2269  	Volume* volume = (Volume*)_volume->private_volume;
2270  
2271  	Query* query;
2272  	status_t error = Query::Create(volume, queryString, flags, port, token, query);
2273  	if (error != B_OK)
2274  		return error;
2275  
2276  	*_cookie = (void*)query;
2277  
2278  	return B_OK;
2279  }
2280  
2281  
2282  static status_t
2283  bfs_close_query(fs_volume* _volume, void* cookie)
2284  {
2285  	FUNCTION();
2286  	return B_OK;
2287  }
2288  
2289  
2290  static status_t
2291  bfs_free_query_cookie(fs_volume* _volume, void* cookie)
2292  {
2293  	FUNCTION();
2294  
2295  	Query* query = (Query*)cookie;
2296  	delete query;
2297  
2298  	return B_OK;
2299  }
2300  
2301  
2302  static status_t
2303  bfs_read_query(fs_volume* /*_volume*/, void* cookie, struct dirent* dirent,
2304  	size_t bufferSize, uint32* _num)
2305  {
2306  	FUNCTION();
2307  	Query* query = (Query*)cookie;
2308  	status_t status = query->GetNextEntry(dirent, bufferSize);
2309  	if (status == B_OK)
2310  		*_num = 1;
2311  	else if (status == B_ENTRY_NOT_FOUND)
2312  		*_num = 0;
2313  	else
2314  		return status;
2315  
2316  	return B_OK;
2317  }
2318  
2319  
2320  static status_t
2321  bfs_rewind_query(fs_volume* /*_volume*/, void* cookie)
2322  {
2323  	FUNCTION();
2324  
2325  	Query* query = (Query*)cookie;
2326  	return query->Rewind();
2327  }
2328  
2329  
2330  //	#pragma mark -
2331  
2332  
2333  static uint32
2334  bfs_get_supported_operations(partition_data* partition, uint32 mask)
2335  {
2336  	// TODO: We should at least check the partition size.
2337  	return B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2338  		| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2339  		| B_DISK_SYSTEM_SUPPORTS_WRITING;
2340  }
2341  
2342  
2343  static status_t
2344  bfs_initialize(int fd, partition_id partitionID, const char* name,
2345  	const char* parameterString, off_t /*partitionSize*/, disk_job_id job)
2346  {
2347  	// check name
2348  	status_t status = check_volume_name(name);
2349  	if (status != B_OK)
2350  		return status;
2351  
2352  	// parse parameters
2353  	initialize_parameters parameters;
2354  	status = parse_initialize_parameters(parameterString, parameters);
2355  	if (status != B_OK)
2356  		return status;
2357  
2358  	update_disk_device_job_progress(job, 0);
2359  
2360  	// initialize the volume
2361  	Volume volume(NULL);
2362  	status = volume.Initialize(fd, name, parameters.blockSize,
2363  		parameters.flags);
2364  	if (status < B_OK) {
2365  		INFORM(("Initializing volume failed: %s\n", strerror(status)));
2366  		return status;
2367  	}
2368  
2369  	// rescan partition
2370  	status = scan_partition(partitionID);
2371  	if (status != B_OK)
2372  		return status;
2373  
2374  	update_disk_device_job_progress(job, 1);
2375  
2376  	// print some info, if desired
2377  	if (parameters.verbose) {
2378  		disk_super_block super = volume.SuperBlock();
2379  
2380  		INFORM(("Disk was initialized successfully.\n"));
2381  		INFORM(("\tname: \"%s\"\n", super.name));
2382  		INFORM(("\tnum blocks: %" B_PRIdOFF "\n", super.NumBlocks()));
2383  		INFORM(("\tused blocks: %" B_PRIdOFF "\n", super.UsedBlocks()));
2384  		INFORM(("\tblock size: %u bytes\n", (unsigned)super.BlockSize()));
2385  		INFORM(("\tnum allocation groups: %d\n",
2386  			(int)super.AllocationGroups()));
2387  		INFORM(("\tallocation group size: %ld blocks\n",
2388  			1L << super.AllocationGroupShift()));
2389  		INFORM(("\tlog size: %u blocks\n", super.log_blocks.Length()));
2390  	}
2391  
2392  	return B_OK;
2393  }
2394  
2395  
2396  static status_t
2397  bfs_uninitialize(int fd, partition_id partitionID, off_t partitionSize,
2398  	uint32 blockSize, disk_job_id job)
2399  {
2400  	if (blockSize == 0)
2401  		return B_BAD_VALUE;
2402  
2403  	update_disk_device_job_progress(job, 0.0);
2404  
2405  	// just overwrite the superblock
2406  	disk_super_block superBlock;
2407  	memset(&superBlock, 0, sizeof(superBlock));
2408  
2409  	if (write_pos(fd, 512, &superBlock, sizeof(superBlock)) < 0)
2410  		return errno;
2411  
2412  	update_disk_device_job_progress(job, 1.0);
2413  
2414  	return B_OK;
2415  }
2416  
2417  
2418  //	#pragma mark -
2419  
2420  
2421  static status_t
2422  bfs_std_ops(int32 op, ...)
2423  {
2424  	switch (op) {
2425  		case B_MODULE_INIT:
2426  #ifdef BFS_DEBUGGER_COMMANDS
2427  			add_debugger_commands();
2428  #endif
2429  			return B_OK;
2430  		case B_MODULE_UNINIT:
2431  #ifdef BFS_DEBUGGER_COMMANDS
2432  			remove_debugger_commands();
2433  #endif
2434  			return B_OK;
2435  
2436  		default:
2437  			return B_ERROR;
2438  	}
2439  }
2440  
2441  fs_volume_ops gBFSVolumeOps = {
2442  	&bfs_unmount,
2443  	&bfs_read_fs_stat,
2444  	&bfs_write_fs_stat,
2445  	&bfs_sync,
2446  	&bfs_get_vnode,
2447  
2448  	/* index directory & index operations */
2449  	&bfs_open_index_dir,
2450  	&bfs_close_index_dir,
2451  	&bfs_free_index_dir_cookie,
2452  	&bfs_read_index_dir,
2453  	&bfs_rewind_index_dir,
2454  
2455  	&bfs_create_index,
2456  	&bfs_remove_index,
2457  	&bfs_stat_index,
2458  
2459  	/* query operations */
2460  	&bfs_open_query,
2461  	&bfs_close_query,
2462  	&bfs_free_query_cookie,
2463  	&bfs_read_query,
2464  	&bfs_rewind_query,
2465  };
2466  
2467  fs_vnode_ops gBFSVnodeOps = {
2468  	/* vnode operations */
2469  	&bfs_lookup,
2470  	&bfs_get_vnode_name,
2471  	&bfs_put_vnode,
2472  	&bfs_remove_vnode,
2473  
2474  	/* VM file access */
2475  	&bfs_can_page,
2476  	&bfs_read_pages,
2477  	&bfs_write_pages,
2478  
2479  	&bfs_io,
2480  	NULL,	// cancel_io()
2481  
2482  	&bfs_get_file_map,
2483  
2484  	&bfs_ioctl,
2485  	&bfs_set_flags,
2486  	NULL,	// fs_select
2487  	NULL,	// fs_deselect
2488  	&bfs_fsync,
2489  
2490  	&bfs_read_link,
2491  	&bfs_create_symlink,
2492  
2493  	&bfs_link,
2494  	&bfs_unlink,
2495  	&bfs_rename,
2496  
2497  	&bfs_access,
2498  	&bfs_read_stat,
2499  	&bfs_write_stat,
2500  	NULL,	// fs_preallocate
2501  
2502  	/* file operations */
2503  	&bfs_create,
2504  	&bfs_open,
2505  	&bfs_close,
2506  	&bfs_free_cookie,
2507  	&bfs_read,
2508  	&bfs_write,
2509  
2510  	/* directory operations */
2511  	&bfs_create_dir,
2512  	&bfs_remove_dir,
2513  	&bfs_open_dir,
2514  	&bfs_close_dir,
2515  	&bfs_free_dir_cookie,
2516  	&bfs_read_dir,
2517  	&bfs_rewind_dir,
2518  
2519  	/* attribute directory operations */
2520  	&bfs_open_attr_dir,
2521  	&bfs_close_attr_dir,
2522  	&bfs_free_attr_dir_cookie,
2523  	&bfs_read_attr_dir,
2524  	&bfs_rewind_attr_dir,
2525  
2526  	/* attribute operations */
2527  	&bfs_create_attr,
2528  	&bfs_open_attr,
2529  	&bfs_close_attr,
2530  	&bfs_free_attr_cookie,
2531  	&bfs_read_attr,
2532  	&bfs_write_attr,
2533  
2534  	&bfs_read_attr_stat,
2535  	&bfs_write_attr_stat,
2536  	&bfs_rename_attr,
2537  	&bfs_remove_attr,
2538  
2539  	/* special nodes */
2540  	&bfs_create_special_node
2541  };
2542  
2543  static file_system_module_info sBeFileSystem = {
2544  	{
2545  		"file_systems/bfs" BFS_ENDIAN_SUFFIX B_CURRENT_FS_API_VERSION,
2546  		0,
2547  		bfs_std_ops,
2548  	},
2549  
2550  	"bfs" BFS_ENDIAN_SUFFIX,						// short_name
2551  	"Be File System" BFS_ENDIAN_PRETTY_SUFFIX,		// pretty_name
2552  
2553  	// DDM flags
2554  	0
2555  //	| B_DISK_SYSTEM_SUPPORTS_CHECKING
2556  //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING
2557  //	| B_DISK_SYSTEM_SUPPORTS_RESIZING
2558  //	| B_DISK_SYSTEM_SUPPORTS_MOVING
2559  //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME
2560  //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS
2561  	| B_DISK_SYSTEM_SUPPORTS_INITIALIZING
2562  	| B_DISK_SYSTEM_SUPPORTS_CONTENT_NAME
2563  //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING
2564  //	| B_DISK_SYSTEM_SUPPORTS_DEFRAGMENTING_WHILE_MOUNTED
2565  //	| B_DISK_SYSTEM_SUPPORTS_CHECKING_WHILE_MOUNTED
2566  //	| B_DISK_SYSTEM_SUPPORTS_REPAIRING_WHILE_MOUNTED
2567  //	| B_DISK_SYSTEM_SUPPORTS_RESIZING_WHILE_MOUNTED
2568  //	| B_DISK_SYSTEM_SUPPORTS_MOVING_WHILE_MOUNTED
2569  //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_NAME_WHILE_MOUNTED
2570  //	| B_DISK_SYSTEM_SUPPORTS_SETTING_CONTENT_PARAMETERS_WHILE_MOUNTED
2571  	| B_DISK_SYSTEM_SUPPORTS_WRITING
2572  	,
2573  
2574  	// scanning
2575  	bfs_identify_partition,
2576  	bfs_scan_partition,
2577  	bfs_free_identify_partition_cookie,
2578  	NULL,	// free_partition_content_cookie()
2579  
2580  	&bfs_mount,
2581  
2582  	/* capability querying operations */
2583  	&bfs_get_supported_operations,
2584  
2585  	NULL,	// validate_resize
2586  	NULL,	// validate_move
2587  	NULL,	// validate_set_content_name
2588  	NULL,	// validate_set_content_parameters
2589  	NULL,	// validate_initialize,
2590  
2591  	/* shadow partition modification */
2592  	NULL,	// shadow_changed
2593  
2594  	/* writing */
2595  	NULL,	// defragment
2596  	NULL,	// repair
2597  	NULL,	// resize
2598  	NULL,	// move
2599  	NULL,	// set_content_name
2600  	NULL,	// set_content_parameters
2601  	bfs_initialize,
2602  	bfs_uninitialize
2603  };
2604  
2605  module_info* modules[] = {
2606  	(module_info*)&sBeFileSystem,
2607  	NULL,
2608  };
2609