xref: /haiku/src/system/kernel/fs/rootfs.cpp (revision 52c4471a3024d2eb81fe88e2c3982b9f8daa5e56)
1 /*
2  * Copyright 2002-2017, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #if FS_SHELL
11 #	include "fssh_api_wrapper.h"
12 
13 #	include "OpenHashTable.h"
14 #	include "list.h"
15 #else
16 #	include <stdio.h>
17 #	include <stdlib.h>
18 #	include <string.h>
19 #	include <sys/stat.h>
20 
21 #	include <fs_cache.h>
22 #	include <KernelExport.h>
23 #	include <NodeMonitor.h>
24 
25 #	include <debug.h>
26 #	include <lock.h>
27 #	include <OpenHashTable.h>
28 #	include <util/AutoLock.h>
29 #	include <vfs.h>
30 #	include <vm/vm.h>
31 #endif
32 
33 
34 
35 #if FS_SHELL
36 	using namespace FSShell;
37 #	define user_strlcpy(to, from, len)	(strlcpy(to, from, len), FSSH_B_OK)
38 #endif
39 
40 
41 //#define TRACE_ROOTFS
42 #ifdef TRACE_ROOTFS
43 #	define TRACE(x) dprintf x
44 #else
45 #	define TRACE(x)
46 #endif
47 
48 
49 namespace {
50 
51 struct rootfs_stream {
52 	mode_t						type;
53 	struct stream_dir {
54 		struct rootfs_vnode*	dir_head;
55 		struct list				cookies;
56 		mutex					cookie_lock;
57 	} dir;
58 	struct stream_symlink {
59 		char*					path;
60 		size_t					length;
61 	} symlink;
62 };
63 
64 struct rootfs_vnode {
65 	struct rootfs_vnode*		all_next;
66 	ino_t						id;
67 	char*						name;
68 	timespec					modification_time;
69 	timespec					creation_time;
70 	uid_t						uid;
71 	gid_t						gid;
72 	struct rootfs_vnode*		parent;
73 	struct rootfs_vnode*		dir_next;
74 	struct rootfs_stream		stream;
75 };
76 
77 struct VnodeHash {
78 	typedef	ino_t			KeyType;
79 	typedef	rootfs_vnode	ValueType;
80 
81 	size_t HashKey(KeyType key) const
82 	{
83 		return key;
84 	}
85 
86 	size_t Hash(ValueType* vnode) const
87 	{
88 		return vnode->id;
89 	}
90 
91 	bool Compare(KeyType key, ValueType* vnode) const
92 	{
93 		return vnode->id == key;
94 	}
95 
96 	ValueType*& GetLink(ValueType* value) const
97 	{
98 		return value->all_next;
99 	}
100 };
101 
102 typedef BOpenHashTable<VnodeHash> VnodeTable;
103 
104 struct rootfs {
105 	fs_volume*					volume;
106 	dev_t						id;
107 	rw_lock						lock;
108 	ino_t						next_vnode_id;
109 	VnodeTable*					vnode_list_hash;
110 	struct rootfs_vnode*		root_vnode;
111 };
112 
113 // dircookie, dirs are only types of streams supported by rootfs
114 struct rootfs_dir_cookie {
115 	struct list_link			link;
116 	mutex						lock;
117 	struct rootfs_vnode*		current;
118 	int32						iteration_state;
119 };
120 
121 // directory iteration states
122 enum {
123 	ITERATION_STATE_DOT		= 0,
124 	ITERATION_STATE_DOT_DOT	= 1,
125 	ITERATION_STATE_OTHERS	= 2,
126 	ITERATION_STATE_BEGIN	= ITERATION_STATE_DOT,
127 };
128 
129 
130 // extern only to make forward declaration possible
131 extern fs_volume_ops sVolumeOps;
132 extern fs_vnode_ops sVnodeOps;
133 
134 } // namespace
135 
136 
137 #define ROOTFS_HASH_SIZE 16
138 
139 
140 inline static status_t
141 rootfs_check_permissions(struct rootfs_vnode* dir, int accessMode)
142 {
143 	return check_access_permissions(accessMode, dir->stream.type, (gid_t)dir->gid, (uid_t)dir->uid);
144 }
145 
146 
147 static timespec
148 current_timespec()
149 {
150 	bigtime_t time = real_time_clock_usecs();
151 
152 	timespec tv;
153 	tv.tv_sec = time / 1000000;
154 	tv.tv_nsec = (time % 1000000) * 1000;
155 	return tv;
156 }
157 
158 
159 static ino_t
160 get_parent_id(struct rootfs_vnode* vnode)
161 {
162 	if (vnode->parent != NULL)
163 		return vnode->parent->id;
164 	return -1;
165 }
166 
167 
168 static struct rootfs_vnode*
169 rootfs_create_vnode(struct rootfs* fs, struct rootfs_vnode* parent,
170 	const char* name, int type)
171 {
172 	struct rootfs_vnode* vnode;
173 
174 	vnode = (rootfs_vnode*)malloc(sizeof(struct rootfs_vnode));
175 	if (vnode == NULL)
176 		return NULL;
177 
178 	memset(vnode, 0, sizeof(struct rootfs_vnode));
179 
180 	if (name != NULL) {
181 		vnode->name = strdup(name);
182 		if (vnode->name == NULL) {
183 			free(vnode);
184 			return NULL;
185 		}
186 	}
187 
188 	vnode->id = fs->next_vnode_id++;
189 	vnode->stream.type = type;
190 	vnode->creation_time = vnode->modification_time = current_timespec();
191 	vnode->uid = geteuid();
192 	vnode->gid = parent ? parent->gid : getegid();
193 		// inherit group from parent if possible
194 
195 	if (S_ISDIR(type)) {
196 		list_init(&vnode->stream.dir.cookies);
197 		mutex_init(&vnode->stream.dir.cookie_lock, "rootfs dir cookies");
198 	}
199 
200 	return vnode;
201 }
202 
203 
204 static status_t
205 rootfs_delete_vnode(struct rootfs* fs, struct rootfs_vnode* v, bool force_delete)
206 {
207 	// cant delete it if it's in a directory or is a directory
208 	// and has children
209 	if (!force_delete && (v->stream.dir.dir_head != NULL || v->dir_next != NULL))
210 		return EPERM;
211 
212 	// remove it from the global hash table
213 	fs->vnode_list_hash->Remove(v);
214 
215 	if (S_ISDIR(v->stream.type))
216 		mutex_destroy(&v->stream.dir.cookie_lock);
217 
218 	free(v->name);
219 	free(v);
220 
221 	return 0;
222 }
223 
224 
225 /*! Makes sure none of the dircookies point to the vnode passed in. */
226 static void
227 update_dir_cookies(struct rootfs_vnode* dir, struct rootfs_vnode* vnode)
228 {
229 	struct rootfs_dir_cookie* cookie = NULL;
230 
231 	while ((cookie = (rootfs_dir_cookie*)list_get_next_item(
232 			&dir->stream.dir.cookies, cookie)) != NULL) {
233 		MutexLocker cookieLocker(cookie->lock);
234 		if (cookie->current == vnode)
235 			cookie->current = vnode->dir_next;
236 	}
237 }
238 
239 
240 static struct rootfs_vnode*
241 rootfs_find_in_dir(struct rootfs_vnode* dir, const char* path)
242 {
243 	struct rootfs_vnode* vnode;
244 
245 	if (!strcmp(path, "."))
246 		return dir;
247 	if (!strcmp(path, ".."))
248 		return dir->parent;
249 
250 	for (vnode = dir->stream.dir.dir_head; vnode; vnode = vnode->dir_next) {
251 		if (!strcmp(vnode->name, path))
252 			return vnode;
253 	}
254 	return NULL;
255 }
256 
257 
258 static status_t
259 rootfs_insert_in_dir(struct rootfs* fs, struct rootfs_vnode* dir,
260 	struct rootfs_vnode* vnode)
261 {
262 	// make sure the directory stays sorted alphabetically
263 
264 	struct rootfs_vnode* node = dir->stream.dir.dir_head;
265 	struct rootfs_vnode* last = NULL;
266 	while (node != NULL && strcmp(node->name, vnode->name) < 0) {
267 		last = node;
268 		node = node->dir_next;
269 	}
270 	if (last == NULL) {
271 		// the new vnode is the first entry in the list
272 		vnode->dir_next = dir->stream.dir.dir_head;
273 		dir->stream.dir.dir_head = vnode;
274 	} else {
275 		// insert after that node
276 		vnode->dir_next = last->dir_next;
277 		last->dir_next = vnode;
278 	}
279 
280 	vnode->parent = dir;
281 	dir->modification_time = current_timespec();
282 
283 	notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
284 		B_STAT_MODIFICATION_TIME);
285 	return B_OK;
286 }
287 
288 
289 static status_t
290 rootfs_remove_from_dir(struct rootfs* fs, struct rootfs_vnode* dir,
291 	struct rootfs_vnode* removeVnode)
292 {
293 	struct rootfs_vnode* vnode;
294 	struct rootfs_vnode* lastVnode;
295 
296 	for (vnode = dir->stream.dir.dir_head, lastVnode = NULL; vnode != NULL;
297 			lastVnode = vnode, vnode = vnode->dir_next) {
298 		if (vnode == removeVnode) {
299 			// make sure all dircookies dont point to this vnode
300 			update_dir_cookies(dir, vnode);
301 
302 			if (lastVnode)
303 				lastVnode->dir_next = vnode->dir_next;
304 			else
305 				dir->stream.dir.dir_head = vnode->dir_next;
306 			vnode->dir_next = NULL;
307 
308 			dir->modification_time = current_timespec();
309 			notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
310 				B_STAT_MODIFICATION_TIME);
311 			return B_OK;
312 		}
313 	}
314 	return B_ENTRY_NOT_FOUND;
315 }
316 
317 
318 static bool
319 rootfs_is_dir_empty(struct rootfs_vnode* dir)
320 {
321 	return !dir->stream.dir.dir_head;
322 }
323 
324 
325 /*! You must hold the FS write lock when calling this function */
326 static status_t
327 remove_node(struct rootfs* fs, struct rootfs_vnode* directory,
328 	struct rootfs_vnode* vnode)
329 {
330 	// schedule this vnode to be removed when it's ref goes to zero
331 
332 	bool gotNode = (get_vnode(fs->volume, vnode->id, NULL) == B_OK);
333 
334 	status_t status = B_OK;
335 	if (gotNode)
336 		status = remove_vnode(fs->volume, vnode->id);
337 
338 	if (status == B_OK) {
339 		rootfs_remove_from_dir(fs, directory, vnode);
340 		notify_entry_removed(fs->id, directory->id, vnode->name, vnode->id);
341 	}
342 
343 	if (gotNode)
344 		put_vnode(fs->volume, vnode->id);
345 
346 	return status;
347 }
348 
349 
350 static status_t
351 rootfs_remove(struct rootfs* fs, struct rootfs_vnode* dir, const char* name,
352 	bool isDirectory)
353 {
354 	struct rootfs_vnode* vnode;
355 	status_t status = B_OK;
356 
357 	WriteLocker locker(fs->lock);
358 
359 	vnode = rootfs_find_in_dir(dir, name);
360 	if (!vnode)
361 		status = B_ENTRY_NOT_FOUND;
362 	else if (isDirectory && !S_ISDIR(vnode->stream.type))
363 		status = B_NOT_A_DIRECTORY;
364 	else if (!isDirectory && S_ISDIR(vnode->stream.type))
365 		status = B_IS_A_DIRECTORY;
366 	else if (isDirectory && !rootfs_is_dir_empty(vnode))
367 		status = B_DIRECTORY_NOT_EMPTY;
368 
369 	if (status != B_OK)
370 		return status;
371 
372 	entry_cache_remove(fs->volume->id, dir->id, name);
373 
374 	return remove_node(fs, dir, vnode);
375 }
376 
377 
378 //	#pragma mark -
379 
380 
381 static status_t
382 rootfs_mount(fs_volume* volume, const char* device, uint32 flags,
383 	const char* args, ino_t* _rootID)
384 {
385 	struct rootfs* fs;
386 	struct rootfs_vnode* vnode;
387 	status_t err;
388 
389 	TRACE(("rootfs_mount: entry\n"));
390 
391 	fs = (rootfs*)malloc(sizeof(struct rootfs));
392 	if (fs == NULL)
393 		return B_NO_MEMORY;
394 
395 	volume->private_volume = fs;
396 	volume->ops = &sVolumeOps;
397 	fs->volume = volume;
398 	fs->id = volume->id;
399 	fs->next_vnode_id = 1;
400 
401 	rw_lock_init(&fs->lock, "rootfs");
402 
403 	fs->vnode_list_hash = new(std::nothrow) VnodeTable();
404 	if (fs->vnode_list_hash == NULL
405 			|| fs->vnode_list_hash->Init(ROOTFS_HASH_SIZE) != B_OK) {
406 		err = B_NO_MEMORY;
407 		goto err2;
408 	}
409 
410 	// create the root vnode
411 	vnode = rootfs_create_vnode(fs, NULL, ".", S_IFDIR | 0755);
412 	if (vnode == NULL) {
413 		err = B_NO_MEMORY;
414 		goto err3;
415 	}
416 	vnode->parent = vnode;
417 
418 	fs->root_vnode = vnode;
419 	fs->vnode_list_hash->Insert(vnode);
420 	publish_vnode(volume, vnode->id, vnode, &sVnodeOps, vnode->stream.type, 0);
421 
422 	*_rootID = vnode->id;
423 
424 	return B_OK;
425 
426 err3:
427 	delete fs->vnode_list_hash;
428 err2:
429 	rw_lock_destroy(&fs->lock);
430 	free(fs);
431 
432 	return err;
433 }
434 
435 
436 static status_t
437 rootfs_unmount(fs_volume* _volume)
438 {
439 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
440 
441 	TRACE(("rootfs_unmount: entry fs = %p\n", fs));
442 
443 	// release the reference to the root
444 	put_vnode(fs->volume, fs->root_vnode->id);
445 
446 	// delete all of the vnodes
447 	VnodeTable::Iterator i(fs->vnode_list_hash);
448 
449 	while (i.HasNext()) {
450 		struct rootfs_vnode* vnode = i.Next();
451 		rootfs_delete_vnode(fs, vnode, true);
452 	}
453 
454 	delete fs->vnode_list_hash;
455 	rw_lock_destroy(&fs->lock);
456 	free(fs);
457 
458 	return B_OK;
459 }
460 
461 
462 static status_t
463 rootfs_sync(fs_volume* _volume)
464 {
465 	TRACE(("rootfs_sync: entry\n"));
466 
467 	return B_OK;
468 }
469 
470 
471 static status_t
472 rootfs_lookup(fs_volume* _volume, fs_vnode* _dir, const char* name, ino_t* _id)
473 {
474 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
475 	struct rootfs_vnode* dir = (struct rootfs_vnode*)_dir->private_node;
476 	struct rootfs_vnode* vnode;
477 
478 	TRACE(("rootfs_lookup: entry dir %p, name '%s'\n", dir, name));
479 	if (!S_ISDIR(dir->stream.type))
480 		return B_NOT_A_DIRECTORY;
481 
482 	status_t status = rootfs_check_permissions(dir, X_OK);
483 	if (status != B_OK)
484 		return status;
485 
486 	ReadLocker locker(fs->lock);
487 
488 	// look it up
489 	vnode = rootfs_find_in_dir(dir, name);
490 	if (!vnode)
491 		return B_ENTRY_NOT_FOUND;
492 
493 	status = get_vnode(fs->volume, vnode->id, NULL);
494 	if (status != B_OK)
495 		return status;
496 
497 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
498 
499 	*_id = vnode->id;
500 	return B_OK;
501 }
502 
503 
504 static status_t
505 rootfs_get_vnode_name(fs_volume* _volume, fs_vnode* _vnode, char* buffer,
506 	size_t bufferSize)
507 {
508 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
509 
510 	TRACE(("rootfs_get_vnode_name: vnode = %p (name = %s)\n", vnode,
511 		vnode->name));
512 
513 	strlcpy(buffer, vnode->name, bufferSize);
514 	return B_OK;
515 }
516 
517 
518 static status_t
519 rootfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _vnode, int* _type,
520 	uint32* _flags, bool reenter)
521 {
522 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
523 	struct rootfs_vnode* vnode;
524 
525 	TRACE(("rootfs_getvnode: asking for vnode %Ld, r %d\n", id, reenter));
526 
527 	if (!reenter)
528 		rw_lock_read_lock(&fs->lock);
529 
530 	vnode = fs->vnode_list_hash->Lookup(id);
531 
532 	if (!reenter)
533 		rw_lock_read_unlock(&fs->lock);
534 
535 	TRACE(("rootfs_getnvnode: looked it up at %p\n", vnode));
536 
537 	if (vnode == NULL)
538 		return B_ENTRY_NOT_FOUND;
539 
540 	_vnode->private_node = vnode;
541 	_vnode->ops = &sVnodeOps;
542 	*_type = vnode->stream.type;
543 	*_flags = 0;
544 
545 	return B_OK;
546 }
547 
548 
549 static status_t
550 rootfs_put_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
551 {
552 #ifdef TRACE_ROOTFS
553 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
554 
555 	TRACE(("rootfs_putvnode: entry on vnode 0x%Lx, r %d\n", vnode->id, reenter));
556 #endif
557 	return B_OK; // whatever
558 }
559 
560 
561 static status_t
562 rootfs_remove_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
563 {
564 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
565 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
566 
567 	TRACE(("rootfs_remove_vnode: remove %p (0x%Lx), r %d\n", vnode, vnode->id,
568 		reenter));
569 
570 	if (!reenter)
571 		rw_lock_write_lock(&fs->lock);
572 
573 	if (vnode->dir_next) {
574 		// can't remove node if it's linked to the dir
575 		panic("rootfs_remove_vnode: vnode %p asked to be removed is present in "
576 			"dir\n", vnode);
577 	}
578 
579 	rootfs_delete_vnode(fs, vnode, false);
580 
581 	if (!reenter)
582 		rw_lock_write_unlock(&fs->lock);
583 
584 	return B_OK;
585 }
586 
587 
588 static status_t
589 rootfs_create(fs_volume* _volume, fs_vnode* _dir, const char* name, int omode,
590 	int perms, void** _cookie, ino_t* _newID)
591 {
592 	return B_BAD_VALUE;
593 }
594 
595 
596 static status_t
597 rootfs_open(fs_volume* _volume, fs_vnode* _v, int openMode, void** _cookie)
598 {
599 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
600 
601 	if (S_ISDIR(vnode->stream.type) && (openMode & O_RWMASK) != O_RDONLY)
602 		return B_IS_A_DIRECTORY;
603 	if ((openMode & O_DIRECTORY) != 0 && !S_ISDIR(vnode->stream.type))
604 		return B_NOT_A_DIRECTORY;
605 
606 	// allow to open the file, but it can't be done anything with it
607 
608 	*_cookie = NULL;
609 	return B_OK;
610 }
611 
612 
613 static status_t
614 rootfs_close(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
615 {
616 	TRACE(("rootfs_close: entry vnode %p, cookie %p\n", _vnode->private_node,
617 		_cookie));
618 	return B_OK;
619 }
620 
621 
622 static status_t
623 rootfs_free_cookie(fs_volume* _volume, fs_vnode* _v, void* _cookie)
624 {
625 	return B_OK;
626 }
627 
628 
629 static status_t
630 rootfs_fsync(fs_volume* _volume, fs_vnode* _v)
631 {
632 	return B_OK;
633 }
634 
635 
636 static status_t
637 rootfs_read(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
638 	off_t pos, void* buffer, size_t* _length)
639 {
640 	return EINVAL;
641 }
642 
643 
644 static status_t
645 rootfs_write(fs_volume* _volume, fs_vnode* vnode, void* cookie,
646 	off_t pos, const void* buffer, size_t* _length)
647 {
648 	TRACE(("rootfs_write: vnode %p, cookie %p, pos 0x%Lx , len %#x\n",
649 		vnode, cookie, pos, (int)*_length));
650 
651 	return EPERM;
652 }
653 
654 
655 static status_t
656 rootfs_create_dir(fs_volume* _volume, fs_vnode* _dir, const char* name,
657 	int mode)
658 {
659 	struct rootfs* fs = (rootfs*)_volume->private_volume;
660 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
661 	struct rootfs_vnode* vnode;
662 
663 	TRACE(("rootfs_create_dir: dir %p, name = '%s', perms = %d\n", dir, name,
664 		mode));
665 
666 	status_t status = rootfs_check_permissions(dir, W_OK);
667 	if (status != B_OK)
668 		return status;
669 
670 	WriteLocker locker(fs->lock);
671 
672 	vnode = rootfs_find_in_dir(dir, name);
673 	if (vnode != NULL)
674 		return B_FILE_EXISTS;
675 
676 	TRACE(("rootfs_create: creating new vnode\n"));
677 	vnode = rootfs_create_vnode(fs, dir, name, S_IFDIR | (mode & S_IUMSK));
678 	if (vnode == NULL)
679 		return B_NO_MEMORY;
680 
681 	rootfs_insert_in_dir(fs, dir, vnode);
682 	fs->vnode_list_hash->Insert(vnode);
683 
684 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
685 	notify_entry_created(fs->id, dir->id, name, vnode->id);
686 
687 	return B_OK;
688 }
689 
690 
691 static status_t
692 rootfs_remove_dir(fs_volume* _volume, fs_vnode* _dir, const char* name)
693 {
694 	struct rootfs* fs = (rootfs*)_volume->private_volume;
695 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
696 
697 	status_t status = rootfs_check_permissions(dir, W_OK);
698 	if (status != B_OK)
699 		return status;
700 
701 	TRACE(("rootfs_remove_dir: dir %p (0x%Lx), name '%s'\n", dir, dir->id,
702 		name));
703 
704 	return rootfs_remove(fs, dir, name, true);
705 }
706 
707 
708 static status_t
709 rootfs_open_dir(fs_volume* _volume, fs_vnode* _vnode, void** _cookie)
710 {
711 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
712 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
713 	struct rootfs_dir_cookie* cookie;
714 
715 	status_t status = rootfs_check_permissions(vnode, R_OK);
716 	if (status < B_OK)
717 		return status;
718 
719 	TRACE(("rootfs_open: vnode %p\n", vnode));
720 
721 	if (!S_ISDIR(vnode->stream.type))
722 		return B_BAD_VALUE;
723 
724 	cookie = (rootfs_dir_cookie*)malloc(sizeof(struct rootfs_dir_cookie));
725 	if (cookie == NULL)
726 		return B_NO_MEMORY;
727 
728 	mutex_init(&cookie->lock, "rootfs dir cookie");
729 
730 	ReadLocker locker(fs->lock);
731 
732 	cookie->current = vnode->stream.dir.dir_head;
733 	cookie->iteration_state = ITERATION_STATE_BEGIN;
734 
735 	mutex_lock(&vnode->stream.dir.cookie_lock);
736 	list_add_item(&vnode->stream.dir.cookies, cookie);
737 	mutex_unlock(&vnode->stream.dir.cookie_lock);
738 
739 	*_cookie = cookie;
740 
741 	return B_OK;
742 }
743 
744 
745 static status_t
746 rootfs_free_dir_cookie(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
747 {
748 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
749 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
750 	struct rootfs* fs = (rootfs*)_volume->private_volume;
751 
752 	ReadLocker locker(fs->lock);
753 
754 	mutex_lock(&vnode->stream.dir.cookie_lock);
755 	list_remove_item(&vnode->stream.dir.cookies, cookie);
756 	mutex_unlock(&vnode->stream.dir.cookie_lock);
757 
758 	locker.Unlock();
759 
760 	mutex_destroy(&cookie->lock);
761 
762 	free(cookie);
763 	return B_OK;
764 }
765 
766 
767 static status_t
768 rootfs_read_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
769 	struct dirent* dirent, size_t bufferSize, uint32* _num)
770 {
771 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
772 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
773 	struct rootfs* fs = (rootfs*)_volume->private_volume;
774 	struct rootfs_vnode* childNode = NULL;
775 	const char* name = NULL;
776 	struct rootfs_vnode* nextChildNode = NULL;
777 
778 	TRACE(("rootfs_read_dir: vnode %p, cookie %p, buffer = %p, bufferSize = %d, "
779 		"num = %p\n", _vnode, cookie, dirent, (int)bufferSize, _num));
780 
781 	ReadLocker locker(fs->lock);
782 
783 	MutexLocker cookieLocker(cookie->lock);
784 	int nextState = cookie->iteration_state;
785 
786 	switch (cookie->iteration_state) {
787 		case ITERATION_STATE_DOT:
788 			childNode = vnode;
789 			name = ".";
790 			nextChildNode = vnode->stream.dir.dir_head;
791 			nextState = cookie->iteration_state + 1;
792 			break;
793 		case ITERATION_STATE_DOT_DOT:
794 			childNode = vnode->parent;
795 			name = "..";
796 			nextChildNode = vnode->stream.dir.dir_head;
797 			nextState = cookie->iteration_state + 1;
798 			break;
799 		default:
800 			childNode = cookie->current;
801 			if (childNode) {
802 				name = childNode->name;
803 				nextChildNode = childNode->dir_next;
804 			}
805 			break;
806 	}
807 
808 	if (!childNode) {
809 		// we're at the end of the directory
810 		*_num = 0;
811 		return B_OK;
812 	}
813 
814 	dirent->d_dev = fs->id;
815 	dirent->d_ino = childNode->id;
816 	dirent->d_reclen = offsetof(struct dirent, d_name) + strlen(name) + 1;
817 
818 	if (dirent->d_reclen > bufferSize)
819 		return ENOBUFS;
820 
821 	int nameLength = user_strlcpy(dirent->d_name, name,
822 		bufferSize - offsetof(struct dirent, d_name));
823 	if (nameLength < B_OK)
824 		return nameLength;
825 
826 	cookie->current = nextChildNode;
827 	cookie->iteration_state = nextState;
828 	*_num = 1;
829 	return B_OK;
830 }
831 
832 
833 static status_t
834 rootfs_rewind_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
835 {
836 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
837 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
838 	struct rootfs* fs = (rootfs*)_volume->private_volume;
839 
840 	ReadLocker locker(fs->lock);
841 	MutexLocker cookieLocker(cookie->lock);
842 
843 	cookie->current = vnode->stream.dir.dir_head;
844 	cookie->iteration_state = ITERATION_STATE_BEGIN;
845 
846 	return B_OK;
847 }
848 
849 
850 static status_t
851 rootfs_ioctl(fs_volume* _volume, fs_vnode* _v, void* _cookie, uint32 op,
852 	void* buffer, size_t length)
853 {
854 	TRACE(("rootfs_ioctl: vnode %p, cookie %p, op %d, buf %p, length %d\n",
855 		_volume, _cookie, (int)op, buffer, (int)length));
856 
857 	return B_BAD_VALUE;
858 }
859 
860 
861 static bool
862 rootfs_can_page(fs_volume* _volume, fs_vnode* _v, void* cookie)
863 {
864 	return false;
865 }
866 
867 
868 static status_t
869 rootfs_read_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
870 	const iovec* vecs, size_t count, size_t* _numBytes)
871 {
872 	return B_NOT_ALLOWED;
873 }
874 
875 
876 static status_t
877 rootfs_write_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
878 	const iovec* vecs, size_t count, size_t* _numBytes)
879 {
880 	return B_NOT_ALLOWED;
881 }
882 
883 
884 static status_t
885 rootfs_read_link(fs_volume* _volume, fs_vnode* _link, char* buffer,
886 	size_t* _bufferSize)
887 {
888 	struct rootfs_vnode* link = (rootfs_vnode*)_link->private_node;
889 
890 	if (!S_ISLNK(link->stream.type))
891 		return B_BAD_VALUE;
892 
893 	memcpy(buffer, link->stream.symlink.path, min_c(*_bufferSize,
894 		link->stream.symlink.length));
895 
896 	*_bufferSize = link->stream.symlink.length;
897 
898 	return B_OK;
899 }
900 
901 
902 static status_t
903 rootfs_symlink(fs_volume* _volume, fs_vnode* _dir, const char* name,
904 	const char* path, int mode)
905 {
906 	struct rootfs* fs = (rootfs*)_volume->private_volume;
907 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
908 	struct rootfs_vnode* vnode;
909 
910 	TRACE(("rootfs_symlink: dir %p, name = '%s', path = %s\n", dir, name, path));
911 
912 	status_t status = rootfs_check_permissions(dir, W_OK);
913 	if (status != B_OK)
914 		return status;
915 
916 	WriteLocker locker(fs->lock);
917 
918 	vnode = rootfs_find_in_dir(dir, name);
919 	if (vnode != NULL)
920 		return B_FILE_EXISTS;
921 
922 	TRACE(("rootfs_create: creating new symlink\n"));
923 	vnode = rootfs_create_vnode(fs, dir, name, S_IFLNK | (mode & S_IUMSK));
924 	if (vnode == NULL)
925 		return B_NO_MEMORY;
926 
927 	rootfs_insert_in_dir(fs, dir, vnode);
928 	fs->vnode_list_hash->Insert(vnode);
929 
930 	vnode->stream.symlink.path = strdup(path);
931 	if (vnode->stream.symlink.path == NULL) {
932 		rootfs_delete_vnode(fs, vnode, false);
933 		return B_NO_MEMORY;
934 	}
935 	vnode->stream.symlink.length = strlen(path);
936 
937 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
938 
939 	notify_entry_created(fs->id, dir->id, name, vnode->id);
940 
941 	return B_OK;
942 }
943 
944 
945 static status_t
946 rootfs_unlink(fs_volume* _volume, fs_vnode* _dir, const char* name)
947 {
948 	struct rootfs* fs = (rootfs*)_volume->private_volume;
949 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
950 
951 	TRACE(("rootfs_unlink: dir %p (0x%Lx), name '%s'\n", dir, dir->id, name));
952 
953 	status_t status = rootfs_check_permissions(dir, W_OK);
954 	if (status != B_OK)
955 		return status;
956 
957 	return rootfs_remove(fs, dir, name, false);
958 }
959 
960 
961 static status_t
962 rootfs_rename(fs_volume* _volume, fs_vnode* _fromDir, const char* fromName,
963 	fs_vnode* _toDir, const char* toName)
964 {
965 	struct rootfs* fs = (rootfs*)_volume->private_volume;
966 	struct rootfs_vnode* fromDirectory = (rootfs_vnode*)_fromDir->private_node;
967 	struct rootfs_vnode* toDirectory = (rootfs_vnode*)_toDir->private_node;
968 
969 	TRACE(("rootfs_rename: from %p (0x%Lx, %s), fromName '%s', to %p "
970 		"(0x%Lx, %s), toName '%s'\n", fromDirectory, fromDirectory->id,
971 		fromDirectory->name != NULL ? fromDirectory->name : "NULL",
972 		fromName, toDirectory, toDirectory->id,
973 		toDirectory->name != NULL ? toDirectory->name : "NULL",
974 		toName));
975 
976 	// Prevent renaming /boot, since that will stop everything from working.
977 	// TODO: This should be solved differently. Either root should still be
978 	// able to do this or a mechanism should be introduced that does this
979 	// at the VFS level, for example by checking nodes for a specific
980 	// attribute.
981 	if (fromDirectory->id == 1 && strcmp(fromName, "boot") == 0)
982 		return EPERM;
983 
984 	status_t status = rootfs_check_permissions(fromDirectory, W_OK);
985 	if (status == B_OK)
986 		status = rootfs_check_permissions(toDirectory, W_OK);
987 	if (status != B_OK)
988 		return status;
989 
990 	WriteLocker locker(fs->lock);
991 
992 	struct rootfs_vnode* vnode = rootfs_find_in_dir(fromDirectory, fromName);
993 	if (vnode == NULL)
994 		return B_ENTRY_NOT_FOUND;
995 
996 	// make sure the target is not a subdirectory of us
997 	struct rootfs_vnode* parent = toDirectory->parent;
998 	while (parent != NULL && parent != parent->parent) {
999 		if (parent == vnode)
1000 			return B_BAD_VALUE;
1001 
1002 		parent = parent->parent;
1003 	}
1004 
1005 	struct rootfs_vnode* targetVnode = rootfs_find_in_dir(toDirectory, toName);
1006 	if (targetVnode != NULL) {
1007 		// target node exists, let's see if it is an empty directory
1008 		if (S_ISDIR(targetVnode->stream.type)
1009 			&& !rootfs_is_dir_empty(targetVnode))
1010 			return B_NAME_IN_USE;
1011 
1012 		// so we can cleanly remove it
1013 		entry_cache_remove(fs->volume->id, toDirectory->id, toName);
1014 		remove_node(fs, toDirectory, targetVnode);
1015 	}
1016 
1017 	// we try to reuse the existing name buffer if possible
1018 	if (strlen(fromName) < strlen(toName)) {
1019 		char* nameBuffer = strdup(toName);
1020 		if (nameBuffer == NULL)
1021 			return B_NO_MEMORY;
1022 
1023 		free(vnode->name);
1024 		vnode->name = nameBuffer;
1025 	} else {
1026 		// we can just copy it
1027 		strcpy(vnode->name, toName);
1028 	}
1029 
1030 	// remove it from the dir
1031 	entry_cache_remove(fs->volume->id, fromDirectory->id, fromName);
1032 	rootfs_remove_from_dir(fs, fromDirectory, vnode);
1033 
1034 	// Add it back to the dir with the new name.
1035 	// We need to do this even in the same directory,
1036 	// so that it keeps sorted correctly.
1037 	rootfs_insert_in_dir(fs, toDirectory, vnode);
1038 
1039 	entry_cache_add(fs->volume->id, toDirectory->id, toName, vnode->id);
1040 
1041 	notify_entry_moved(fs->id, fromDirectory->id, fromName, toDirectory->id,
1042 		toName, vnode->id);
1043 
1044 	return B_OK;
1045 }
1046 
1047 
1048 static status_t
1049 rootfs_read_stat(fs_volume* _volume, fs_vnode* _v, struct stat* stat)
1050 {
1051 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1052 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
1053 
1054 	TRACE(("rootfs_read_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1055 		stat));
1056 
1057 	// stream exists, but we know to return size 0, since we can only hold
1058 	// directories
1059 	stat->st_dev = fs->id;
1060 	stat->st_ino = vnode->id;
1061 	if (S_ISLNK(vnode->stream.type))
1062 		stat->st_size = vnode->stream.symlink.length;
1063 	else
1064 		stat->st_size = 0;
1065 	stat->st_mode = vnode->stream.type;
1066 
1067 	stat->st_nlink = 1;
1068 	stat->st_blksize = 65536;
1069 	stat->st_blocks = 0;
1070 
1071 	stat->st_uid = vnode->uid;
1072 	stat->st_gid = vnode->gid;
1073 
1074 	stat->st_atim.tv_sec = real_time_clock();
1075 	stat->st_atim.tv_nsec = 0;
1076 	stat->st_mtim = stat->st_ctim = vnode->modification_time;
1077 	stat->st_crtim = vnode->creation_time;
1078 
1079 	return B_OK;
1080 }
1081 
1082 
1083 static status_t
1084 rootfs_write_stat(fs_volume* _volume, fs_vnode* _vnode, const struct stat* stat,
1085 	uint32 statMask)
1086 {
1087 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1088 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
1089 
1090 	const uid_t uid = geteuid();
1091 	const bool isOwnerOrRoot = uid == 0 || uid == (uid_t)vnode->uid;
1092 	const bool hasWriteAccess = rootfs_check_permissions(vnode, W_OK) == B_OK;
1093 
1094 	TRACE(("rootfs_write_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1095 		stat));
1096 
1097 	// we cannot change the size of anything
1098 	if (statMask & B_STAT_SIZE)
1099 		return B_BAD_VALUE;
1100 
1101 	WriteLocker locker(fs->lock);
1102 
1103 	if ((statMask & B_STAT_MODE) != 0) {
1104 		// only the user or root can do that
1105 		if (!isOwnerOrRoot)
1106 			return B_NOT_ALLOWED;
1107 
1108 		vnode->stream.type = (vnode->stream.type & ~S_IUMSK)
1109 			| (stat->st_mode & S_IUMSK);
1110 	}
1111 
1112 	if ((statMask & B_STAT_UID) != 0) {
1113 		// only root should be allowed
1114 		if (uid != 0)
1115 			return B_NOT_ALLOWED;
1116 		vnode->uid = stat->st_uid;
1117 	}
1118 
1119 	if ((statMask & B_STAT_GID) != 0) {
1120 		// only user or root can do that
1121 		if (!isOwnerOrRoot)
1122 			return B_NOT_ALLOWED;
1123 		vnode->gid = stat->st_gid;
1124 	}
1125 
1126 	if ((statMask & B_STAT_MODIFICATION_TIME) != 0) {
1127 		if (!isOwnerOrRoot && !hasWriteAccess)
1128 			return B_NOT_ALLOWED;
1129 		vnode->modification_time = stat->st_mtim;
1130 	}
1131 
1132 	if ((statMask & B_STAT_CREATION_TIME) != 0) {
1133 		if (!isOwnerOrRoot && !hasWriteAccess)
1134 			return B_NOT_ALLOWED;
1135 		vnode->creation_time = stat->st_crtim;
1136 	}
1137 
1138 	locker.Unlock();
1139 
1140 	notify_stat_changed(fs->id, get_parent_id(vnode), vnode->id, statMask);
1141 	return B_OK;
1142 }
1143 
1144 
1145 static status_t
1146 rootfs_create_special_node(fs_volume* _volume, fs_vnode* _dir, const char* name,
1147 	fs_vnode* subVnode, mode_t mode, uint32 flags, fs_vnode* _superVnode,
1148 	ino_t* _nodeID)
1149 {
1150 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1151 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
1152 	struct rootfs_vnode* vnode;
1153 
1154 	WriteLocker locker(fs->lock);
1155 
1156 	if (name != NULL) {
1157 		vnode = rootfs_find_in_dir(dir, name);
1158 		if (vnode != NULL)
1159 			return B_FILE_EXISTS;
1160 	}
1161 
1162 	vnode = rootfs_create_vnode(fs, dir, name, mode);
1163 	if (vnode == NULL)
1164 		return B_NO_MEMORY;
1165 
1166 	if (name != NULL)
1167 		rootfs_insert_in_dir(fs, dir, vnode);
1168 	else
1169 		flags |= B_VNODE_PUBLISH_REMOVED;
1170 
1171 	fs->vnode_list_hash->Insert(vnode);
1172 
1173 	_superVnode->private_node = vnode;
1174 	_superVnode->ops = &sVnodeOps;
1175 	*_nodeID = vnode->id;
1176 
1177 	if (subVnode == NULL)
1178 		subVnode = _superVnode;
1179 
1180 	status_t status = publish_vnode(fs->volume, vnode->id,
1181 		subVnode->private_node, subVnode->ops, mode, flags);
1182 	if (status != B_OK) {
1183 		if (name != NULL)
1184 			rootfs_remove_from_dir(fs, dir, vnode);
1185 		rootfs_delete_vnode(fs, vnode, false);
1186 		return status;
1187 	}
1188 
1189 	if (name != NULL) {
1190 		entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
1191 		notify_entry_created(fs->id, dir->id, name, vnode->id);
1192 	}
1193 
1194 	return B_OK;
1195 }
1196 
1197 
1198 static status_t
1199 rootfs_std_ops(int32 op, ...)
1200 {
1201 	switch (op) {
1202 		case B_MODULE_INIT:
1203 			return B_OK;
1204 
1205 		case B_MODULE_UNINIT:
1206 			return B_OK;
1207 
1208 		default:
1209 			return B_ERROR;
1210 	}
1211 }
1212 
1213 
1214 namespace {
1215 
1216 fs_volume_ops sVolumeOps = {
1217 	&rootfs_unmount,
1218 	NULL,
1219 	NULL,
1220 	&rootfs_sync,
1221 	&rootfs_get_vnode,
1222 
1223 	// the other operations are not supported (indices, queries)
1224 	NULL,
1225 };
1226 
1227 fs_vnode_ops sVnodeOps = {
1228 	&rootfs_lookup,
1229 	&rootfs_get_vnode_name,
1230 
1231 	&rootfs_put_vnode,
1232 	&rootfs_remove_vnode,
1233 
1234 	&rootfs_can_page,
1235 	&rootfs_read_pages,
1236 	&rootfs_write_pages,
1237 
1238 	NULL,	// io()
1239 	NULL,	// cancel_io()
1240 
1241 	NULL,	// get_file_map()
1242 
1243 	/* common */
1244 	&rootfs_ioctl,
1245 	NULL,	// fs_set_flags()
1246 	NULL,	// select
1247 	NULL,	// deselect
1248 	&rootfs_fsync,
1249 
1250 	&rootfs_read_link,
1251 	&rootfs_symlink,
1252 	NULL,	// fs_link()
1253 	&rootfs_unlink,
1254 	&rootfs_rename,
1255 
1256 	NULL,	// fs_access()
1257 	&rootfs_read_stat,
1258 	&rootfs_write_stat,
1259 	NULL,
1260 
1261 	/* file */
1262 	&rootfs_create,
1263 	&rootfs_open,
1264 	&rootfs_close,
1265 	&rootfs_free_cookie,
1266 	&rootfs_read,
1267 	&rootfs_write,
1268 
1269 	/* directory */
1270 	&rootfs_create_dir,
1271 	&rootfs_remove_dir,
1272 	&rootfs_open_dir,
1273 	&rootfs_close,			// same as for files - it does nothing, anyway
1274 	&rootfs_free_dir_cookie,
1275 	&rootfs_read_dir,
1276 	&rootfs_rewind_dir,
1277 
1278 	/* attribute directory operations */
1279 	NULL,	// open_attr_dir
1280 	NULL,	// close_attr_dir
1281 	NULL,	// free_attr_dir_cookie
1282 	NULL,	// read_attr_dir
1283 	NULL,	// rewind_attr_dir
1284 
1285 	/* attribute operations */
1286 	NULL,	// create_attr
1287 	NULL,	// open_attr
1288 	NULL,	// close_attr
1289 	NULL,	// free_attr_cookie
1290 	NULL,	// read_attr
1291 	NULL,	// write_attr
1292 
1293 	NULL,	// read_attr_stat
1294 	NULL,	// write_attr_stat
1295 	NULL,	// rename_attr
1296 	NULL,	// remove_attr
1297 
1298 	/* support for node and FS layers */
1299 	&rootfs_create_special_node,
1300 	NULL,	// get_super_vnode,
1301 };
1302 
1303 }	// namespace
1304 
1305 file_system_module_info gRootFileSystem = {
1306 	{
1307 		"file_systems/rootfs" B_CURRENT_FS_API_VERSION,
1308 		0,
1309 		rootfs_std_ops,
1310 	},
1311 
1312 	"rootfs",				// short_name
1313 	"Root File System",		// pretty_name
1314 	0,						// DDM flags
1315 
1316 	NULL,	// identify_partition()
1317 	NULL,	// scan_partition()
1318 	NULL,	// free_identify_partition_cookie()
1319 	NULL,	// free_partition_content_cookie()
1320 
1321 	&rootfs_mount,
1322 };
1323