xref: /haiku/src/system/kernel/fs/rootfs.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2002-2017, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #if FS_SHELL
11 #	include "fssh_api_wrapper.h"
12 
13 #	include "OpenHashTable.h"
14 #	include "list.h"
15 #else
16 #	include <stdio.h>
17 #	include <stdlib.h>
18 #	include <string.h>
19 #	include <sys/stat.h>
20 
21 #	include <fs_cache.h>
22 #	include <KernelExport.h>
23 #	include <NodeMonitor.h>
24 
25 #	include <debug.h>
26 #	include <lock.h>
27 #	include <OpenHashTable.h>
28 #	include <util/AutoLock.h>
29 #	include <vfs.h>
30 #	include <vm/vm.h>
31 #endif
32 
33 #include <fs_ops_support.h>
34 
35 
36 #if FS_SHELL
37 	using namespace FSShell;
38 #	define user_strlcpy(to, from, len)	(strlcpy(to, from, len), FSSH_B_OK)
39 #endif
40 
41 
42 //#define TRACE_ROOTFS
43 #ifdef TRACE_ROOTFS
44 #	define TRACE(x) dprintf x
45 #else
46 #	define TRACE(x)
47 #endif
48 
49 
50 namespace {
51 
52 struct rootfs_stream {
53 	mode_t						type;
54 	struct stream_dir {
55 		struct rootfs_vnode*	dir_head;
56 		struct list				cookies;
57 		mutex					cookie_lock;
58 	} dir;
59 	struct stream_symlink {
60 		char*					path;
61 		size_t					length;
62 	} symlink;
63 };
64 
65 struct rootfs_vnode {
66 	struct rootfs_vnode*		all_next;
67 	ino_t						id;
68 	char*						name;
69 	timespec					modification_time;
70 	timespec					creation_time;
71 	uid_t						uid;
72 	gid_t						gid;
73 	struct rootfs_vnode*		parent;
74 	struct rootfs_vnode*		dir_next;
75 	struct rootfs_stream		stream;
76 };
77 
78 struct VnodeHash {
79 	typedef	ino_t			KeyType;
80 	typedef	rootfs_vnode	ValueType;
81 
82 	size_t HashKey(KeyType key) const
83 	{
84 		return key;
85 	}
86 
87 	size_t Hash(ValueType* vnode) const
88 	{
89 		return vnode->id;
90 	}
91 
92 	bool Compare(KeyType key, ValueType* vnode) const
93 	{
94 		return vnode->id == key;
95 	}
96 
97 	ValueType*& GetLink(ValueType* value) const
98 	{
99 		return value->all_next;
100 	}
101 };
102 
103 typedef BOpenHashTable<VnodeHash> VnodeTable;
104 
105 struct rootfs {
106 	fs_volume*					volume;
107 	dev_t						id;
108 	rw_lock						lock;
109 	ino_t						next_vnode_id;
110 	VnodeTable*					vnode_list_hash;
111 	struct rootfs_vnode*		root_vnode;
112 };
113 
114 // dircookie, dirs are only types of streams supported by rootfs
115 struct rootfs_dir_cookie {
116 	struct list_link			link;
117 	mutex						lock;
118 	struct rootfs_vnode*		current;
119 	int32						iteration_state;
120 };
121 
122 // directory iteration states
123 enum {
124 	ITERATION_STATE_DOT		= 0,
125 	ITERATION_STATE_DOT_DOT	= 1,
126 	ITERATION_STATE_OTHERS	= 2,
127 	ITERATION_STATE_BEGIN	= ITERATION_STATE_DOT,
128 };
129 
130 
131 // extern only to make forward declaration possible
132 extern fs_volume_ops sVolumeOps;
133 extern fs_vnode_ops sVnodeOps;
134 
135 } // namespace
136 
137 
138 #define ROOTFS_HASH_SIZE 16
139 
140 
141 inline static status_t
142 rootfs_check_permissions(struct rootfs_vnode* dir, int accessMode)
143 {
144 	return check_access_permissions(accessMode, dir->stream.type, (gid_t)dir->gid, (uid_t)dir->uid);
145 }
146 
147 
148 static timespec
149 current_timespec()
150 {
151 	bigtime_t time = real_time_clock_usecs();
152 
153 	timespec tv;
154 	tv.tv_sec = time / 1000000;
155 	tv.tv_nsec = (time % 1000000) * 1000;
156 	return tv;
157 }
158 
159 
160 static ino_t
161 get_parent_id(struct rootfs_vnode* vnode)
162 {
163 	if (vnode->parent != NULL)
164 		return vnode->parent->id;
165 	return -1;
166 }
167 
168 
169 static struct rootfs_vnode*
170 rootfs_create_vnode(struct rootfs* fs, struct rootfs_vnode* parent,
171 	const char* name, int type)
172 {
173 	struct rootfs_vnode* vnode;
174 
175 	vnode = (rootfs_vnode*)malloc(sizeof(struct rootfs_vnode));
176 	if (vnode == NULL)
177 		return NULL;
178 
179 	memset(vnode, 0, sizeof(struct rootfs_vnode));
180 
181 	if (name != NULL) {
182 		vnode->name = strdup(name);
183 		if (vnode->name == NULL) {
184 			free(vnode);
185 			return NULL;
186 		}
187 	}
188 
189 	vnode->id = fs->next_vnode_id++;
190 	vnode->stream.type = type;
191 	vnode->creation_time = vnode->modification_time = current_timespec();
192 	vnode->uid = geteuid();
193 	vnode->gid = parent ? parent->gid : getegid();
194 		// inherit group from parent if possible
195 
196 	if (S_ISDIR(type)) {
197 		list_init(&vnode->stream.dir.cookies);
198 		mutex_init(&vnode->stream.dir.cookie_lock, "rootfs dir cookies");
199 	}
200 
201 	return vnode;
202 }
203 
204 
205 static status_t
206 rootfs_delete_vnode(struct rootfs* fs, struct rootfs_vnode* v, bool force_delete)
207 {
208 	// cant delete it if it's in a directory or is a directory
209 	// and has children
210 	if (!force_delete && (v->stream.dir.dir_head != NULL || v->dir_next != NULL))
211 		return EPERM;
212 
213 	// remove it from the global hash table
214 	fs->vnode_list_hash->Remove(v);
215 
216 	if (S_ISDIR(v->stream.type))
217 		mutex_destroy(&v->stream.dir.cookie_lock);
218 
219 	free(v->name);
220 	free(v);
221 
222 	return 0;
223 }
224 
225 
226 /*! Makes sure none of the dircookies point to the vnode passed in. */
227 static void
228 update_dir_cookies(struct rootfs_vnode* dir, struct rootfs_vnode* vnode)
229 {
230 	struct rootfs_dir_cookie* cookie = NULL;
231 
232 	while ((cookie = (rootfs_dir_cookie*)list_get_next_item(
233 			&dir->stream.dir.cookies, cookie)) != NULL) {
234 		MutexLocker cookieLocker(cookie->lock);
235 		if (cookie->current == vnode)
236 			cookie->current = vnode->dir_next;
237 	}
238 }
239 
240 
241 static struct rootfs_vnode*
242 rootfs_find_in_dir(struct rootfs_vnode* dir, const char* path)
243 {
244 	struct rootfs_vnode* vnode;
245 
246 	if (!strcmp(path, "."))
247 		return dir;
248 	if (!strcmp(path, ".."))
249 		return dir->parent;
250 
251 	for (vnode = dir->stream.dir.dir_head; vnode; vnode = vnode->dir_next) {
252 		if (!strcmp(vnode->name, path))
253 			return vnode;
254 	}
255 	return NULL;
256 }
257 
258 
259 static status_t
260 rootfs_insert_in_dir(struct rootfs* fs, struct rootfs_vnode* dir,
261 	struct rootfs_vnode* vnode)
262 {
263 	// make sure the directory stays sorted alphabetically
264 
265 	struct rootfs_vnode* node = dir->stream.dir.dir_head;
266 	struct rootfs_vnode* last = NULL;
267 	while (node != NULL && strcmp(node->name, vnode->name) < 0) {
268 		last = node;
269 		node = node->dir_next;
270 	}
271 	if (last == NULL) {
272 		// the new vnode is the first entry in the list
273 		vnode->dir_next = dir->stream.dir.dir_head;
274 		dir->stream.dir.dir_head = vnode;
275 	} else {
276 		// insert after that node
277 		vnode->dir_next = last->dir_next;
278 		last->dir_next = vnode;
279 	}
280 
281 	vnode->parent = dir;
282 	dir->modification_time = current_timespec();
283 
284 	notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
285 		B_STAT_MODIFICATION_TIME);
286 	return B_OK;
287 }
288 
289 
290 static status_t
291 rootfs_remove_from_dir(struct rootfs* fs, struct rootfs_vnode* dir,
292 	struct rootfs_vnode* removeVnode)
293 {
294 	struct rootfs_vnode* vnode;
295 	struct rootfs_vnode* lastVnode;
296 
297 	for (vnode = dir->stream.dir.dir_head, lastVnode = NULL; vnode != NULL;
298 			lastVnode = vnode, vnode = vnode->dir_next) {
299 		if (vnode == removeVnode) {
300 			// make sure all dircookies dont point to this vnode
301 			update_dir_cookies(dir, vnode);
302 
303 			if (lastVnode)
304 				lastVnode->dir_next = vnode->dir_next;
305 			else
306 				dir->stream.dir.dir_head = vnode->dir_next;
307 			vnode->dir_next = NULL;
308 
309 			dir->modification_time = current_timespec();
310 			notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
311 				B_STAT_MODIFICATION_TIME);
312 			return B_OK;
313 		}
314 	}
315 	return B_ENTRY_NOT_FOUND;
316 }
317 
318 
319 static bool
320 rootfs_is_dir_empty(struct rootfs_vnode* dir)
321 {
322 	return !dir->stream.dir.dir_head;
323 }
324 
325 
326 /*! You must hold the FS write lock when calling this function */
327 static status_t
328 remove_node(struct rootfs* fs, struct rootfs_vnode* directory,
329 	struct rootfs_vnode* vnode)
330 {
331 	// schedule this vnode to be removed when it's ref goes to zero
332 
333 	bool gotNode = (get_vnode(fs->volume, vnode->id, NULL) == B_OK);
334 
335 	status_t status = B_OK;
336 	if (gotNode)
337 		status = remove_vnode(fs->volume, vnode->id);
338 
339 	if (status == B_OK) {
340 		rootfs_remove_from_dir(fs, directory, vnode);
341 		notify_entry_removed(fs->id, directory->id, vnode->name, vnode->id);
342 	}
343 
344 	if (gotNode)
345 		put_vnode(fs->volume, vnode->id);
346 
347 	return status;
348 }
349 
350 
351 static status_t
352 rootfs_remove(struct rootfs* fs, struct rootfs_vnode* dir, const char* name,
353 	bool isDirectory)
354 {
355 	struct rootfs_vnode* vnode;
356 	status_t status = B_OK;
357 
358 	WriteLocker locker(fs->lock);
359 
360 	vnode = rootfs_find_in_dir(dir, name);
361 	if (!vnode)
362 		status = B_ENTRY_NOT_FOUND;
363 	else if (isDirectory && !S_ISDIR(vnode->stream.type))
364 		status = B_NOT_A_DIRECTORY;
365 	else if (!isDirectory && S_ISDIR(vnode->stream.type))
366 		status = B_IS_A_DIRECTORY;
367 	else if (isDirectory && !rootfs_is_dir_empty(vnode))
368 		status = B_DIRECTORY_NOT_EMPTY;
369 
370 	if (status != B_OK)
371 		return status;
372 
373 	entry_cache_remove(fs->volume->id, dir->id, name);
374 
375 	return remove_node(fs, dir, vnode);
376 }
377 
378 
379 //	#pragma mark -
380 
381 
382 static status_t
383 rootfs_mount(fs_volume* volume, const char* device, uint32 flags,
384 	const char* args, ino_t* _rootID)
385 {
386 	struct rootfs* fs;
387 	struct rootfs_vnode* vnode;
388 	status_t err;
389 
390 	TRACE(("rootfs_mount: entry\n"));
391 
392 	fs = (rootfs*)malloc(sizeof(struct rootfs));
393 	if (fs == NULL)
394 		return B_NO_MEMORY;
395 
396 	volume->private_volume = fs;
397 	volume->ops = &sVolumeOps;
398 	fs->volume = volume;
399 	fs->id = volume->id;
400 	fs->next_vnode_id = 1;
401 
402 	rw_lock_init(&fs->lock, "rootfs");
403 
404 	fs->vnode_list_hash = new(std::nothrow) VnodeTable();
405 	if (fs->vnode_list_hash == NULL
406 			|| fs->vnode_list_hash->Init(ROOTFS_HASH_SIZE) != B_OK) {
407 		err = B_NO_MEMORY;
408 		goto err2;
409 	}
410 
411 	// create the root vnode
412 	vnode = rootfs_create_vnode(fs, NULL, ".", S_IFDIR | 0755);
413 	if (vnode == NULL) {
414 		err = B_NO_MEMORY;
415 		goto err3;
416 	}
417 	vnode->parent = vnode;
418 
419 	fs->root_vnode = vnode;
420 	fs->vnode_list_hash->Insert(vnode);
421 	publish_vnode(volume, vnode->id, vnode, &sVnodeOps, vnode->stream.type, 0);
422 
423 	*_rootID = vnode->id;
424 
425 	return B_OK;
426 
427 err3:
428 	delete fs->vnode_list_hash;
429 err2:
430 	rw_lock_destroy(&fs->lock);
431 	free(fs);
432 
433 	return err;
434 }
435 
436 
437 static status_t
438 rootfs_unmount(fs_volume* _volume)
439 {
440 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
441 
442 	TRACE(("rootfs_unmount: entry fs = %p\n", fs));
443 
444 	// release the reference to the root
445 	put_vnode(fs->volume, fs->root_vnode->id);
446 
447 	// delete all of the vnodes
448 	VnodeTable::Iterator i(fs->vnode_list_hash);
449 
450 	while (i.HasNext()) {
451 		struct rootfs_vnode* vnode = i.Next();
452 		rootfs_delete_vnode(fs, vnode, true);
453 	}
454 
455 	delete fs->vnode_list_hash;
456 	rw_lock_destroy(&fs->lock);
457 	free(fs);
458 
459 	return B_OK;
460 }
461 
462 
463 static status_t
464 rootfs_sync(fs_volume* _volume)
465 {
466 	TRACE(("rootfs_sync: entry\n"));
467 
468 	return B_OK;
469 }
470 
471 
472 static status_t
473 rootfs_lookup(fs_volume* _volume, fs_vnode* _dir, const char* name, ino_t* _id)
474 {
475 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
476 	struct rootfs_vnode* dir = (struct rootfs_vnode*)_dir->private_node;
477 	struct rootfs_vnode* vnode;
478 
479 	TRACE(("rootfs_lookup: entry dir %p, name '%s'\n", dir, name));
480 	if (!S_ISDIR(dir->stream.type))
481 		return B_NOT_A_DIRECTORY;
482 
483 	status_t status = rootfs_check_permissions(dir, X_OK);
484 	if (status != B_OK)
485 		return status;
486 
487 	ReadLocker locker(fs->lock);
488 
489 	// look it up
490 	vnode = rootfs_find_in_dir(dir, name);
491 	if (!vnode)
492 		return B_ENTRY_NOT_FOUND;
493 
494 	status = get_vnode(fs->volume, vnode->id, NULL);
495 	if (status != B_OK)
496 		return status;
497 
498 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
499 
500 	*_id = vnode->id;
501 	return B_OK;
502 }
503 
504 
505 static status_t
506 rootfs_get_vnode_name(fs_volume* _volume, fs_vnode* _vnode, char* buffer,
507 	size_t bufferSize)
508 {
509 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
510 
511 	TRACE(("rootfs_get_vnode_name: vnode = %p (name = %s)\n", vnode,
512 		vnode->name));
513 
514 	strlcpy(buffer, vnode->name, bufferSize);
515 	return B_OK;
516 }
517 
518 
519 static status_t
520 rootfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _vnode, int* _type,
521 	uint32* _flags, bool reenter)
522 {
523 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
524 	struct rootfs_vnode* vnode;
525 
526 	TRACE(("rootfs_getvnode: asking for vnode %lld, r %d\n", id, reenter));
527 
528 	if (!reenter)
529 		rw_lock_read_lock(&fs->lock);
530 
531 	vnode = fs->vnode_list_hash->Lookup(id);
532 
533 	if (!reenter)
534 		rw_lock_read_unlock(&fs->lock);
535 
536 	TRACE(("rootfs_getnvnode: looked it up at %p\n", vnode));
537 
538 	if (vnode == NULL)
539 		return B_ENTRY_NOT_FOUND;
540 
541 	_vnode->private_node = vnode;
542 	_vnode->ops = &sVnodeOps;
543 	*_type = vnode->stream.type;
544 	*_flags = 0;
545 
546 	return B_OK;
547 }
548 
549 
550 static status_t
551 rootfs_put_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
552 {
553 #ifdef TRACE_ROOTFS
554 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
555 
556 	TRACE(("rootfs_putvnode: entry on vnode 0x%Lx, r %d\n", vnode->id, reenter));
557 #endif
558 	return B_OK; // whatever
559 }
560 
561 
562 static status_t
563 rootfs_remove_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
564 {
565 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
566 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
567 
568 	TRACE(("rootfs_remove_vnode: remove %p (0x%Lx), r %d\n", vnode, vnode->id,
569 		reenter));
570 
571 	if (!reenter)
572 		rw_lock_write_lock(&fs->lock);
573 
574 	if (vnode->dir_next) {
575 		// can't remove node if it's linked to the dir
576 		panic("rootfs_remove_vnode: vnode %p asked to be removed is present in "
577 			"dir\n", vnode);
578 	}
579 
580 	rootfs_delete_vnode(fs, vnode, false);
581 
582 	if (!reenter)
583 		rw_lock_write_unlock(&fs->lock);
584 
585 	return B_OK;
586 }
587 
588 
589 static status_t
590 rootfs_create(fs_volume* _volume, fs_vnode* _dir, const char* name, int omode,
591 	int perms, void** _cookie, ino_t* _newID)
592 {
593 	return B_BAD_VALUE;
594 }
595 
596 
597 static status_t
598 rootfs_open(fs_volume* _volume, fs_vnode* _v, int openMode, void** _cookie)
599 {
600 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
601 
602 	if (S_ISDIR(vnode->stream.type) && (openMode & O_RWMASK) != O_RDONLY)
603 		return B_IS_A_DIRECTORY;
604 	if ((openMode & O_DIRECTORY) != 0 && !S_ISDIR(vnode->stream.type))
605 		return B_NOT_A_DIRECTORY;
606 
607 	status_t status = rootfs_check_permissions(vnode, open_mode_to_access(openMode));
608 	if (status != B_OK)
609 		return status;
610 
611 	// allow to open the file, but nothing can be done with it
612 
613 	*_cookie = NULL;
614 	return B_OK;
615 }
616 
617 
618 static status_t
619 rootfs_close(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
620 {
621 	TRACE(("rootfs_close: entry vnode %p, cookie %p\n", _vnode->private_node,
622 		_cookie));
623 	return B_OK;
624 }
625 
626 
627 static status_t
628 rootfs_free_cookie(fs_volume* _volume, fs_vnode* _v, void* _cookie)
629 {
630 	return B_OK;
631 }
632 
633 
634 static status_t
635 rootfs_fsync(fs_volume* _volume, fs_vnode* _v)
636 {
637 	return B_OK;
638 }
639 
640 
641 static status_t
642 rootfs_read(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
643 	off_t pos, void* buffer, size_t* _length)
644 {
645 	return EINVAL;
646 }
647 
648 
649 static status_t
650 rootfs_write(fs_volume* _volume, fs_vnode* vnode, void* cookie,
651 	off_t pos, const void* buffer, size_t* _length)
652 {
653 	TRACE(("rootfs_write: vnode %p, cookie %p, pos 0x%Lx , len %#x\n",
654 		vnode, cookie, pos, (int)*_length));
655 
656 	return EPERM;
657 }
658 
659 
660 static status_t
661 rootfs_create_dir(fs_volume* _volume, fs_vnode* _dir, const char* name,
662 	int mode)
663 {
664 	struct rootfs* fs = (rootfs*)_volume->private_volume;
665 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
666 	struct rootfs_vnode* vnode;
667 
668 	TRACE(("rootfs_create_dir: dir %p, name = '%s', perms = %d\n", dir, name,
669 		mode));
670 
671 	status_t status = rootfs_check_permissions(dir, W_OK);
672 	if (status != B_OK)
673 		return status;
674 
675 	WriteLocker locker(fs->lock);
676 
677 	vnode = rootfs_find_in_dir(dir, name);
678 	if (vnode != NULL)
679 		return B_FILE_EXISTS;
680 
681 	TRACE(("rootfs_create: creating new vnode\n"));
682 	vnode = rootfs_create_vnode(fs, dir, name, S_IFDIR | (mode & S_IUMSK));
683 	if (vnode == NULL)
684 		return B_NO_MEMORY;
685 
686 	rootfs_insert_in_dir(fs, dir, vnode);
687 	fs->vnode_list_hash->Insert(vnode);
688 
689 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
690 	notify_entry_created(fs->id, dir->id, name, vnode->id);
691 
692 	return B_OK;
693 }
694 
695 
696 static status_t
697 rootfs_remove_dir(fs_volume* _volume, fs_vnode* _dir, const char* name)
698 {
699 	struct rootfs* fs = (rootfs*)_volume->private_volume;
700 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
701 
702 	status_t status = rootfs_check_permissions(dir, W_OK);
703 	if (status != B_OK)
704 		return status;
705 
706 	TRACE(("rootfs_remove_dir: dir %p (0x%Lx), name '%s'\n", dir, dir->id,
707 		name));
708 
709 	return rootfs_remove(fs, dir, name, true);
710 }
711 
712 
713 static status_t
714 rootfs_open_dir(fs_volume* _volume, fs_vnode* _vnode, void** _cookie)
715 {
716 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
717 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
718 	struct rootfs_dir_cookie* cookie;
719 
720 	status_t status = rootfs_check_permissions(vnode, R_OK);
721 	if (status < B_OK)
722 		return status;
723 
724 	TRACE(("rootfs_open: vnode %p\n", vnode));
725 
726 	if (!S_ISDIR(vnode->stream.type))
727 		return B_BAD_VALUE;
728 
729 	cookie = (rootfs_dir_cookie*)malloc(sizeof(struct rootfs_dir_cookie));
730 	if (cookie == NULL)
731 		return B_NO_MEMORY;
732 
733 	mutex_init(&cookie->lock, "rootfs dir cookie");
734 
735 	ReadLocker locker(fs->lock);
736 
737 	cookie->current = vnode->stream.dir.dir_head;
738 	cookie->iteration_state = ITERATION_STATE_BEGIN;
739 
740 	mutex_lock(&vnode->stream.dir.cookie_lock);
741 	list_add_item(&vnode->stream.dir.cookies, cookie);
742 	mutex_unlock(&vnode->stream.dir.cookie_lock);
743 
744 	*_cookie = cookie;
745 
746 	return B_OK;
747 }
748 
749 
750 static status_t
751 rootfs_free_dir_cookie(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
752 {
753 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
754 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
755 	struct rootfs* fs = (rootfs*)_volume->private_volume;
756 
757 	ReadLocker locker(fs->lock);
758 
759 	mutex_lock(&vnode->stream.dir.cookie_lock);
760 	list_remove_item(&vnode->stream.dir.cookies, cookie);
761 	mutex_unlock(&vnode->stream.dir.cookie_lock);
762 
763 	locker.Unlock();
764 
765 	mutex_destroy(&cookie->lock);
766 
767 	free(cookie);
768 	return B_OK;
769 }
770 
771 
772 static status_t
773 rootfs_read_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
774 	struct dirent* dirent, size_t bufferSize, uint32* _num)
775 {
776 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
777 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
778 	struct rootfs* fs = (rootfs*)_volume->private_volume;
779 	struct rootfs_vnode* childNode = NULL;
780 	const char* name = NULL;
781 	struct rootfs_vnode* nextChildNode = NULL;
782 
783 	TRACE(("rootfs_read_dir: vnode %p, cookie %p, buffer = %p, bufferSize = %d, "
784 		"num = %p\n", _vnode, cookie, dirent, (int)bufferSize, _num));
785 
786 	ReadLocker locker(fs->lock);
787 
788 	MutexLocker cookieLocker(cookie->lock);
789 	int nextState = cookie->iteration_state;
790 
791 	switch (cookie->iteration_state) {
792 		case ITERATION_STATE_DOT:
793 			childNode = vnode;
794 			name = ".";
795 			nextChildNode = vnode->stream.dir.dir_head;
796 			nextState = cookie->iteration_state + 1;
797 			break;
798 		case ITERATION_STATE_DOT_DOT:
799 			childNode = vnode->parent;
800 			name = "..";
801 			nextChildNode = vnode->stream.dir.dir_head;
802 			nextState = cookie->iteration_state + 1;
803 			break;
804 		default:
805 			childNode = cookie->current;
806 			if (childNode) {
807 				name = childNode->name;
808 				nextChildNode = childNode->dir_next;
809 			}
810 			break;
811 	}
812 
813 	if (!childNode) {
814 		// we're at the end of the directory
815 		*_num = 0;
816 		return B_OK;
817 	}
818 
819 	dirent->d_dev = fs->id;
820 	dirent->d_ino = childNode->id;
821 	dirent->d_reclen = offsetof(struct dirent, d_name) + strlen(name) + 1;
822 
823 	if (dirent->d_reclen > bufferSize)
824 		return ENOBUFS;
825 
826 	int nameLength = user_strlcpy(dirent->d_name, name,
827 		bufferSize - offsetof(struct dirent, d_name));
828 	if (nameLength < B_OK)
829 		return nameLength;
830 
831 	cookie->current = nextChildNode;
832 	cookie->iteration_state = nextState;
833 	*_num = 1;
834 	return B_OK;
835 }
836 
837 
838 static status_t
839 rootfs_rewind_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
840 {
841 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
842 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
843 	struct rootfs* fs = (rootfs*)_volume->private_volume;
844 
845 	ReadLocker locker(fs->lock);
846 	MutexLocker cookieLocker(cookie->lock);
847 
848 	cookie->current = vnode->stream.dir.dir_head;
849 	cookie->iteration_state = ITERATION_STATE_BEGIN;
850 
851 	return B_OK;
852 }
853 
854 
855 static status_t
856 rootfs_ioctl(fs_volume* _volume, fs_vnode* _v, void* _cookie, uint32 op,
857 	void* buffer, size_t length)
858 {
859 	TRACE(("rootfs_ioctl: vnode %p, cookie %p, op %d, buf %p, length %d\n",
860 		_volume, _cookie, (int)op, buffer, (int)length));
861 
862 	return B_BAD_VALUE;
863 }
864 
865 
866 static bool
867 rootfs_can_page(fs_volume* _volume, fs_vnode* _v, void* cookie)
868 {
869 	return false;
870 }
871 
872 
873 static status_t
874 rootfs_read_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
875 	const iovec* vecs, size_t count, size_t* _numBytes)
876 {
877 	return B_NOT_ALLOWED;
878 }
879 
880 
881 static status_t
882 rootfs_write_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
883 	const iovec* vecs, size_t count, size_t* _numBytes)
884 {
885 	return B_NOT_ALLOWED;
886 }
887 
888 
889 static status_t
890 rootfs_read_link(fs_volume* _volume, fs_vnode* _link, char* buffer,
891 	size_t* _bufferSize)
892 {
893 	struct rootfs_vnode* link = (rootfs_vnode*)_link->private_node;
894 
895 	if (!S_ISLNK(link->stream.type))
896 		return B_BAD_VALUE;
897 
898 	memcpy(buffer, link->stream.symlink.path, min_c(*_bufferSize,
899 		link->stream.symlink.length));
900 
901 	*_bufferSize = link->stream.symlink.length;
902 
903 	return B_OK;
904 }
905 
906 
907 static status_t
908 rootfs_symlink(fs_volume* _volume, fs_vnode* _dir, const char* name,
909 	const char* path, int mode)
910 {
911 	struct rootfs* fs = (rootfs*)_volume->private_volume;
912 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
913 	struct rootfs_vnode* vnode;
914 
915 	TRACE(("rootfs_symlink: dir %p, name = '%s', path = %s\n", dir, name, path));
916 
917 	status_t status = rootfs_check_permissions(dir, W_OK);
918 	if (status != B_OK)
919 		return status;
920 
921 	WriteLocker locker(fs->lock);
922 
923 	vnode = rootfs_find_in_dir(dir, name);
924 	if (vnode != NULL)
925 		return B_FILE_EXISTS;
926 
927 	TRACE(("rootfs_create: creating new symlink\n"));
928 	vnode = rootfs_create_vnode(fs, dir, name, S_IFLNK | (mode & S_IUMSK));
929 	if (vnode == NULL)
930 		return B_NO_MEMORY;
931 
932 	rootfs_insert_in_dir(fs, dir, vnode);
933 	fs->vnode_list_hash->Insert(vnode);
934 
935 	vnode->stream.symlink.path = strdup(path);
936 	if (vnode->stream.symlink.path == NULL) {
937 		rootfs_delete_vnode(fs, vnode, false);
938 		return B_NO_MEMORY;
939 	}
940 	vnode->stream.symlink.length = strlen(path);
941 
942 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
943 
944 	notify_entry_created(fs->id, dir->id, name, vnode->id);
945 
946 	return B_OK;
947 }
948 
949 
950 static status_t
951 rootfs_unlink(fs_volume* _volume, fs_vnode* _dir, const char* name)
952 {
953 	struct rootfs* fs = (rootfs*)_volume->private_volume;
954 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
955 
956 	TRACE(("rootfs_unlink: dir %p (0x%Lx), name '%s'\n", dir, dir->id, name));
957 
958 	status_t status = rootfs_check_permissions(dir, W_OK);
959 	if (status != B_OK)
960 		return status;
961 
962 	return rootfs_remove(fs, dir, name, false);
963 }
964 
965 
966 static status_t
967 rootfs_rename(fs_volume* _volume, fs_vnode* _fromDir, const char* fromName,
968 	fs_vnode* _toDir, const char* toName)
969 {
970 	struct rootfs* fs = (rootfs*)_volume->private_volume;
971 	struct rootfs_vnode* fromDirectory = (rootfs_vnode*)_fromDir->private_node;
972 	struct rootfs_vnode* toDirectory = (rootfs_vnode*)_toDir->private_node;
973 
974 	TRACE(("rootfs_rename: from %p (0x%Lx, %s), fromName '%s', to %p "
975 		"(0x%Lx, %s), toName '%s'\n", fromDirectory, fromDirectory->id,
976 		fromDirectory->name != NULL ? fromDirectory->name : "NULL",
977 		fromName, toDirectory, toDirectory->id,
978 		toDirectory->name != NULL ? toDirectory->name : "NULL",
979 		toName));
980 
981 	// Prevent renaming /boot, since that will stop everything from working.
982 	// TODO: This should be solved differently. Either root should still be
983 	// able to do this or a mechanism should be introduced that does this
984 	// at the VFS level, for example by checking nodes for a specific
985 	// attribute.
986 	if (fromDirectory->id == 1 && strcmp(fromName, "boot") == 0)
987 		return EPERM;
988 
989 	status_t status = rootfs_check_permissions(fromDirectory, W_OK);
990 	if (status == B_OK)
991 		status = rootfs_check_permissions(toDirectory, W_OK);
992 	if (status != B_OK)
993 		return status;
994 
995 	WriteLocker locker(fs->lock);
996 
997 	struct rootfs_vnode* vnode = rootfs_find_in_dir(fromDirectory, fromName);
998 	if (vnode == NULL)
999 		return B_ENTRY_NOT_FOUND;
1000 
1001 	// make sure the target is not a subdirectory of us
1002 	struct rootfs_vnode* parent = toDirectory->parent;
1003 	while (parent != NULL && parent != parent->parent) {
1004 		if (parent == vnode)
1005 			return B_BAD_VALUE;
1006 
1007 		parent = parent->parent;
1008 	}
1009 
1010 	struct rootfs_vnode* targetVnode = rootfs_find_in_dir(toDirectory, toName);
1011 	if (targetVnode != NULL) {
1012 		// target node exists, let's see if it is an empty directory
1013 		if (S_ISDIR(targetVnode->stream.type)
1014 			&& !rootfs_is_dir_empty(targetVnode))
1015 			return B_NAME_IN_USE;
1016 
1017 		// so we can cleanly remove it
1018 		entry_cache_remove(fs->volume->id, toDirectory->id, toName);
1019 		remove_node(fs, toDirectory, targetVnode);
1020 	}
1021 
1022 	// we try to reuse the existing name buffer if possible
1023 	if (strlen(fromName) < strlen(toName)) {
1024 		char* nameBuffer = strdup(toName);
1025 		if (nameBuffer == NULL)
1026 			return B_NO_MEMORY;
1027 
1028 		free(vnode->name);
1029 		vnode->name = nameBuffer;
1030 	} else {
1031 		// we can just copy it
1032 		strcpy(vnode->name, toName);
1033 	}
1034 
1035 	// remove it from the dir
1036 	entry_cache_remove(fs->volume->id, fromDirectory->id, fromName);
1037 	rootfs_remove_from_dir(fs, fromDirectory, vnode);
1038 
1039 	// Add it back to the dir with the new name.
1040 	// We need to do this even in the same directory,
1041 	// so that it keeps sorted correctly.
1042 	rootfs_insert_in_dir(fs, toDirectory, vnode);
1043 
1044 	entry_cache_add(fs->volume->id, toDirectory->id, toName, vnode->id);
1045 
1046 	notify_entry_moved(fs->id, fromDirectory->id, fromName, toDirectory->id,
1047 		toName, vnode->id);
1048 
1049 	return B_OK;
1050 }
1051 
1052 
1053 static status_t
1054 rootfs_read_stat(fs_volume* _volume, fs_vnode* _v, struct stat* stat)
1055 {
1056 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1057 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
1058 
1059 	TRACE(("rootfs_read_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1060 		stat));
1061 
1062 	// stream exists, but we know to return size 0, since we can only hold
1063 	// directories
1064 	stat->st_dev = fs->id;
1065 	stat->st_ino = vnode->id;
1066 	if (S_ISLNK(vnode->stream.type))
1067 		stat->st_size = vnode->stream.symlink.length;
1068 	else
1069 		stat->st_size = 0;
1070 	stat->st_mode = vnode->stream.type;
1071 
1072 	stat->st_nlink = 1;
1073 	stat->st_blksize = 65536;
1074 	stat->st_blocks = 0;
1075 
1076 	stat->st_uid = vnode->uid;
1077 	stat->st_gid = vnode->gid;
1078 
1079 	stat->st_atim.tv_sec = real_time_clock();
1080 	stat->st_atim.tv_nsec = 0;
1081 	stat->st_mtim = stat->st_ctim = vnode->modification_time;
1082 	stat->st_crtim = vnode->creation_time;
1083 
1084 	return B_OK;
1085 }
1086 
1087 
1088 static status_t
1089 rootfs_write_stat(fs_volume* _volume, fs_vnode* _vnode, const struct stat* stat,
1090 	uint32 statMask)
1091 {
1092 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1093 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
1094 
1095 	const uid_t uid = geteuid();
1096 	const bool isOwnerOrRoot = uid == 0 || uid == (uid_t)vnode->uid;
1097 	const bool hasWriteAccess = rootfs_check_permissions(vnode, W_OK) == B_OK;
1098 
1099 	TRACE(("rootfs_write_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1100 		stat));
1101 
1102 	// we cannot change the size of anything
1103 	if (statMask & B_STAT_SIZE)
1104 		return B_BAD_VALUE;
1105 
1106 	WriteLocker locker(fs->lock);
1107 
1108 	if ((statMask & B_STAT_MODE) != 0) {
1109 		// only the user or root can do that
1110 		if (!isOwnerOrRoot)
1111 			return B_NOT_ALLOWED;
1112 
1113 		vnode->stream.type = (vnode->stream.type & ~S_IUMSK)
1114 			| (stat->st_mode & S_IUMSK);
1115 	}
1116 
1117 	if ((statMask & B_STAT_UID) != 0) {
1118 		// only root should be allowed
1119 		if (uid != 0)
1120 			return B_NOT_ALLOWED;
1121 		vnode->uid = stat->st_uid;
1122 	}
1123 
1124 	if ((statMask & B_STAT_GID) != 0) {
1125 		// only user or root can do that
1126 		if (!isOwnerOrRoot)
1127 			return B_NOT_ALLOWED;
1128 		vnode->gid = stat->st_gid;
1129 	}
1130 
1131 	if ((statMask & B_STAT_MODIFICATION_TIME) != 0) {
1132 		if (!isOwnerOrRoot && !hasWriteAccess)
1133 			return B_NOT_ALLOWED;
1134 		vnode->modification_time = stat->st_mtim;
1135 	}
1136 
1137 	if ((statMask & B_STAT_CREATION_TIME) != 0) {
1138 		if (!isOwnerOrRoot && !hasWriteAccess)
1139 			return B_NOT_ALLOWED;
1140 		vnode->creation_time = stat->st_crtim;
1141 	}
1142 
1143 	locker.Unlock();
1144 
1145 	notify_stat_changed(fs->id, get_parent_id(vnode), vnode->id, statMask);
1146 	return B_OK;
1147 }
1148 
1149 
1150 static status_t
1151 rootfs_create_special_node(fs_volume* _volume, fs_vnode* _dir, const char* name,
1152 	fs_vnode* subVnode, mode_t mode, uint32 flags, fs_vnode* _superVnode,
1153 	ino_t* _nodeID)
1154 {
1155 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1156 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
1157 	struct rootfs_vnode* vnode;
1158 
1159 	WriteLocker locker(fs->lock);
1160 
1161 	if (name != NULL) {
1162 		vnode = rootfs_find_in_dir(dir, name);
1163 		if (vnode != NULL)
1164 			return B_FILE_EXISTS;
1165 	}
1166 
1167 	vnode = rootfs_create_vnode(fs, dir, name, mode);
1168 	if (vnode == NULL)
1169 		return B_NO_MEMORY;
1170 
1171 	if (name != NULL)
1172 		rootfs_insert_in_dir(fs, dir, vnode);
1173 	else
1174 		flags |= B_VNODE_PUBLISH_REMOVED;
1175 
1176 	fs->vnode_list_hash->Insert(vnode);
1177 
1178 	_superVnode->private_node = vnode;
1179 	_superVnode->ops = &sVnodeOps;
1180 	*_nodeID = vnode->id;
1181 
1182 	if (subVnode == NULL)
1183 		subVnode = _superVnode;
1184 
1185 	status_t status = publish_vnode(fs->volume, vnode->id,
1186 		subVnode->private_node, subVnode->ops, mode, flags);
1187 	if (status != B_OK) {
1188 		if (name != NULL)
1189 			rootfs_remove_from_dir(fs, dir, vnode);
1190 		rootfs_delete_vnode(fs, vnode, false);
1191 		return status;
1192 	}
1193 
1194 	if (name != NULL) {
1195 		entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
1196 		notify_entry_created(fs->id, dir->id, name, vnode->id);
1197 	}
1198 
1199 	return B_OK;
1200 }
1201 
1202 
1203 static status_t
1204 rootfs_std_ops(int32 op, ...)
1205 {
1206 	switch (op) {
1207 		case B_MODULE_INIT:
1208 			return B_OK;
1209 
1210 		case B_MODULE_UNINIT:
1211 			return B_OK;
1212 
1213 		default:
1214 			return B_ERROR;
1215 	}
1216 }
1217 
1218 
1219 namespace {
1220 
1221 fs_volume_ops sVolumeOps = {
1222 	&rootfs_unmount,
1223 	NULL,
1224 	NULL,
1225 	&rootfs_sync,
1226 	&rootfs_get_vnode,
1227 
1228 	// the other operations are not supported (indices, queries)
1229 	NULL,
1230 };
1231 
1232 fs_vnode_ops sVnodeOps = {
1233 	&rootfs_lookup,
1234 	&rootfs_get_vnode_name,
1235 
1236 	&rootfs_put_vnode,
1237 	&rootfs_remove_vnode,
1238 
1239 	&rootfs_can_page,
1240 	&rootfs_read_pages,
1241 	&rootfs_write_pages,
1242 
1243 	NULL,	// io()
1244 	NULL,	// cancel_io()
1245 
1246 	NULL,	// get_file_map()
1247 
1248 	/* common */
1249 	&rootfs_ioctl,
1250 	NULL,	// fs_set_flags()
1251 	NULL,	// select
1252 	NULL,	// deselect
1253 	&rootfs_fsync,
1254 
1255 	&rootfs_read_link,
1256 	&rootfs_symlink,
1257 	NULL,	// fs_link()
1258 	&rootfs_unlink,
1259 	&rootfs_rename,
1260 
1261 	NULL,	// fs_access()
1262 	&rootfs_read_stat,
1263 	&rootfs_write_stat,
1264 	NULL,
1265 
1266 	/* file */
1267 	&rootfs_create,
1268 	&rootfs_open,
1269 	&rootfs_close,
1270 	&rootfs_free_cookie,
1271 	&rootfs_read,
1272 	&rootfs_write,
1273 
1274 	/* directory */
1275 	&rootfs_create_dir,
1276 	&rootfs_remove_dir,
1277 	&rootfs_open_dir,
1278 	&rootfs_close,			// same as for files - it does nothing, anyway
1279 	&rootfs_free_dir_cookie,
1280 	&rootfs_read_dir,
1281 	&rootfs_rewind_dir,
1282 
1283 	/* attribute directory operations */
1284 	NULL,	// open_attr_dir
1285 	NULL,	// close_attr_dir
1286 	NULL,	// free_attr_dir_cookie
1287 	NULL,	// read_attr_dir
1288 	NULL,	// rewind_attr_dir
1289 
1290 	/* attribute operations */
1291 	NULL,	// create_attr
1292 	NULL,	// open_attr
1293 	NULL,	// close_attr
1294 	NULL,	// free_attr_cookie
1295 	NULL,	// read_attr
1296 	NULL,	// write_attr
1297 
1298 	NULL,	// read_attr_stat
1299 	NULL,	// write_attr_stat
1300 	NULL,	// rename_attr
1301 	NULL,	// remove_attr
1302 
1303 	/* support for node and FS layers */
1304 	&rootfs_create_special_node,
1305 	NULL,	// get_super_vnode,
1306 };
1307 
1308 }	// namespace
1309 
1310 file_system_module_info gRootFileSystem = {
1311 	{
1312 		"file_systems/rootfs" B_CURRENT_FS_API_VERSION,
1313 		0,
1314 		rootfs_std_ops,
1315 	},
1316 
1317 	"rootfs",				// short_name
1318 	"Root File System",		// pretty_name
1319 	0,						// DDM flags
1320 
1321 	NULL,	// identify_partition()
1322 	NULL,	// scan_partition()
1323 	NULL,	// free_identify_partition_cookie()
1324 	NULL,	// free_partition_content_cookie()
1325 
1326 	&rootfs_mount,
1327 };
1328