xref: /haiku/src/system/kernel/fs/rootfs.cpp (revision e81a954787e50e56a7f06f72705b7859b6ab06d1)
1 /*
2  * Copyright 2002-2017, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #if FS_SHELL
11 #	include "fssh_api_wrapper.h"
12 
13 #	include "KOpenHashTable.h"
14 #	include "list.h"
15 #else
16 #	include <stdio.h>
17 #	include <stdlib.h>
18 #	include <string.h>
19 #	include <sys/stat.h>
20 
21 #	include <fs_cache.h>
22 #	include <KernelExport.h>
23 #	include <NodeMonitor.h>
24 
25 #	include <debug.h>
26 #	include <lock.h>
27 #	include <OpenHashTable.h>
28 #	include <util/AutoLock.h>
29 #	include <vfs.h>
30 #	include <vm/vm.h>
31 #endif
32 
33 
34 
35 #if FS_SHELL
36 	using namespace FSShell;
37 #	define user_strlcpy(to, from, len)	(strlcpy(to, from, len), FSSH_B_OK)
38 #endif
39 
40 
41 //#define TRACE_ROOTFS
42 #ifdef TRACE_ROOTFS
43 #	define TRACE(x) dprintf x
44 #else
45 #	define TRACE(x)
46 #endif
47 
48 
49 namespace {
50 
51 struct rootfs_stream {
52 	mode_t						type;
53 	struct stream_dir {
54 		struct rootfs_vnode*	dir_head;
55 		struct list				cookies;
56 		mutex					cookie_lock;
57 	} dir;
58 	struct stream_symlink {
59 		char*					path;
60 		size_t					length;
61 	} symlink;
62 };
63 
64 struct rootfs_vnode {
65 	struct rootfs_vnode*		all_next;
66 	ino_t						id;
67 	char*						name;
68 	timespec					modification_time;
69 	timespec					creation_time;
70 	uid_t						uid;
71 	gid_t						gid;
72 	struct rootfs_vnode*		parent;
73 	struct rootfs_vnode*		dir_next;
74 	struct rootfs_stream		stream;
75 };
76 
77 struct VnodeHash {
78 	typedef	ino_t			KeyType;
79 	typedef	rootfs_vnode	ValueType;
80 
81 	size_t HashKey(KeyType key) const
82 	{
83 		return key;
84 	}
85 
86 	size_t Hash(ValueType* vnode) const
87 	{
88 		return vnode->id;
89 	}
90 
91 	bool Compare(KeyType key, ValueType* vnode) const
92 	{
93 		return vnode->id == key;
94 	}
95 
96 	ValueType*& GetLink(ValueType* value) const
97 	{
98 		return value->all_next;
99 	}
100 };
101 
102 typedef BOpenHashTable<VnodeHash> VnodeTable;
103 
104 struct rootfs {
105 	fs_volume*					volume;
106 	dev_t						id;
107 	rw_lock						lock;
108 	ino_t						next_vnode_id;
109 	VnodeTable*					vnode_list_hash;
110 	struct rootfs_vnode*		root_vnode;
111 };
112 
113 // dircookie, dirs are only types of streams supported by rootfs
114 struct rootfs_dir_cookie {
115 	struct list_link			link;
116 	mutex						lock;
117 	struct rootfs_vnode*		current;
118 	int32						iteration_state;
119 };
120 
121 // directory iteration states
122 enum {
123 	ITERATION_STATE_DOT		= 0,
124 	ITERATION_STATE_DOT_DOT	= 1,
125 	ITERATION_STATE_OTHERS	= 2,
126 	ITERATION_STATE_BEGIN	= ITERATION_STATE_DOT,
127 };
128 
129 
130 // extern only to make forward declaration possible
131 extern fs_volume_ops sVolumeOps;
132 extern fs_vnode_ops sVnodeOps;
133 
134 } // namespace
135 
136 
137 #define ROOTFS_HASH_SIZE 16
138 
139 
140 static timespec
141 current_timespec()
142 {
143 	bigtime_t time = real_time_clock_usecs();
144 
145 	timespec tv;
146 	tv.tv_sec = time / 1000000;
147 	tv.tv_nsec = (time % 1000000) * 1000;
148 	return tv;
149 }
150 
151 
152 static ino_t
153 get_parent_id(struct rootfs_vnode* vnode)
154 {
155 	if (vnode->parent != NULL)
156 		return vnode->parent->id;
157 	return -1;
158 }
159 
160 
161 static struct rootfs_vnode*
162 rootfs_create_vnode(struct rootfs* fs, struct rootfs_vnode* parent,
163 	const char* name, int type)
164 {
165 	struct rootfs_vnode* vnode;
166 
167 	vnode = (rootfs_vnode*)malloc(sizeof(struct rootfs_vnode));
168 	if (vnode == NULL)
169 		return NULL;
170 
171 	memset(vnode, 0, sizeof(struct rootfs_vnode));
172 
173 	if (name != NULL) {
174 		vnode->name = strdup(name);
175 		if (vnode->name == NULL) {
176 			free(vnode);
177 			return NULL;
178 		}
179 	}
180 
181 	vnode->id = fs->next_vnode_id++;
182 	vnode->stream.type = type;
183 	vnode->creation_time = vnode->modification_time = current_timespec();
184 	vnode->uid = geteuid();
185 	vnode->gid = parent ? parent->gid : getegid();
186 		// inherit group from parent if possible
187 
188 	if (S_ISDIR(type)) {
189 		list_init(&vnode->stream.dir.cookies);
190 		mutex_init(&vnode->stream.dir.cookie_lock, "rootfs dir cookies");
191 	}
192 
193 	return vnode;
194 }
195 
196 
197 static status_t
198 rootfs_delete_vnode(struct rootfs* fs, struct rootfs_vnode* v, bool force_delete)
199 {
200 	// cant delete it if it's in a directory or is a directory
201 	// and has children
202 	if (!force_delete && (v->stream.dir.dir_head != NULL || v->dir_next != NULL))
203 		return EPERM;
204 
205 	// remove it from the global hash table
206 	fs->vnode_list_hash->Remove(v);
207 
208 	if (S_ISDIR(v->stream.type))
209 		mutex_destroy(&v->stream.dir.cookie_lock);
210 
211 	free(v->name);
212 	free(v);
213 
214 	return 0;
215 }
216 
217 
218 /*! Makes sure none of the dircookies point to the vnode passed in. */
219 static void
220 update_dir_cookies(struct rootfs_vnode* dir, struct rootfs_vnode* vnode)
221 {
222 	struct rootfs_dir_cookie* cookie = NULL;
223 
224 	while ((cookie = (rootfs_dir_cookie*)list_get_next_item(
225 			&dir->stream.dir.cookies, cookie)) != NULL) {
226 		MutexLocker cookieLocker(cookie->lock);
227 		if (cookie->current == vnode)
228 			cookie->current = vnode->dir_next;
229 	}
230 }
231 
232 
233 static struct rootfs_vnode*
234 rootfs_find_in_dir(struct rootfs_vnode* dir, const char* path)
235 {
236 	struct rootfs_vnode* vnode;
237 
238 	if (!strcmp(path, "."))
239 		return dir;
240 	if (!strcmp(path, ".."))
241 		return dir->parent;
242 
243 	for (vnode = dir->stream.dir.dir_head; vnode; vnode = vnode->dir_next) {
244 		if (!strcmp(vnode->name, path))
245 			return vnode;
246 	}
247 	return NULL;
248 }
249 
250 
251 static status_t
252 rootfs_insert_in_dir(struct rootfs* fs, struct rootfs_vnode* dir,
253 	struct rootfs_vnode* vnode)
254 {
255 	// make sure the directory stays sorted alphabetically
256 
257 	struct rootfs_vnode* node = dir->stream.dir.dir_head;
258 	struct rootfs_vnode* last = NULL;
259 	while (node != NULL && strcmp(node->name, vnode->name) < 0) {
260 		last = node;
261 		node = node->dir_next;
262 	}
263 	if (last == NULL) {
264 		// the new vnode is the first entry in the list
265 		vnode->dir_next = dir->stream.dir.dir_head;
266 		dir->stream.dir.dir_head = vnode;
267 	} else {
268 		// insert after that node
269 		vnode->dir_next = last->dir_next;
270 		last->dir_next = vnode;
271 	}
272 
273 	vnode->parent = dir;
274 	dir->modification_time = current_timespec();
275 
276 	notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
277 		B_STAT_MODIFICATION_TIME);
278 	return B_OK;
279 }
280 
281 
282 static status_t
283 rootfs_remove_from_dir(struct rootfs* fs, struct rootfs_vnode* dir,
284 	struct rootfs_vnode* removeVnode)
285 {
286 	struct rootfs_vnode* vnode;
287 	struct rootfs_vnode* lastVnode;
288 
289 	for (vnode = dir->stream.dir.dir_head, lastVnode = NULL; vnode != NULL;
290 			lastVnode = vnode, vnode = vnode->dir_next) {
291 		if (vnode == removeVnode) {
292 			// make sure all dircookies dont point to this vnode
293 			update_dir_cookies(dir, vnode);
294 
295 			if (lastVnode)
296 				lastVnode->dir_next = vnode->dir_next;
297 			else
298 				dir->stream.dir.dir_head = vnode->dir_next;
299 			vnode->dir_next = NULL;
300 
301 			dir->modification_time = current_timespec();
302 			notify_stat_changed(fs->id, get_parent_id(dir), dir->id,
303 				B_STAT_MODIFICATION_TIME);
304 			return B_OK;
305 		}
306 	}
307 	return B_ENTRY_NOT_FOUND;
308 }
309 
310 
311 static bool
312 rootfs_is_dir_empty(struct rootfs_vnode* dir)
313 {
314 	return !dir->stream.dir.dir_head;
315 }
316 
317 
318 /*! You must hold the FS write lock when calling this function */
319 static status_t
320 remove_node(struct rootfs* fs, struct rootfs_vnode* directory,
321 	struct rootfs_vnode* vnode)
322 {
323 	// schedule this vnode to be removed when it's ref goes to zero
324 
325 	bool gotNode = (get_vnode(fs->volume, vnode->id, NULL) == B_OK);
326 
327 	status_t status = B_OK;
328 	if (gotNode)
329 		status = remove_vnode(fs->volume, vnode->id);
330 
331 	if (status == B_OK) {
332 		rootfs_remove_from_dir(fs, directory, vnode);
333 		notify_entry_removed(fs->id, directory->id, vnode->name, vnode->id);
334 	}
335 
336 	if (gotNode)
337 		put_vnode(fs->volume, vnode->id);
338 
339 	return status;
340 }
341 
342 
343 static status_t
344 rootfs_remove(struct rootfs* fs, struct rootfs_vnode* dir, const char* name,
345 	bool isDirectory)
346 {
347 	struct rootfs_vnode* vnode;
348 	status_t status = B_OK;
349 
350 	WriteLocker locker(fs->lock);
351 
352 	vnode = rootfs_find_in_dir(dir, name);
353 	if (!vnode)
354 		status = B_ENTRY_NOT_FOUND;
355 	else if (isDirectory && !S_ISDIR(vnode->stream.type))
356 		status = B_NOT_A_DIRECTORY;
357 	else if (!isDirectory && S_ISDIR(vnode->stream.type))
358 		status = B_IS_A_DIRECTORY;
359 	else if (isDirectory && !rootfs_is_dir_empty(vnode))
360 		status = B_DIRECTORY_NOT_EMPTY;
361 
362 	if (status != B_OK)
363 		return status;
364 
365 	entry_cache_remove(fs->volume->id, dir->id, name);
366 
367 	return remove_node(fs, dir, vnode);
368 }
369 
370 
371 //	#pragma mark -
372 
373 
374 static status_t
375 rootfs_mount(fs_volume* volume, const char* device, uint32 flags,
376 	const char* args, ino_t* _rootID)
377 {
378 	struct rootfs* fs;
379 	struct rootfs_vnode* vnode;
380 	status_t err;
381 
382 	TRACE(("rootfs_mount: entry\n"));
383 
384 	fs = (rootfs*)malloc(sizeof(struct rootfs));
385 	if (fs == NULL)
386 		return B_NO_MEMORY;
387 
388 	volume->private_volume = fs;
389 	volume->ops = &sVolumeOps;
390 	fs->volume = volume;
391 	fs->id = volume->id;
392 	fs->next_vnode_id = 1;
393 
394 	rw_lock_init(&fs->lock, "rootfs");
395 
396 	fs->vnode_list_hash = new(std::nothrow) VnodeTable();
397 	if (fs->vnode_list_hash == NULL
398 			|| fs->vnode_list_hash->Init(ROOTFS_HASH_SIZE) != B_OK) {
399 		err = B_NO_MEMORY;
400 		goto err2;
401 	}
402 
403 	// create the root vnode
404 	vnode = rootfs_create_vnode(fs, NULL, ".", S_IFDIR | 0777);
405 	if (vnode == NULL) {
406 		err = B_NO_MEMORY;
407 		goto err3;
408 	}
409 	vnode->parent = vnode;
410 
411 	fs->root_vnode = vnode;
412 	fs->vnode_list_hash->Insert(vnode);
413 	publish_vnode(volume, vnode->id, vnode, &sVnodeOps, vnode->stream.type, 0);
414 
415 	*_rootID = vnode->id;
416 
417 	return B_OK;
418 
419 err3:
420 	delete fs->vnode_list_hash;
421 err2:
422 	rw_lock_destroy(&fs->lock);
423 	free(fs);
424 
425 	return err;
426 }
427 
428 
429 static status_t
430 rootfs_unmount(fs_volume* _volume)
431 {
432 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
433 
434 	TRACE(("rootfs_unmount: entry fs = %p\n", fs));
435 
436 	// release the reference to the root
437 	put_vnode(fs->volume, fs->root_vnode->id);
438 
439 	// delete all of the vnodes
440 	VnodeTable::Iterator i(fs->vnode_list_hash);
441 
442 	while (i.HasNext()) {
443 		struct rootfs_vnode* vnode = i.Next();
444 		rootfs_delete_vnode(fs, vnode, true);
445 	}
446 
447 	delete fs->vnode_list_hash;
448 	rw_lock_destroy(&fs->lock);
449 	free(fs);
450 
451 	return B_OK;
452 }
453 
454 
455 static status_t
456 rootfs_sync(fs_volume* _volume)
457 {
458 	TRACE(("rootfs_sync: entry\n"));
459 
460 	return B_OK;
461 }
462 
463 
464 static status_t
465 rootfs_lookup(fs_volume* _volume, fs_vnode* _dir, const char* name, ino_t* _id)
466 {
467 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
468 	struct rootfs_vnode* dir = (struct rootfs_vnode*)_dir->private_node;
469 	struct rootfs_vnode* vnode;
470 
471 	TRACE(("rootfs_lookup: entry dir %p, name '%s'\n", dir, name));
472 	if (!S_ISDIR(dir->stream.type))
473 		return B_NOT_A_DIRECTORY;
474 
475 	ReadLocker locker(fs->lock);
476 
477 	// look it up
478 	vnode = rootfs_find_in_dir(dir, name);
479 	if (!vnode)
480 		return B_ENTRY_NOT_FOUND;
481 
482 	status_t status = get_vnode(fs->volume, vnode->id, NULL);
483 	if (status != B_OK)
484 		return status;
485 
486 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
487 
488 	*_id = vnode->id;
489 	return B_OK;
490 }
491 
492 
493 static status_t
494 rootfs_get_vnode_name(fs_volume* _volume, fs_vnode* _vnode, char* buffer,
495 	size_t bufferSize)
496 {
497 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
498 
499 	TRACE(("rootfs_get_vnode_name: vnode = %p (name = %s)\n", vnode,
500 		vnode->name));
501 
502 	strlcpy(buffer, vnode->name, bufferSize);
503 	return B_OK;
504 }
505 
506 
507 static status_t
508 rootfs_get_vnode(fs_volume* _volume, ino_t id, fs_vnode* _vnode, int* _type,
509 	uint32* _flags, bool reenter)
510 {
511 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
512 	struct rootfs_vnode* vnode;
513 
514 	TRACE(("rootfs_getvnode: asking for vnode %Ld, r %d\n", id, reenter));
515 
516 	if (!reenter)
517 		rw_lock_read_lock(&fs->lock);
518 
519 	vnode = fs->vnode_list_hash->Lookup(id);
520 
521 	if (!reenter)
522 		rw_lock_read_unlock(&fs->lock);
523 
524 	TRACE(("rootfs_getnvnode: looked it up at %p\n", vnode));
525 
526 	if (vnode == NULL)
527 		return B_ENTRY_NOT_FOUND;
528 
529 	_vnode->private_node = vnode;
530 	_vnode->ops = &sVnodeOps;
531 	*_type = vnode->stream.type;
532 	*_flags = 0;
533 
534 	return B_OK;
535 }
536 
537 
538 static status_t
539 rootfs_put_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
540 {
541 #ifdef TRACE_ROOTFS
542 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
543 
544 	TRACE(("rootfs_putvnode: entry on vnode 0x%Lx, r %d\n", vnode->id, reenter));
545 #endif
546 	return B_OK; // whatever
547 }
548 
549 
550 static status_t
551 rootfs_remove_vnode(fs_volume* _volume, fs_vnode* _vnode, bool reenter)
552 {
553 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
554 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
555 
556 	TRACE(("rootfs_remove_vnode: remove %p (0x%Lx), r %d\n", vnode, vnode->id,
557 		reenter));
558 
559 	if (!reenter)
560 		rw_lock_write_lock(&fs->lock);
561 
562 	if (vnode->dir_next) {
563 		// can't remove node if it's linked to the dir
564 		panic("rootfs_remove_vnode: vnode %p asked to be removed is present in "
565 			"dir\n", vnode);
566 	}
567 
568 	rootfs_delete_vnode(fs, vnode, false);
569 
570 	if (!reenter)
571 		rw_lock_write_unlock(&fs->lock);
572 
573 	return B_OK;
574 }
575 
576 
577 static status_t
578 rootfs_create(fs_volume* _volume, fs_vnode* _dir, const char* name, int omode,
579 	int perms, void** _cookie, ino_t* _newID)
580 {
581 	return B_BAD_VALUE;
582 }
583 
584 
585 static status_t
586 rootfs_open(fs_volume* _volume, fs_vnode* _v, int openMode, void** _cookie)
587 {
588 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
589 
590 	if (S_ISDIR(vnode->stream.type) && (openMode & O_RWMASK) != O_RDONLY)
591 		return B_IS_A_DIRECTORY;
592 	if ((openMode & O_DIRECTORY) != 0 && !S_ISDIR(vnode->stream.type))
593 		return B_NOT_A_DIRECTORY;
594 
595 	// allow to open the file, but it can't be done anything with it
596 
597 	*_cookie = NULL;
598 	return B_OK;
599 }
600 
601 
602 static status_t
603 rootfs_close(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
604 {
605 	TRACE(("rootfs_close: entry vnode %p, cookie %p\n", _vnode->private_node,
606 		_cookie));
607 	return B_OK;
608 }
609 
610 
611 static status_t
612 rootfs_free_cookie(fs_volume* _volume, fs_vnode* _v, void* _cookie)
613 {
614 	return B_OK;
615 }
616 
617 
618 static status_t
619 rootfs_fsync(fs_volume* _volume, fs_vnode* _v)
620 {
621 	return B_OK;
622 }
623 
624 
625 static status_t
626 rootfs_read(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
627 	off_t pos, void* buffer, size_t* _length)
628 {
629 	return EINVAL;
630 }
631 
632 
633 static status_t
634 rootfs_write(fs_volume* _volume, fs_vnode* vnode, void* cookie,
635 	off_t pos, const void* buffer, size_t* _length)
636 {
637 	TRACE(("rootfs_write: vnode %p, cookie %p, pos 0x%Lx , len %#x\n",
638 		vnode, cookie, pos, (int)*_length));
639 
640 	return EPERM;
641 }
642 
643 
644 static status_t
645 rootfs_create_dir(fs_volume* _volume, fs_vnode* _dir, const char* name,
646 	int mode)
647 {
648 	struct rootfs* fs = (rootfs*)_volume->private_volume;
649 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
650 	struct rootfs_vnode* vnode;
651 
652 	TRACE(("rootfs_create_dir: dir %p, name = '%s', perms = %d\n", dir, name,
653 		mode));
654 
655 	WriteLocker locker(fs->lock);
656 
657 	vnode = rootfs_find_in_dir(dir, name);
658 	if (vnode != NULL)
659 		return B_FILE_EXISTS;
660 
661 	TRACE(("rootfs_create: creating new vnode\n"));
662 	vnode = rootfs_create_vnode(fs, dir, name, S_IFDIR | (mode & S_IUMSK));
663 	if (vnode == NULL)
664 		return B_NO_MEMORY;
665 
666 	rootfs_insert_in_dir(fs, dir, vnode);
667 	fs->vnode_list_hash->Insert(vnode);
668 
669 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
670 	notify_entry_created(fs->id, dir->id, name, vnode->id);
671 
672 	return B_OK;
673 }
674 
675 
676 static status_t
677 rootfs_remove_dir(fs_volume* _volume, fs_vnode* _dir, const char* name)
678 {
679 	struct rootfs* fs = (rootfs*)_volume->private_volume;
680 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
681 
682 	TRACE(("rootfs_remove_dir: dir %p (0x%Lx), name '%s'\n", dir, dir->id,
683 		name));
684 
685 	return rootfs_remove(fs, dir, name, true);
686 }
687 
688 
689 static status_t
690 rootfs_open_dir(fs_volume* _volume, fs_vnode* _v, void** _cookie)
691 {
692 	struct rootfs* fs = (struct rootfs*)_volume->private_volume;
693 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_v->private_node;
694 	struct rootfs_dir_cookie* cookie;
695 
696 	TRACE(("rootfs_open: vnode %p\n", vnode));
697 
698 	if (!S_ISDIR(vnode->stream.type))
699 		return B_BAD_VALUE;
700 
701 	cookie = (rootfs_dir_cookie*)malloc(sizeof(struct rootfs_dir_cookie));
702 	if (cookie == NULL)
703 		return B_NO_MEMORY;
704 
705 	mutex_init(&cookie->lock, "rootfs dir cookie");
706 
707 	ReadLocker locker(fs->lock);
708 
709 	cookie->current = vnode->stream.dir.dir_head;
710 	cookie->iteration_state = ITERATION_STATE_BEGIN;
711 
712 	mutex_lock(&vnode->stream.dir.cookie_lock);
713 	list_add_item(&vnode->stream.dir.cookies, cookie);
714 	mutex_unlock(&vnode->stream.dir.cookie_lock);
715 
716 	*_cookie = cookie;
717 
718 	return B_OK;
719 }
720 
721 
722 static status_t
723 rootfs_free_dir_cookie(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
724 {
725 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
726 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
727 	struct rootfs* fs = (rootfs*)_volume->private_volume;
728 
729 	ReadLocker locker(fs->lock);
730 
731 	mutex_lock(&vnode->stream.dir.cookie_lock);
732 	list_remove_item(&vnode->stream.dir.cookies, cookie);
733 	mutex_unlock(&vnode->stream.dir.cookie_lock);
734 
735 	locker.Unlock();
736 
737 	mutex_destroy(&cookie->lock);
738 
739 	free(cookie);
740 	return B_OK;
741 }
742 
743 
744 static status_t
745 rootfs_read_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie,
746 	struct dirent* dirent, size_t bufferSize, uint32* _num)
747 {
748 	struct rootfs_vnode* vnode = (struct rootfs_vnode*)_vnode->private_node;
749 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
750 	struct rootfs* fs = (rootfs*)_volume->private_volume;
751 	struct rootfs_vnode* childNode = NULL;
752 	const char* name = NULL;
753 	struct rootfs_vnode* nextChildNode = NULL;
754 
755 	TRACE(("rootfs_read_dir: vnode %p, cookie %p, buffer = %p, bufferSize = %d, "
756 		"num = %p\n", _vnode, cookie, dirent, (int)bufferSize, _num));
757 
758 	ReadLocker locker(fs->lock);
759 
760 	MutexLocker cookieLocker(cookie->lock);
761 	int nextState = cookie->iteration_state;
762 
763 	switch (cookie->iteration_state) {
764 		case ITERATION_STATE_DOT:
765 			childNode = vnode;
766 			name = ".";
767 			nextChildNode = vnode->stream.dir.dir_head;
768 			nextState = cookie->iteration_state + 1;
769 			break;
770 		case ITERATION_STATE_DOT_DOT:
771 			childNode = vnode->parent;
772 			name = "..";
773 			nextChildNode = vnode->stream.dir.dir_head;
774 			nextState = cookie->iteration_state + 1;
775 			break;
776 		default:
777 			childNode = cookie->current;
778 			if (childNode) {
779 				name = childNode->name;
780 				nextChildNode = childNode->dir_next;
781 			}
782 			break;
783 	}
784 
785 	if (!childNode) {
786 		// we're at the end of the directory
787 		*_num = 0;
788 		return B_OK;
789 	}
790 
791 	dirent->d_dev = fs->id;
792 	dirent->d_ino = childNode->id;
793 	dirent->d_reclen = strlen(name) + sizeof(struct dirent);
794 
795 	if (dirent->d_reclen > bufferSize)
796 		return ENOBUFS;
797 
798 	int nameLength = user_strlcpy(dirent->d_name, name,
799 		bufferSize - sizeof(struct dirent));
800 	if (nameLength < B_OK)
801 		return nameLength;
802 
803 	cookie->current = nextChildNode;
804 	cookie->iteration_state = nextState;
805 	*_num = 1;
806 	return B_OK;
807 }
808 
809 
810 static status_t
811 rootfs_rewind_dir(fs_volume* _volume, fs_vnode* _vnode, void* _cookie)
812 {
813 	struct rootfs_dir_cookie* cookie = (rootfs_dir_cookie*)_cookie;
814 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
815 	struct rootfs* fs = (rootfs*)_volume->private_volume;
816 
817 	ReadLocker locker(fs->lock);
818 	MutexLocker cookieLocker(cookie->lock);
819 
820 	cookie->current = vnode->stream.dir.dir_head;
821 	cookie->iteration_state = ITERATION_STATE_BEGIN;
822 
823 	return B_OK;
824 }
825 
826 
827 static status_t
828 rootfs_ioctl(fs_volume* _volume, fs_vnode* _v, void* _cookie, uint32 op,
829 	void* buffer, size_t length)
830 {
831 	TRACE(("rootfs_ioctl: vnode %p, cookie %p, op %d, buf %p, length %d\n",
832 		_volume, _cookie, (int)op, buffer, (int)length));
833 
834 	return B_BAD_VALUE;
835 }
836 
837 
838 static bool
839 rootfs_can_page(fs_volume* _volume, fs_vnode* _v, void* cookie)
840 {
841 	return false;
842 }
843 
844 
845 static status_t
846 rootfs_read_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
847 	const iovec* vecs, size_t count, size_t* _numBytes)
848 {
849 	return B_NOT_ALLOWED;
850 }
851 
852 
853 static status_t
854 rootfs_write_pages(fs_volume* _volume, fs_vnode* _v, void* cookie, off_t pos,
855 	const iovec* vecs, size_t count, size_t* _numBytes)
856 {
857 	return B_NOT_ALLOWED;
858 }
859 
860 
861 static status_t
862 rootfs_read_link(fs_volume* _volume, fs_vnode* _link, char* buffer,
863 	size_t* _bufferSize)
864 {
865 	struct rootfs_vnode* link = (rootfs_vnode*)_link->private_node;
866 
867 	if (!S_ISLNK(link->stream.type))
868 		return B_BAD_VALUE;
869 
870 	if (link->stream.symlink.length < *_bufferSize)
871 		*_bufferSize = link->stream.symlink.length;
872 
873 	memcpy(buffer, link->stream.symlink.path, *_bufferSize);
874 	return B_OK;
875 }
876 
877 
878 static status_t
879 rootfs_symlink(fs_volume* _volume, fs_vnode* _dir, const char* name,
880 	const char* path, int mode)
881 {
882 	struct rootfs* fs = (rootfs*)_volume->private_volume;
883 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
884 	struct rootfs_vnode* vnode;
885 
886 	TRACE(("rootfs_symlink: dir %p, name = '%s', path = %s\n", dir, name, path));
887 
888 	WriteLocker locker(fs->lock);
889 
890 	vnode = rootfs_find_in_dir(dir, name);
891 	if (vnode != NULL)
892 		return B_FILE_EXISTS;
893 
894 	TRACE(("rootfs_create: creating new symlink\n"));
895 	vnode = rootfs_create_vnode(fs, dir, name, S_IFLNK | (mode & S_IUMSK));
896 	if (vnode == NULL)
897 		return B_NO_MEMORY;
898 
899 	rootfs_insert_in_dir(fs, dir, vnode);
900 	fs->vnode_list_hash->Insert(vnode);
901 
902 	vnode->stream.symlink.path = strdup(path);
903 	if (vnode->stream.symlink.path == NULL) {
904 		rootfs_delete_vnode(fs, vnode, false);
905 		return B_NO_MEMORY;
906 	}
907 	vnode->stream.symlink.length = strlen(path);
908 
909 	entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
910 
911 	notify_entry_created(fs->id, dir->id, name, vnode->id);
912 
913 	return B_OK;
914 }
915 
916 
917 static status_t
918 rootfs_unlink(fs_volume* _volume, fs_vnode* _dir, const char* name)
919 {
920 	struct rootfs* fs = (rootfs*)_volume->private_volume;
921 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
922 
923 	TRACE(("rootfs_unlink: dir %p (0x%Lx), name '%s'\n", dir, dir->id, name));
924 
925 	return rootfs_remove(fs, dir, name, false);
926 }
927 
928 
929 static status_t
930 rootfs_rename(fs_volume* _volume, fs_vnode* _fromDir, const char* fromName,
931 	fs_vnode* _toDir, const char* toName)
932 {
933 	struct rootfs* fs = (rootfs*)_volume->private_volume;
934 	struct rootfs_vnode* fromDirectory = (rootfs_vnode*)_fromDir->private_node;
935 	struct rootfs_vnode* toDirectory = (rootfs_vnode*)_toDir->private_node;
936 
937 	TRACE(("rootfs_rename: from %p (0x%Lx, %s), fromName '%s', to %p "
938 		"(0x%Lx, %s), toName '%s'\n", fromDirectory, fromDirectory->id,
939 		fromDirectory->name != NULL ? fromDirectory->name : "NULL",
940 		fromName, toDirectory, toDirectory->id,
941 		toDirectory->name != NULL ? toDirectory->name : "NULL",
942 		toName));
943 
944 	// Prevent renaming /boot, since that will stop everything from working.
945 	// TODO: This should be solved differently. Either root should still be
946 	// able to do this or a mechanism should be introduced that does this
947 	// at the VFS level, for example by checking nodes for a specific
948 	// attribute.
949 	if (fromDirectory->id == 1 && strcmp(fromName, "boot") == 0)
950 		return EPERM;
951 
952 	WriteLocker locker(fs->lock);
953 
954 	struct rootfs_vnode* vnode = rootfs_find_in_dir(fromDirectory, fromName);
955 	if (vnode == NULL)
956 		return B_ENTRY_NOT_FOUND;
957 
958 	// make sure the target is not a subdirectory of us
959 	struct rootfs_vnode* parent = toDirectory->parent;
960 	while (parent != NULL && parent != parent->parent) {
961 		if (parent == vnode)
962 			return B_BAD_VALUE;
963 
964 		parent = parent->parent;
965 	}
966 
967 	struct rootfs_vnode* targetVnode = rootfs_find_in_dir(toDirectory, toName);
968 	if (targetVnode != NULL) {
969 		// target node exists, let's see if it is an empty directory
970 		if (S_ISDIR(targetVnode->stream.type)
971 			&& !rootfs_is_dir_empty(targetVnode))
972 			return B_NAME_IN_USE;
973 
974 		// so we can cleanly remove it
975 		entry_cache_remove(fs->volume->id, toDirectory->id, toName);
976 		remove_node(fs, toDirectory, targetVnode);
977 	}
978 
979 	// we try to reuse the existing name buffer if possible
980 	if (strlen(fromName) < strlen(toName)) {
981 		char* nameBuffer = strdup(toName);
982 		if (nameBuffer == NULL)
983 			return B_NO_MEMORY;
984 
985 		free(vnode->name);
986 		vnode->name = nameBuffer;
987 	} else {
988 		// we can just copy it
989 		strcpy(vnode->name, toName);
990 	}
991 
992 	// remove it from the dir
993 	entry_cache_remove(fs->volume->id, fromDirectory->id, fromName);
994 	rootfs_remove_from_dir(fs, fromDirectory, vnode);
995 
996 	// Add it back to the dir with the new name.
997 	// We need to do this even in the same directory,
998 	// so that it keeps sorted correctly.
999 	rootfs_insert_in_dir(fs, toDirectory, vnode);
1000 
1001 	entry_cache_add(fs->volume->id, toDirectory->id, toName, vnode->id);
1002 
1003 	notify_entry_moved(fs->id, fromDirectory->id, fromName, toDirectory->id,
1004 		toName, vnode->id);
1005 
1006 	return B_OK;
1007 }
1008 
1009 
1010 static status_t
1011 rootfs_read_stat(fs_volume* _volume, fs_vnode* _v, struct stat* stat)
1012 {
1013 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1014 	struct rootfs_vnode* vnode = (rootfs_vnode*)_v->private_node;
1015 
1016 	TRACE(("rootfs_read_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1017 		stat));
1018 
1019 	// stream exists, but we know to return size 0, since we can only hold
1020 	// directories
1021 	stat->st_dev = fs->id;
1022 	stat->st_ino = vnode->id;
1023 	if (S_ISLNK(vnode->stream.type))
1024 		stat->st_size = vnode->stream.symlink.length;
1025 	else
1026 		stat->st_size = 0;
1027 	stat->st_mode = vnode->stream.type;
1028 
1029 	stat->st_nlink = 1;
1030 	stat->st_blksize = 65536;
1031 	stat->st_blocks = 0;
1032 
1033 	stat->st_uid = vnode->uid;
1034 	stat->st_gid = vnode->gid;
1035 
1036 	stat->st_atim.tv_sec = real_time_clock();
1037 	stat->st_atim.tv_nsec = 0;
1038 	stat->st_mtim = stat->st_ctim = vnode->modification_time;
1039 	stat->st_crtim = vnode->creation_time;
1040 
1041 	return B_OK;
1042 }
1043 
1044 
1045 static status_t
1046 rootfs_write_stat(fs_volume* _volume, fs_vnode* _vnode, const struct stat* stat,
1047 	uint32 statMask)
1048 {
1049 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1050 	struct rootfs_vnode* vnode = (rootfs_vnode*)_vnode->private_node;
1051 
1052 	TRACE(("rootfs_write_stat: vnode %p (0x%Lx), stat %p\n", vnode, vnode->id,
1053 		stat));
1054 
1055 	// we cannot change the size of anything
1056 	if (statMask & B_STAT_SIZE)
1057 		return B_BAD_VALUE;
1058 
1059 	WriteLocker locker(fs->lock);
1060 
1061 	if ((statMask & B_STAT_MODE) != 0) {
1062 		vnode->stream.type = (vnode->stream.type & ~S_IUMSK)
1063 			| (stat->st_mode & S_IUMSK);
1064 	}
1065 
1066 	if ((statMask & B_STAT_UID) != 0)
1067 		vnode->uid = stat->st_uid;
1068 	if ((statMask & B_STAT_GID) != 0)
1069 		vnode->gid = stat->st_gid;
1070 
1071 	if ((statMask & B_STAT_MODIFICATION_TIME) != 0)
1072 		vnode->modification_time = stat->st_mtim;
1073 	if ((statMask & B_STAT_CREATION_TIME) != 0)
1074 		vnode->creation_time = stat->st_crtim;
1075 
1076 	locker.Unlock();
1077 
1078 	notify_stat_changed(fs->id, get_parent_id(vnode), vnode->id, statMask);
1079 	return B_OK;
1080 }
1081 
1082 
1083 static status_t
1084 rootfs_create_special_node(fs_volume* _volume, fs_vnode* _dir, const char* name,
1085 	fs_vnode* subVnode, mode_t mode, uint32 flags, fs_vnode* _superVnode,
1086 	ino_t* _nodeID)
1087 {
1088 	struct rootfs* fs = (rootfs*)_volume->private_volume;
1089 	struct rootfs_vnode* dir = (rootfs_vnode*)_dir->private_node;
1090 	struct rootfs_vnode* vnode;
1091 
1092 	WriteLocker locker(fs->lock);
1093 
1094 	if (name != NULL) {
1095 		vnode = rootfs_find_in_dir(dir, name);
1096 		if (vnode != NULL)
1097 			return B_FILE_EXISTS;
1098 	}
1099 
1100 	vnode = rootfs_create_vnode(fs, dir, name, mode);
1101 	if (vnode == NULL)
1102 		return B_NO_MEMORY;
1103 
1104 	if (name != NULL)
1105 		rootfs_insert_in_dir(fs, dir, vnode);
1106 	else
1107 		flags |= B_VNODE_PUBLISH_REMOVED;
1108 
1109 	fs->vnode_list_hash->Insert(vnode);
1110 
1111 	_superVnode->private_node = vnode;
1112 	_superVnode->ops = &sVnodeOps;
1113 	*_nodeID = vnode->id;
1114 
1115 	if (subVnode == NULL)
1116 		subVnode = _superVnode;
1117 
1118 	status_t status = publish_vnode(fs->volume, vnode->id,
1119 		subVnode->private_node, subVnode->ops, mode, flags);
1120 	if (status != B_OK) {
1121 		if (name != NULL)
1122 			rootfs_remove_from_dir(fs, dir, vnode);
1123 		rootfs_delete_vnode(fs, vnode, false);
1124 		return status;
1125 	}
1126 
1127 	if (name != NULL) {
1128 		entry_cache_add(fs->volume->id, dir->id, name, vnode->id);
1129 		notify_entry_created(fs->id, dir->id, name, vnode->id);
1130 	}
1131 
1132 	return B_OK;
1133 }
1134 
1135 
1136 static status_t
1137 rootfs_std_ops(int32 op, ...)
1138 {
1139 	switch (op) {
1140 		case B_MODULE_INIT:
1141 			return B_OK;
1142 
1143 		case B_MODULE_UNINIT:
1144 			return B_OK;
1145 
1146 		default:
1147 			return B_ERROR;
1148 	}
1149 }
1150 
1151 
1152 namespace {
1153 
1154 fs_volume_ops sVolumeOps = {
1155 	&rootfs_unmount,
1156 	NULL,
1157 	NULL,
1158 	&rootfs_sync,
1159 	&rootfs_get_vnode,
1160 
1161 	// the other operations are not supported (indices, queries)
1162 	NULL,
1163 };
1164 
1165 fs_vnode_ops sVnodeOps = {
1166 	&rootfs_lookup,
1167 	&rootfs_get_vnode_name,
1168 
1169 	&rootfs_put_vnode,
1170 	&rootfs_remove_vnode,
1171 
1172 	&rootfs_can_page,
1173 	&rootfs_read_pages,
1174 	&rootfs_write_pages,
1175 
1176 	NULL,	// io()
1177 	NULL,	// cancel_io()
1178 
1179 	NULL,	// get_file_map()
1180 
1181 	/* common */
1182 	&rootfs_ioctl,
1183 	NULL,	// fs_set_flags()
1184 	NULL,	// select
1185 	NULL,	// deselect
1186 	&rootfs_fsync,
1187 
1188 	&rootfs_read_link,
1189 	&rootfs_symlink,
1190 	NULL,	// fs_link()
1191 	&rootfs_unlink,
1192 	&rootfs_rename,
1193 
1194 	NULL,	// fs_access()
1195 	&rootfs_read_stat,
1196 	&rootfs_write_stat,
1197 	NULL,
1198 
1199 	/* file */
1200 	&rootfs_create,
1201 	&rootfs_open,
1202 	&rootfs_close,
1203 	&rootfs_free_cookie,
1204 	&rootfs_read,
1205 	&rootfs_write,
1206 
1207 	/* directory */
1208 	&rootfs_create_dir,
1209 	&rootfs_remove_dir,
1210 	&rootfs_open_dir,
1211 	&rootfs_close,			// same as for files - it does nothing, anyway
1212 	&rootfs_free_dir_cookie,
1213 	&rootfs_read_dir,
1214 	&rootfs_rewind_dir,
1215 
1216 	/* attribute directory operations */
1217 	NULL,	// open_attr_dir
1218 	NULL,	// close_attr_dir
1219 	NULL,	// free_attr_dir_cookie
1220 	NULL,	// read_attr_dir
1221 	NULL,	// rewind_attr_dir
1222 
1223 	/* attribute operations */
1224 	NULL,	// create_attr
1225 	NULL,	// open_attr
1226 	NULL,	// close_attr
1227 	NULL,	// free_attr_cookie
1228 	NULL,	// read_attr
1229 	NULL,	// write_attr
1230 
1231 	NULL,	// read_attr_stat
1232 	NULL,	// write_attr_stat
1233 	NULL,	// rename_attr
1234 	NULL,	// remove_attr
1235 
1236 	/* support for node and FS layers */
1237 	&rootfs_create_special_node,
1238 	NULL,	// get_super_vnode,
1239 };
1240 
1241 }	// namespace
1242 
1243 file_system_module_info gRootFileSystem = {
1244 	{
1245 		"file_systems/rootfs" B_CURRENT_FS_API_VERSION,
1246 		0,
1247 		rootfs_std_ops,
1248 	},
1249 
1250 	"rootfs",				// short_name
1251 	"Root File System",		// pretty_name
1252 	0,						// DDM flags
1253 
1254 	NULL,	// identify_partition()
1255 	NULL,	// scan_partition()
1256 	NULL,	// free_identify_partition_cookie()
1257 	NULL,	// free_partition_content_cookie()
1258 
1259 	&rootfs_mount,
1260 };
1261