xref: /haiku/src/system/kernel/fs/fd.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <AutoDeleterDrivers.h>
20 #include <BytePointer.h>
21 
22 #include <syscalls.h>
23 #include <syscall_restart.h>
24 #include <slab/Slab.h>
25 #include <util/AutoLock.h>
26 #include <util/iovec_support.h>
27 #include <vfs.h>
28 #include <wait_for_objects.h>
29 
30 #include "vfs_tracing.h"
31 
32 
33 //#define TRACE_FD
34 #ifdef TRACE_FD
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x)
38 #endif
39 
40 
41 static const size_t kMaxReadDirBufferSize = 64 * 1024;
42 
43 extern object_cache* sFileDescriptorCache;
44 
45 
46 static struct file_descriptor* get_fd_locked(struct io_context* context,
47 	int fd);
48 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
49 static void deselect_select_infos(file_descriptor* descriptor,
50 	select_info* infos, bool putSyncObjects);
51 
52 
53 struct FDGetterLocking {
54 	inline bool Lock(file_descriptor* /*lockable*/)
55 	{
56 		return false;
57 	}
58 
59 	inline void Unlock(file_descriptor* lockable)
60 	{
61 		put_fd(lockable);
62 	}
63 };
64 
65 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
66 public:
67 	inline FDGetter()
68 		: AutoLocker<file_descriptor, FDGetterLocking>()
69 	{
70 	}
71 
72 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
73 		: AutoLocker<file_descriptor, FDGetterLocking>(
74 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
75 	{
76 	}
77 
78 	inline file_descriptor* SetTo(io_context* context, int fd,
79 		bool contextLocked = false)
80 	{
81 		file_descriptor* descriptor
82 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
83 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
84 		return descriptor;
85 	}
86 
87 	inline file_descriptor* SetTo(int fd, bool kernel,
88 		bool contextLocked = false)
89 	{
90 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
91 	}
92 
93 	inline file_descriptor* FD() const
94 	{
95 		return fLockable;
96 	}
97 };
98 
99 
100 //	#pragma mark - General fd routines
101 
102 
103 #ifdef DEBUG
104 void dump_fd(int fd, struct file_descriptor* descriptor);
105 
106 void
107 dump_fd(int fd,struct file_descriptor* descriptor)
108 {
109 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
110 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
111 		", pos = %" B_PRId64 "\n",
112 		fd, descriptor, descriptor->type, descriptor->ref_count,
113 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
114 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
115 }
116 #endif
117 
118 
119 /*! Allocates and initializes a new file_descriptor.
120 */
121 struct file_descriptor*
122 alloc_fd(void)
123 {
124 	file_descriptor* descriptor
125 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
126 	if (descriptor == NULL)
127 		return NULL;
128 
129 	descriptor->u.vnode = NULL;
130 	descriptor->cookie = NULL;
131 	descriptor->ref_count = 1;
132 	descriptor->open_count = 0;
133 	descriptor->open_mode = 0;
134 	descriptor->pos = 0;
135 
136 	return descriptor;
137 }
138 
139 
140 bool
141 fd_close_on_exec(struct io_context* context, int fd)
142 {
143 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
144 }
145 
146 
147 void
148 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
149 {
150 	if (closeFD)
151 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
152 	else
153 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
154 }
155 
156 
157 /*!	Searches a free slot in the FD table of the provided I/O context, and
158 	inserts the specified descriptor into it.
159 */
160 int
161 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
162 	int firstIndex)
163 {
164 	int fd = -1;
165 	uint32 i;
166 
167 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
168 		return B_BAD_VALUE;
169 
170 	mutex_lock(&context->io_mutex);
171 
172 	for (i = firstIndex; i < context->table_size; i++) {
173 		if (!context->fds[i]) {
174 			fd = i;
175 			break;
176 		}
177 	}
178 	if (fd < 0) {
179 		fd = B_NO_MORE_FDS;
180 		goto err;
181 	}
182 
183 	TFD(NewFD(context, fd, descriptor));
184 
185 	context->fds[fd] = descriptor;
186 	context->num_used_fds++;
187 	atomic_add(&descriptor->open_count, 1);
188 
189 err:
190 	mutex_unlock(&context->io_mutex);
191 
192 	return fd;
193 }
194 
195 
196 int
197 new_fd(struct io_context* context, struct file_descriptor* descriptor)
198 {
199 	return new_fd_etc(context, descriptor, 0);
200 }
201 
202 
203 /*!	Reduces the descriptor's reference counter, and frees all resources
204 	when it's no longer used.
205 */
206 void
207 put_fd(struct file_descriptor* descriptor)
208 {
209 	int32 previous = atomic_add(&descriptor->ref_count, -1);
210 
211 	TFD(PutFD(descriptor));
212 
213 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
214 		descriptor, descriptor->ref_count, descriptor->cookie));
215 
216 	// free the descriptor if we don't need it anymore
217 	if (previous == 1) {
218 		// free the underlying object
219 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
220 			descriptor->ops->fd_free(descriptor);
221 
222 		object_cache_free(sFileDescriptorCache, descriptor, 0);
223 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
224 		&& previous - 1 == descriptor->open_count
225 		&& descriptor->ops != NULL) {
226 		// the descriptor has been disconnected - it cannot
227 		// be accessed anymore, let's close it (no one is
228 		// currently accessing this descriptor)
229 
230 		if (descriptor->ops->fd_close)
231 			descriptor->ops->fd_close(descriptor);
232 		if (descriptor->ops->fd_free)
233 			descriptor->ops->fd_free(descriptor);
234 
235 		// prevent this descriptor from being closed/freed again
236 		descriptor->ops = NULL;
237 		descriptor->u.vnode = NULL;
238 
239 		// the file descriptor is kept intact, so that it's not
240 		// reused until someone explicitly closes it
241 	}
242 }
243 
244 
245 /*!	Decrements the open counter of the file descriptor and invokes
246 	its close hook when appropriate.
247 */
248 void
249 close_fd(struct io_context* context, struct file_descriptor* descriptor)
250 {
251 	// POSIX advisory locks need to be released when any file descriptor closes
252 	if (descriptor->type == FDTYPE_FILE)
253 		vfs_release_posix_lock(context, descriptor);
254 
255 	if (atomic_add(&descriptor->open_count, -1) == 1) {
256 		vfs_unlock_vnode_if_locked(descriptor);
257 
258 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
259 			descriptor->ops->fd_close(descriptor);
260 	}
261 }
262 
263 
264 status_t
265 close_fd_index(struct io_context* context, int fd)
266 {
267 	struct file_descriptor* descriptor = remove_fd(context, fd);
268 
269 	if (descriptor == NULL)
270 		return B_FILE_ERROR;
271 
272 	close_fd(context, descriptor);
273 	put_fd(descriptor);
274 		// the reference associated with the slot
275 
276 	return B_OK;
277 }
278 
279 
280 /*!	This descriptor's underlying object will be closed and freed as soon as
281 	possible (in one of the next calls to put_fd() - get_fd() will no longer
282 	succeed on this descriptor).
283 	This is useful if the underlying object is gone, for instance when a
284 	(mounted) volume got removed unexpectedly.
285 */
286 void
287 disconnect_fd(struct file_descriptor* descriptor)
288 {
289 	descriptor->open_mode |= O_DISCONNECTED;
290 }
291 
292 
293 void
294 inc_fd_ref_count(struct file_descriptor* descriptor)
295 {
296 	atomic_add(&descriptor->ref_count, 1);
297 }
298 
299 
300 static struct file_descriptor*
301 get_fd_locked(struct io_context* context, int fd)
302 {
303 	if (fd < 0 || (uint32)fd >= context->table_size)
304 		return NULL;
305 
306 	struct file_descriptor* descriptor = context->fds[fd];
307 
308 	if (descriptor != NULL) {
309 		// disconnected descriptors cannot be accessed anymore
310 		if (descriptor->open_mode & O_DISCONNECTED)
311 			return NULL;
312 
313 		TFD(GetFD(context, fd, descriptor));
314 		inc_fd_ref_count(descriptor);
315 	}
316 
317 	return descriptor;
318 }
319 
320 
321 struct file_descriptor*
322 get_fd(struct io_context* context, int fd)
323 {
324 	MutexLocker _(context->io_mutex);
325 
326 	return get_fd_locked(context, fd);
327 }
328 
329 
330 struct file_descriptor*
331 get_open_fd(struct io_context* context, int fd)
332 {
333 	MutexLocker _(context->io_mutex);
334 
335 	file_descriptor* descriptor = get_fd_locked(context, fd);
336 	if (descriptor == NULL)
337 		return NULL;
338 
339 	atomic_add(&descriptor->open_count, 1);
340 
341 	return descriptor;
342 }
343 
344 
345 /*!	Removes the file descriptor from the specified slot.
346 */
347 static struct file_descriptor*
348 remove_fd(struct io_context* context, int fd)
349 {
350 	struct file_descriptor* descriptor = NULL;
351 
352 	if (fd < 0)
353 		return NULL;
354 
355 	mutex_lock(&context->io_mutex);
356 
357 	if ((uint32)fd < context->table_size)
358 		descriptor = context->fds[fd];
359 
360 	select_info* selectInfos = NULL;
361 	bool disconnected = false;
362 
363 	if (descriptor != NULL)	{
364 		// fd is valid
365 		TFD(RemoveFD(context, fd, descriptor));
366 
367 		context->fds[fd] = NULL;
368 		fd_set_close_on_exec(context, fd, false);
369 		context->num_used_fds--;
370 
371 		selectInfos = context->select_infos[fd];
372 		context->select_infos[fd] = NULL;
373 
374 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
375 	}
376 
377 	if (selectInfos != NULL)
378 		deselect_select_infos(descriptor, selectInfos, true);
379 
380 	mutex_unlock(&context->io_mutex);
381 
382 	return disconnected ? NULL : descriptor;
383 }
384 
385 
386 static int
387 dup_fd(int fd, bool kernel)
388 {
389 	struct io_context* context = get_current_io_context(kernel);
390 	struct file_descriptor* descriptor;
391 	int status;
392 
393 	TRACE(("dup_fd: fd = %d\n", fd));
394 
395 	// Try to get the fd structure
396 	descriptor = get_fd(context, fd);
397 	if (descriptor == NULL)
398 		return B_FILE_ERROR;
399 
400 	// now put the fd in place
401 	status = new_fd(context, descriptor);
402 	if (status < 0)
403 		put_fd(descriptor);
404 	else {
405 		mutex_lock(&context->io_mutex);
406 		fd_set_close_on_exec(context, status, false);
407 		mutex_unlock(&context->io_mutex);
408 	}
409 
410 	return status;
411 }
412 
413 
414 /*!	POSIX says this should be the same as:
415 		close(newfd);
416 		fcntl(oldfd, F_DUPFD, newfd);
417 
418 	We do dup2() directly to be thread-safe.
419 */
420 static int
421 dup2_fd(int oldfd, int newfd, bool kernel)
422 {
423 	struct file_descriptor* evicted = NULL;
424 	struct io_context* context;
425 
426 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
427 
428 	// quick check
429 	if (oldfd < 0 || newfd < 0)
430 		return B_FILE_ERROR;
431 
432 	// Get current I/O context and lock it
433 	context = get_current_io_context(kernel);
434 	mutex_lock(&context->io_mutex);
435 
436 	// Check if the fds are valid (mutex must be locked because
437 	// the table size could be changed)
438 	if ((uint32)oldfd >= context->table_size
439 		|| (uint32)newfd >= context->table_size
440 		|| context->fds[oldfd] == NULL
441 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
442 		mutex_unlock(&context->io_mutex);
443 		return B_FILE_ERROR;
444 	}
445 
446 	// Check for identity, note that it cannot be made above
447 	// because we always want to return an error on invalid
448 	// handles
449 	if (oldfd != newfd) {
450 		// Now do the work
451 		TFD(Dup2FD(context, oldfd, newfd));
452 
453 		evicted = context->fds[newfd];
454 		select_info* selectInfos = context->select_infos[newfd];
455 		context->select_infos[newfd] = NULL;
456 		atomic_add(&context->fds[oldfd]->ref_count, 1);
457 		atomic_add(&context->fds[oldfd]->open_count, 1);
458 		context->fds[newfd] = context->fds[oldfd];
459 
460 		if (evicted == NULL)
461 			context->num_used_fds++;
462 
463 		deselect_select_infos(evicted, selectInfos, true);
464 	}
465 
466 	fd_set_close_on_exec(context, newfd, false);
467 
468 	mutex_unlock(&context->io_mutex);
469 
470 	// Say bye bye to the evicted fd
471 	if (evicted) {
472 		close_fd(context, evicted);
473 		put_fd(evicted);
474 	}
475 
476 	return newfd;
477 }
478 
479 
480 /*!	Duplicates an FD from another team to this/the kernel team.
481 	\param fromTeam The team which owns the FD.
482 	\param fd The FD to duplicate.
483 	\param kernel If \c true, the new FD will be created in the kernel team,
484 			the current userland team otherwise.
485 	\return The newly created FD or an error code, if something went wrong.
486 */
487 int
488 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
489 {
490 	// get the I/O context for the team in question
491 	Team* team = Team::Get(fromTeam);
492 	if (team == NULL)
493 		return B_BAD_TEAM_ID;
494 	BReference<Team> teamReference(team, true);
495 
496 	io_context* fromContext = team->io_context;
497 
498 	// get the file descriptor
499 	file_descriptor* descriptor = get_fd(fromContext, fd);
500 	if (descriptor == NULL)
501 		return B_FILE_ERROR;
502 	DescriptorPutter descriptorPutter(descriptor);
503 
504 	// create a new FD in the target I/O context
505 	int result = new_fd(get_current_io_context(kernel), descriptor);
506 	if (result >= 0) {
507 		// the descriptor reference belongs to the slot, now
508 		descriptorPutter.Detach();
509 	}
510 
511 	return result;
512 }
513 
514 
515 static status_t
516 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
517 {
518 	struct file_descriptor* descriptor;
519 	int status;
520 
521 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
522 	if (descriptor == NULL)
523 		return B_FILE_ERROR;
524 
525 	if (descriptor->ops->fd_ioctl)
526 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
527 	else
528 		status = B_DEV_INVALID_IOCTL;
529 
530 	if (status == B_DEV_INVALID_IOCTL)
531 		status = ENOTTY;
532 
533 	put_fd(descriptor);
534 	return status;
535 }
536 
537 
538 static void
539 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
540 	bool putSyncObjects)
541 {
542 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
543 
544 	select_info* info = infos;
545 	while (info != NULL) {
546 		select_sync* sync = info->sync;
547 
548 		// deselect the selected events
549 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
550 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
551 			for (uint16 event = 1; event < 16; event++) {
552 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
553 					descriptor->ops->fd_deselect(descriptor, event,
554 						(selectsync*)info);
555 				}
556 			}
557 		}
558 
559 		select_info* next = info->next;
560 		notify_select_events(info, B_EVENT_INVALID);
561 		info = next;
562 
563 		if (putSyncObjects)
564 			put_select_sync(sync);
565 	}
566 }
567 
568 
569 status_t
570 select_fd(int32 fd, struct select_info* info, bool kernel)
571 {
572 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
573 		info->sync, info->selected_events));
574 
575 	FDGetter fdGetter;
576 		// define before the context locker, so it will be destroyed after it
577 
578 	io_context* context = get_current_io_context(kernel);
579 	MutexLocker locker(context->io_mutex);
580 
581 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
582 	if (descriptor == NULL)
583 		return B_FILE_ERROR;
584 
585 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
586 
587 	if (descriptor->ops->fd_select == NULL) {
588 		// if the I/O subsystem doesn't support select(), we will
589 		// immediately notify the select call
590 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
591 		if (eventsToSelect != 0)
592 			return notify_select_events(info, eventsToSelect);
593 		else
594 			return B_OK;
595 	}
596 
597 	// We need the FD to stay open while we're doing this, so no select()/
598 	// deselect() will be called on it after it is closed.
599 	atomic_add(&descriptor->open_count, 1);
600 
601 	locker.Unlock();
602 
603 	// select any events asked for
604 	uint32 selectedEvents = 0;
605 
606 	for (uint16 event = 1; event < 16; event++) {
607 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
608 			&& descriptor->ops->fd_select(descriptor, event,
609 				(selectsync*)info) == B_OK) {
610 			selectedEvents |= SELECT_FLAG(event);
611 		}
612 	}
613 	info->selected_events = selectedEvents
614 		| (info->selected_events & B_EVENT_INVALID);
615 
616 	// Add the info to the IO context. Even if nothing has been selected -- we
617 	// always support B_EVENT_INVALID.
618 	locker.Lock();
619 	if (context->fds[fd] != descriptor) {
620 		// Someone close()d the index in the meantime. deselect() all
621 		// events.
622 		info->next = NULL;
623 		deselect_select_infos(descriptor, info, false);
624 
625 		// Release our open reference of the descriptor.
626 		close_fd(context, descriptor);
627 		return B_FILE_ERROR;
628 	}
629 
630 	// The FD index hasn't changed, so we add the select info to the table.
631 
632 	info->next = context->select_infos[fd];
633 	context->select_infos[fd] = info;
634 
635 	// As long as the info is in the list, we keep a reference to the sync
636 	// object.
637 	acquire_select_sync(info->sync);
638 
639 	// Finally release our open reference. It is safe just to decrement,
640 	// since as long as the descriptor is associated with the slot,
641 	// someone else still has it open.
642 	atomic_add(&descriptor->open_count, -1);
643 
644 	return B_OK;
645 }
646 
647 
648 status_t
649 deselect_fd(int32 fd, struct select_info* info, bool kernel)
650 {
651 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
652 		info->sync, info->selected_events));
653 
654 	FDGetter fdGetter;
655 		// define before the context locker, so it will be destroyed after it
656 
657 	io_context* context = get_current_io_context(kernel);
658 	MutexLocker locker(context->io_mutex);
659 
660 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
661 	if (descriptor == NULL)
662 		return B_FILE_ERROR;
663 
664 	// remove the info from the IO context
665 
666 	select_info** infoLocation = &context->select_infos[fd];
667 	while (*infoLocation != NULL && *infoLocation != info)
668 		infoLocation = &(*infoLocation)->next;
669 
670 	// If not found, someone else beat us to it.
671 	if (*infoLocation != info)
672 		return B_OK;
673 
674 	*infoLocation = info->next;
675 
676 	locker.Unlock();
677 
678 	// deselect the selected events
679 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
680 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
681 		for (uint16 event = 1; event < 16; event++) {
682 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
683 				descriptor->ops->fd_deselect(descriptor, event,
684 					(selectsync*)info);
685 			}
686 		}
687 	}
688 
689 	put_select_sync(info->sync);
690 
691 	return B_OK;
692 }
693 
694 
695 /*!	This function checks if the specified fd is valid in the current
696 	context. It can be used for a quick check; the fd is not locked
697 	so it could become invalid immediately after this check.
698 */
699 bool
700 fd_is_valid(int fd, bool kernel)
701 {
702 	struct file_descriptor* descriptor
703 		= get_fd(get_current_io_context(kernel), fd);
704 	if (descriptor == NULL)
705 		return false;
706 
707 	put_fd(descriptor);
708 	return true;
709 }
710 
711 
712 struct vnode*
713 fd_vnode(struct file_descriptor* descriptor)
714 {
715 	switch (descriptor->type) {
716 		case FDTYPE_FILE:
717 		case FDTYPE_DIR:
718 		case FDTYPE_ATTR_DIR:
719 		case FDTYPE_ATTR:
720 			return descriptor->u.vnode;
721 	}
722 
723 	return NULL;
724 }
725 
726 
727 static status_t
728 common_close(int fd, bool kernel)
729 {
730 	return close_fd_index(get_current_io_context(kernel), fd);
731 }
732 
733 
734 static ssize_t
735 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
736 {
737 	if (pos < -1)
738 		return B_BAD_VALUE;
739 
740 	FDGetter fdGetter;
741 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
742 	if (!descriptor)
743 		return B_FILE_ERROR;
744 
745 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
746 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
747 		return B_FILE_ERROR;
748 	}
749 
750 	bool movePosition = false;
751 	if (pos == -1 && descriptor->ops->fd_seek != NULL) {
752 		pos = descriptor->pos;
753 		movePosition = true;
754 	}
755 
756 	if (write ? descriptor->ops->fd_write == NULL
757 			: descriptor->ops->fd_read == NULL) {
758 		return B_BAD_VALUE;
759 	}
760 
761 	if (length == 0)
762 		return 0;
763 
764 	if (!is_user_address_range(buffer, length))
765 		return B_BAD_ADDRESS;
766 
767 	SyscallRestartWrapper<status_t> status;
768 
769 	if (write)
770 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
771 	else
772 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
773 
774 	if (status != B_OK)
775 		return status;
776 
777 	if (movePosition) {
778 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
779 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos + length;
780 	}
781 
782 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
783 }
784 
785 
786 static ssize_t
787 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
788 	bool write)
789 {
790 	if (pos < -1)
791 		return B_BAD_VALUE;
792 
793 	iovec* vecs;
794 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
795 	if (error != B_OK)
796 		return error;
797 	MemoryDeleter _(vecs);
798 
799 	FDGetter fdGetter;
800 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
801 	if (!descriptor)
802 		return B_FILE_ERROR;
803 
804 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
805 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
806 		return B_FILE_ERROR;
807 	}
808 
809 	bool movePosition = false;
810 	if (pos == -1 && descriptor->ops->fd_seek != NULL) {
811 		pos = descriptor->pos;
812 		movePosition = true;
813 	}
814 
815 	if (write ? descriptor->ops->fd_write == NULL
816 			: descriptor->ops->fd_read == NULL) {
817 		return B_BAD_VALUE;
818 	}
819 
820 	SyscallRestartWrapper<status_t> status;
821 
822 	ssize_t bytesTransferred = 0;
823 	for (size_t i = 0; i < count; i++) {
824 		if (vecs[i].iov_base == NULL)
825 			continue;
826 
827 		size_t length = vecs[i].iov_len;
828 		if (write) {
829 			status = descriptor->ops->fd_write(descriptor, pos,
830 				vecs[i].iov_base, &length);
831 		} else {
832 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
833 				&length);
834 		}
835 
836 		if (status != B_OK) {
837 			if (bytesTransferred == 0)
838 				return status;
839 			status = B_OK;
840 			break;
841 		}
842 
843 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
844 			bytesTransferred = SSIZE_MAX;
845 		else
846 			bytesTransferred += (ssize_t)length;
847 
848 		pos += length;
849 
850 		if (length < vecs[i].iov_len)
851 			break;
852 	}
853 
854 	if (movePosition) {
855 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
856 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos;
857 	}
858 
859 	return bytesTransferred;
860 }
861 
862 
863 status_t
864 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
865 {
866 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
867 
868 	return fd_ioctl(false, fd, op, buffer, length);
869 }
870 
871 
872 //	#pragma mark - User syscalls
873 
874 
875 ssize_t
876 _user_read(int fd, off_t pos, void* buffer, size_t length)
877 {
878 	return common_user_io(fd, pos, buffer, length, false);
879 }
880 
881 
882 ssize_t
883 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
884 {
885 	return common_user_vector_io(fd, pos, userVecs, count, false);
886 }
887 
888 
889 ssize_t
890 _user_write(int fd, off_t pos, const void* buffer, size_t length)
891 {
892 	return common_user_io(fd, pos, (void*)buffer, length, true);
893 }
894 
895 
896 ssize_t
897 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
898 {
899 	return common_user_vector_io(fd, pos, userVecs, count, true);
900 }
901 
902 
903 off_t
904 _user_seek(int fd, off_t pos, int seekType)
905 {
906 	syscall_64_bit_return_value();
907 
908 	struct file_descriptor* descriptor;
909 
910 	descriptor = get_fd(get_current_io_context(false), fd);
911 	if (!descriptor)
912 		return B_FILE_ERROR;
913 
914 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
915 
916 	if (descriptor->ops->fd_seek)
917 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
918 	else
919 		pos = ESPIPE;
920 
921 	put_fd(descriptor);
922 	return pos;
923 }
924 
925 
926 status_t
927 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
928 {
929 	TRACE(("user_ioctl: fd %d\n", fd));
930 
931 	// "buffer" is not always a pointer depending on "op", so we cannot
932 	// check that it is a userland buffer here. Instead we check that
933 	// it is at least not within the bounds of kernel memory; as in
934 	// the cases where it is a numeric constant it is usually a low one.
935 	if (IS_KERNEL_ADDRESS(buffer))
936 		return B_BAD_ADDRESS;
937 
938 	SyscallRestartWrapper<status_t> status;
939 
940 	return status = fd_ioctl(false, fd, op, buffer, length);
941 }
942 
943 
944 ssize_t
945 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
946 	uint32 maxCount)
947 {
948 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
949 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
950 
951 	if (maxCount == 0)
952 		return 0;
953 
954 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
955 		return B_BAD_ADDRESS;
956 
957 	// get I/O context and FD
958 	io_context* ioContext = get_current_io_context(false);
959 	FDGetter fdGetter;
960 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
961 	if (descriptor == NULL)
962 		return B_FILE_ERROR;
963 
964 	if (descriptor->ops->fd_read_dir == NULL)
965 		return B_UNSUPPORTED;
966 
967 	// restrict buffer size and allocate a heap buffer
968 	if (bufferSize > kMaxReadDirBufferSize)
969 		bufferSize = kMaxReadDirBufferSize;
970 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
971 	if (buffer == NULL)
972 		return B_NO_MEMORY;
973 	MemoryDeleter bufferDeleter(buffer);
974 
975 	// read the directory
976 	uint32 count = maxCount;
977 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
978 		buffer, bufferSize, &count);
979 	if (status != B_OK)
980 		return status;
981 
982 	ASSERT(count <= maxCount);
983 
984 	// copy the buffer back -- determine the total buffer size first
985 	size_t sizeToCopy = 0;
986 	BytePointer<struct dirent> entry = buffer;
987 	for (uint32 i = 0; i < count; i++) {
988 		size_t length = entry->d_reclen;
989 		sizeToCopy += length;
990 		entry += length;
991 	}
992 
993 	ASSERT(sizeToCopy <= bufferSize);
994 
995 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
996 		return B_BAD_ADDRESS;
997 
998 	return count;
999 }
1000 
1001 
1002 status_t
1003 _user_rewind_dir(int fd)
1004 {
1005 	struct file_descriptor* descriptor;
1006 	status_t status;
1007 
1008 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1009 
1010 	descriptor = get_fd(get_current_io_context(false), fd);
1011 	if (descriptor == NULL)
1012 		return B_FILE_ERROR;
1013 
1014 	if (descriptor->ops->fd_rewind_dir)
1015 		status = descriptor->ops->fd_rewind_dir(descriptor);
1016 	else
1017 		status = B_UNSUPPORTED;
1018 
1019 	put_fd(descriptor);
1020 	return status;
1021 }
1022 
1023 
1024 status_t
1025 _user_close(int fd)
1026 {
1027 	return common_close(fd, false);
1028 }
1029 
1030 
1031 int
1032 _user_dup(int fd)
1033 {
1034 	return dup_fd(fd, false);
1035 }
1036 
1037 
1038 int
1039 _user_dup2(int ofd, int nfd)
1040 {
1041 	return dup2_fd(ofd, nfd, false);
1042 }
1043 
1044 
1045 //	#pragma mark - Kernel calls
1046 
1047 
1048 ssize_t
1049 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1050 {
1051 	if (pos < -1)
1052 		return B_BAD_VALUE;
1053 
1054 	FDGetter fdGetter;
1055 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1056 
1057 	if (!descriptor)
1058 		return B_FILE_ERROR;
1059 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1060 		return B_FILE_ERROR;
1061 
1062 	bool movePosition = false;
1063 	if (pos == -1) {
1064 		pos = descriptor->pos;
1065 		movePosition = true;
1066 	}
1067 
1068 	SyscallFlagUnsetter _;
1069 
1070 	if (descriptor->ops->fd_read == NULL)
1071 		return B_BAD_VALUE;
1072 
1073 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1074 		&length);
1075 	if (bytesRead >= B_OK) {
1076 		if (length > SSIZE_MAX)
1077 			bytesRead = SSIZE_MAX;
1078 		else
1079 			bytesRead = (ssize_t)length;
1080 
1081 		if (movePosition)
1082 			descriptor->pos = pos + length;
1083 	}
1084 
1085 	return bytesRead;
1086 }
1087 
1088 
1089 ssize_t
1090 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1091 {
1092 	bool movePosition = false;
1093 	status_t status;
1094 
1095 	if (pos < -1)
1096 		return B_BAD_VALUE;
1097 
1098 	FDGetter fdGetter;
1099 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1100 
1101 	if (!descriptor)
1102 		return B_FILE_ERROR;
1103 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1104 		return B_FILE_ERROR;
1105 
1106 	if (pos == -1) {
1107 		pos = descriptor->pos;
1108 		movePosition = true;
1109 	}
1110 
1111 	if (descriptor->ops->fd_read == NULL)
1112 		return B_BAD_VALUE;
1113 
1114 	SyscallFlagUnsetter _;
1115 
1116 	ssize_t bytesRead = 0;
1117 
1118 	for (size_t i = 0; i < count; i++) {
1119 		size_t length = vecs[i].iov_len;
1120 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1121 			&length);
1122 		if (status != B_OK) {
1123 			bytesRead = status;
1124 			break;
1125 		}
1126 
1127 		if ((uint64)bytesRead + length > SSIZE_MAX)
1128 			bytesRead = SSIZE_MAX;
1129 		else
1130 			bytesRead += (ssize_t)length;
1131 
1132 		pos += vecs[i].iov_len;
1133 	}
1134 
1135 	if (movePosition)
1136 		descriptor->pos = pos;
1137 
1138 	return bytesRead;
1139 }
1140 
1141 
1142 ssize_t
1143 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1144 {
1145 	if (pos < -1)
1146 		return B_BAD_VALUE;
1147 
1148 	FDGetter fdGetter;
1149 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1150 
1151 	if (descriptor == NULL)
1152 		return B_FILE_ERROR;
1153 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1154 		return B_FILE_ERROR;
1155 
1156 	bool movePosition = false;
1157 	if (pos == -1) {
1158 		pos = descriptor->pos;
1159 		movePosition = true;
1160 	}
1161 
1162 	if (descriptor->ops->fd_write == NULL)
1163 		return B_BAD_VALUE;
1164 
1165 	SyscallFlagUnsetter _;
1166 
1167 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1168 		&length);
1169 	if (bytesWritten >= B_OK) {
1170 		if (length > SSIZE_MAX)
1171 			bytesWritten = SSIZE_MAX;
1172 		else
1173 			bytesWritten = (ssize_t)length;
1174 
1175 		if (movePosition)
1176 			descriptor->pos = pos + length;
1177 	}
1178 
1179 	return bytesWritten;
1180 }
1181 
1182 
1183 ssize_t
1184 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1185 {
1186 	bool movePosition = false;
1187 	status_t status;
1188 
1189 	if (pos < -1)
1190 		return B_BAD_VALUE;
1191 
1192 	FDGetter fdGetter;
1193 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1194 
1195 	if (!descriptor)
1196 		return B_FILE_ERROR;
1197 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1198 		return B_FILE_ERROR;
1199 
1200 	if (pos == -1) {
1201 		pos = descriptor->pos;
1202 		movePosition = true;
1203 	}
1204 
1205 	if (descriptor->ops->fd_write == NULL)
1206 		return B_BAD_VALUE;
1207 
1208 	SyscallFlagUnsetter _;
1209 
1210 	ssize_t bytesWritten = 0;
1211 
1212 	for (size_t i = 0; i < count; i++) {
1213 		size_t length = vecs[i].iov_len;
1214 		status = descriptor->ops->fd_write(descriptor, pos,
1215 			vecs[i].iov_base, &length);
1216 		if (status != B_OK) {
1217 			bytesWritten = status;
1218 			break;
1219 		}
1220 
1221 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1222 			bytesWritten = SSIZE_MAX;
1223 		else
1224 			bytesWritten += (ssize_t)length;
1225 
1226 		pos += vecs[i].iov_len;
1227 	}
1228 
1229 	if (movePosition)
1230 		descriptor->pos = pos;
1231 
1232 	return bytesWritten;
1233 }
1234 
1235 
1236 off_t
1237 _kern_seek(int fd, off_t pos, int seekType)
1238 {
1239 	struct file_descriptor* descriptor;
1240 
1241 	descriptor = get_fd(get_current_io_context(true), fd);
1242 	if (!descriptor)
1243 		return B_FILE_ERROR;
1244 
1245 	if (descriptor->ops->fd_seek)
1246 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1247 	else
1248 		pos = ESPIPE;
1249 
1250 	put_fd(descriptor);
1251 	return pos;
1252 }
1253 
1254 
1255 status_t
1256 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1257 {
1258 	TRACE(("kern_ioctl: fd %d\n", fd));
1259 
1260 	SyscallFlagUnsetter _;
1261 
1262 	return fd_ioctl(true, fd, op, buffer, length);
1263 }
1264 
1265 
1266 ssize_t
1267 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1268 	uint32 maxCount)
1269 {
1270 	struct file_descriptor* descriptor;
1271 	ssize_t retval;
1272 
1273 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1274 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1275 
1276 	struct io_context* ioContext = get_current_io_context(true);
1277 	descriptor = get_fd(ioContext, fd);
1278 	if (descriptor == NULL)
1279 		return B_FILE_ERROR;
1280 
1281 	if (descriptor->ops->fd_read_dir) {
1282 		uint32 count = maxCount;
1283 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1284 			bufferSize, &count);
1285 		if (retval >= 0)
1286 			retval = count;
1287 	} else
1288 		retval = B_UNSUPPORTED;
1289 
1290 	put_fd(descriptor);
1291 	return retval;
1292 }
1293 
1294 
1295 status_t
1296 _kern_rewind_dir(int fd)
1297 {
1298 	struct file_descriptor* descriptor;
1299 	status_t status;
1300 
1301 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1302 
1303 	descriptor = get_fd(get_current_io_context(true), fd);
1304 	if (descriptor == NULL)
1305 		return B_FILE_ERROR;
1306 
1307 	if (descriptor->ops->fd_rewind_dir)
1308 		status = descriptor->ops->fd_rewind_dir(descriptor);
1309 	else
1310 		status = B_UNSUPPORTED;
1311 
1312 	put_fd(descriptor);
1313 	return status;
1314 }
1315 
1316 
1317 status_t
1318 _kern_close(int fd)
1319 {
1320 	return common_close(fd, true);
1321 }
1322 
1323 
1324 int
1325 _kern_dup(int fd)
1326 {
1327 	return dup_fd(fd, true);
1328 }
1329 
1330 
1331 int
1332 _kern_dup2(int ofd, int nfd)
1333 {
1334 	return dup2_fd(ofd, nfd, true);
1335 }
1336 
1337