xref: /haiku/src/system/kernel/fs/fd.cpp (revision b671e9bbdbd10268a042b4f4cc4317ccd03d105e)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 
27 //#define TRACE_FD
28 #ifdef TRACE_FD
29 #	define TRACE(x) dprintf x
30 #else
31 #	define TRACE(x)
32 #endif
33 
34 
35 static const size_t kMaxReadDirBufferSize = 64 * 1024;
36 
37 
38 static struct file_descriptor* get_fd_locked(struct io_context* context,
39 	int fd);
40 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
41 static void deselect_select_infos(file_descriptor* descriptor,
42 	select_info* infos);
43 
44 
45 struct FDGetterLocking {
46 	inline bool Lock(file_descriptor* /*lockable*/)
47 	{
48 		return false;
49 	}
50 
51 	inline void Unlock(file_descriptor* lockable)
52 	{
53 		put_fd(lockable);
54 	}
55 };
56 
57 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
58 public:
59 	inline FDGetter()
60 		: AutoLocker<file_descriptor, FDGetterLocking>()
61 	{
62 	}
63 
64 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
65 		: AutoLocker<file_descriptor, FDGetterLocking>(
66 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
67 	{
68 	}
69 
70 	inline file_descriptor* SetTo(io_context* context, int fd,
71 		bool contextLocked = false)
72 	{
73 		file_descriptor* descriptor
74 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
75 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
76 		return descriptor;
77 	}
78 
79 	inline file_descriptor* SetTo(int fd, bool kernel,
80 		bool contextLocked = false)
81 	{
82 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
83 	}
84 
85 	inline file_descriptor* FD() const
86 	{
87 		return fLockable;
88 	}
89 };
90 
91 
92 //	#pragma mark - General fd routines
93 
94 
95 #ifdef DEBUG
96 void dump_fd(int fd, struct file_descriptor* descriptor);
97 
98 void
99 dump_fd(int fd,struct file_descriptor* descriptor)
100 {
101 	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, "
102 		"u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
103 		fd, descriptor, descriptor->type, descriptor->ref_count,
104 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
105 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
106 }
107 #endif
108 
109 
110 /*! Allocates and initializes a new file_descriptor.
111 */
112 struct file_descriptor*
113 alloc_fd(void)
114 {
115 	file_descriptor* descriptor
116 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
117 	if (descriptor == NULL)
118 		return NULL;
119 
120 	descriptor->u.vnode = NULL;
121 	descriptor->cookie = NULL;
122 	descriptor->ref_count = 1;
123 	descriptor->open_count = 0;
124 	descriptor->open_mode = 0;
125 	descriptor->pos = 0;
126 
127 	return descriptor;
128 }
129 
130 
131 bool
132 fd_close_on_exec(struct io_context* context, int fd)
133 {
134 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
135 }
136 
137 
138 void
139 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
140 {
141 	if (closeFD)
142 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
143 	else
144 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
145 }
146 
147 
148 /*!	Searches a free slot in the FD table of the provided I/O context, and
149 	inserts the specified descriptor into it.
150 */
151 int
152 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
153 	int firstIndex)
154 {
155 	int fd = -1;
156 	uint32 i;
157 
158 	mutex_lock(&context->io_mutex);
159 
160 	for (i = firstIndex; i < context->table_size; i++) {
161 		if (!context->fds[i]) {
162 			fd = i;
163 			break;
164 		}
165 	}
166 	if (fd < 0) {
167 		fd = B_NO_MORE_FDS;
168 		goto err;
169 	}
170 
171 	context->fds[fd] = descriptor;
172 	context->num_used_fds++;
173 	atomic_add(&descriptor->open_count, 1);
174 
175 err:
176 	mutex_unlock(&context->io_mutex);
177 
178 	return fd;
179 }
180 
181 
182 int
183 new_fd(struct io_context* context, struct file_descriptor* descriptor)
184 {
185 	return new_fd_etc(context, descriptor, 0);
186 }
187 
188 
189 /*!	Reduces the descriptor's reference counter, and frees all resources
190 	when it's no longer used.
191 */
192 void
193 put_fd(struct file_descriptor* descriptor)
194 {
195 	int32 previous = atomic_add(&descriptor->ref_count, -1);
196 
197 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
198 		descriptor, descriptor->ref_count, descriptor->cookie));
199 
200 	// free the descriptor if we don't need it anymore
201 	if (previous == 1) {
202 		// free the underlying object
203 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
204 			descriptor->ops->fd_free(descriptor);
205 
206 		free(descriptor);
207 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
208 		&& previous - 1 == descriptor->open_count
209 		&& descriptor->ops != NULL) {
210 		// the descriptor has been disconnected - it cannot
211 		// be accessed anymore, let's close it (no one is
212 		// currently accessing this descriptor)
213 
214 		if (descriptor->ops->fd_close)
215 			descriptor->ops->fd_close(descriptor);
216 		if (descriptor->ops->fd_free)
217 			descriptor->ops->fd_free(descriptor);
218 
219 		// prevent this descriptor from being closed/freed again
220 		descriptor->open_count = -1;
221 		descriptor->ref_count = -1;
222 		descriptor->ops = NULL;
223 		descriptor->u.vnode = NULL;
224 
225 		// the file descriptor is kept intact, so that it's not
226 		// reused until someone explicetly closes it
227 	}
228 }
229 
230 
231 /*!	Decrements the open counter of the file descriptor and invokes
232 	its close hook when appropriate.
233 */
234 void
235 close_fd(struct file_descriptor* descriptor)
236 {
237 	if (atomic_add(&descriptor->open_count, -1) == 1) {
238 		vfs_unlock_vnode_if_locked(descriptor);
239 
240 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
241 			descriptor->ops->fd_close(descriptor);
242 	}
243 }
244 
245 
246 status_t
247 close_fd_index(struct io_context* context, int fd)
248 {
249 	struct file_descriptor* descriptor = remove_fd(context, fd);
250 
251 	if (descriptor == NULL)
252 		return B_FILE_ERROR;
253 
254 	close_fd(descriptor);
255 	put_fd(descriptor);
256 		// the reference associated with the slot
257 
258 	return B_OK;
259 }
260 
261 
262 /*!	This descriptor's underlying object will be closed and freed as soon as
263 	possible (in one of the next calls to put_fd() - get_fd() will no longer
264 	succeed on this descriptor).
265 	This is useful if the underlying object is gone, for instance when a
266 	(mounted) volume got removed unexpectedly.
267 */
268 void
269 disconnect_fd(struct file_descriptor* descriptor)
270 {
271 	descriptor->open_mode |= O_DISCONNECTED;
272 }
273 
274 
275 void
276 inc_fd_ref_count(struct file_descriptor* descriptor)
277 {
278 	atomic_add(&descriptor->ref_count, 1);
279 }
280 
281 
282 static struct file_descriptor*
283 get_fd_locked(struct io_context* context, int fd)
284 {
285 	if (fd < 0 || (uint32)fd >= context->table_size)
286 		return NULL;
287 
288 	struct file_descriptor* descriptor = context->fds[fd];
289 
290 	if (descriptor != NULL) {
291 		// Disconnected descriptors cannot be accessed anymore
292 		if (descriptor->open_mode & O_DISCONNECTED)
293 			descriptor = NULL;
294 		else
295 			inc_fd_ref_count(descriptor);
296 	}
297 
298 	return descriptor;
299 }
300 
301 
302 struct file_descriptor*
303 get_fd(struct io_context* context, int fd)
304 {
305 	MutexLocker _(context->io_mutex);
306 
307 	return get_fd_locked(context, fd);
308 }
309 
310 
311 struct file_descriptor*
312 get_open_fd(struct io_context* context, int fd)
313 {
314 	MutexLocker _(context->io_mutex);
315 
316 	file_descriptor* descriptor = get_fd_locked(context, fd);
317 	if (descriptor == NULL)
318 		return NULL;
319 
320 	atomic_add(&descriptor->open_count, 1);
321 
322 	return descriptor;
323 }
324 
325 
326 /*!	Removes the file descriptor from the specified slot.
327 */
328 static struct file_descriptor*
329 remove_fd(struct io_context* context, int fd)
330 {
331 	struct file_descriptor* descriptor = NULL;
332 
333 	if (fd < 0)
334 		return NULL;
335 
336 	mutex_lock(&context->io_mutex);
337 
338 	if ((uint32)fd < context->table_size)
339 		descriptor = context->fds[fd];
340 
341 	select_info* selectInfos = NULL;
342 	bool disconnected = false;
343 
344 	if (descriptor)	{
345 		// fd is valid
346 		context->fds[fd] = NULL;
347 		fd_set_close_on_exec(context, fd, false);
348 		context->num_used_fds--;
349 
350 		selectInfos = context->select_infos[fd];
351 		context->select_infos[fd] = NULL;
352 
353 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
354 	}
355 
356 	mutex_unlock(&context->io_mutex);
357 
358 	if (selectInfos != NULL)
359 		deselect_select_infos(descriptor, selectInfos);
360 
361 	return disconnected ? NULL : descriptor;
362 }
363 
364 
365 static int
366 dup_fd(int fd, bool kernel)
367 {
368 	struct io_context* context = get_current_io_context(kernel);
369 	struct file_descriptor* descriptor;
370 	int status;
371 
372 	TRACE(("dup_fd: fd = %d\n", fd));
373 
374 	// Try to get the fd structure
375 	descriptor = get_fd(context, fd);
376 	if (descriptor == NULL)
377 		return B_FILE_ERROR;
378 
379 	// now put the fd in place
380 	status = new_fd(context, descriptor);
381 	if (status < 0)
382 		put_fd(descriptor);
383 	else {
384 		mutex_lock(&context->io_mutex);
385 		fd_set_close_on_exec(context, status, false);
386 		mutex_unlock(&context->io_mutex);
387 	}
388 
389 	return status;
390 }
391 
392 
393 /*!	POSIX says this should be the same as:
394 		close(newfd);
395 		fcntl(oldfd, F_DUPFD, newfd);
396 
397 	We do dup2() directly to be thread-safe.
398 */
399 static int
400 dup2_fd(int oldfd, int newfd, bool kernel)
401 {
402 	struct file_descriptor* evicted = NULL;
403 	struct io_context* context;
404 
405 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
406 
407 	// quick check
408 	if (oldfd < 0 || newfd < 0)
409 		return B_FILE_ERROR;
410 
411 	// Get current I/O context and lock it
412 	context = get_current_io_context(kernel);
413 	mutex_lock(&context->io_mutex);
414 
415 	// Check if the fds are valid (mutex must be locked because
416 	// the table size could be changed)
417 	if ((uint32)oldfd >= context->table_size
418 		|| (uint32)newfd >= context->table_size
419 		|| context->fds[oldfd] == NULL) {
420 		mutex_unlock(&context->io_mutex);
421 		return B_FILE_ERROR;
422 	}
423 
424 	// Check for identity, note that it cannot be made above
425 	// because we always want to return an error on invalid
426 	// handles
427 	select_info* selectInfos = NULL;
428 	if (oldfd != newfd) {
429 		// Now do the work
430 		evicted = context->fds[newfd];
431 		selectInfos = context->select_infos[newfd];
432 		context->select_infos[newfd] = NULL;
433 		atomic_add(&context->fds[oldfd]->ref_count, 1);
434 		atomic_add(&context->fds[oldfd]->open_count, 1);
435 		context->fds[newfd] = context->fds[oldfd];
436 
437 		if (evicted == NULL)
438 			context->num_used_fds++;
439 	}
440 
441 	fd_set_close_on_exec(context, newfd, false);
442 
443 	mutex_unlock(&context->io_mutex);
444 
445 	// Say bye bye to the evicted fd
446 	if (evicted) {
447 		deselect_select_infos(evicted, selectInfos);
448 		close_fd(evicted);
449 		put_fd(evicted);
450 	}
451 
452 	return newfd;
453 }
454 
455 
456 /*!	Duplicates an FD from another team to this/the kernel team.
457 	\param fromTeam The team which owns the FD.
458 	\param fd The FD to duplicate.
459 	\param kernel If \c true, the new FD will be created in the kernel team,
460 			the current userland team otherwise.
461 	\return The newly created FD or an error code, if something went wrong.
462 */
463 int
464 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
465 {
466 	// get the I/O context for the team in question
467 	InterruptsSpinLocker teamsLocker(gTeamSpinlock);
468 	struct team* team = team_get_team_struct_locked(fromTeam);
469 	if (team == NULL)
470 		return B_BAD_TEAM_ID;
471 
472 	io_context* fromContext = team->io_context;
473 	vfs_get_io_context(fromContext);
474 
475 	teamsLocker.Unlock();
476 
477 	CObjectDeleter<io_context> _(fromContext, vfs_put_io_context);
478 
479 	// get the file descriptor
480 	file_descriptor* descriptor = get_fd(fromContext, fd);
481 	if (descriptor == NULL)
482 		return B_FILE_ERROR;
483 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
484 
485 	// create a new FD in the target I/O context
486 	int result = new_fd(get_current_io_context(kernel), descriptor);
487 	if (result >= 0) {
488 		// the descriptor reference belongs to the slot, now
489 		descriptorPutter.Detach();
490 	}
491 
492 	return result;
493 }
494 
495 
496 static status_t
497 fd_ioctl(bool kernelFD, int fd, ulong op, void* buffer, size_t length)
498 {
499 	struct file_descriptor* descriptor;
500 	int status;
501 
502 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
503 	if (descriptor == NULL)
504 		return B_FILE_ERROR;
505 
506 	if (descriptor->ops->fd_ioctl)
507 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
508 	else
509 		status = EOPNOTSUPP;
510 
511 	put_fd(descriptor);
512 	return status;
513 }
514 
515 
516 static void
517 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
518 {
519 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
520 
521 	select_info* info = infos;
522 	while (info != NULL) {
523 		select_sync* sync = info->sync;
524 
525 		// deselect the selected events
526 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
527 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
528 			for (uint16 event = 1; event < 16; event++) {
529 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
530 					descriptor->ops->fd_deselect(descriptor, event,
531 						(selectsync*)info);
532 				}
533 			}
534 		}
535 
536 		notify_select_events(info, B_EVENT_INVALID);
537 		info = info->next;
538 		put_select_sync(sync);
539 	}
540 }
541 
542 
543 status_t
544 select_fd(int32 fd, struct select_info* info, bool kernel)
545 {
546 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
547 		info->sync, info->selected_events));
548 
549 	FDGetter fdGetter;
550 		// define before the context locker, so it will be destroyed after it
551 
552 	io_context* context = get_current_io_context(kernel);
553 	MutexLocker locker(context->io_mutex);
554 
555 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
556 	if (descriptor == NULL)
557 		return B_FILE_ERROR;
558 
559 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
560 
561 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
562 		// if the I/O subsystem doesn't support select(), we will
563 		// immediately notify the select call
564 		return notify_select_events(info, eventsToSelect);
565 	}
566 
567 	// We need the FD to stay open while we're doing this, so no select()/
568 	// deselect() will be called on it after it is closed.
569 	atomic_add(&descriptor->open_count, 1);
570 
571 	locker.Unlock();
572 
573 	// select any events asked for
574 	uint32 selectedEvents = 0;
575 
576 	for (uint16 event = 1; event < 16; event++) {
577 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
578 			&& descriptor->ops->fd_select(descriptor, event,
579 				(selectsync*)info) == B_OK) {
580 			selectedEvents |= SELECT_FLAG(event);
581 		}
582 	}
583 	info->selected_events = selectedEvents
584 		| (info->selected_events & B_EVENT_INVALID);
585 
586 	// Add the info to the IO context. Even if nothing has been selected -- we
587 	// always support B_EVENT_INVALID.
588 	locker.Lock();
589 	if (context->fds[fd] != descriptor) {
590 		// Someone close()d the index in the meantime. deselect() all
591 		// events.
592 		info->next = NULL;
593 		deselect_select_infos(descriptor, info);
594 
595 		// Release our open reference of the descriptor.
596 		close_fd(descriptor);
597 		return B_FILE_ERROR;
598 	}
599 
600 	// The FD index hasn't changed, so we add the select info to the table.
601 
602 	info->next = context->select_infos[fd];
603 	context->select_infos[fd] = info;
604 
605 	// As long as the info is in the list, we keep a reference to the sync
606 	// object.
607 	atomic_add(&info->sync->ref_count, 1);
608 
609 	// Finally release our open reference. It is safe just to decrement,
610 	// since as long as the descriptor is associated with the slot,
611 	// someone else still has it open.
612 	atomic_add(&descriptor->open_count, -1);
613 
614 	return B_OK;
615 }
616 
617 
618 status_t
619 deselect_fd(int32 fd, struct select_info* info, bool kernel)
620 {
621 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
622 		info->sync, info->selected_events));
623 
624 	FDGetter fdGetter;
625 		// define before the context locker, so it will be destroyed after it
626 
627 	io_context* context = get_current_io_context(kernel);
628 	MutexLocker locker(context->io_mutex);
629 
630 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
631 	if (descriptor == NULL)
632 		return B_FILE_ERROR;
633 
634 	// remove the info from the IO context
635 
636 	select_info** infoLocation = &context->select_infos[fd];
637 	while (*infoLocation != NULL && *infoLocation != info)
638 		infoLocation = &(*infoLocation)->next;
639 
640 	// If not found, someone else beat us to it.
641 	if (*infoLocation != info)
642 		return B_OK;
643 
644 	*infoLocation = info->next;
645 
646 	locker.Unlock();
647 
648 	// deselect the selected events
649 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
650 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
651 		for (uint16 event = 1; event < 16; event++) {
652 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
653 				descriptor->ops->fd_deselect(descriptor, event,
654 					(selectsync*)info);
655 			}
656 		}
657 	}
658 
659 	put_select_sync(info->sync);
660 
661 	return B_OK;
662 }
663 
664 
665 /*!	This function checks if the specified fd is valid in the current
666 	context. It can be used for a quick check; the fd is not locked
667 	so it could become invalid immediately after this check.
668 */
669 bool
670 fd_is_valid(int fd, bool kernel)
671 {
672 	struct file_descriptor* descriptor
673 		= get_fd(get_current_io_context(kernel), fd);
674 	if (descriptor == NULL)
675 		return false;
676 
677 	put_fd(descriptor);
678 	return true;
679 }
680 
681 
682 struct vnode*
683 fd_vnode(struct file_descriptor* descriptor)
684 {
685 	switch (descriptor->type) {
686 		case FDTYPE_FILE:
687 		case FDTYPE_DIR:
688 		case FDTYPE_ATTR_DIR:
689 		case FDTYPE_ATTR:
690 			return descriptor->u.vnode;
691 	}
692 
693 	return NULL;
694 }
695 
696 
697 static status_t
698 common_close(int fd, bool kernel)
699 {
700 	return close_fd_index(get_current_io_context(kernel), fd);
701 }
702 
703 
704 static ssize_t
705 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
706 {
707 	if (!IS_USER_ADDRESS(buffer))
708 		return B_BAD_ADDRESS;
709 
710 	if (pos < -1)
711 		return B_BAD_VALUE;
712 
713 	FDGetter fdGetter;
714 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
715 	if (!descriptor)
716 		return B_FILE_ERROR;
717 
718 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
719 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
720 		return B_FILE_ERROR;
721 	}
722 
723 	bool movePosition = false;
724 	if (pos == -1) {
725 		pos = descriptor->pos;
726 		movePosition = true;
727 	}
728 
729 	if (write ? descriptor->ops->fd_write == NULL
730 			: descriptor->ops->fd_read == NULL) {
731 		return B_BAD_VALUE;
732 	}
733 
734 	SyscallRestartWrapper<status_t> status;
735 
736 	if (write)
737 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
738 	else
739 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
740 
741 	if (status != B_OK)
742 		return status;
743 
744 	if (movePosition)
745 		descriptor->pos = pos + length;
746 
747 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
748 }
749 
750 
751 static ssize_t
752 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
753 	bool write)
754 {
755 	if (!IS_USER_ADDRESS(userVecs))
756 		return B_BAD_ADDRESS;
757 
758 	if (pos < -1)
759 		return B_BAD_VALUE;
760 
761 	// prevent integer overflow exploit in malloc()
762 	if (count > IOV_MAX)
763 		return B_BAD_VALUE;
764 
765 	FDGetter fdGetter;
766 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
767 	if (!descriptor)
768 		return B_FILE_ERROR;
769 
770 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
771 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
772 		return B_FILE_ERROR;
773 	}
774 
775 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
776 	if (vecs == NULL)
777 		return B_NO_MEMORY;
778 	MemoryDeleter _(vecs);
779 
780 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
781 		return B_BAD_ADDRESS;
782 
783 	bool movePosition = false;
784 	if (pos == -1) {
785 		pos = descriptor->pos;
786 		movePosition = true;
787 	}
788 
789 	if (write ? descriptor->ops->fd_write == NULL
790 			: descriptor->ops->fd_read == NULL) {
791 		return B_BAD_VALUE;
792 	}
793 
794 	SyscallRestartWrapper<status_t> status;
795 
796 	ssize_t bytesTransferred = 0;
797 	for (uint32 i = 0; i < count; i++) {
798 		size_t length = vecs[i].iov_len;
799 		if (write) {
800 			status = descriptor->ops->fd_write(descriptor, pos,
801 				vecs[i].iov_base, &length);
802 		} else {
803 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
804 				&length);
805 		}
806 
807 		if (status != B_OK) {
808 			if (bytesTransferred == 0)
809 				return status;
810 			status = B_OK;
811 			break;
812 		}
813 
814 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
815 			bytesTransferred = SSIZE_MAX;
816 		else
817 			bytesTransferred += (ssize_t)length;
818 
819 		pos += length;
820 
821 		if (length < vecs[i].iov_len)
822 			break;
823 	}
824 
825 	if (movePosition)
826 		descriptor->pos = pos;
827 
828 	return bytesTransferred;
829 }
830 
831 
832 status_t
833 user_fd_kernel_ioctl(int fd, ulong op, void* buffer, size_t length)
834 {
835 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
836 
837 	return fd_ioctl(false, fd, op, buffer, length);
838 }
839 
840 
841 //	#pragma mark - User syscalls
842 
843 
844 ssize_t
845 _user_read(int fd, off_t pos, void* buffer, size_t length)
846 {
847 	return common_user_io(fd, pos, buffer, length, false);
848 }
849 
850 
851 ssize_t
852 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
853 {
854 	return common_user_vector_io(fd, pos, userVecs, count, false);
855 }
856 
857 
858 ssize_t
859 _user_write(int fd, off_t pos, const void* buffer, size_t length)
860 {
861 	return common_user_io(fd, pos, (void*)buffer, length, true);
862 }
863 
864 
865 ssize_t
866 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
867 {
868 	return common_user_vector_io(fd, pos, userVecs, count, true);
869 }
870 
871 
872 off_t
873 _user_seek(int fd, off_t pos, int seekType)
874 {
875 	syscall_64_bit_return_value();
876 
877 	struct file_descriptor* descriptor;
878 
879 	descriptor = get_fd(get_current_io_context(false), fd);
880 	if (!descriptor)
881 		return B_FILE_ERROR;
882 
883 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
884 
885 	if (descriptor->ops->fd_seek)
886 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
887 	else
888 		pos = ESPIPE;
889 
890 	put_fd(descriptor);
891 	return pos;
892 }
893 
894 
895 status_t
896 _user_ioctl(int fd, ulong op, void* buffer, size_t length)
897 {
898 	if (!IS_USER_ADDRESS(buffer))
899 		return B_BAD_ADDRESS;
900 
901 	TRACE(("user_ioctl: fd %d\n", fd));
902 
903 	SyscallRestartWrapper<status_t> status;
904 
905 	return status = fd_ioctl(false, fd, op, buffer, length);
906 }
907 
908 
909 ssize_t
910 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
911 	uint32 maxCount)
912 {
913 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
914 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
915 
916 	if (maxCount == 0)
917 		return 0;
918 
919 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
920 		return B_BAD_ADDRESS;
921 
922 	// get I/O context and FD
923 	io_context* ioContext = get_current_io_context(false);
924 	FDGetter fdGetter;
925 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
926 	if (descriptor == NULL)
927 		return B_FILE_ERROR;
928 
929 	if (descriptor->ops->fd_read_dir == NULL)
930 		return B_UNSUPPORTED;
931 
932 	// restrict buffer size and allocate a heap buffer
933 	if (bufferSize > kMaxReadDirBufferSize)
934 		bufferSize = kMaxReadDirBufferSize;
935 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
936 	if (buffer == NULL)
937 		return B_NO_MEMORY;
938 	MemoryDeleter bufferDeleter(buffer);
939 
940 	// read the directory
941 	uint32 count = maxCount;
942 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
943 		buffer, bufferSize, &count);
944 	if (status != B_OK)
945 		return status;
946 
947 	// copy the buffer back -- determine the total buffer size first
948 	size_t sizeToCopy = 0;
949 	struct dirent* entry = buffer;
950 	for (uint32 i = 0; i < count; i++) {
951 		size_t length = entry->d_reclen;
952 		sizeToCopy += length;
953 		entry = (struct dirent*)((uint8*)entry + length);
954 	}
955 
956 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
957 		return B_BAD_ADDRESS;
958 
959 	return count;
960 }
961 
962 
963 status_t
964 _user_rewind_dir(int fd)
965 {
966 	struct file_descriptor* descriptor;
967 	status_t status;
968 
969 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
970 
971 	descriptor = get_fd(get_current_io_context(false), fd);
972 	if (descriptor == NULL)
973 		return B_FILE_ERROR;
974 
975 	if (descriptor->ops->fd_rewind_dir)
976 		status = descriptor->ops->fd_rewind_dir(descriptor);
977 	else
978 		status = EOPNOTSUPP;
979 
980 	put_fd(descriptor);
981 	return status;
982 }
983 
984 
985 status_t
986 _user_close(int fd)
987 {
988 	return common_close(fd, false);
989 }
990 
991 
992 int
993 _user_dup(int fd)
994 {
995 	return dup_fd(fd, false);
996 }
997 
998 
999 int
1000 _user_dup2(int ofd, int nfd)
1001 {
1002 	return dup2_fd(ofd, nfd, false);
1003 }
1004 
1005 
1006 //	#pragma mark - Kernel calls
1007 
1008 
1009 ssize_t
1010 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1011 {
1012 	if (pos < -1)
1013 		return B_BAD_VALUE;
1014 
1015 	FDGetter fdGetter;
1016 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1017 
1018 	if (!descriptor)
1019 		return B_FILE_ERROR;
1020 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1021 		return B_FILE_ERROR;
1022 
1023 	bool movePosition = false;
1024 	if (pos == -1) {
1025 		pos = descriptor->pos;
1026 		movePosition = true;
1027 	}
1028 
1029 	SyscallFlagUnsetter _;
1030 
1031 	if (descriptor->ops->fd_read == NULL)
1032 		return B_BAD_VALUE;
1033 
1034 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1035 		&length);
1036 	if (bytesRead >= B_OK) {
1037 		if (length > SSIZE_MAX)
1038 			bytesRead = SSIZE_MAX;
1039 		else
1040 			bytesRead = (ssize_t)length;
1041 
1042 		if (movePosition)
1043 			descriptor->pos = pos + length;
1044 	}
1045 
1046 	return bytesRead;
1047 }
1048 
1049 
1050 ssize_t
1051 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1052 {
1053 	bool movePosition = false;
1054 	status_t status;
1055 	uint32 i;
1056 
1057 	if (pos < -1)
1058 		return B_BAD_VALUE;
1059 
1060 	FDGetter fdGetter;
1061 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1062 
1063 	if (!descriptor)
1064 		return B_FILE_ERROR;
1065 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1066 		return B_FILE_ERROR;
1067 
1068 	if (pos == -1) {
1069 		pos = descriptor->pos;
1070 		movePosition = true;
1071 	}
1072 
1073 	if (descriptor->ops->fd_read == NULL)
1074 		return B_BAD_VALUE;
1075 
1076 	SyscallFlagUnsetter _;
1077 
1078 	ssize_t bytesRead = 0;
1079 
1080 	for (i = 0; i < count; i++) {
1081 		size_t length = vecs[i].iov_len;
1082 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1083 			&length);
1084 		if (status != B_OK) {
1085 			bytesRead = status;
1086 			break;
1087 		}
1088 
1089 		if ((uint64)bytesRead + length > SSIZE_MAX)
1090 			bytesRead = SSIZE_MAX;
1091 		else
1092 			bytesRead += (ssize_t)length;
1093 
1094 		pos += vecs[i].iov_len;
1095 	}
1096 
1097 	if (movePosition)
1098 		descriptor->pos = pos;
1099 
1100 	return bytesRead;
1101 }
1102 
1103 
1104 ssize_t
1105 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1106 {
1107 	if (pos < -1)
1108 		return B_BAD_VALUE;
1109 
1110 	FDGetter fdGetter;
1111 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1112 
1113 	if (descriptor == NULL)
1114 		return B_FILE_ERROR;
1115 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1116 		return B_FILE_ERROR;
1117 
1118 	bool movePosition = false;
1119 	if (pos == -1) {
1120 		pos = descriptor->pos;
1121 		movePosition = true;
1122 	}
1123 
1124 	if (descriptor->ops->fd_write == NULL)
1125 		return B_BAD_VALUE;
1126 
1127 	SyscallFlagUnsetter _;
1128 
1129 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1130 		&length);
1131 	if (bytesWritten >= B_OK) {
1132 		if (length > SSIZE_MAX)
1133 			bytesWritten = SSIZE_MAX;
1134 		else
1135 			bytesWritten = (ssize_t)length;
1136 
1137 		if (movePosition)
1138 			descriptor->pos = pos + length;
1139 	}
1140 
1141 	return bytesWritten;
1142 }
1143 
1144 
1145 ssize_t
1146 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1147 {
1148 	bool movePosition = false;
1149 	status_t status;
1150 	uint32 i;
1151 
1152 	if (pos < -1)
1153 		return B_BAD_VALUE;
1154 
1155 	FDGetter fdGetter;
1156 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1157 
1158 	if (!descriptor)
1159 		return B_FILE_ERROR;
1160 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1161 		return B_FILE_ERROR;
1162 
1163 	if (pos == -1) {
1164 		pos = descriptor->pos;
1165 		movePosition = true;
1166 	}
1167 
1168 	if (descriptor->ops->fd_write == NULL)
1169 		return B_BAD_VALUE;
1170 
1171 	SyscallFlagUnsetter _;
1172 
1173 	ssize_t bytesWritten = 0;
1174 
1175 	for (i = 0; i < count; i++) {
1176 		size_t length = vecs[i].iov_len;
1177 		status = descriptor->ops->fd_write(descriptor, pos,
1178 			vecs[i].iov_base, &length);
1179 		if (status != B_OK) {
1180 			bytesWritten = status;
1181 			break;
1182 		}
1183 
1184 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1185 			bytesWritten = SSIZE_MAX;
1186 		else
1187 			bytesWritten += (ssize_t)length;
1188 
1189 		pos += vecs[i].iov_len;
1190 	}
1191 
1192 	if (movePosition)
1193 		descriptor->pos = pos;
1194 
1195 	return bytesWritten;
1196 }
1197 
1198 
1199 off_t
1200 _kern_seek(int fd, off_t pos, int seekType)
1201 {
1202 	struct file_descriptor* descriptor;
1203 
1204 	descriptor = get_fd(get_current_io_context(true), fd);
1205 	if (!descriptor)
1206 		return B_FILE_ERROR;
1207 
1208 	if (descriptor->ops->fd_seek)
1209 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1210 	else
1211 		pos = ESPIPE;
1212 
1213 	put_fd(descriptor);
1214 	return pos;
1215 }
1216 
1217 
1218 status_t
1219 _kern_ioctl(int fd, ulong op, void* buffer, size_t length)
1220 {
1221 	TRACE(("kern_ioctl: fd %d\n", fd));
1222 
1223 	SyscallFlagUnsetter _;
1224 
1225 	return fd_ioctl(true, fd, op, buffer, length);
1226 }
1227 
1228 
1229 ssize_t
1230 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1231 	uint32 maxCount)
1232 {
1233 	struct file_descriptor* descriptor;
1234 	ssize_t retval;
1235 
1236 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1237 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1238 
1239 	struct io_context* ioContext = get_current_io_context(true);
1240 	descriptor = get_fd(ioContext, fd);
1241 	if (descriptor == NULL)
1242 		return B_FILE_ERROR;
1243 
1244 	if (descriptor->ops->fd_read_dir) {
1245 		uint32 count = maxCount;
1246 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1247 			bufferSize, &count);
1248 		if (retval >= 0)
1249 			retval = count;
1250 	} else
1251 		retval = EOPNOTSUPP;
1252 
1253 	put_fd(descriptor);
1254 	return retval;
1255 }
1256 
1257 
1258 status_t
1259 _kern_rewind_dir(int fd)
1260 {
1261 	struct file_descriptor* descriptor;
1262 	status_t status;
1263 
1264 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1265 
1266 	descriptor = get_fd(get_current_io_context(true), fd);
1267 	if (descriptor == NULL)
1268 		return B_FILE_ERROR;
1269 
1270 	if (descriptor->ops->fd_rewind_dir)
1271 		status = descriptor->ops->fd_rewind_dir(descriptor);
1272 	else
1273 		status = EOPNOTSUPP;
1274 
1275 	put_fd(descriptor);
1276 	return status;
1277 }
1278 
1279 
1280 status_t
1281 _kern_close(int fd)
1282 {
1283 	return common_close(fd, true);
1284 }
1285 
1286 
1287 int
1288 _kern_dup(int fd)
1289 {
1290 	return dup_fd(fd, true);
1291 }
1292 
1293 
1294 int
1295 _kern_dup2(int ofd, int nfd)
1296 {
1297 	return dup2_fd(ofd, nfd, true);
1298 }
1299 
1300