xref: /haiku/src/system/kernel/fs/fd.cpp (revision cda5b8808fd0262f0fac472f6cfa809f846a83cf)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 //! Operations on file descriptors
7 
8 #include <fd.h>
9 
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include <OS.h>
14 
15 #include <AutoDeleter.h>
16 
17 #include <syscalls.h>
18 #include <syscall_restart.h>
19 #include <util/AutoLock.h>
20 #include <vfs.h>
21 #include <wait_for_objects.h>
22 
23 
24 //#define TRACE_FD
25 #ifdef TRACE_FD
26 #	define TRACE(x) dprintf x
27 #else
28 #	define TRACE(x)
29 #endif
30 
31 
32 static struct file_descriptor* get_fd_locked(struct io_context* context,
33 	int fd);
34 static struct file_descriptor *remove_fd(struct io_context *context, int fd);
35 static void deselect_select_infos(file_descriptor* descriptor,
36 	select_info* infos);
37 
38 
39 struct FDGetterLocking {
40 	inline bool Lock(file_descriptor* /*lockable*/)
41 	{
42 		return false;
43 	}
44 
45 	inline void Unlock(file_descriptor* lockable)
46 	{
47 		put_fd(lockable);
48 	}
49 };
50 
51 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
52 public:
53 	inline FDGetter()
54 		: AutoLocker<file_descriptor, FDGetterLocking>()
55 	{
56 	}
57 
58 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
59 		: AutoLocker<file_descriptor, FDGetterLocking>(
60 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
61 	{
62 	}
63 
64 	inline file_descriptor* SetTo(io_context* context, int fd,
65 		bool contextLocked = false)
66 	{
67 		file_descriptor* descriptor
68 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
69 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
70 		return descriptor;
71 	}
72 
73 	inline file_descriptor* SetTo(int fd, bool kernel,
74 		bool contextLocked = false)
75 	{
76 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
77 	}
78 
79 	inline file_descriptor* FD() const
80 	{
81 		return fLockable;
82 	}
83 };
84 
85 
86 /*** General fd routines ***/
87 
88 
89 #ifdef DEBUG
90 void dump_fd(int fd, struct file_descriptor *descriptor);
91 
92 void
93 dump_fd(int fd,struct file_descriptor *descriptor)
94 {
95 	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
96 		fd, descriptor, descriptor->type, descriptor->ref_count, descriptor->ops,
97 		descriptor->u.vnode, descriptor->u.mount, descriptor->cookie, descriptor->open_mode, descriptor->pos);
98 }
99 #endif
100 
101 
102 /** Allocates and initializes a new file_descriptor */
103 
104 struct file_descriptor *
105 alloc_fd(void)
106 {
107 	file_descriptor *descriptor
108 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
109 	if (descriptor == NULL)
110 		return NULL;
111 
112 	descriptor->u.vnode = NULL;
113 	descriptor->cookie = NULL;
114 	descriptor->ref_count = 1;
115 	descriptor->open_count = 0;
116 	descriptor->open_mode = 0;
117 	descriptor->pos = 0;
118 
119 	return descriptor;
120 }
121 
122 
123 bool
124 fd_close_on_exec(struct io_context *context, int fd)
125 {
126 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
127 }
128 
129 
130 void
131 fd_set_close_on_exec(struct io_context *context, int fd, bool closeFD)
132 {
133 	if (closeFD)
134 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
135 	else
136 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
137 }
138 
139 
140 /** Searches a free slot in the FD table of the provided I/O context, and inserts
141  *	the specified descriptor into it.
142  */
143 
144 int
145 new_fd_etc(struct io_context *context, struct file_descriptor *descriptor, int firstIndex)
146 {
147 	int fd = -1;
148 	uint32 i;
149 
150 	mutex_lock(&context->io_mutex);
151 
152 	for (i = firstIndex; i < context->table_size; i++) {
153 		if (!context->fds[i]) {
154 			fd = i;
155 			break;
156 		}
157 	}
158 	if (fd < 0) {
159 		fd = B_NO_MORE_FDS;
160 		goto err;
161 	}
162 
163 	context->fds[fd] = descriptor;
164 	context->num_used_fds++;
165 	atomic_add(&descriptor->open_count, 1);
166 
167 err:
168 	mutex_unlock(&context->io_mutex);
169 
170 	return fd;
171 }
172 
173 
174 int
175 new_fd(struct io_context *context, struct file_descriptor *descriptor)
176 {
177 	return new_fd_etc(context, descriptor, 0);
178 }
179 
180 
181 /**	Reduces the descriptor's reference counter, and frees all resources
182  *	when it's no longer used.
183  */
184 
185 void
186 put_fd(struct file_descriptor *descriptor)
187 {
188 	int32 previous = atomic_add(&descriptor->ref_count, -1);
189 
190 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
191 		descriptor, descriptor->ref_count, descriptor->cookie));
192 
193 	// free the descriptor if we don't need it anymore
194 	if (previous == 1) {
195 		// free the underlying object
196 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
197 			descriptor->ops->fd_free(descriptor);
198 
199 		free(descriptor);
200 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
201 		&& previous - 1 == descriptor->open_count
202 		&& descriptor->ops != NULL) {
203 		// the descriptor has been disconnected - it cannot
204 		// be accessed anymore, let's close it (no one is
205 		// currently accessing this descriptor)
206 
207 		if (descriptor->ops->fd_close)
208 			descriptor->ops->fd_close(descriptor);
209 		if (descriptor->ops->fd_free)
210 			descriptor->ops->fd_free(descriptor);
211 
212 		// prevent this descriptor from being closed/freed again
213 		descriptor->open_count = -1;
214 		descriptor->ref_count = -1;
215 		descriptor->ops = NULL;
216 		descriptor->u.vnode = NULL;
217 
218 		// the file descriptor is kept intact, so that it's not
219 		// reused until someone explicetly closes it
220 	}
221 }
222 
223 
224 /**	Decrements the open counter of the file descriptor and invokes
225  *	its close hook when appropriate.
226  */
227 
228 void
229 close_fd(struct file_descriptor *descriptor)
230 {
231 	if (atomic_add(&descriptor->open_count, -1) == 1) {
232 		vfs_unlock_vnode_if_locked(descriptor);
233 
234 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
235 			descriptor->ops->fd_close(descriptor);
236 	}
237 }
238 
239 
240 status_t
241 close_fd_index(struct io_context *context, int fd)
242 {
243 	struct file_descriptor *descriptor = remove_fd(context, fd);
244 
245 	if (descriptor == NULL)
246 		return B_FILE_ERROR;
247 
248 	close_fd(descriptor);
249 	put_fd(descriptor);
250 		// the reference associated with the slot
251 
252 	return B_OK;
253 }
254 
255 
256 /**	This descriptor's underlying object will be closed and freed
257  *	as soon as possible (in one of the next calls to put_fd() -
258  *	get_fd() will no longer succeed on this descriptor).
259  *	This is useful if the underlying object is gone, for instance
260  *	when a (mounted) volume got removed unexpectedly.
261  */
262 
263 void
264 disconnect_fd(struct file_descriptor *descriptor)
265 {
266 	descriptor->open_mode |= O_DISCONNECTED;
267 }
268 
269 
270 void
271 inc_fd_ref_count(struct file_descriptor *descriptor)
272 {
273 	atomic_add(&descriptor->ref_count, 1);
274 }
275 
276 
277 static struct file_descriptor *
278 get_fd_locked(struct io_context *context, int fd)
279 {
280 	if (fd < 0 || (uint32)fd >= context->table_size)
281 		return NULL;
282 
283 	struct file_descriptor *descriptor = context->fds[fd];
284 
285 	if (descriptor != NULL) {
286 		// Disconnected descriptors cannot be accessed anymore
287 		if (descriptor->open_mode & O_DISCONNECTED)
288 			descriptor = NULL;
289 		else
290 			inc_fd_ref_count(descriptor);
291 	}
292 
293 	return descriptor;
294 }
295 
296 
297 struct file_descriptor *
298 get_fd(struct io_context *context, int fd)
299 {
300 	MutexLocker(context->io_mutex);
301 
302 	return get_fd_locked(context, fd);
303 }
304 
305 
306 /**	Removes the file descriptor from the specified slot.
307  */
308 
309 static struct file_descriptor *
310 remove_fd(struct io_context *context, int fd)
311 {
312 	struct file_descriptor *descriptor = NULL;
313 
314 	if (fd < 0)
315 		return NULL;
316 
317 	mutex_lock(&context->io_mutex);
318 
319 	if ((uint32)fd < context->table_size)
320 		descriptor = context->fds[fd];
321 
322 	select_info* selectInfos = NULL;
323 	bool disconnected = false;
324 
325 	if (descriptor)	{
326 		// fd is valid
327 		context->fds[fd] = NULL;
328 		fd_set_close_on_exec(context, fd, false);
329 		context->num_used_fds--;
330 
331 		selectInfos = context->select_infos[fd];
332 		context->select_infos[fd] = NULL;
333 
334 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
335 	}
336 
337 	mutex_unlock(&context->io_mutex);
338 
339 	if (selectInfos != NULL)
340 		deselect_select_infos(descriptor, selectInfos);
341 
342 	return disconnected ? NULL : descriptor;
343 }
344 
345 
346 static int
347 dup_fd(int fd, bool kernel)
348 {
349 	struct io_context *context = get_current_io_context(kernel);
350 	struct file_descriptor *descriptor;
351 	int status;
352 
353 	TRACE(("dup_fd: fd = %d\n", fd));
354 
355 	// Try to get the fd structure
356 	descriptor = get_fd(context, fd);
357 	if (descriptor == NULL)
358 		return B_FILE_ERROR;
359 
360 	// now put the fd in place
361 	status = new_fd(context, descriptor);
362 	if (status < 0)
363 		put_fd(descriptor);
364 	else {
365 		mutex_lock(&context->io_mutex);
366 		fd_set_close_on_exec(context, status, false);
367 		mutex_unlock(&context->io_mutex);
368 	}
369 
370 	return status;
371 }
372 
373 
374 /*!	POSIX says this should be the same as:
375 		close(newfd);
376 		fcntl(oldfd, F_DUPFD, newfd);
377 
378 	We do dup2() directly to be thread-safe.
379 */
380 static int
381 dup2_fd(int oldfd, int newfd, bool kernel)
382 {
383 	struct file_descriptor *evicted = NULL;
384 	struct io_context *context;
385 
386 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
387 
388 	// quick check
389 	if (oldfd < 0 || newfd < 0)
390 		return B_FILE_ERROR;
391 
392 	// Get current I/O context and lock it
393 	context = get_current_io_context(kernel);
394 	mutex_lock(&context->io_mutex);
395 
396 	// Check if the fds are valid (mutex must be locked because
397 	// the table size could be changed)
398 	if ((uint32)oldfd >= context->table_size
399 		|| (uint32)newfd >= context->table_size
400 		|| context->fds[oldfd] == NULL) {
401 		mutex_unlock(&context->io_mutex);
402 		return B_FILE_ERROR;
403 	}
404 
405 	// Check for identity, note that it cannot be made above
406 	// because we always want to return an error on invalid
407 	// handles
408 	select_info* selectInfos = NULL;
409 	if (oldfd != newfd) {
410 		// Now do the work
411 		evicted = context->fds[newfd];
412 		selectInfos = context->select_infos[newfd];
413 		context->select_infos[newfd] = NULL;
414 		atomic_add(&context->fds[oldfd]->ref_count, 1);
415 		atomic_add(&context->fds[oldfd]->open_count, 1);
416 		context->fds[newfd] = context->fds[oldfd];
417 
418 		if (evicted == NULL)
419 			context->num_used_fds++;
420 	}
421 
422 	fd_set_close_on_exec(context, newfd, false);
423 
424 	mutex_unlock(&context->io_mutex);
425 
426 	// Say bye bye to the evicted fd
427 	if (evicted) {
428 		deselect_select_infos(evicted, selectInfos);
429 		close_fd(evicted);
430 		put_fd(evicted);
431 	}
432 
433 	return newfd;
434 }
435 
436 
437 static status_t
438 fd_ioctl(bool kernelFD, int fd, ulong op, void *buffer, size_t length)
439 {
440 	struct file_descriptor *descriptor;
441 	int status;
442 
443 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
444 	if (descriptor == NULL)
445 		return B_FILE_ERROR;
446 
447 	if (descriptor->ops->fd_ioctl)
448 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
449 	else
450 		status = EOPNOTSUPP;
451 
452 	put_fd(descriptor);
453 	return status;
454 }
455 
456 
457 static void
458 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
459 {
460 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
461 
462 	select_info* info = infos;
463 	while (info != NULL) {
464 		select_sync* sync = info->sync;
465 
466 		// deselect the selected events
467 		if (descriptor->ops->fd_deselect && info->selected_events) {
468 			for (uint16 event = 1; event < 16; event++) {
469 				if (info->selected_events & SELECT_FLAG(event)) {
470 					descriptor->ops->fd_deselect(descriptor, event,
471 						(selectsync*)info);
472 				}
473 			}
474 		}
475 
476 		notify_select_events(info, B_EVENT_INVALID);
477 		info = info->next;
478 		put_select_sync(sync);
479 	}
480 }
481 
482 
483 status_t
484 select_fd(int32 fd, struct select_info* info, bool kernel)
485 {
486 	TRACE(("select_fd(fd = %d, info = %p (%p), 0x%x)\n", fd, info,
487 		info->sync, info.selected_events));
488 
489 	FDGetter fdGetter;
490 		// define before the context locker, so it will be destroyed after it
491 
492 	io_context* context = get_current_io_context(kernel);
493 	MutexLocker locker(context->io_mutex);
494 
495 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
496 	if (descriptor == NULL)
497 		return B_FILE_ERROR;
498 
499 	if (info->selected_events == 0)
500 		return B_OK;
501 
502 	if (!descriptor->ops->fd_select) {
503 		// if the I/O subsystem doesn't support select(), we will
504 		// immediately notify the select call
505 		return notify_select_events(info, info->selected_events);
506 	}
507 
508 	// add the info to the IO context
509 	info->next = context->select_infos[fd];
510 	context->select_infos[fd] = info;
511 
512 	// as long as the info is in the list, we keep a reference to the sync
513 	// object
514 	atomic_add(&info->sync->ref_count, 1);
515 
516 	locker.Unlock();
517 
518 	// select any events asked for
519 	uint32 selectedEvents = 0;
520 
521 	for (uint16 event = 1; event < 16; event++) {
522 		if (info->selected_events & SELECT_FLAG(event)
523 			&& descriptor->ops->fd_select(descriptor, event,
524 				(selectsync*)info) == B_OK) {
525 			selectedEvents |= SELECT_FLAG(event);
526 		}
527 	}
528 	info->selected_events = selectedEvents;
529 
530 	// if nothing has been selected, we deselect immediately
531 	if (selectedEvents == 0)
532 		deselect_fd(fd, info, kernel);
533 
534 	return B_OK;
535 }
536 
537 
538 status_t
539 deselect_fd(int32 fd, struct select_info* info, bool kernel)
540 {
541 	TRACE(("deselect_fd(fd = %d, info = %p (%p), 0x%x)\n", fd, info,
542 		info->sync, info.selected_events));
543 
544 	if (info->selected_events == 0)
545 		return B_OK;
546 
547 	FDGetter fdGetter;
548 		// define before the context locker, so it will be destroyed after it
549 
550 	io_context* context = get_current_io_context(kernel);
551 	MutexLocker locker(context->io_mutex);
552 
553 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
554 	if (descriptor == NULL)
555 		return B_FILE_ERROR;
556 
557 	// remove the info from the IO context
558 
559 	select_info** infoLocation = &context->select_infos[fd];
560 	while (*infoLocation != NULL && *infoLocation != info)
561 		infoLocation = &(*infoLocation)->next;
562 
563 	// If not found, someone else beat us to it.
564 	if (*infoLocation != info)
565 		return B_OK;
566 
567 	*infoLocation = info->next;
568 
569 	locker.Unlock();
570 
571 	// deselect the selected events
572 	if (descriptor->ops->fd_deselect && info->selected_events) {
573 		for (uint16 event = 1; event < 16; event++) {
574 			if (info->selected_events & SELECT_FLAG(event)) {
575 				descriptor->ops->fd_deselect(descriptor, event,
576 					(selectsync*)info);
577 			}
578 		}
579 	}
580 
581 	put_select_sync(info->sync);
582 
583 	return B_OK;
584 }
585 
586 
587 /** This function checks if the specified fd is valid in the current
588  *	context. It can be used for a quick check; the fd is not locked
589  *	so it could become invalid immediately after this check.
590  */
591 
592 bool
593 fd_is_valid(int fd, bool kernel)
594 {
595 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
596 	if (descriptor == NULL)
597 		return false;
598 
599 	put_fd(descriptor);
600 	return true;
601 }
602 
603 
604 struct vnode *
605 fd_vnode(struct file_descriptor *descriptor)
606 {
607 	switch (descriptor->type) {
608 		case FDTYPE_FILE:
609 		case FDTYPE_DIR:
610 		case FDTYPE_ATTR_DIR:
611 		case FDTYPE_ATTR:
612 			return descriptor->u.vnode;
613 	}
614 
615 	return NULL;
616 }
617 
618 
619 static status_t
620 common_close(int fd, bool kernel)
621 {
622 	return close_fd_index(get_current_io_context(kernel), fd);
623 }
624 
625 
626 static ssize_t
627 common_user_io(int fd, off_t pos, void *buffer, size_t length, bool write)
628 {
629 	if (!IS_USER_ADDRESS(buffer))
630 		return B_BAD_ADDRESS;
631 
632 	if (pos < -1)
633 		return B_BAD_VALUE;
634 
635 	FDGetter fdGetter;
636 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
637 	if (!descriptor)
638 		return B_FILE_ERROR;
639 
640 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
641 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
642 		return B_FILE_ERROR;
643 	}
644 
645 	bool movePosition = false;
646 	if (pos == -1) {
647 		pos = descriptor->pos;
648 		movePosition = true;
649 	}
650 
651 	if (write ? descriptor->ops->fd_write == NULL
652 			: descriptor->ops->fd_read == NULL) {
653 		return B_BAD_VALUE;
654 	}
655 
656 	SyscallRestartWrapper<status_t> status;
657 
658 	if (write)
659 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
660 	else
661 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
662 
663 	if (status < B_OK)
664 		return status;
665 
666 	if (movePosition)
667 		descriptor->pos = pos + length;
668 
669 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
670 }
671 
672 
673 static ssize_t
674 common_user_vector_io(int fd, off_t pos, const iovec *userVecs, size_t count,
675 	bool write)
676 {
677 	if (!IS_USER_ADDRESS(userVecs))
678 		return B_BAD_ADDRESS;
679 
680 	if (pos < -1)
681 		return B_BAD_VALUE;
682 
683 	/* prevent integer overflow exploit in malloc() */
684 	if (count > IOV_MAX)
685 		return B_BAD_VALUE;
686 
687 	FDGetter fdGetter;
688 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
689 	if (!descriptor)
690 		return B_FILE_ERROR;
691 
692 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
693 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
694 		return B_FILE_ERROR;
695 	}
696 
697 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
698 	if (vecs == NULL)
699 		return B_NO_MEMORY;
700 	MemoryDeleter _(vecs);
701 
702 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) < B_OK)
703 		return B_BAD_ADDRESS;
704 
705 	bool movePosition = false;
706 	if (pos == -1) {
707 		pos = descriptor->pos;
708 		movePosition = true;
709 	}
710 
711 	if (write ? descriptor->ops->fd_write == NULL
712 			: descriptor->ops->fd_read == NULL) {
713 		return B_BAD_VALUE;
714 	}
715 
716 	SyscallRestartWrapper<status_t> status;
717 
718 	ssize_t bytesTransferred = 0;
719 	for (uint32 i = 0; i < count; i++) {
720 		size_t length = vecs[i].iov_len;
721 		if (write) {
722 			status = descriptor->ops->fd_write(descriptor, pos,
723 				vecs[i].iov_base, &length);
724 		} else {
725 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
726 				&length);
727 		}
728 
729 		if (status < B_OK) {
730 			if (bytesTransferred == 0)
731 				return status;
732 			status = B_OK;
733 			break;
734 		}
735 
736 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
737 			bytesTransferred = SSIZE_MAX;
738 		else
739 			bytesTransferred += (ssize_t)length;
740 
741 		pos += length;
742 
743 		if (length < vecs[i].iov_len)
744 			break;
745 	}
746 
747 	if (movePosition)
748 		descriptor->pos = pos;
749 
750 	return bytesTransferred;
751 }
752 
753 
754 status_t
755 user_fd_kernel_ioctl(int fd, ulong op, void *buffer, size_t length)
756 {
757 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
758 
759 	return fd_ioctl(false, fd, op, buffer, length);
760 }
761 
762 
763 //	#pragma mark - User syscalls
764 
765 
766 ssize_t
767 _user_read(int fd, off_t pos, void *buffer, size_t length)
768 {
769 	return common_user_io(fd, pos, buffer, length, false);
770 }
771 
772 
773 ssize_t
774 _user_readv(int fd, off_t pos, const iovec *userVecs, size_t count)
775 {
776 	return common_user_vector_io(fd, pos, userVecs, count, false);
777 }
778 
779 
780 ssize_t
781 _user_write(int fd, off_t pos, const void *buffer, size_t length)
782 {
783 	return common_user_io(fd, pos, (void*)buffer, length, true);
784 }
785 
786 
787 ssize_t
788 _user_writev(int fd, off_t pos, const iovec *userVecs, size_t count)
789 {
790 	return common_user_vector_io(fd, pos, userVecs, count, true);
791 }
792 
793 
794 off_t
795 _user_seek(int fd, off_t pos, int seekType)
796 {
797 	syscall_64_bit_return_value();
798 
799 	struct file_descriptor *descriptor;
800 
801 	descriptor = get_fd(get_current_io_context(false), fd);
802 	if (!descriptor)
803 		return B_FILE_ERROR;
804 
805 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
806 
807 	if (descriptor->ops->fd_seek)
808 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
809 	else
810 		pos = ESPIPE;
811 
812 	put_fd(descriptor);
813 	return pos;
814 }
815 
816 
817 status_t
818 _user_ioctl(int fd, ulong op, void *buffer, size_t length)
819 {
820 	struct file_descriptor *descriptor;
821 
822 	if (!IS_USER_ADDRESS(buffer))
823 		return B_BAD_ADDRESS;
824 
825 	TRACE(("user_ioctl: fd %d\n", fd));
826 
827 	SyscallRestartWrapper<status_t> status;
828 
829 	return status = fd_ioctl(false, fd, op, buffer, length);
830 }
831 
832 
833 ssize_t
834 _user_read_dir(int fd, struct dirent *buffer, size_t bufferSize, uint32 maxCount)
835 {
836 	struct file_descriptor *descriptor;
837 	ssize_t retval;
838 
839 	if (!IS_USER_ADDRESS(buffer))
840 		return B_BAD_ADDRESS;
841 
842 	TRACE(("user_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = %lu)\n", fd, buffer, bufferSize, maxCount));
843 
844 	struct io_context* ioContext = get_current_io_context(false);
845 	descriptor = get_fd(ioContext, fd);
846 	if (descriptor == NULL)
847 		return B_FILE_ERROR;
848 
849 	if (descriptor->ops->fd_read_dir) {
850 		uint32 count = maxCount;
851 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
852 			bufferSize, &count);
853 		if (retval >= 0)
854 			retval = count;
855 	} else
856 		retval = EOPNOTSUPP;
857 
858 	put_fd(descriptor);
859 	return retval;
860 }
861 
862 
863 status_t
864 _user_rewind_dir(int fd)
865 {
866 	struct file_descriptor *descriptor;
867 	status_t status;
868 
869 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
870 
871 	descriptor = get_fd(get_current_io_context(false), fd);
872 	if (descriptor == NULL)
873 		return B_FILE_ERROR;
874 
875 	if (descriptor->ops->fd_rewind_dir)
876 		status = descriptor->ops->fd_rewind_dir(descriptor);
877 	else
878 		status = EOPNOTSUPP;
879 
880 	put_fd(descriptor);
881 	return status;
882 }
883 
884 
885 status_t
886 _user_close(int fd)
887 {
888 	return common_close(fd, false);
889 }
890 
891 
892 int
893 _user_dup(int fd)
894 {
895 	return dup_fd(fd, false);
896 }
897 
898 
899 int
900 _user_dup2(int ofd, int nfd)
901 {
902 	return dup2_fd(ofd, nfd, false);
903 }
904 
905 
906 //	#pragma mark - Kernel calls
907 
908 
909 ssize_t
910 _kern_read(int fd, off_t pos, void *buffer, size_t length)
911 {
912 	if (pos < -1)
913 		return B_BAD_VALUE;
914 
915 	FDGetter fdGetter;
916 	struct file_descriptor *descriptor = fdGetter.SetTo(fd, true);
917 
918 	if (!descriptor)
919 		return B_FILE_ERROR;
920 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
921 		return B_FILE_ERROR;
922 
923 	bool movePosition = false;
924 	if (pos == -1) {
925 		pos = descriptor->pos;
926 		movePosition = true;
927 	}
928 
929 	SyscallFlagUnsetter _;
930 
931 	if (descriptor->ops->fd_read == NULL)
932 		return B_BAD_VALUE;
933 
934 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
935 		&length);
936 	if (bytesRead >= B_OK) {
937 		if (length > SSIZE_MAX)
938 			bytesRead = SSIZE_MAX;
939 		else
940 			bytesRead = (ssize_t)length;
941 
942 		if (movePosition)
943 			descriptor->pos = pos + length;
944 	}
945 
946 	return bytesRead;
947 }
948 
949 
950 ssize_t
951 _kern_readv(int fd, off_t pos, const iovec *vecs, size_t count)
952 {
953 	bool movePosition = false;
954 	status_t status;
955 	uint32 i;
956 
957 	if (pos < -1)
958 		return B_BAD_VALUE;
959 
960 	FDGetter fdGetter;
961 	struct file_descriptor *descriptor = fdGetter.SetTo(fd, true);
962 
963 	if (!descriptor)
964 		return B_FILE_ERROR;
965 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
966 		return B_FILE_ERROR;
967 
968 	if (pos == -1) {
969 		pos = descriptor->pos;
970 		movePosition = true;
971 	}
972 
973 	if (descriptor->ops->fd_read == NULL)
974 		return B_BAD_VALUE;
975 
976 	SyscallFlagUnsetter _;
977 
978 	ssize_t bytesRead = 0;
979 
980 	for (i = 0; i < count; i++) {
981 		size_t length = vecs[i].iov_len;
982 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
983 			&length);
984 		if (status < B_OK) {
985 			bytesRead = status;
986 			break;
987 		}
988 
989 		if ((uint64)bytesRead + length > SSIZE_MAX)
990 			bytesRead = SSIZE_MAX;
991 		else
992 			bytesRead += (ssize_t)length;
993 
994 		pos += vecs[i].iov_len;
995 	}
996 
997 	if (movePosition)
998 		descriptor->pos = pos;
999 
1000 	return bytesRead;
1001 }
1002 
1003 
1004 ssize_t
1005 _kern_write(int fd, off_t pos, const void *buffer, size_t length)
1006 {
1007 	if (pos < -1)
1008 		return B_BAD_VALUE;
1009 
1010 	FDGetter fdGetter;
1011 	struct file_descriptor *descriptor = fdGetter.SetTo(fd, true);
1012 
1013 	if (descriptor == NULL)
1014 		return B_FILE_ERROR;
1015 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1016 		return B_FILE_ERROR;
1017 
1018 	bool movePosition = false;
1019 	if (pos == -1) {
1020 		pos = descriptor->pos;
1021 		movePosition = true;
1022 	}
1023 
1024 	if (descriptor->ops->fd_write == NULL)
1025 		return B_BAD_VALUE;
1026 
1027 	SyscallFlagUnsetter _;
1028 
1029 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1030 		&length);
1031 	if (bytesWritten >= B_OK) {
1032 		if (length > SSIZE_MAX)
1033 			bytesWritten = SSIZE_MAX;
1034 		else
1035 			bytesWritten = (ssize_t)length;
1036 
1037 		if (movePosition)
1038 			descriptor->pos = pos + length;
1039 	}
1040 
1041 	return bytesWritten;
1042 }
1043 
1044 
1045 ssize_t
1046 _kern_writev(int fd, off_t pos, const iovec *vecs, size_t count)
1047 {
1048 	bool movePosition = false;
1049 	status_t status;
1050 	uint32 i;
1051 
1052 	if (pos < -1)
1053 		return B_BAD_VALUE;
1054 
1055 	FDGetter fdGetter;
1056 	struct file_descriptor *descriptor = fdGetter.SetTo(fd, true);
1057 
1058 	if (!descriptor)
1059 		return B_FILE_ERROR;
1060 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1061 		return B_FILE_ERROR;
1062 
1063 	if (pos == -1) {
1064 		pos = descriptor->pos;
1065 		movePosition = true;
1066 	}
1067 
1068 	if (descriptor->ops->fd_write == NULL)
1069 		return B_BAD_VALUE;
1070 
1071 	SyscallFlagUnsetter _;
1072 
1073 	ssize_t bytesWritten = 0;
1074 
1075 	for (i = 0; i < count; i++) {
1076 		size_t length = vecs[i].iov_len;
1077 		status = descriptor->ops->fd_write(descriptor, pos,
1078 			vecs[i].iov_base, &length);
1079 		if (status < B_OK) {
1080 			bytesWritten = status;
1081 			break;
1082 		}
1083 
1084 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1085 			bytesWritten = SSIZE_MAX;
1086 		else
1087 			bytesWritten += (ssize_t)length;
1088 
1089 		pos += vecs[i].iov_len;
1090 	}
1091 
1092 	if (movePosition)
1093 		descriptor->pos = pos;
1094 
1095 	return bytesWritten;
1096 }
1097 
1098 
1099 off_t
1100 _kern_seek(int fd, off_t pos, int seekType)
1101 {
1102 	struct file_descriptor *descriptor;
1103 
1104 	descriptor = get_fd(get_current_io_context(true), fd);
1105 	if (!descriptor)
1106 		return B_FILE_ERROR;
1107 
1108 	if (descriptor->ops->fd_seek)
1109 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1110 	else
1111 		pos = ESPIPE;
1112 
1113 	put_fd(descriptor);
1114 	return pos;
1115 }
1116 
1117 
1118 status_t
1119 _kern_ioctl(int fd, ulong op, void *buffer, size_t length)
1120 {
1121 	TRACE(("kern_ioctl: fd %d\n", fd));
1122 
1123 	SyscallFlagUnsetter _;
1124 
1125 	return fd_ioctl(true, fd, op, buffer, length);
1126 }
1127 
1128 
1129 ssize_t
1130 _kern_read_dir(int fd, struct dirent *buffer, size_t bufferSize, uint32 maxCount)
1131 {
1132 	struct file_descriptor *descriptor;
1133 	ssize_t retval;
1134 
1135 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = %lu)\n",fd, buffer, bufferSize, maxCount));
1136 
1137 	struct io_context* ioContext = get_current_io_context(true);
1138 	descriptor = get_fd(ioContext, fd);
1139 	if (descriptor == NULL)
1140 		return B_FILE_ERROR;
1141 
1142 	if (descriptor->ops->fd_read_dir) {
1143 		uint32 count = maxCount;
1144 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1145 			bufferSize, &count);
1146 		if (retval >= 0)
1147 			retval = count;
1148 	} else
1149 		retval = EOPNOTSUPP;
1150 
1151 	put_fd(descriptor);
1152 	return retval;
1153 }
1154 
1155 
1156 status_t
1157 _kern_rewind_dir(int fd)
1158 {
1159 	struct file_descriptor *descriptor;
1160 	status_t status;
1161 
1162 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1163 
1164 	descriptor = get_fd(get_current_io_context(true), fd);
1165 	if (descriptor == NULL)
1166 		return B_FILE_ERROR;
1167 
1168 	if (descriptor->ops->fd_rewind_dir)
1169 		status = descriptor->ops->fd_rewind_dir(descriptor);
1170 	else
1171 		status = EOPNOTSUPP;
1172 
1173 	put_fd(descriptor);
1174 	return status;
1175 }
1176 
1177 
1178 status_t
1179 _kern_close(int fd)
1180 {
1181 	return common_close(fd, true);
1182 }
1183 
1184 
1185 int
1186 _kern_dup(int fd)
1187 {
1188 	return dup_fd(fd, true);
1189 }
1190 
1191 
1192 int
1193 _kern_dup2(int ofd, int nfd)
1194 {
1195 	return dup2_fd(ofd, nfd, true);
1196 }
1197 
1198