xref: /haiku/src/system/kernel/fs/fd.cpp (revision 6e82e428596071bbde44e296d04740f1f5d54d03)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2015, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos, bool putSyncObjects);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->ops = NULL;
228 		descriptor->u.vnode = NULL;
229 
230 		// the file descriptor is kept intact, so that it's not
231 		// reused until someone explicitly closes it
232 	}
233 }
234 
235 
236 /*!	Decrements the open counter of the file descriptor and invokes
237 	its close hook when appropriate.
238 */
239 void
240 close_fd(struct file_descriptor* descriptor)
241 {
242 	if (atomic_add(&descriptor->open_count, -1) == 1) {
243 		vfs_unlock_vnode_if_locked(descriptor);
244 
245 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
246 			descriptor->ops->fd_close(descriptor);
247 	}
248 }
249 
250 
251 status_t
252 close_fd_index(struct io_context* context, int fd)
253 {
254 	struct file_descriptor* descriptor = remove_fd(context, fd);
255 
256 	if (descriptor == NULL)
257 		return B_FILE_ERROR;
258 
259 	close_fd(descriptor);
260 	put_fd(descriptor);
261 		// the reference associated with the slot
262 
263 	return B_OK;
264 }
265 
266 
267 /*!	This descriptor's underlying object will be closed and freed as soon as
268 	possible (in one of the next calls to put_fd() - get_fd() will no longer
269 	succeed on this descriptor).
270 	This is useful if the underlying object is gone, for instance when a
271 	(mounted) volume got removed unexpectedly.
272 */
273 void
274 disconnect_fd(struct file_descriptor* descriptor)
275 {
276 	descriptor->open_mode |= O_DISCONNECTED;
277 }
278 
279 
280 void
281 inc_fd_ref_count(struct file_descriptor* descriptor)
282 {
283 	atomic_add(&descriptor->ref_count, 1);
284 }
285 
286 
287 static struct file_descriptor*
288 get_fd_locked(struct io_context* context, int fd)
289 {
290 	if (fd < 0 || (uint32)fd >= context->table_size)
291 		return NULL;
292 
293 	struct file_descriptor* descriptor = context->fds[fd];
294 
295 	if (descriptor != NULL) {
296 		TFD(GetFD(context, fd, descriptor));
297 		inc_fd_ref_count(descriptor);
298 	}
299 
300 	return descriptor;
301 }
302 
303 
304 struct file_descriptor*
305 get_fd(struct io_context* context, int fd)
306 {
307 	MutexLocker _(context->io_mutex);
308 
309 	return get_fd_locked(context, fd);
310 }
311 
312 
313 struct file_descriptor*
314 get_open_fd(struct io_context* context, int fd)
315 {
316 	MutexLocker _(context->io_mutex);
317 
318 	file_descriptor* descriptor = get_fd_locked(context, fd);
319 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
320 		return NULL;
321 
322 	atomic_add(&descriptor->open_count, 1);
323 
324 	return descriptor;
325 }
326 
327 
328 /*!	Removes the file descriptor from the specified slot.
329 */
330 static struct file_descriptor*
331 remove_fd(struct io_context* context, int fd)
332 {
333 	struct file_descriptor* descriptor = NULL;
334 
335 	if (fd < 0)
336 		return NULL;
337 
338 	mutex_lock(&context->io_mutex);
339 
340 	if ((uint32)fd < context->table_size)
341 		descriptor = context->fds[fd];
342 
343 	select_info* selectInfos = NULL;
344 	bool disconnected = false;
345 
346 	if (descriptor != NULL)	{
347 		// fd is valid
348 		TFD(RemoveFD(context, fd, descriptor));
349 
350 		context->fds[fd] = NULL;
351 		fd_set_close_on_exec(context, fd, false);
352 		context->num_used_fds--;
353 
354 		selectInfos = context->select_infos[fd];
355 		context->select_infos[fd] = NULL;
356 
357 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
358 	}
359 
360 	mutex_unlock(&context->io_mutex);
361 
362 	if (selectInfos != NULL)
363 		deselect_select_infos(descriptor, selectInfos, true);
364 
365 	return disconnected ? NULL : descriptor;
366 }
367 
368 
369 static int
370 dup_fd(int fd, bool kernel)
371 {
372 	struct io_context* context = get_current_io_context(kernel);
373 	struct file_descriptor* descriptor;
374 	int status;
375 
376 	TRACE(("dup_fd: fd = %d\n", fd));
377 
378 	// Try to get the fd structure
379 	descriptor = get_fd(context, fd);
380 	if (descriptor == NULL)
381 		return B_FILE_ERROR;
382 
383 	// now put the fd in place
384 	status = new_fd(context, descriptor);
385 	if (status < 0)
386 		put_fd(descriptor);
387 	else {
388 		mutex_lock(&context->io_mutex);
389 		fd_set_close_on_exec(context, status, false);
390 		mutex_unlock(&context->io_mutex);
391 	}
392 
393 	return status;
394 }
395 
396 
397 /*!	POSIX says this should be the same as:
398 		close(newfd);
399 		fcntl(oldfd, F_DUPFD, newfd);
400 
401 	We do dup2() directly to be thread-safe.
402 */
403 static int
404 dup2_fd(int oldfd, int newfd, bool kernel)
405 {
406 	struct file_descriptor* evicted = NULL;
407 	struct io_context* context;
408 
409 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
410 
411 	// quick check
412 	if (oldfd < 0 || newfd < 0)
413 		return B_FILE_ERROR;
414 
415 	// Get current I/O context and lock it
416 	context = get_current_io_context(kernel);
417 	mutex_lock(&context->io_mutex);
418 
419 	// Check if the fds are valid (mutex must be locked because
420 	// the table size could be changed)
421 	if ((uint32)oldfd >= context->table_size
422 		|| (uint32)newfd >= context->table_size
423 		|| context->fds[oldfd] == NULL
424 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
425 		mutex_unlock(&context->io_mutex);
426 		return B_FILE_ERROR;
427 	}
428 
429 	// Check for identity, note that it cannot be made above
430 	// because we always want to return an error on invalid
431 	// handles
432 	select_info* selectInfos = NULL;
433 	if (oldfd != newfd) {
434 		// Now do the work
435 		TFD(Dup2FD(context, oldfd, newfd));
436 
437 		evicted = context->fds[newfd];
438 		selectInfos = context->select_infos[newfd];
439 		context->select_infos[newfd] = NULL;
440 		atomic_add(&context->fds[oldfd]->ref_count, 1);
441 		atomic_add(&context->fds[oldfd]->open_count, 1);
442 		context->fds[newfd] = context->fds[oldfd];
443 
444 		if (evicted == NULL)
445 			context->num_used_fds++;
446 	}
447 
448 	fd_set_close_on_exec(context, newfd, false);
449 
450 	mutex_unlock(&context->io_mutex);
451 
452 	// Say bye bye to the evicted fd
453 	if (evicted) {
454 		deselect_select_infos(evicted, selectInfos, true);
455 		close_fd(evicted);
456 		put_fd(evicted);
457 	}
458 
459 	return newfd;
460 }
461 
462 
463 /*!	Duplicates an FD from another team to this/the kernel team.
464 	\param fromTeam The team which owns the FD.
465 	\param fd The FD to duplicate.
466 	\param kernel If \c true, the new FD will be created in the kernel team,
467 			the current userland team otherwise.
468 	\return The newly created FD or an error code, if something went wrong.
469 */
470 int
471 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
472 {
473 	// get the I/O context for the team in question
474 	Team* team = Team::Get(fromTeam);
475 	if (team == NULL)
476 		return B_BAD_TEAM_ID;
477 	BReference<Team> teamReference(team, true);
478 
479 	io_context* fromContext = team->io_context;
480 
481 	// get the file descriptor
482 	file_descriptor* descriptor = get_fd(fromContext, fd);
483 	if (descriptor == NULL)
484 		return B_FILE_ERROR;
485 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
486 
487 	// create a new FD in the target I/O context
488 	int result = new_fd(get_current_io_context(kernel), descriptor);
489 	if (result >= 0) {
490 		// the descriptor reference belongs to the slot, now
491 		descriptorPutter.Detach();
492 	}
493 
494 	return result;
495 }
496 
497 
498 static status_t
499 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
500 {
501 	struct file_descriptor* descriptor;
502 	int status;
503 
504 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
505 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
506 		return B_FILE_ERROR;
507 
508 	if (descriptor->ops->fd_ioctl != NULL)
509 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
510 	else
511 		status = B_DEV_INVALID_IOCTL;
512 
513 	if (status == B_DEV_INVALID_IOCTL)
514 		status = ENOTTY;
515 
516 	put_fd(descriptor);
517 	return status;
518 }
519 
520 
521 static void
522 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
523 	bool putSyncObjects)
524 {
525 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
526 
527 	select_info* info = infos;
528 	while (info != NULL) {
529 		select_sync* sync = info->sync;
530 
531 		// deselect the selected events
532 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
533 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
534 			for (uint16 event = 1; event < 16; event++) {
535 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
536 					descriptor->ops->fd_deselect(descriptor, event,
537 						(selectsync*)info);
538 				}
539 			}
540 		}
541 
542 		notify_select_events(info, B_EVENT_INVALID);
543 		info = info->next;
544 
545 		if (putSyncObjects)
546 			put_select_sync(sync);
547 	}
548 }
549 
550 
551 status_t
552 select_fd(int32 fd, struct select_info* info, bool kernel)
553 {
554 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
555 		info->sync, info->selected_events));
556 
557 	FDGetter fdGetter;
558 		// define before the context locker, so it will be destroyed after it
559 
560 	io_context* context = get_current_io_context(kernel);
561 	MutexLocker locker(context->io_mutex);
562 
563 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
564 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
565 		return B_FILE_ERROR;
566 
567 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
568 
569 	if (descriptor->ops->fd_select == NULL) {
570 		// if the I/O subsystem doesn't support select(), we will
571 		// immediately notify the select call
572 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
573 		if (eventsToSelect != 0)
574 			return notify_select_events(info, eventsToSelect);
575 		else
576 			return B_OK;
577 	}
578 
579 	// We need the FD to stay open while we're doing this, so no select()/
580 	// deselect() will be called on it after it is closed.
581 	atomic_add(&descriptor->open_count, 1);
582 
583 	locker.Unlock();
584 
585 	// select any events asked for
586 	uint32 selectedEvents = 0;
587 
588 	for (uint16 event = 1; event < 16; event++) {
589 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
590 			&& descriptor->ops->fd_select(descriptor, event,
591 				(selectsync*)info) == B_OK) {
592 			selectedEvents |= SELECT_FLAG(event);
593 		}
594 	}
595 	info->selected_events = selectedEvents
596 		| (info->selected_events & B_EVENT_INVALID);
597 
598 	// Add the info to the IO context. Even if nothing has been selected -- we
599 	// always support B_EVENT_INVALID.
600 	locker.Lock();
601 	if (context->fds[fd] != descriptor) {
602 		// Someone close()d the index in the meantime. deselect() all
603 		// events.
604 		info->next = NULL;
605 		deselect_select_infos(descriptor, info, false);
606 
607 		// Release our open reference of the descriptor.
608 		close_fd(descriptor);
609 		return B_FILE_ERROR;
610 	}
611 
612 	// The FD index hasn't changed, so we add the select info to the table.
613 
614 	info->next = context->select_infos[fd];
615 	context->select_infos[fd] = info;
616 
617 	// As long as the info is in the list, we keep a reference to the sync
618 	// object.
619 	atomic_add(&info->sync->ref_count, 1);
620 
621 	// Finally release our open reference. It is safe just to decrement,
622 	// since as long as the descriptor is associated with the slot,
623 	// someone else still has it open.
624 	atomic_add(&descriptor->open_count, -1);
625 
626 	return B_OK;
627 }
628 
629 
630 status_t
631 deselect_fd(int32 fd, struct select_info* info, bool kernel)
632 {
633 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
634 		info->sync, info->selected_events));
635 
636 	FDGetter fdGetter;
637 		// define before the context locker, so it will be destroyed after it
638 
639 	io_context* context = get_current_io_context(kernel);
640 	MutexLocker locker(context->io_mutex);
641 
642 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
643 	if (descriptor == NULL)
644 		return B_FILE_ERROR;
645 
646 	// remove the info from the IO context
647 
648 	select_info** infoLocation = &context->select_infos[fd];
649 	while (*infoLocation != NULL && *infoLocation != info)
650 		infoLocation = &(*infoLocation)->next;
651 
652 	// If not found, someone else beat us to it.
653 	if (*infoLocation != info)
654 		return B_OK;
655 
656 	*infoLocation = info->next;
657 
658 	locker.Unlock();
659 
660 	// deselect the selected events
661 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
662 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
663 		for (uint16 event = 1; event < 16; event++) {
664 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
665 				descriptor->ops->fd_deselect(descriptor, event,
666 					(selectsync*)info);
667 			}
668 		}
669 	}
670 
671 	put_select_sync(info->sync);
672 
673 	return B_OK;
674 }
675 
676 
677 /*!	This function checks if the specified fd is valid in the current
678 	context. It can be used for a quick check; the fd is not locked
679 	so it could become invalid immediately after this check.
680 */
681 bool
682 fd_is_valid(int fd, bool kernel)
683 {
684 	struct file_descriptor* descriptor
685 		= get_fd(get_current_io_context(kernel), fd);
686 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
687 		return false;
688 
689 	put_fd(descriptor);
690 	return true;
691 }
692 
693 
694 struct vnode*
695 fd_vnode(struct file_descriptor* descriptor)
696 {
697 	switch (descriptor->type) {
698 		case FDTYPE_FILE:
699 		case FDTYPE_DIR:
700 		case FDTYPE_ATTR_DIR:
701 		case FDTYPE_ATTR:
702 			return descriptor->u.vnode;
703 	}
704 
705 	return NULL;
706 }
707 
708 
709 static status_t
710 common_close(int fd, bool kernel)
711 {
712 	return close_fd_index(get_current_io_context(kernel), fd);
713 }
714 
715 
716 static ssize_t
717 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
718 {
719 	if (!IS_USER_ADDRESS(buffer))
720 		return B_BAD_ADDRESS;
721 
722 	if (pos < -1)
723 		return B_BAD_VALUE;
724 
725 	FDGetter fdGetter;
726 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
727 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
728 		return B_FILE_ERROR;
729 
730 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
731 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
732 		return B_FILE_ERROR;
733 	}
734 
735 	bool movePosition = false;
736 	if (pos == -1) {
737 		pos = descriptor->pos;
738 		movePosition = true;
739 	}
740 
741 	if (write ? descriptor->ops->fd_write == NULL
742 			: descriptor->ops->fd_read == NULL) {
743 		return B_BAD_VALUE;
744 	}
745 
746 	SyscallRestartWrapper<status_t> status;
747 
748 	if (write)
749 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
750 	else
751 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
752 
753 	if (status != B_OK)
754 		return status;
755 
756 	if (movePosition)
757 		descriptor->pos = pos + length;
758 
759 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
760 }
761 
762 
763 static ssize_t
764 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
765 	bool write)
766 {
767 	if (!IS_USER_ADDRESS(userVecs))
768 		return B_BAD_ADDRESS;
769 
770 	if (pos < -1)
771 		return B_BAD_VALUE;
772 
773 	// prevent integer overflow exploit in malloc()
774 	if (count > IOV_MAX)
775 		return B_BAD_VALUE;
776 
777 	FDGetter fdGetter;
778 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
779 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
780 		return B_FILE_ERROR;
781 
782 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
783 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
784 		return B_FILE_ERROR;
785 	}
786 
787 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
788 	if (vecs == NULL)
789 		return B_NO_MEMORY;
790 	MemoryDeleter _(vecs);
791 
792 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
793 		return B_BAD_ADDRESS;
794 
795 	bool movePosition = false;
796 	if (pos == -1) {
797 		pos = descriptor->pos;
798 		movePosition = true;
799 	}
800 
801 	if (write ? descriptor->ops->fd_write == NULL
802 			: descriptor->ops->fd_read == NULL) {
803 		return B_BAD_VALUE;
804 	}
805 
806 	SyscallRestartWrapper<status_t> status;
807 
808 	ssize_t bytesTransferred = 0;
809 	for (uint32 i = 0; i < count; i++) {
810 		size_t length = vecs[i].iov_len;
811 		if (write) {
812 			status = descriptor->ops->fd_write(descriptor, pos,
813 				vecs[i].iov_base, &length);
814 		} else {
815 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
816 				&length);
817 		}
818 
819 		if (status != B_OK) {
820 			if (bytesTransferred == 0)
821 				return status;
822 			status = B_OK;
823 			break;
824 		}
825 
826 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
827 			bytesTransferred = SSIZE_MAX;
828 		else
829 			bytesTransferred += (ssize_t)length;
830 
831 		pos += length;
832 
833 		if (length < vecs[i].iov_len)
834 			break;
835 	}
836 
837 	if (movePosition)
838 		descriptor->pos = pos;
839 
840 	return bytesTransferred;
841 }
842 
843 
844 status_t
845 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
846 {
847 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
848 
849 	return fd_ioctl(false, fd, op, buffer, length);
850 }
851 
852 
853 //	#pragma mark - User syscalls
854 
855 
856 ssize_t
857 _user_read(int fd, off_t pos, void* buffer, size_t length)
858 {
859 	return common_user_io(fd, pos, buffer, length, false);
860 }
861 
862 
863 ssize_t
864 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
865 {
866 	return common_user_vector_io(fd, pos, userVecs, count, false);
867 }
868 
869 
870 ssize_t
871 _user_write(int fd, off_t pos, const void* buffer, size_t length)
872 {
873 	return common_user_io(fd, pos, (void*)buffer, length, true);
874 }
875 
876 
877 ssize_t
878 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
879 {
880 	return common_user_vector_io(fd, pos, userVecs, count, true);
881 }
882 
883 
884 off_t
885 _user_seek(int fd, off_t pos, int seekType)
886 {
887 	syscall_64_bit_return_value();
888 
889 	struct file_descriptor* descriptor;
890 
891 	descriptor = get_fd(get_current_io_context(false), fd);
892 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
893 		return B_FILE_ERROR;
894 
895 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
896 
897 	if (descriptor->ops->fd_seek != NULL)
898 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
899 	else
900 		pos = ESPIPE;
901 
902 	put_fd(descriptor);
903 	return pos;
904 }
905 
906 
907 status_t
908 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
909 {
910 	if (!IS_USER_ADDRESS(buffer))
911 		return B_BAD_ADDRESS;
912 
913 	TRACE(("user_ioctl: fd %d\n", fd));
914 
915 	SyscallRestartWrapper<status_t> status;
916 
917 	return status = fd_ioctl(false, fd, op, buffer, length);
918 }
919 
920 
921 ssize_t
922 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
923 	uint32 maxCount)
924 {
925 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
926 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
927 
928 	if (maxCount == 0)
929 		return 0;
930 
931 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
932 		return B_BAD_ADDRESS;
933 
934 	// get I/O context and FD
935 	io_context* ioContext = get_current_io_context(false);
936 	FDGetter fdGetter;
937 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
938 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
939 		return B_FILE_ERROR;
940 
941 	if (descriptor->ops->fd_read_dir == NULL)
942 		return B_UNSUPPORTED;
943 
944 	// restrict buffer size and allocate a heap buffer
945 	if (bufferSize > kMaxReadDirBufferSize)
946 		bufferSize = kMaxReadDirBufferSize;
947 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
948 	if (buffer == NULL)
949 		return B_NO_MEMORY;
950 	MemoryDeleter bufferDeleter(buffer);
951 
952 	// read the directory
953 	uint32 count = maxCount;
954 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
955 		buffer, bufferSize, &count);
956 	if (status != B_OK)
957 		return status;
958 
959 	// copy the buffer back -- determine the total buffer size first
960 	size_t sizeToCopy = 0;
961 	struct dirent* entry = buffer;
962 	for (uint32 i = 0; i < count; i++) {
963 		size_t length = entry->d_reclen;
964 		sizeToCopy += length;
965 		entry = (struct dirent*)((uint8*)entry + length);
966 	}
967 
968 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
969 		return B_BAD_ADDRESS;
970 
971 	return count;
972 }
973 
974 
975 status_t
976 _user_rewind_dir(int fd)
977 {
978 	struct file_descriptor* descriptor;
979 	status_t status;
980 
981 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
982 
983 	descriptor = get_fd(get_current_io_context(false), fd);
984 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
985 		return B_FILE_ERROR;
986 
987 	if (descriptor->ops->fd_rewind_dir != NULL)
988 		status = descriptor->ops->fd_rewind_dir(descriptor);
989 	else
990 		status = B_UNSUPPORTED;
991 
992 	put_fd(descriptor);
993 	return status;
994 }
995 
996 
997 status_t
998 _user_close(int fd)
999 {
1000 	return common_close(fd, false);
1001 }
1002 
1003 
1004 int
1005 _user_dup(int fd)
1006 {
1007 	return dup_fd(fd, false);
1008 }
1009 
1010 
1011 int
1012 _user_dup2(int ofd, int nfd)
1013 {
1014 	return dup2_fd(ofd, nfd, false);
1015 }
1016 
1017 
1018 //	#pragma mark - Kernel calls
1019 
1020 
1021 ssize_t
1022 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1023 {
1024 	if (pos < -1)
1025 		return B_BAD_VALUE;
1026 
1027 	FDGetter fdGetter;
1028 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1029 
1030 	if (!descriptor)
1031 		return B_FILE_ERROR;
1032 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1033 		return B_FILE_ERROR;
1034 
1035 	bool movePosition = false;
1036 	if (pos == -1) {
1037 		pos = descriptor->pos;
1038 		movePosition = true;
1039 	}
1040 
1041 	SyscallFlagUnsetter _;
1042 
1043 	if (descriptor->ops->fd_read == NULL)
1044 		return B_BAD_VALUE;
1045 
1046 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1047 		&length);
1048 	if (bytesRead >= B_OK) {
1049 		if (length > SSIZE_MAX)
1050 			bytesRead = SSIZE_MAX;
1051 		else
1052 			bytesRead = (ssize_t)length;
1053 
1054 		if (movePosition)
1055 			descriptor->pos = pos + length;
1056 	}
1057 
1058 	return bytesRead;
1059 }
1060 
1061 
1062 ssize_t
1063 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1064 {
1065 	bool movePosition = false;
1066 	status_t status;
1067 	uint32 i;
1068 
1069 	if (pos < -1)
1070 		return B_BAD_VALUE;
1071 
1072 	FDGetter fdGetter;
1073 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1074 
1075 	if (!descriptor)
1076 		return B_FILE_ERROR;
1077 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1078 		return B_FILE_ERROR;
1079 
1080 	if (pos == -1) {
1081 		pos = descriptor->pos;
1082 		movePosition = true;
1083 	}
1084 
1085 	if (descriptor->ops->fd_read == NULL)
1086 		return B_BAD_VALUE;
1087 
1088 	SyscallFlagUnsetter _;
1089 
1090 	ssize_t bytesRead = 0;
1091 
1092 	for (i = 0; i < count; i++) {
1093 		size_t length = vecs[i].iov_len;
1094 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1095 			&length);
1096 		if (status != B_OK) {
1097 			bytesRead = status;
1098 			break;
1099 		}
1100 
1101 		if ((uint64)bytesRead + length > SSIZE_MAX)
1102 			bytesRead = SSIZE_MAX;
1103 		else
1104 			bytesRead += (ssize_t)length;
1105 
1106 		pos += vecs[i].iov_len;
1107 	}
1108 
1109 	if (movePosition)
1110 		descriptor->pos = pos;
1111 
1112 	return bytesRead;
1113 }
1114 
1115 
1116 ssize_t
1117 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1118 {
1119 	if (pos < -1)
1120 		return B_BAD_VALUE;
1121 
1122 	FDGetter fdGetter;
1123 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1124 
1125 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1126 		return B_FILE_ERROR;
1127 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1128 		return B_FILE_ERROR;
1129 
1130 	bool movePosition = false;
1131 	if (pos == -1) {
1132 		pos = descriptor->pos;
1133 		movePosition = true;
1134 	}
1135 
1136 	if (descriptor->ops->fd_write == NULL)
1137 		return B_BAD_VALUE;
1138 
1139 	SyscallFlagUnsetter _;
1140 
1141 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1142 		&length);
1143 	if (bytesWritten >= B_OK) {
1144 		if (length > SSIZE_MAX)
1145 			bytesWritten = SSIZE_MAX;
1146 		else
1147 			bytesWritten = (ssize_t)length;
1148 
1149 		if (movePosition)
1150 			descriptor->pos = pos + length;
1151 	}
1152 
1153 	return bytesWritten;
1154 }
1155 
1156 
1157 ssize_t
1158 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1159 {
1160 	bool movePosition = false;
1161 	status_t status;
1162 	uint32 i;
1163 
1164 	if (pos < -1)
1165 		return B_BAD_VALUE;
1166 
1167 	FDGetter fdGetter;
1168 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1169 
1170 	if (!descriptor)
1171 		return B_FILE_ERROR;
1172 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1173 		return B_FILE_ERROR;
1174 
1175 	if (pos == -1) {
1176 		pos = descriptor->pos;
1177 		movePosition = true;
1178 	}
1179 
1180 	if (descriptor->ops->fd_write == NULL)
1181 		return B_BAD_VALUE;
1182 
1183 	SyscallFlagUnsetter _;
1184 
1185 	ssize_t bytesWritten = 0;
1186 
1187 	for (i = 0; i < count; i++) {
1188 		size_t length = vecs[i].iov_len;
1189 		status = descriptor->ops->fd_write(descriptor, pos,
1190 			vecs[i].iov_base, &length);
1191 		if (status != B_OK) {
1192 			bytesWritten = status;
1193 			break;
1194 		}
1195 
1196 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1197 			bytesWritten = SSIZE_MAX;
1198 		else
1199 			bytesWritten += (ssize_t)length;
1200 
1201 		pos += vecs[i].iov_len;
1202 	}
1203 
1204 	if (movePosition)
1205 		descriptor->pos = pos;
1206 
1207 	return bytesWritten;
1208 }
1209 
1210 
1211 off_t
1212 _kern_seek(int fd, off_t pos, int seekType)
1213 {
1214 	struct file_descriptor* descriptor;
1215 
1216 	descriptor = get_fd(get_current_io_context(true), fd);
1217 	if (!descriptor)
1218 		return B_FILE_ERROR;
1219 
1220 	if (descriptor->ops->fd_seek)
1221 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1222 	else
1223 		pos = ESPIPE;
1224 
1225 	put_fd(descriptor);
1226 	return pos;
1227 }
1228 
1229 
1230 status_t
1231 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1232 {
1233 	TRACE(("kern_ioctl: fd %d\n", fd));
1234 
1235 	SyscallFlagUnsetter _;
1236 
1237 	return fd_ioctl(true, fd, op, buffer, length);
1238 }
1239 
1240 
1241 ssize_t
1242 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1243 	uint32 maxCount)
1244 {
1245 	struct file_descriptor* descriptor;
1246 	ssize_t retval;
1247 
1248 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1249 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1250 
1251 	struct io_context* ioContext = get_current_io_context(true);
1252 	descriptor = get_fd(ioContext, fd);
1253 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1254 		return B_FILE_ERROR;
1255 
1256 	if (descriptor->ops->fd_read_dir) {
1257 		uint32 count = maxCount;
1258 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1259 			bufferSize, &count);
1260 		if (retval >= 0)
1261 			retval = count;
1262 	} else
1263 		retval = B_UNSUPPORTED;
1264 
1265 	put_fd(descriptor);
1266 	return retval;
1267 }
1268 
1269 
1270 status_t
1271 _kern_rewind_dir(int fd)
1272 {
1273 	struct file_descriptor* descriptor;
1274 	status_t status;
1275 
1276 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1277 
1278 	descriptor = get_fd(get_current_io_context(true), fd);
1279 	if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1280 		return B_FILE_ERROR;
1281 
1282 	if (descriptor->ops->fd_rewind_dir)
1283 		status = descriptor->ops->fd_rewind_dir(descriptor);
1284 	else
1285 		status = B_UNSUPPORTED;
1286 
1287 	put_fd(descriptor);
1288 	return status;
1289 }
1290 
1291 
1292 status_t
1293 _kern_close(int fd)
1294 {
1295 	return common_close(fd, true);
1296 }
1297 
1298 
1299 int
1300 _kern_dup(int fd)
1301 {
1302 	return dup_fd(fd, true);
1303 }
1304 
1305 
1306 int
1307 _kern_dup2(int ofd, int nfd)
1308 {
1309 	return dup2_fd(ofd, nfd, true);
1310 }
1311 
1312