xref: /haiku/src/system/kernel/fs/fd.cpp (revision 1f52c921e27aa442370e1bd4adc021acf2b78b64)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <BytePointer.h>
20 
21 #include <syscalls.h>
22 #include <syscall_restart.h>
23 #include <slab/Slab.h>
24 #include <util/AutoLock.h>
25 #include <vfs.h>
26 #include <wait_for_objects.h>
27 
28 #include "vfs_tracing.h"
29 
30 
31 //#define TRACE_FD
32 #ifdef TRACE_FD
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x)
36 #endif
37 
38 
39 static const size_t kMaxReadDirBufferSize = 64 * 1024;
40 
41 extern object_cache* sFileDescriptorCache;
42 
43 
44 static struct file_descriptor* get_fd_locked(struct io_context* context,
45 	int fd);
46 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
47 static void deselect_select_infos(file_descriptor* descriptor,
48 	select_info* infos, bool putSyncObjects);
49 
50 
51 struct FDGetterLocking {
52 	inline bool Lock(file_descriptor* /*lockable*/)
53 	{
54 		return false;
55 	}
56 
57 	inline void Unlock(file_descriptor* lockable)
58 	{
59 		put_fd(lockable);
60 	}
61 };
62 
63 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
64 public:
65 	inline FDGetter()
66 		: AutoLocker<file_descriptor, FDGetterLocking>()
67 	{
68 	}
69 
70 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
71 		: AutoLocker<file_descriptor, FDGetterLocking>(
72 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
73 	{
74 	}
75 
76 	inline file_descriptor* SetTo(io_context* context, int fd,
77 		bool contextLocked = false)
78 	{
79 		file_descriptor* descriptor
80 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
81 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
82 		return descriptor;
83 	}
84 
85 	inline file_descriptor* SetTo(int fd, bool kernel,
86 		bool contextLocked = false)
87 	{
88 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
89 	}
90 
91 	inline file_descriptor* FD() const
92 	{
93 		return fLockable;
94 	}
95 };
96 
97 
98 //	#pragma mark - General fd routines
99 
100 
101 #ifdef DEBUG
102 void dump_fd(int fd, struct file_descriptor* descriptor);
103 
104 void
105 dump_fd(int fd,struct file_descriptor* descriptor)
106 {
107 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
108 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
109 		", pos = %" B_PRId64 "\n",
110 		fd, descriptor, descriptor->type, descriptor->ref_count,
111 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
112 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
113 }
114 #endif
115 
116 
117 /*! Allocates and initializes a new file_descriptor.
118 */
119 struct file_descriptor*
120 alloc_fd(void)
121 {
122 	file_descriptor* descriptor
123 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
124 	if (descriptor == NULL)
125 		return NULL;
126 
127 	descriptor->u.vnode = NULL;
128 	descriptor->cookie = NULL;
129 	descriptor->ref_count = 1;
130 	descriptor->open_count = 0;
131 	descriptor->open_mode = 0;
132 	descriptor->pos = 0;
133 
134 	return descriptor;
135 }
136 
137 
138 bool
139 fd_close_on_exec(struct io_context* context, int fd)
140 {
141 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
142 }
143 
144 
145 void
146 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
147 {
148 	if (closeFD)
149 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
150 	else
151 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
152 }
153 
154 
155 /*!	Searches a free slot in the FD table of the provided I/O context, and
156 	inserts the specified descriptor into it.
157 */
158 int
159 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
160 	int firstIndex)
161 {
162 	int fd = -1;
163 	uint32 i;
164 
165 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
166 		return B_BAD_VALUE;
167 
168 	mutex_lock(&context->io_mutex);
169 
170 	for (i = firstIndex; i < context->table_size; i++) {
171 		if (!context->fds[i]) {
172 			fd = i;
173 			break;
174 		}
175 	}
176 	if (fd < 0) {
177 		fd = B_NO_MORE_FDS;
178 		goto err;
179 	}
180 
181 	TFD(NewFD(context, fd, descriptor));
182 
183 	context->fds[fd] = descriptor;
184 	context->num_used_fds++;
185 	atomic_add(&descriptor->open_count, 1);
186 
187 err:
188 	mutex_unlock(&context->io_mutex);
189 
190 	return fd;
191 }
192 
193 
194 int
195 new_fd(struct io_context* context, struct file_descriptor* descriptor)
196 {
197 	return new_fd_etc(context, descriptor, 0);
198 }
199 
200 
201 /*!	Reduces the descriptor's reference counter, and frees all resources
202 	when it's no longer used.
203 */
204 void
205 put_fd(struct file_descriptor* descriptor)
206 {
207 	int32 previous = atomic_add(&descriptor->ref_count, -1);
208 
209 	TFD(PutFD(descriptor));
210 
211 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
212 		descriptor, descriptor->ref_count, descriptor->cookie));
213 
214 	// free the descriptor if we don't need it anymore
215 	if (previous == 1) {
216 		// free the underlying object
217 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
218 			descriptor->ops->fd_free(descriptor);
219 
220 		object_cache_free(sFileDescriptorCache, descriptor, 0);
221 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
222 		&& previous - 1 == descriptor->open_count
223 		&& descriptor->ops != NULL) {
224 		// the descriptor has been disconnected - it cannot
225 		// be accessed anymore, let's close it (no one is
226 		// currently accessing this descriptor)
227 
228 		if (descriptor->ops->fd_close)
229 			descriptor->ops->fd_close(descriptor);
230 		if (descriptor->ops->fd_free)
231 			descriptor->ops->fd_free(descriptor);
232 
233 		// prevent this descriptor from being closed/freed again
234 		descriptor->ops = NULL;
235 		descriptor->u.vnode = NULL;
236 
237 		// the file descriptor is kept intact, so that it's not
238 		// reused until someone explicitly closes it
239 	}
240 }
241 
242 
243 /*!	Decrements the open counter of the file descriptor and invokes
244 	its close hook when appropriate.
245 */
246 void
247 close_fd(struct io_context* context, struct file_descriptor* descriptor)
248 {
249 	// POSIX advisory locks need to be released when any file descriptor closes
250 	if (descriptor->type == FDTYPE_FILE)
251 		vfs_release_posix_lock(context, descriptor);
252 
253 	if (atomic_add(&descriptor->open_count, -1) == 1) {
254 		vfs_unlock_vnode_if_locked(descriptor);
255 
256 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
257 			descriptor->ops->fd_close(descriptor);
258 	}
259 }
260 
261 
262 status_t
263 close_fd_index(struct io_context* context, int fd)
264 {
265 	struct file_descriptor* descriptor = remove_fd(context, fd);
266 
267 	if (descriptor == NULL)
268 		return B_FILE_ERROR;
269 
270 	close_fd(context, descriptor);
271 	put_fd(descriptor);
272 		// the reference associated with the slot
273 
274 	return B_OK;
275 }
276 
277 
278 /*!	This descriptor's underlying object will be closed and freed as soon as
279 	possible (in one of the next calls to put_fd() - get_fd() will no longer
280 	succeed on this descriptor).
281 	This is useful if the underlying object is gone, for instance when a
282 	(mounted) volume got removed unexpectedly.
283 */
284 void
285 disconnect_fd(struct file_descriptor* descriptor)
286 {
287 	descriptor->open_mode |= O_DISCONNECTED;
288 }
289 
290 
291 void
292 inc_fd_ref_count(struct file_descriptor* descriptor)
293 {
294 	atomic_add(&descriptor->ref_count, 1);
295 }
296 
297 
298 static struct file_descriptor*
299 get_fd_locked(struct io_context* context, int fd)
300 {
301 	if (fd < 0 || (uint32)fd >= context->table_size)
302 		return NULL;
303 
304 	struct file_descriptor* descriptor = context->fds[fd];
305 
306 	if (descriptor != NULL) {
307 		// disconnected descriptors cannot be accessed anymore
308 		if (descriptor->open_mode & O_DISCONNECTED)
309 			return NULL;
310 
311 		TFD(GetFD(context, fd, descriptor));
312 		inc_fd_ref_count(descriptor);
313 	}
314 
315 	return descriptor;
316 }
317 
318 
319 struct file_descriptor*
320 get_fd(struct io_context* context, int fd)
321 {
322 	MutexLocker _(context->io_mutex);
323 
324 	return get_fd_locked(context, fd);
325 }
326 
327 
328 struct file_descriptor*
329 get_open_fd(struct io_context* context, int fd)
330 {
331 	MutexLocker _(context->io_mutex);
332 
333 	file_descriptor* descriptor = get_fd_locked(context, fd);
334 	if (descriptor == NULL)
335 		return NULL;
336 
337 	atomic_add(&descriptor->open_count, 1);
338 
339 	return descriptor;
340 }
341 
342 
343 /*!	Removes the file descriptor from the specified slot.
344 */
345 static struct file_descriptor*
346 remove_fd(struct io_context* context, int fd)
347 {
348 	struct file_descriptor* descriptor = NULL;
349 
350 	if (fd < 0)
351 		return NULL;
352 
353 	mutex_lock(&context->io_mutex);
354 
355 	if ((uint32)fd < context->table_size)
356 		descriptor = context->fds[fd];
357 
358 	select_info* selectInfos = NULL;
359 	bool disconnected = false;
360 
361 	if (descriptor != NULL)	{
362 		// fd is valid
363 		TFD(RemoveFD(context, fd, descriptor));
364 
365 		context->fds[fd] = NULL;
366 		fd_set_close_on_exec(context, fd, false);
367 		context->num_used_fds--;
368 
369 		selectInfos = context->select_infos[fd];
370 		context->select_infos[fd] = NULL;
371 
372 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
373 	}
374 
375 	mutex_unlock(&context->io_mutex);
376 
377 	if (selectInfos != NULL)
378 		deselect_select_infos(descriptor, selectInfos, true);
379 
380 	return disconnected ? NULL : descriptor;
381 }
382 
383 
384 static int
385 dup_fd(int fd, bool kernel)
386 {
387 	struct io_context* context = get_current_io_context(kernel);
388 	struct file_descriptor* descriptor;
389 	int status;
390 
391 	TRACE(("dup_fd: fd = %d\n", fd));
392 
393 	// Try to get the fd structure
394 	descriptor = get_fd(context, fd);
395 	if (descriptor == NULL)
396 		return B_FILE_ERROR;
397 
398 	// now put the fd in place
399 	status = new_fd(context, descriptor);
400 	if (status < 0)
401 		put_fd(descriptor);
402 	else {
403 		mutex_lock(&context->io_mutex);
404 		fd_set_close_on_exec(context, status, false);
405 		mutex_unlock(&context->io_mutex);
406 	}
407 
408 	return status;
409 }
410 
411 
412 /*!	POSIX says this should be the same as:
413 		close(newfd);
414 		fcntl(oldfd, F_DUPFD, newfd);
415 
416 	We do dup2() directly to be thread-safe.
417 */
418 static int
419 dup2_fd(int oldfd, int newfd, bool kernel)
420 {
421 	struct file_descriptor* evicted = NULL;
422 	struct io_context* context;
423 
424 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
425 
426 	// quick check
427 	if (oldfd < 0 || newfd < 0)
428 		return B_FILE_ERROR;
429 
430 	// Get current I/O context and lock it
431 	context = get_current_io_context(kernel);
432 	mutex_lock(&context->io_mutex);
433 
434 	// Check if the fds are valid (mutex must be locked because
435 	// the table size could be changed)
436 	if ((uint32)oldfd >= context->table_size
437 		|| (uint32)newfd >= context->table_size
438 		|| context->fds[oldfd] == NULL
439 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
440 		mutex_unlock(&context->io_mutex);
441 		return B_FILE_ERROR;
442 	}
443 
444 	// Check for identity, note that it cannot be made above
445 	// because we always want to return an error on invalid
446 	// handles
447 	select_info* selectInfos = NULL;
448 	if (oldfd != newfd) {
449 		// Now do the work
450 		TFD(Dup2FD(context, oldfd, newfd));
451 
452 		evicted = context->fds[newfd];
453 		selectInfos = context->select_infos[newfd];
454 		context->select_infos[newfd] = NULL;
455 		atomic_add(&context->fds[oldfd]->ref_count, 1);
456 		atomic_add(&context->fds[oldfd]->open_count, 1);
457 		context->fds[newfd] = context->fds[oldfd];
458 
459 		if (evicted == NULL)
460 			context->num_used_fds++;
461 	}
462 
463 	fd_set_close_on_exec(context, newfd, false);
464 
465 	mutex_unlock(&context->io_mutex);
466 
467 	// Say bye bye to the evicted fd
468 	if (evicted) {
469 		deselect_select_infos(evicted, selectInfos, true);
470 		close_fd(context, evicted);
471 		put_fd(evicted);
472 	}
473 
474 	return newfd;
475 }
476 
477 
478 /*!	Duplicates an FD from another team to this/the kernel team.
479 	\param fromTeam The team which owns the FD.
480 	\param fd The FD to duplicate.
481 	\param kernel If \c true, the new FD will be created in the kernel team,
482 			the current userland team otherwise.
483 	\return The newly created FD or an error code, if something went wrong.
484 */
485 int
486 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
487 {
488 	// get the I/O context for the team in question
489 	Team* team = Team::Get(fromTeam);
490 	if (team == NULL)
491 		return B_BAD_TEAM_ID;
492 	BReference<Team> teamReference(team, true);
493 
494 	io_context* fromContext = team->io_context;
495 
496 	// get the file descriptor
497 	file_descriptor* descriptor = get_fd(fromContext, fd);
498 	if (descriptor == NULL)
499 		return B_FILE_ERROR;
500 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
501 
502 	// create a new FD in the target I/O context
503 	int result = new_fd(get_current_io_context(kernel), descriptor);
504 	if (result >= 0) {
505 		// the descriptor reference belongs to the slot, now
506 		descriptorPutter.Detach();
507 	}
508 
509 	return result;
510 }
511 
512 
513 static status_t
514 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
515 {
516 	struct file_descriptor* descriptor;
517 	int status;
518 
519 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
520 	if (descriptor == NULL)
521 		return B_FILE_ERROR;
522 
523 	if (descriptor->ops->fd_ioctl)
524 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
525 	else
526 		status = B_DEV_INVALID_IOCTL;
527 
528 	if (status == B_DEV_INVALID_IOCTL)
529 		status = ENOTTY;
530 
531 	put_fd(descriptor);
532 	return status;
533 }
534 
535 
536 static void
537 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
538 	bool putSyncObjects)
539 {
540 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
541 
542 	select_info* info = infos;
543 	while (info != NULL) {
544 		select_sync* sync = info->sync;
545 
546 		// deselect the selected events
547 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
548 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
549 			for (uint16 event = 1; event < 16; event++) {
550 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
551 					descriptor->ops->fd_deselect(descriptor, event,
552 						(selectsync*)info);
553 				}
554 			}
555 		}
556 
557 		notify_select_events(info, B_EVENT_INVALID);
558 		info = info->next;
559 
560 		if (putSyncObjects)
561 			put_select_sync(sync);
562 	}
563 }
564 
565 
566 status_t
567 select_fd(int32 fd, struct select_info* info, bool kernel)
568 {
569 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
570 		info->sync, info->selected_events));
571 
572 	FDGetter fdGetter;
573 		// define before the context locker, so it will be destroyed after it
574 
575 	io_context* context = get_current_io_context(kernel);
576 	MutexLocker locker(context->io_mutex);
577 
578 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
579 	if (descriptor == NULL)
580 		return B_FILE_ERROR;
581 
582 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
583 
584 	if (descriptor->ops->fd_select == NULL) {
585 		// if the I/O subsystem doesn't support select(), we will
586 		// immediately notify the select call
587 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
588 		if (eventsToSelect != 0)
589 			return notify_select_events(info, eventsToSelect);
590 		else
591 			return B_OK;
592 	}
593 
594 	// We need the FD to stay open while we're doing this, so no select()/
595 	// deselect() will be called on it after it is closed.
596 	atomic_add(&descriptor->open_count, 1);
597 
598 	locker.Unlock();
599 
600 	// select any events asked for
601 	uint32 selectedEvents = 0;
602 
603 	for (uint16 event = 1; event < 16; event++) {
604 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
605 			&& descriptor->ops->fd_select(descriptor, event,
606 				(selectsync*)info) == B_OK) {
607 			selectedEvents |= SELECT_FLAG(event);
608 		}
609 	}
610 	info->selected_events = selectedEvents
611 		| (info->selected_events & B_EVENT_INVALID);
612 
613 	// Add the info to the IO context. Even if nothing has been selected -- we
614 	// always support B_EVENT_INVALID.
615 	locker.Lock();
616 	if (context->fds[fd] != descriptor) {
617 		// Someone close()d the index in the meantime. deselect() all
618 		// events.
619 		info->next = NULL;
620 		deselect_select_infos(descriptor, info, false);
621 
622 		// Release our open reference of the descriptor.
623 		close_fd(context, descriptor);
624 		return B_FILE_ERROR;
625 	}
626 
627 	// The FD index hasn't changed, so we add the select info to the table.
628 
629 	info->next = context->select_infos[fd];
630 	context->select_infos[fd] = info;
631 
632 	// As long as the info is in the list, we keep a reference to the sync
633 	// object.
634 	atomic_add(&info->sync->ref_count, 1);
635 
636 	// Finally release our open reference. It is safe just to decrement,
637 	// since as long as the descriptor is associated with the slot,
638 	// someone else still has it open.
639 	atomic_add(&descriptor->open_count, -1);
640 
641 	return B_OK;
642 }
643 
644 
645 status_t
646 deselect_fd(int32 fd, struct select_info* info, bool kernel)
647 {
648 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
649 		info->sync, info->selected_events));
650 
651 	FDGetter fdGetter;
652 		// define before the context locker, so it will be destroyed after it
653 
654 	io_context* context = get_current_io_context(kernel);
655 	MutexLocker locker(context->io_mutex);
656 
657 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
658 	if (descriptor == NULL)
659 		return B_FILE_ERROR;
660 
661 	// remove the info from the IO context
662 
663 	select_info** infoLocation = &context->select_infos[fd];
664 	while (*infoLocation != NULL && *infoLocation != info)
665 		infoLocation = &(*infoLocation)->next;
666 
667 	// If not found, someone else beat us to it.
668 	if (*infoLocation != info)
669 		return B_OK;
670 
671 	*infoLocation = info->next;
672 
673 	locker.Unlock();
674 
675 	// deselect the selected events
676 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
677 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
678 		for (uint16 event = 1; event < 16; event++) {
679 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
680 				descriptor->ops->fd_deselect(descriptor, event,
681 					(selectsync*)info);
682 			}
683 		}
684 	}
685 
686 	put_select_sync(info->sync);
687 
688 	return B_OK;
689 }
690 
691 
692 /*!	This function checks if the specified fd is valid in the current
693 	context. It can be used for a quick check; the fd is not locked
694 	so it could become invalid immediately after this check.
695 */
696 bool
697 fd_is_valid(int fd, bool kernel)
698 {
699 	struct file_descriptor* descriptor
700 		= get_fd(get_current_io_context(kernel), fd);
701 	if (descriptor == NULL)
702 		return false;
703 
704 	put_fd(descriptor);
705 	return true;
706 }
707 
708 
709 struct vnode*
710 fd_vnode(struct file_descriptor* descriptor)
711 {
712 	switch (descriptor->type) {
713 		case FDTYPE_FILE:
714 		case FDTYPE_DIR:
715 		case FDTYPE_ATTR_DIR:
716 		case FDTYPE_ATTR:
717 			return descriptor->u.vnode;
718 	}
719 
720 	return NULL;
721 }
722 
723 
724 static status_t
725 common_close(int fd, bool kernel)
726 {
727 	return close_fd_index(get_current_io_context(kernel), fd);
728 }
729 
730 
731 static ssize_t
732 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
733 {
734 	if (!IS_USER_ADDRESS(buffer))
735 		return B_BAD_ADDRESS;
736 
737 	if (pos < -1)
738 		return B_BAD_VALUE;
739 
740 	FDGetter fdGetter;
741 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
742 	if (!descriptor)
743 		return B_FILE_ERROR;
744 
745 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
746 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
747 		return B_FILE_ERROR;
748 	}
749 
750 	bool movePosition = false;
751 	if (pos == -1) {
752 		pos = descriptor->pos;
753 		movePosition = true;
754 	}
755 
756 	if (write ? descriptor->ops->fd_write == NULL
757 			: descriptor->ops->fd_read == NULL) {
758 		return B_BAD_VALUE;
759 	}
760 
761 	SyscallRestartWrapper<status_t> status;
762 
763 	if (write)
764 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
765 	else
766 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
767 
768 	if (status != B_OK)
769 		return status;
770 
771 	if (movePosition)
772 		descriptor->pos = pos + length;
773 
774 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
775 }
776 
777 
778 static ssize_t
779 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
780 	bool write)
781 {
782 	if (!IS_USER_ADDRESS(userVecs))
783 		return B_BAD_ADDRESS;
784 
785 	if (pos < -1)
786 		return B_BAD_VALUE;
787 
788 	// prevent integer overflow exploit in malloc()
789 	if (count > IOV_MAX)
790 		return B_BAD_VALUE;
791 
792 	FDGetter fdGetter;
793 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
794 	if (!descriptor)
795 		return B_FILE_ERROR;
796 
797 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
798 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
799 		return B_FILE_ERROR;
800 	}
801 
802 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
803 	if (vecs == NULL)
804 		return B_NO_MEMORY;
805 	MemoryDeleter _(vecs);
806 
807 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
808 		return B_BAD_ADDRESS;
809 
810 	bool movePosition = false;
811 	if (pos == -1) {
812 		pos = descriptor->pos;
813 		movePosition = true;
814 	}
815 
816 	if (write ? descriptor->ops->fd_write == NULL
817 			: descriptor->ops->fd_read == NULL) {
818 		return B_BAD_VALUE;
819 	}
820 
821 	SyscallRestartWrapper<status_t> status;
822 
823 	ssize_t bytesTransferred = 0;
824 	for (uint32 i = 0; i < count; i++) {
825 		size_t length = vecs[i].iov_len;
826 		if (write) {
827 			status = descriptor->ops->fd_write(descriptor, pos,
828 				vecs[i].iov_base, &length);
829 		} else {
830 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
831 				&length);
832 		}
833 
834 		if (status != B_OK) {
835 			if (bytesTransferred == 0)
836 				return status;
837 			status = B_OK;
838 			break;
839 		}
840 
841 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
842 			bytesTransferred = SSIZE_MAX;
843 		else
844 			bytesTransferred += (ssize_t)length;
845 
846 		pos += length;
847 
848 		if (length < vecs[i].iov_len)
849 			break;
850 	}
851 
852 	if (movePosition)
853 		descriptor->pos = pos;
854 
855 	return bytesTransferred;
856 }
857 
858 
859 status_t
860 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
861 {
862 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
863 
864 	return fd_ioctl(false, fd, op, buffer, length);
865 }
866 
867 
868 //	#pragma mark - User syscalls
869 
870 
871 ssize_t
872 _user_read(int fd, off_t pos, void* buffer, size_t length)
873 {
874 	return common_user_io(fd, pos, buffer, length, false);
875 }
876 
877 
878 ssize_t
879 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
880 {
881 	return common_user_vector_io(fd, pos, userVecs, count, false);
882 }
883 
884 
885 ssize_t
886 _user_write(int fd, off_t pos, const void* buffer, size_t length)
887 {
888 	return common_user_io(fd, pos, (void*)buffer, length, true);
889 }
890 
891 
892 ssize_t
893 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
894 {
895 	return common_user_vector_io(fd, pos, userVecs, count, true);
896 }
897 
898 
899 off_t
900 _user_seek(int fd, off_t pos, int seekType)
901 {
902 	syscall_64_bit_return_value();
903 
904 	struct file_descriptor* descriptor;
905 
906 	descriptor = get_fd(get_current_io_context(false), fd);
907 	if (!descriptor)
908 		return B_FILE_ERROR;
909 
910 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
911 
912 	if (descriptor->ops->fd_seek)
913 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
914 	else
915 		pos = ESPIPE;
916 
917 	put_fd(descriptor);
918 	return pos;
919 }
920 
921 
922 status_t
923 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
924 {
925 	TRACE(("user_ioctl: fd %d\n", fd));
926 
927 	// "buffer" is not always a pointer depending on "op", so we cannot
928 	// check that it is a userland buffer here. Instead we check that
929 	// it is at least not within the bounds of kernel memory; as in
930 	// the cases where it is a numeric constant it is usually a low one.
931 	if (IS_KERNEL_ADDRESS(buffer))
932 		return B_BAD_ADDRESS;
933 
934 	SyscallRestartWrapper<status_t> status;
935 
936 	return status = fd_ioctl(false, fd, op, buffer, length);
937 }
938 
939 
940 ssize_t
941 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
942 	uint32 maxCount)
943 {
944 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
945 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
946 
947 	if (maxCount == 0)
948 		return 0;
949 
950 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
951 		return B_BAD_ADDRESS;
952 
953 	// get I/O context and FD
954 	io_context* ioContext = get_current_io_context(false);
955 	FDGetter fdGetter;
956 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
957 	if (descriptor == NULL)
958 		return B_FILE_ERROR;
959 
960 	if (descriptor->ops->fd_read_dir == NULL)
961 		return B_UNSUPPORTED;
962 
963 	// restrict buffer size and allocate a heap buffer
964 	if (bufferSize > kMaxReadDirBufferSize)
965 		bufferSize = kMaxReadDirBufferSize;
966 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
967 	if (buffer == NULL)
968 		return B_NO_MEMORY;
969 	MemoryDeleter bufferDeleter(buffer);
970 
971 	// read the directory
972 	uint32 count = maxCount;
973 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
974 		buffer, bufferSize, &count);
975 	if (status != B_OK)
976 		return status;
977 
978 	// copy the buffer back -- determine the total buffer size first
979 	size_t sizeToCopy = 0;
980 	BytePointer<struct dirent> entry = buffer;
981 	for (uint32 i = 0; i < count; i++) {
982 		size_t length = entry->d_reclen;
983 		sizeToCopy += length;
984 		entry += length;
985 	}
986 
987 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
988 		return B_BAD_ADDRESS;
989 
990 	return count;
991 }
992 
993 
994 status_t
995 _user_rewind_dir(int fd)
996 {
997 	struct file_descriptor* descriptor;
998 	status_t status;
999 
1000 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1001 
1002 	descriptor = get_fd(get_current_io_context(false), fd);
1003 	if (descriptor == NULL)
1004 		return B_FILE_ERROR;
1005 
1006 	if (descriptor->ops->fd_rewind_dir)
1007 		status = descriptor->ops->fd_rewind_dir(descriptor);
1008 	else
1009 		status = B_UNSUPPORTED;
1010 
1011 	put_fd(descriptor);
1012 	return status;
1013 }
1014 
1015 
1016 status_t
1017 _user_close(int fd)
1018 {
1019 	return common_close(fd, false);
1020 }
1021 
1022 
1023 int
1024 _user_dup(int fd)
1025 {
1026 	return dup_fd(fd, false);
1027 }
1028 
1029 
1030 int
1031 _user_dup2(int ofd, int nfd)
1032 {
1033 	return dup2_fd(ofd, nfd, false);
1034 }
1035 
1036 
1037 //	#pragma mark - Kernel calls
1038 
1039 
1040 ssize_t
1041 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1042 {
1043 	if (pos < -1)
1044 		return B_BAD_VALUE;
1045 
1046 	FDGetter fdGetter;
1047 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1048 
1049 	if (!descriptor)
1050 		return B_FILE_ERROR;
1051 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1052 		return B_FILE_ERROR;
1053 
1054 	bool movePosition = false;
1055 	if (pos == -1) {
1056 		pos = descriptor->pos;
1057 		movePosition = true;
1058 	}
1059 
1060 	SyscallFlagUnsetter _;
1061 
1062 	if (descriptor->ops->fd_read == NULL)
1063 		return B_BAD_VALUE;
1064 
1065 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1066 		&length);
1067 	if (bytesRead >= B_OK) {
1068 		if (length > SSIZE_MAX)
1069 			bytesRead = SSIZE_MAX;
1070 		else
1071 			bytesRead = (ssize_t)length;
1072 
1073 		if (movePosition)
1074 			descriptor->pos = pos + length;
1075 	}
1076 
1077 	return bytesRead;
1078 }
1079 
1080 
1081 ssize_t
1082 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1083 {
1084 	bool movePosition = false;
1085 	status_t status;
1086 	uint32 i;
1087 
1088 	if (pos < -1)
1089 		return B_BAD_VALUE;
1090 
1091 	FDGetter fdGetter;
1092 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1093 
1094 	if (!descriptor)
1095 		return B_FILE_ERROR;
1096 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1097 		return B_FILE_ERROR;
1098 
1099 	if (pos == -1) {
1100 		pos = descriptor->pos;
1101 		movePosition = true;
1102 	}
1103 
1104 	if (descriptor->ops->fd_read == NULL)
1105 		return B_BAD_VALUE;
1106 
1107 	SyscallFlagUnsetter _;
1108 
1109 	ssize_t bytesRead = 0;
1110 
1111 	for (i = 0; i < count; i++) {
1112 		size_t length = vecs[i].iov_len;
1113 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1114 			&length);
1115 		if (status != B_OK) {
1116 			bytesRead = status;
1117 			break;
1118 		}
1119 
1120 		if ((uint64)bytesRead + length > SSIZE_MAX)
1121 			bytesRead = SSIZE_MAX;
1122 		else
1123 			bytesRead += (ssize_t)length;
1124 
1125 		pos += vecs[i].iov_len;
1126 	}
1127 
1128 	if (movePosition)
1129 		descriptor->pos = pos;
1130 
1131 	return bytesRead;
1132 }
1133 
1134 
1135 ssize_t
1136 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1137 {
1138 	if (pos < -1)
1139 		return B_BAD_VALUE;
1140 
1141 	FDGetter fdGetter;
1142 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1143 
1144 	if (descriptor == NULL)
1145 		return B_FILE_ERROR;
1146 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1147 		return B_FILE_ERROR;
1148 
1149 	bool movePosition = false;
1150 	if (pos == -1) {
1151 		pos = descriptor->pos;
1152 		movePosition = true;
1153 	}
1154 
1155 	if (descriptor->ops->fd_write == NULL)
1156 		return B_BAD_VALUE;
1157 
1158 	SyscallFlagUnsetter _;
1159 
1160 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1161 		&length);
1162 	if (bytesWritten >= B_OK) {
1163 		if (length > SSIZE_MAX)
1164 			bytesWritten = SSIZE_MAX;
1165 		else
1166 			bytesWritten = (ssize_t)length;
1167 
1168 		if (movePosition)
1169 			descriptor->pos = pos + length;
1170 	}
1171 
1172 	return bytesWritten;
1173 }
1174 
1175 
1176 ssize_t
1177 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1178 {
1179 	bool movePosition = false;
1180 	status_t status;
1181 	uint32 i;
1182 
1183 	if (pos < -1)
1184 		return B_BAD_VALUE;
1185 
1186 	FDGetter fdGetter;
1187 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1188 
1189 	if (!descriptor)
1190 		return B_FILE_ERROR;
1191 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1192 		return B_FILE_ERROR;
1193 
1194 	if (pos == -1) {
1195 		pos = descriptor->pos;
1196 		movePosition = true;
1197 	}
1198 
1199 	if (descriptor->ops->fd_write == NULL)
1200 		return B_BAD_VALUE;
1201 
1202 	SyscallFlagUnsetter _;
1203 
1204 	ssize_t bytesWritten = 0;
1205 
1206 	for (i = 0; i < count; i++) {
1207 		size_t length = vecs[i].iov_len;
1208 		status = descriptor->ops->fd_write(descriptor, pos,
1209 			vecs[i].iov_base, &length);
1210 		if (status != B_OK) {
1211 			bytesWritten = status;
1212 			break;
1213 		}
1214 
1215 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1216 			bytesWritten = SSIZE_MAX;
1217 		else
1218 			bytesWritten += (ssize_t)length;
1219 
1220 		pos += vecs[i].iov_len;
1221 	}
1222 
1223 	if (movePosition)
1224 		descriptor->pos = pos;
1225 
1226 	return bytesWritten;
1227 }
1228 
1229 
1230 off_t
1231 _kern_seek(int fd, off_t pos, int seekType)
1232 {
1233 	struct file_descriptor* descriptor;
1234 
1235 	descriptor = get_fd(get_current_io_context(true), fd);
1236 	if (!descriptor)
1237 		return B_FILE_ERROR;
1238 
1239 	if (descriptor->ops->fd_seek)
1240 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1241 	else
1242 		pos = ESPIPE;
1243 
1244 	put_fd(descriptor);
1245 	return pos;
1246 }
1247 
1248 
1249 status_t
1250 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1251 {
1252 	TRACE(("kern_ioctl: fd %d\n", fd));
1253 
1254 	SyscallFlagUnsetter _;
1255 
1256 	return fd_ioctl(true, fd, op, buffer, length);
1257 }
1258 
1259 
1260 ssize_t
1261 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1262 	uint32 maxCount)
1263 {
1264 	struct file_descriptor* descriptor;
1265 	ssize_t retval;
1266 
1267 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1268 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1269 
1270 	struct io_context* ioContext = get_current_io_context(true);
1271 	descriptor = get_fd(ioContext, fd);
1272 	if (descriptor == NULL)
1273 		return B_FILE_ERROR;
1274 
1275 	if (descriptor->ops->fd_read_dir) {
1276 		uint32 count = maxCount;
1277 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1278 			bufferSize, &count);
1279 		if (retval >= 0)
1280 			retval = count;
1281 	} else
1282 		retval = B_UNSUPPORTED;
1283 
1284 	put_fd(descriptor);
1285 	return retval;
1286 }
1287 
1288 
1289 status_t
1290 _kern_rewind_dir(int fd)
1291 {
1292 	struct file_descriptor* descriptor;
1293 	status_t status;
1294 
1295 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1296 
1297 	descriptor = get_fd(get_current_io_context(true), fd);
1298 	if (descriptor == NULL)
1299 		return B_FILE_ERROR;
1300 
1301 	if (descriptor->ops->fd_rewind_dir)
1302 		status = descriptor->ops->fd_rewind_dir(descriptor);
1303 	else
1304 		status = B_UNSUPPORTED;
1305 
1306 	put_fd(descriptor);
1307 	return status;
1308 }
1309 
1310 
1311 status_t
1312 _kern_close(int fd)
1313 {
1314 	return common_close(fd, true);
1315 }
1316 
1317 
1318 int
1319 _kern_dup(int fd)
1320 {
1321 	return dup_fd(fd, true);
1322 }
1323 
1324 
1325 int
1326 _kern_dup2(int ofd, int nfd)
1327 {
1328 	return dup2_fd(ofd, nfd, true);
1329 }
1330 
1331