xref: /haiku/src/system/kernel/fs/fd.cpp (revision d64b771b96050fca8ff1859daa5ec44ff3493af7)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 
20 #include <syscalls.h>
21 #include <syscall_restart.h>
22 #include <util/AutoLock.h>
23 #include <vfs.h>
24 #include <wait_for_objects.h>
25 
26 #include "vfs_tracing.h"
27 
28 
29 //#define TRACE_FD
30 #ifdef TRACE_FD
31 #	define TRACE(x) dprintf x
32 #else
33 #	define TRACE(x)
34 #endif
35 
36 
37 static const size_t kMaxReadDirBufferSize = 64 * 1024;
38 
39 
40 static struct file_descriptor* get_fd_locked(struct io_context* context,
41 	int fd);
42 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43 static void deselect_select_infos(file_descriptor* descriptor,
44 	select_info* infos);
45 
46 
47 struct FDGetterLocking {
48 	inline bool Lock(file_descriptor* /*lockable*/)
49 	{
50 		return false;
51 	}
52 
53 	inline void Unlock(file_descriptor* lockable)
54 	{
55 		put_fd(lockable);
56 	}
57 };
58 
59 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60 public:
61 	inline FDGetter()
62 		: AutoLocker<file_descriptor, FDGetterLocking>()
63 	{
64 	}
65 
66 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67 		: AutoLocker<file_descriptor, FDGetterLocking>(
68 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69 	{
70 	}
71 
72 	inline file_descriptor* SetTo(io_context* context, int fd,
73 		bool contextLocked = false)
74 	{
75 		file_descriptor* descriptor
76 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78 		return descriptor;
79 	}
80 
81 	inline file_descriptor* SetTo(int fd, bool kernel,
82 		bool contextLocked = false)
83 	{
84 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85 	}
86 
87 	inline file_descriptor* FD() const
88 	{
89 		return fLockable;
90 	}
91 };
92 
93 
94 //	#pragma mark - General fd routines
95 
96 
97 #ifdef DEBUG
98 void dump_fd(int fd, struct file_descriptor* descriptor);
99 
100 void
101 dump_fd(int fd,struct file_descriptor* descriptor)
102 {
103 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
104 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
105 		", pos = %" B_PRId64 "\n",
106 		fd, descriptor, descriptor->type, descriptor->ref_count,
107 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
108 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
109 }
110 #endif
111 
112 
113 /*! Allocates and initializes a new file_descriptor.
114 */
115 struct file_descriptor*
116 alloc_fd(void)
117 {
118 	file_descriptor* descriptor
119 		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
120 	if (descriptor == NULL)
121 		return NULL;
122 
123 	descriptor->u.vnode = NULL;
124 	descriptor->cookie = NULL;
125 	descriptor->ref_count = 1;
126 	descriptor->open_count = 0;
127 	descriptor->open_mode = 0;
128 	descriptor->pos = 0;
129 
130 	return descriptor;
131 }
132 
133 
134 bool
135 fd_close_on_exec(struct io_context* context, int fd)
136 {
137 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
138 }
139 
140 
141 void
142 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
143 {
144 	if (closeFD)
145 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
146 	else
147 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
148 }
149 
150 
151 /*!	Searches a free slot in the FD table of the provided I/O context, and
152 	inserts the specified descriptor into it.
153 */
154 int
155 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
156 	int firstIndex)
157 {
158 	int fd = -1;
159 	uint32 i;
160 
161 	mutex_lock(&context->io_mutex);
162 
163 	for (i = firstIndex; i < context->table_size; i++) {
164 		if (!context->fds[i]) {
165 			fd = i;
166 			break;
167 		}
168 	}
169 	if (fd < 0) {
170 		fd = B_NO_MORE_FDS;
171 		goto err;
172 	}
173 
174 	TFD(NewFD(context, fd, descriptor));
175 
176 	context->fds[fd] = descriptor;
177 	context->num_used_fds++;
178 	atomic_add(&descriptor->open_count, 1);
179 
180 err:
181 	mutex_unlock(&context->io_mutex);
182 
183 	return fd;
184 }
185 
186 
187 int
188 new_fd(struct io_context* context, struct file_descriptor* descriptor)
189 {
190 	return new_fd_etc(context, descriptor, 0);
191 }
192 
193 
194 /*!	Reduces the descriptor's reference counter, and frees all resources
195 	when it's no longer used.
196 */
197 void
198 put_fd(struct file_descriptor* descriptor)
199 {
200 	int32 previous = atomic_add(&descriptor->ref_count, -1);
201 
202 	TFD(PutFD(descriptor));
203 
204 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
205 		descriptor, descriptor->ref_count, descriptor->cookie));
206 
207 	// free the descriptor if we don't need it anymore
208 	if (previous == 1) {
209 		// free the underlying object
210 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
211 			descriptor->ops->fd_free(descriptor);
212 
213 		free(descriptor);
214 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
215 		&& previous - 1 == descriptor->open_count
216 		&& descriptor->ops != NULL) {
217 		// the descriptor has been disconnected - it cannot
218 		// be accessed anymore, let's close it (no one is
219 		// currently accessing this descriptor)
220 
221 		if (descriptor->ops->fd_close)
222 			descriptor->ops->fd_close(descriptor);
223 		if (descriptor->ops->fd_free)
224 			descriptor->ops->fd_free(descriptor);
225 
226 		// prevent this descriptor from being closed/freed again
227 		descriptor->open_count = -1;
228 		descriptor->ref_count = -1;
229 		descriptor->ops = NULL;
230 		descriptor->u.vnode = NULL;
231 
232 		// the file descriptor is kept intact, so that it's not
233 		// reused until someone explicetly closes it
234 	}
235 }
236 
237 
238 /*!	Decrements the open counter of the file descriptor and invokes
239 	its close hook when appropriate.
240 */
241 void
242 close_fd(struct file_descriptor* descriptor)
243 {
244 	if (atomic_add(&descriptor->open_count, -1) == 1) {
245 		vfs_unlock_vnode_if_locked(descriptor);
246 
247 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
248 			descriptor->ops->fd_close(descriptor);
249 	}
250 }
251 
252 
253 status_t
254 close_fd_index(struct io_context* context, int fd)
255 {
256 	struct file_descriptor* descriptor = remove_fd(context, fd);
257 
258 	if (descriptor == NULL)
259 		return B_FILE_ERROR;
260 
261 	close_fd(descriptor);
262 	put_fd(descriptor);
263 		// the reference associated with the slot
264 
265 	return B_OK;
266 }
267 
268 
269 /*!	This descriptor's underlying object will be closed and freed as soon as
270 	possible (in one of the next calls to put_fd() - get_fd() will no longer
271 	succeed on this descriptor).
272 	This is useful if the underlying object is gone, for instance when a
273 	(mounted) volume got removed unexpectedly.
274 */
275 void
276 disconnect_fd(struct file_descriptor* descriptor)
277 {
278 	descriptor->open_mode |= O_DISCONNECTED;
279 }
280 
281 
282 void
283 inc_fd_ref_count(struct file_descriptor* descriptor)
284 {
285 	atomic_add(&descriptor->ref_count, 1);
286 }
287 
288 
289 static struct file_descriptor*
290 get_fd_locked(struct io_context* context, int fd)
291 {
292 	if (fd < 0 || (uint32)fd >= context->table_size)
293 		return NULL;
294 
295 	struct file_descriptor* descriptor = context->fds[fd];
296 
297 	if (descriptor != NULL) {
298 		// Disconnected descriptors cannot be accessed anymore
299 		if (descriptor->open_mode & O_DISCONNECTED)
300 			descriptor = NULL;
301 		else {
302 			TFD(GetFD(context, fd, descriptor));
303 			inc_fd_ref_count(descriptor);
304 		}
305 	}
306 
307 	return descriptor;
308 }
309 
310 
311 struct file_descriptor*
312 get_fd(struct io_context* context, int fd)
313 {
314 	MutexLocker _(context->io_mutex);
315 
316 	return get_fd_locked(context, fd);
317 }
318 
319 
320 struct file_descriptor*
321 get_open_fd(struct io_context* context, int fd)
322 {
323 	MutexLocker _(context->io_mutex);
324 
325 	file_descriptor* descriptor = get_fd_locked(context, fd);
326 	if (descriptor == NULL)
327 		return NULL;
328 
329 	atomic_add(&descriptor->open_count, 1);
330 
331 	return descriptor;
332 }
333 
334 
335 /*!	Removes the file descriptor from the specified slot.
336 */
337 static struct file_descriptor*
338 remove_fd(struct io_context* context, int fd)
339 {
340 	struct file_descriptor* descriptor = NULL;
341 
342 	if (fd < 0)
343 		return NULL;
344 
345 	mutex_lock(&context->io_mutex);
346 
347 	if ((uint32)fd < context->table_size)
348 		descriptor = context->fds[fd];
349 
350 	select_info* selectInfos = NULL;
351 	bool disconnected = false;
352 
353 	if (descriptor != NULL)	{
354 		// fd is valid
355 		TFD(RemoveFD(context, fd, descriptor));
356 
357 		context->fds[fd] = NULL;
358 		fd_set_close_on_exec(context, fd, false);
359 		context->num_used_fds--;
360 
361 		selectInfos = context->select_infos[fd];
362 		context->select_infos[fd] = NULL;
363 
364 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
365 	}
366 
367 	mutex_unlock(&context->io_mutex);
368 
369 	if (selectInfos != NULL)
370 		deselect_select_infos(descriptor, selectInfos);
371 
372 	return disconnected ? NULL : descriptor;
373 }
374 
375 
376 static int
377 dup_fd(int fd, bool kernel)
378 {
379 	struct io_context* context = get_current_io_context(kernel);
380 	struct file_descriptor* descriptor;
381 	int status;
382 
383 	TRACE(("dup_fd: fd = %d\n", fd));
384 
385 	// Try to get the fd structure
386 	descriptor = get_fd(context, fd);
387 	if (descriptor == NULL)
388 		return B_FILE_ERROR;
389 
390 	// now put the fd in place
391 	status = new_fd(context, descriptor);
392 	if (status < 0)
393 		put_fd(descriptor);
394 	else {
395 		mutex_lock(&context->io_mutex);
396 		fd_set_close_on_exec(context, status, false);
397 		mutex_unlock(&context->io_mutex);
398 	}
399 
400 	return status;
401 }
402 
403 
404 /*!	POSIX says this should be the same as:
405 		close(newfd);
406 		fcntl(oldfd, F_DUPFD, newfd);
407 
408 	We do dup2() directly to be thread-safe.
409 */
410 static int
411 dup2_fd(int oldfd, int newfd, bool kernel)
412 {
413 	struct file_descriptor* evicted = NULL;
414 	struct io_context* context;
415 
416 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
417 
418 	// quick check
419 	if (oldfd < 0 || newfd < 0)
420 		return B_FILE_ERROR;
421 
422 	// Get current I/O context and lock it
423 	context = get_current_io_context(kernel);
424 	mutex_lock(&context->io_mutex);
425 
426 	// Check if the fds are valid (mutex must be locked because
427 	// the table size could be changed)
428 	if ((uint32)oldfd >= context->table_size
429 		|| (uint32)newfd >= context->table_size
430 		|| context->fds[oldfd] == NULL) {
431 		mutex_unlock(&context->io_mutex);
432 		return B_FILE_ERROR;
433 	}
434 
435 	// Check for identity, note that it cannot be made above
436 	// because we always want to return an error on invalid
437 	// handles
438 	select_info* selectInfos = NULL;
439 	if (oldfd != newfd) {
440 		// Now do the work
441 		TFD(Dup2FD(context, oldfd, newfd));
442 
443 		evicted = context->fds[newfd];
444 		selectInfos = context->select_infos[newfd];
445 		context->select_infos[newfd] = NULL;
446 		atomic_add(&context->fds[oldfd]->ref_count, 1);
447 		atomic_add(&context->fds[oldfd]->open_count, 1);
448 		context->fds[newfd] = context->fds[oldfd];
449 
450 		if (evicted == NULL)
451 			context->num_used_fds++;
452 	}
453 
454 	fd_set_close_on_exec(context, newfd, false);
455 
456 	mutex_unlock(&context->io_mutex);
457 
458 	// Say bye bye to the evicted fd
459 	if (evicted) {
460 		deselect_select_infos(evicted, selectInfos);
461 		close_fd(evicted);
462 		put_fd(evicted);
463 	}
464 
465 	return newfd;
466 }
467 
468 
469 /*!	Duplicates an FD from another team to this/the kernel team.
470 	\param fromTeam The team which owns the FD.
471 	\param fd The FD to duplicate.
472 	\param kernel If \c true, the new FD will be created in the kernel team,
473 			the current userland team otherwise.
474 	\return The newly created FD or an error code, if something went wrong.
475 */
476 int
477 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
478 {
479 	// get the I/O context for the team in question
480 	Team* team = Team::Get(fromTeam);
481 	if (team == NULL)
482 		return B_BAD_TEAM_ID;
483 	BReference<Team> teamReference(team, true);
484 
485 	io_context* fromContext = team->io_context;
486 
487 	// get the file descriptor
488 	file_descriptor* descriptor = get_fd(fromContext, fd);
489 	if (descriptor == NULL)
490 		return B_FILE_ERROR;
491 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
492 
493 	// create a new FD in the target I/O context
494 	int result = new_fd(get_current_io_context(kernel), descriptor);
495 	if (result >= 0) {
496 		// the descriptor reference belongs to the slot, now
497 		descriptorPutter.Detach();
498 	}
499 
500 	return result;
501 }
502 
503 
504 static status_t
505 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
506 {
507 	struct file_descriptor* descriptor;
508 	int status;
509 
510 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
511 	if (descriptor == NULL)
512 		return B_FILE_ERROR;
513 
514 	if (descriptor->ops->fd_ioctl)
515 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
516 	else
517 		status = B_DEV_INVALID_IOCTL;
518 
519 	if (status == B_DEV_INVALID_IOCTL)
520 		status = ENOTTY;
521 
522 	put_fd(descriptor);
523 	return status;
524 }
525 
526 
527 static void
528 deselect_select_infos(file_descriptor* descriptor, select_info* infos)
529 {
530 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
531 
532 	select_info* info = infos;
533 	while (info != NULL) {
534 		select_sync* sync = info->sync;
535 
536 		// deselect the selected events
537 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
538 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
539 			for (uint16 event = 1; event < 16; event++) {
540 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
541 					descriptor->ops->fd_deselect(descriptor, event,
542 						(selectsync*)info);
543 				}
544 			}
545 		}
546 
547 		notify_select_events(info, B_EVENT_INVALID);
548 		info = info->next;
549 		put_select_sync(sync);
550 	}
551 }
552 
553 
554 status_t
555 select_fd(int32 fd, struct select_info* info, bool kernel)
556 {
557 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
558 		info->sync, info->selected_events));
559 
560 	FDGetter fdGetter;
561 		// define before the context locker, so it will be destroyed after it
562 
563 	io_context* context = get_current_io_context(kernel);
564 	MutexLocker locker(context->io_mutex);
565 
566 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
567 	if (descriptor == NULL)
568 		return B_FILE_ERROR;
569 
570 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
571 
572 	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
573 		// if the I/O subsystem doesn't support select(), we will
574 		// immediately notify the select call
575 		return notify_select_events(info, eventsToSelect);
576 	}
577 
578 	// We need the FD to stay open while we're doing this, so no select()/
579 	// deselect() will be called on it after it is closed.
580 	atomic_add(&descriptor->open_count, 1);
581 
582 	locker.Unlock();
583 
584 	// select any events asked for
585 	uint32 selectedEvents = 0;
586 
587 	for (uint16 event = 1; event < 16; event++) {
588 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
589 			&& descriptor->ops->fd_select(descriptor, event,
590 				(selectsync*)info) == B_OK) {
591 			selectedEvents |= SELECT_FLAG(event);
592 		}
593 	}
594 	info->selected_events = selectedEvents
595 		| (info->selected_events & B_EVENT_INVALID);
596 
597 	// Add the info to the IO context. Even if nothing has been selected -- we
598 	// always support B_EVENT_INVALID.
599 	locker.Lock();
600 	if (context->fds[fd] != descriptor) {
601 		// Someone close()d the index in the meantime. deselect() all
602 		// events.
603 		info->next = NULL;
604 		deselect_select_infos(descriptor, info);
605 
606 		// Release our open reference of the descriptor.
607 		close_fd(descriptor);
608 		return B_FILE_ERROR;
609 	}
610 
611 	// The FD index hasn't changed, so we add the select info to the table.
612 
613 	info->next = context->select_infos[fd];
614 	context->select_infos[fd] = info;
615 
616 	// As long as the info is in the list, we keep a reference to the sync
617 	// object.
618 	atomic_add(&info->sync->ref_count, 1);
619 
620 	// Finally release our open reference. It is safe just to decrement,
621 	// since as long as the descriptor is associated with the slot,
622 	// someone else still has it open.
623 	atomic_add(&descriptor->open_count, -1);
624 
625 	return B_OK;
626 }
627 
628 
629 status_t
630 deselect_fd(int32 fd, struct select_info* info, bool kernel)
631 {
632 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
633 		info->sync, info->selected_events));
634 
635 	FDGetter fdGetter;
636 		// define before the context locker, so it will be destroyed after it
637 
638 	io_context* context = get_current_io_context(kernel);
639 	MutexLocker locker(context->io_mutex);
640 
641 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
642 	if (descriptor == NULL)
643 		return B_FILE_ERROR;
644 
645 	// remove the info from the IO context
646 
647 	select_info** infoLocation = &context->select_infos[fd];
648 	while (*infoLocation != NULL && *infoLocation != info)
649 		infoLocation = &(*infoLocation)->next;
650 
651 	// If not found, someone else beat us to it.
652 	if (*infoLocation != info)
653 		return B_OK;
654 
655 	*infoLocation = info->next;
656 
657 	locker.Unlock();
658 
659 	// deselect the selected events
660 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
661 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
662 		for (uint16 event = 1; event < 16; event++) {
663 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
664 				descriptor->ops->fd_deselect(descriptor, event,
665 					(selectsync*)info);
666 			}
667 		}
668 	}
669 
670 	put_select_sync(info->sync);
671 
672 	return B_OK;
673 }
674 
675 
676 /*!	This function checks if the specified fd is valid in the current
677 	context. It can be used for a quick check; the fd is not locked
678 	so it could become invalid immediately after this check.
679 */
680 bool
681 fd_is_valid(int fd, bool kernel)
682 {
683 	struct file_descriptor* descriptor
684 		= get_fd(get_current_io_context(kernel), fd);
685 	if (descriptor == NULL)
686 		return false;
687 
688 	put_fd(descriptor);
689 	return true;
690 }
691 
692 
693 struct vnode*
694 fd_vnode(struct file_descriptor* descriptor)
695 {
696 	switch (descriptor->type) {
697 		case FDTYPE_FILE:
698 		case FDTYPE_DIR:
699 		case FDTYPE_ATTR_DIR:
700 		case FDTYPE_ATTR:
701 			return descriptor->u.vnode;
702 	}
703 
704 	return NULL;
705 }
706 
707 
708 static status_t
709 common_close(int fd, bool kernel)
710 {
711 	return close_fd_index(get_current_io_context(kernel), fd);
712 }
713 
714 
715 static ssize_t
716 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
717 {
718 	if (!IS_USER_ADDRESS(buffer))
719 		return B_BAD_ADDRESS;
720 
721 	if (pos < -1)
722 		return B_BAD_VALUE;
723 
724 	FDGetter fdGetter;
725 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
726 	if (!descriptor)
727 		return B_FILE_ERROR;
728 
729 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
730 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
731 		return B_FILE_ERROR;
732 	}
733 
734 	bool movePosition = false;
735 	if (pos == -1) {
736 		pos = descriptor->pos;
737 		movePosition = true;
738 	}
739 
740 	if (write ? descriptor->ops->fd_write == NULL
741 			: descriptor->ops->fd_read == NULL) {
742 		return B_BAD_VALUE;
743 	}
744 
745 	SyscallRestartWrapper<status_t> status;
746 
747 	if (write)
748 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
749 	else
750 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
751 
752 	if (status != B_OK)
753 		return status;
754 
755 	if (movePosition)
756 		descriptor->pos = pos + length;
757 
758 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
759 }
760 
761 
762 static ssize_t
763 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
764 	bool write)
765 {
766 	if (!IS_USER_ADDRESS(userVecs))
767 		return B_BAD_ADDRESS;
768 
769 	if (pos < -1)
770 		return B_BAD_VALUE;
771 
772 	// prevent integer overflow exploit in malloc()
773 	if (count > IOV_MAX)
774 		return B_BAD_VALUE;
775 
776 	FDGetter fdGetter;
777 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
778 	if (!descriptor)
779 		return B_FILE_ERROR;
780 
781 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
782 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
783 		return B_FILE_ERROR;
784 	}
785 
786 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
787 	if (vecs == NULL)
788 		return B_NO_MEMORY;
789 	MemoryDeleter _(vecs);
790 
791 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
792 		return B_BAD_ADDRESS;
793 
794 	bool movePosition = false;
795 	if (pos == -1) {
796 		pos = descriptor->pos;
797 		movePosition = true;
798 	}
799 
800 	if (write ? descriptor->ops->fd_write == NULL
801 			: descriptor->ops->fd_read == NULL) {
802 		return B_BAD_VALUE;
803 	}
804 
805 	SyscallRestartWrapper<status_t> status;
806 
807 	ssize_t bytesTransferred = 0;
808 	for (uint32 i = 0; i < count; i++) {
809 		size_t length = vecs[i].iov_len;
810 		if (write) {
811 			status = descriptor->ops->fd_write(descriptor, pos,
812 				vecs[i].iov_base, &length);
813 		} else {
814 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
815 				&length);
816 		}
817 
818 		if (status != B_OK) {
819 			if (bytesTransferred == 0)
820 				return status;
821 			status = B_OK;
822 			break;
823 		}
824 
825 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
826 			bytesTransferred = SSIZE_MAX;
827 		else
828 			bytesTransferred += (ssize_t)length;
829 
830 		pos += length;
831 
832 		if (length < vecs[i].iov_len)
833 			break;
834 	}
835 
836 	if (movePosition)
837 		descriptor->pos = pos;
838 
839 	return bytesTransferred;
840 }
841 
842 
843 status_t
844 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
845 {
846 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
847 
848 	return fd_ioctl(false, fd, op, buffer, length);
849 }
850 
851 
852 //	#pragma mark - User syscalls
853 
854 
855 ssize_t
856 _user_read(int fd, off_t pos, void* buffer, size_t length)
857 {
858 	return common_user_io(fd, pos, buffer, length, false);
859 }
860 
861 
862 ssize_t
863 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
864 {
865 	return common_user_vector_io(fd, pos, userVecs, count, false);
866 }
867 
868 
869 ssize_t
870 _user_write(int fd, off_t pos, const void* buffer, size_t length)
871 {
872 	return common_user_io(fd, pos, (void*)buffer, length, true);
873 }
874 
875 
876 ssize_t
877 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
878 {
879 	return common_user_vector_io(fd, pos, userVecs, count, true);
880 }
881 
882 
883 off_t
884 _user_seek(int fd, off_t pos, int seekType)
885 {
886 	syscall_64_bit_return_value();
887 
888 	struct file_descriptor* descriptor;
889 
890 	descriptor = get_fd(get_current_io_context(false), fd);
891 	if (!descriptor)
892 		return B_FILE_ERROR;
893 
894 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
895 
896 	if (descriptor->ops->fd_seek)
897 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
898 	else
899 		pos = ESPIPE;
900 
901 	put_fd(descriptor);
902 	return pos;
903 }
904 
905 
906 status_t
907 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
908 {
909 	if (!IS_USER_ADDRESS(buffer))
910 		return B_BAD_ADDRESS;
911 
912 	TRACE(("user_ioctl: fd %d\n", fd));
913 
914 	SyscallRestartWrapper<status_t> status;
915 
916 	return status = fd_ioctl(false, fd, op, buffer, length);
917 }
918 
919 
920 ssize_t
921 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
922 	uint32 maxCount)
923 {
924 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
925 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
926 
927 	if (maxCount == 0)
928 		return 0;
929 
930 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
931 		return B_BAD_ADDRESS;
932 
933 	// get I/O context and FD
934 	io_context* ioContext = get_current_io_context(false);
935 	FDGetter fdGetter;
936 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
937 	if (descriptor == NULL)
938 		return B_FILE_ERROR;
939 
940 	if (descriptor->ops->fd_read_dir == NULL)
941 		return B_UNSUPPORTED;
942 
943 	// restrict buffer size and allocate a heap buffer
944 	if (bufferSize > kMaxReadDirBufferSize)
945 		bufferSize = kMaxReadDirBufferSize;
946 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
947 	if (buffer == NULL)
948 		return B_NO_MEMORY;
949 	MemoryDeleter bufferDeleter(buffer);
950 
951 	// read the directory
952 	uint32 count = maxCount;
953 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
954 		buffer, bufferSize, &count);
955 	if (status != B_OK)
956 		return status;
957 
958 	// copy the buffer back -- determine the total buffer size first
959 	size_t sizeToCopy = 0;
960 	struct dirent* entry = buffer;
961 	for (uint32 i = 0; i < count; i++) {
962 		size_t length = entry->d_reclen;
963 		sizeToCopy += length;
964 		entry = (struct dirent*)((uint8*)entry + length);
965 	}
966 
967 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
968 		return B_BAD_ADDRESS;
969 
970 	return count;
971 }
972 
973 
974 status_t
975 _user_rewind_dir(int fd)
976 {
977 	struct file_descriptor* descriptor;
978 	status_t status;
979 
980 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
981 
982 	descriptor = get_fd(get_current_io_context(false), fd);
983 	if (descriptor == NULL)
984 		return B_FILE_ERROR;
985 
986 	if (descriptor->ops->fd_rewind_dir)
987 		status = descriptor->ops->fd_rewind_dir(descriptor);
988 	else
989 		status = B_UNSUPPORTED;
990 
991 	put_fd(descriptor);
992 	return status;
993 }
994 
995 
996 status_t
997 _user_close(int fd)
998 {
999 	return common_close(fd, false);
1000 }
1001 
1002 
1003 int
1004 _user_dup(int fd)
1005 {
1006 	return dup_fd(fd, false);
1007 }
1008 
1009 
1010 int
1011 _user_dup2(int ofd, int nfd)
1012 {
1013 	return dup2_fd(ofd, nfd, false);
1014 }
1015 
1016 
1017 //	#pragma mark - Kernel calls
1018 
1019 
1020 ssize_t
1021 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1022 {
1023 	if (pos < -1)
1024 		return B_BAD_VALUE;
1025 
1026 	FDGetter fdGetter;
1027 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1028 
1029 	if (!descriptor)
1030 		return B_FILE_ERROR;
1031 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1032 		return B_FILE_ERROR;
1033 
1034 	bool movePosition = false;
1035 	if (pos == -1) {
1036 		pos = descriptor->pos;
1037 		movePosition = true;
1038 	}
1039 
1040 	SyscallFlagUnsetter _;
1041 
1042 	if (descriptor->ops->fd_read == NULL)
1043 		return B_BAD_VALUE;
1044 
1045 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1046 		&length);
1047 	if (bytesRead >= B_OK) {
1048 		if (length > SSIZE_MAX)
1049 			bytesRead = SSIZE_MAX;
1050 		else
1051 			bytesRead = (ssize_t)length;
1052 
1053 		if (movePosition)
1054 			descriptor->pos = pos + length;
1055 	}
1056 
1057 	return bytesRead;
1058 }
1059 
1060 
1061 ssize_t
1062 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1063 {
1064 	bool movePosition = false;
1065 	status_t status;
1066 	uint32 i;
1067 
1068 	if (pos < -1)
1069 		return B_BAD_VALUE;
1070 
1071 	FDGetter fdGetter;
1072 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1073 
1074 	if (!descriptor)
1075 		return B_FILE_ERROR;
1076 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1077 		return B_FILE_ERROR;
1078 
1079 	if (pos == -1) {
1080 		pos = descriptor->pos;
1081 		movePosition = true;
1082 	}
1083 
1084 	if (descriptor->ops->fd_read == NULL)
1085 		return B_BAD_VALUE;
1086 
1087 	SyscallFlagUnsetter _;
1088 
1089 	ssize_t bytesRead = 0;
1090 
1091 	for (i = 0; i < count; i++) {
1092 		size_t length = vecs[i].iov_len;
1093 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1094 			&length);
1095 		if (status != B_OK) {
1096 			bytesRead = status;
1097 			break;
1098 		}
1099 
1100 		if ((uint64)bytesRead + length > SSIZE_MAX)
1101 			bytesRead = SSIZE_MAX;
1102 		else
1103 			bytesRead += (ssize_t)length;
1104 
1105 		pos += vecs[i].iov_len;
1106 	}
1107 
1108 	if (movePosition)
1109 		descriptor->pos = pos;
1110 
1111 	return bytesRead;
1112 }
1113 
1114 
1115 ssize_t
1116 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1117 {
1118 	if (pos < -1)
1119 		return B_BAD_VALUE;
1120 
1121 	FDGetter fdGetter;
1122 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1123 
1124 	if (descriptor == NULL)
1125 		return B_FILE_ERROR;
1126 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1127 		return B_FILE_ERROR;
1128 
1129 	bool movePosition = false;
1130 	if (pos == -1) {
1131 		pos = descriptor->pos;
1132 		movePosition = true;
1133 	}
1134 
1135 	if (descriptor->ops->fd_write == NULL)
1136 		return B_BAD_VALUE;
1137 
1138 	SyscallFlagUnsetter _;
1139 
1140 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1141 		&length);
1142 	if (bytesWritten >= B_OK) {
1143 		if (length > SSIZE_MAX)
1144 			bytesWritten = SSIZE_MAX;
1145 		else
1146 			bytesWritten = (ssize_t)length;
1147 
1148 		if (movePosition)
1149 			descriptor->pos = pos + length;
1150 	}
1151 
1152 	return bytesWritten;
1153 }
1154 
1155 
1156 ssize_t
1157 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1158 {
1159 	bool movePosition = false;
1160 	status_t status;
1161 	uint32 i;
1162 
1163 	if (pos < -1)
1164 		return B_BAD_VALUE;
1165 
1166 	FDGetter fdGetter;
1167 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1168 
1169 	if (!descriptor)
1170 		return B_FILE_ERROR;
1171 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1172 		return B_FILE_ERROR;
1173 
1174 	if (pos == -1) {
1175 		pos = descriptor->pos;
1176 		movePosition = true;
1177 	}
1178 
1179 	if (descriptor->ops->fd_write == NULL)
1180 		return B_BAD_VALUE;
1181 
1182 	SyscallFlagUnsetter _;
1183 
1184 	ssize_t bytesWritten = 0;
1185 
1186 	for (i = 0; i < count; i++) {
1187 		size_t length = vecs[i].iov_len;
1188 		status = descriptor->ops->fd_write(descriptor, pos,
1189 			vecs[i].iov_base, &length);
1190 		if (status != B_OK) {
1191 			bytesWritten = status;
1192 			break;
1193 		}
1194 
1195 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1196 			bytesWritten = SSIZE_MAX;
1197 		else
1198 			bytesWritten += (ssize_t)length;
1199 
1200 		pos += vecs[i].iov_len;
1201 	}
1202 
1203 	if (movePosition)
1204 		descriptor->pos = pos;
1205 
1206 	return bytesWritten;
1207 }
1208 
1209 
1210 off_t
1211 _kern_seek(int fd, off_t pos, int seekType)
1212 {
1213 	struct file_descriptor* descriptor;
1214 
1215 	descriptor = get_fd(get_current_io_context(true), fd);
1216 	if (!descriptor)
1217 		return B_FILE_ERROR;
1218 
1219 	if (descriptor->ops->fd_seek)
1220 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1221 	else
1222 		pos = ESPIPE;
1223 
1224 	put_fd(descriptor);
1225 	return pos;
1226 }
1227 
1228 
1229 status_t
1230 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1231 {
1232 	TRACE(("kern_ioctl: fd %d\n", fd));
1233 
1234 	SyscallFlagUnsetter _;
1235 
1236 	return fd_ioctl(true, fd, op, buffer, length);
1237 }
1238 
1239 
1240 ssize_t
1241 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1242 	uint32 maxCount)
1243 {
1244 	struct file_descriptor* descriptor;
1245 	ssize_t retval;
1246 
1247 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1248 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1249 
1250 	struct io_context* ioContext = get_current_io_context(true);
1251 	descriptor = get_fd(ioContext, fd);
1252 	if (descriptor == NULL)
1253 		return B_FILE_ERROR;
1254 
1255 	if (descriptor->ops->fd_read_dir) {
1256 		uint32 count = maxCount;
1257 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1258 			bufferSize, &count);
1259 		if (retval >= 0)
1260 			retval = count;
1261 	} else
1262 		retval = B_UNSUPPORTED;
1263 
1264 	put_fd(descriptor);
1265 	return retval;
1266 }
1267 
1268 
1269 status_t
1270 _kern_rewind_dir(int fd)
1271 {
1272 	struct file_descriptor* descriptor;
1273 	status_t status;
1274 
1275 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1276 
1277 	descriptor = get_fd(get_current_io_context(true), fd);
1278 	if (descriptor == NULL)
1279 		return B_FILE_ERROR;
1280 
1281 	if (descriptor->ops->fd_rewind_dir)
1282 		status = descriptor->ops->fd_rewind_dir(descriptor);
1283 	else
1284 		status = B_UNSUPPORTED;
1285 
1286 	put_fd(descriptor);
1287 	return status;
1288 }
1289 
1290 
1291 status_t
1292 _kern_close(int fd)
1293 {
1294 	return common_close(fd, true);
1295 }
1296 
1297 
1298 int
1299 _kern_dup(int fd)
1300 {
1301 	return dup_fd(fd, true);
1302 }
1303 
1304 
1305 int
1306 _kern_dup2(int ofd, int nfd)
1307 {
1308 	return dup2_fd(ofd, nfd, true);
1309 }
1310 
1311