xref: /haiku/src/system/kernel/fs/fd.cpp (revision 3216a856947f9746d8c4c1e720ccf3dc5c0ac786)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <BytePointer.h>
20 
21 #include <syscalls.h>
22 #include <syscall_restart.h>
23 #include <slab/Slab.h>
24 #include <util/AutoLock.h>
25 #include <vfs.h>
26 #include <wait_for_objects.h>
27 
28 #include "vfs_tracing.h"
29 
30 
31 //#define TRACE_FD
32 #ifdef TRACE_FD
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x)
36 #endif
37 
38 
39 static const size_t kMaxReadDirBufferSize = 64 * 1024;
40 
41 extern object_cache* sFileDescriptorCache;
42 
43 
44 static struct file_descriptor* get_fd_locked(struct io_context* context,
45 	int fd);
46 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
47 static void deselect_select_infos(file_descriptor* descriptor,
48 	select_info* infos, bool putSyncObjects);
49 
50 
51 struct FDGetterLocking {
52 	inline bool Lock(file_descriptor* /*lockable*/)
53 	{
54 		return false;
55 	}
56 
57 	inline void Unlock(file_descriptor* lockable)
58 	{
59 		put_fd(lockable);
60 	}
61 };
62 
63 class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
64 public:
65 	inline FDGetter()
66 		: AutoLocker<file_descriptor, FDGetterLocking>()
67 	{
68 	}
69 
70 	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
71 		: AutoLocker<file_descriptor, FDGetterLocking>(
72 			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
73 	{
74 	}
75 
76 	inline file_descriptor* SetTo(io_context* context, int fd,
77 		bool contextLocked = false)
78 	{
79 		file_descriptor* descriptor
80 			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
81 		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
82 		return descriptor;
83 	}
84 
85 	inline file_descriptor* SetTo(int fd, bool kernel,
86 		bool contextLocked = false)
87 	{
88 		return SetTo(get_current_io_context(kernel), fd, contextLocked);
89 	}
90 
91 	inline file_descriptor* FD() const
92 	{
93 		return fLockable;
94 	}
95 };
96 
97 
98 //	#pragma mark - General fd routines
99 
100 
101 #ifdef DEBUG
102 void dump_fd(int fd, struct file_descriptor* descriptor);
103 
104 void
105 dump_fd(int fd,struct file_descriptor* descriptor)
106 {
107 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
108 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
109 		", pos = %" B_PRId64 "\n",
110 		fd, descriptor, descriptor->type, descriptor->ref_count,
111 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
112 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
113 }
114 #endif
115 
116 
117 /*! Allocates and initializes a new file_descriptor.
118 */
119 struct file_descriptor*
120 alloc_fd(void)
121 {
122 	file_descriptor* descriptor
123 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
124 	if (descriptor == NULL)
125 		return NULL;
126 
127 	descriptor->u.vnode = NULL;
128 	descriptor->cookie = NULL;
129 	descriptor->ref_count = 1;
130 	descriptor->open_count = 0;
131 	descriptor->open_mode = 0;
132 	descriptor->pos = 0;
133 
134 	return descriptor;
135 }
136 
137 
138 bool
139 fd_close_on_exec(struct io_context* context, int fd)
140 {
141 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
142 }
143 
144 
145 void
146 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
147 {
148 	if (closeFD)
149 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
150 	else
151 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
152 }
153 
154 
155 /*!	Searches a free slot in the FD table of the provided I/O context, and
156 	inserts the specified descriptor into it.
157 */
158 int
159 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
160 	int firstIndex)
161 {
162 	int fd = -1;
163 	uint32 i;
164 
165 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
166 		return B_BAD_VALUE;
167 
168 	mutex_lock(&context->io_mutex);
169 
170 	for (i = firstIndex; i < context->table_size; i++) {
171 		if (!context->fds[i]) {
172 			fd = i;
173 			break;
174 		}
175 	}
176 	if (fd < 0) {
177 		fd = B_NO_MORE_FDS;
178 		goto err;
179 	}
180 
181 	TFD(NewFD(context, fd, descriptor));
182 
183 	context->fds[fd] = descriptor;
184 	context->num_used_fds++;
185 	atomic_add(&descriptor->open_count, 1);
186 
187 err:
188 	mutex_unlock(&context->io_mutex);
189 
190 	return fd;
191 }
192 
193 
194 int
195 new_fd(struct io_context* context, struct file_descriptor* descriptor)
196 {
197 	return new_fd_etc(context, descriptor, 0);
198 }
199 
200 
201 /*!	Reduces the descriptor's reference counter, and frees all resources
202 	when it's no longer used.
203 */
204 void
205 put_fd(struct file_descriptor* descriptor)
206 {
207 	int32 previous = atomic_add(&descriptor->ref_count, -1);
208 
209 	TFD(PutFD(descriptor));
210 
211 	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
212 		descriptor, descriptor->ref_count, descriptor->cookie));
213 
214 	// free the descriptor if we don't need it anymore
215 	if (previous == 1) {
216 		// free the underlying object
217 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
218 			descriptor->ops->fd_free(descriptor);
219 
220 		object_cache_free(sFileDescriptorCache, descriptor, 0);
221 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
222 		&& previous - 1 == descriptor->open_count
223 		&& descriptor->ops != NULL) {
224 		// the descriptor has been disconnected - it cannot
225 		// be accessed anymore, let's close it (no one is
226 		// currently accessing this descriptor)
227 
228 		if (descriptor->ops->fd_close)
229 			descriptor->ops->fd_close(descriptor);
230 		if (descriptor->ops->fd_free)
231 			descriptor->ops->fd_free(descriptor);
232 
233 		// prevent this descriptor from being closed/freed again
234 		descriptor->ops = NULL;
235 		descriptor->u.vnode = NULL;
236 
237 		// the file descriptor is kept intact, so that it's not
238 		// reused until someone explicitly closes it
239 	}
240 }
241 
242 
243 /*!	Decrements the open counter of the file descriptor and invokes
244 	its close hook when appropriate.
245 */
246 void
247 close_fd(struct io_context* context, struct file_descriptor* descriptor)
248 {
249 	// POSIX advisory locks need to be released when any file descriptor closes
250 	if (descriptor->type == FDTYPE_FILE)
251 		vfs_release_posix_lock(context, descriptor);
252 
253 	if (atomic_add(&descriptor->open_count, -1) == 1) {
254 		vfs_unlock_vnode_if_locked(descriptor);
255 
256 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
257 			descriptor->ops->fd_close(descriptor);
258 	}
259 }
260 
261 
262 status_t
263 close_fd_index(struct io_context* context, int fd)
264 {
265 	struct file_descriptor* descriptor = remove_fd(context, fd);
266 
267 	if (descriptor == NULL)
268 		return B_FILE_ERROR;
269 
270 	close_fd(context, descriptor);
271 	put_fd(descriptor);
272 		// the reference associated with the slot
273 
274 	return B_OK;
275 }
276 
277 
278 /*!	This descriptor's underlying object will be closed and freed as soon as
279 	possible (in one of the next calls to put_fd() - get_fd() will no longer
280 	succeed on this descriptor).
281 	This is useful if the underlying object is gone, for instance when a
282 	(mounted) volume got removed unexpectedly.
283 */
284 void
285 disconnect_fd(struct file_descriptor* descriptor)
286 {
287 	descriptor->open_mode |= O_DISCONNECTED;
288 }
289 
290 
291 void
292 inc_fd_ref_count(struct file_descriptor* descriptor)
293 {
294 	atomic_add(&descriptor->ref_count, 1);
295 }
296 
297 
298 static struct file_descriptor*
299 get_fd_locked(struct io_context* context, int fd)
300 {
301 	if (fd < 0 || (uint32)fd >= context->table_size)
302 		return NULL;
303 
304 	struct file_descriptor* descriptor = context->fds[fd];
305 
306 	if (descriptor != NULL) {
307 		// disconnected descriptors cannot be accessed anymore
308 		if (descriptor->open_mode & O_DISCONNECTED)
309 			return NULL;
310 
311 		TFD(GetFD(context, fd, descriptor));
312 		inc_fd_ref_count(descriptor);
313 	}
314 
315 	return descriptor;
316 }
317 
318 
319 struct file_descriptor*
320 get_fd(struct io_context* context, int fd)
321 {
322 	MutexLocker _(context->io_mutex);
323 
324 	return get_fd_locked(context, fd);
325 }
326 
327 
328 struct file_descriptor*
329 get_open_fd(struct io_context* context, int fd)
330 {
331 	MutexLocker _(context->io_mutex);
332 
333 	file_descriptor* descriptor = get_fd_locked(context, fd);
334 	if (descriptor == NULL)
335 		return NULL;
336 
337 	atomic_add(&descriptor->open_count, 1);
338 
339 	return descriptor;
340 }
341 
342 
343 /*!	Removes the file descriptor from the specified slot.
344 */
345 static struct file_descriptor*
346 remove_fd(struct io_context* context, int fd)
347 {
348 	struct file_descriptor* descriptor = NULL;
349 
350 	if (fd < 0)
351 		return NULL;
352 
353 	mutex_lock(&context->io_mutex);
354 
355 	if ((uint32)fd < context->table_size)
356 		descriptor = context->fds[fd];
357 
358 	select_info* selectInfos = NULL;
359 	bool disconnected = false;
360 
361 	if (descriptor != NULL)	{
362 		// fd is valid
363 		TFD(RemoveFD(context, fd, descriptor));
364 
365 		context->fds[fd] = NULL;
366 		fd_set_close_on_exec(context, fd, false);
367 		context->num_used_fds--;
368 
369 		selectInfos = context->select_infos[fd];
370 		context->select_infos[fd] = NULL;
371 
372 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
373 	}
374 
375 	mutex_unlock(&context->io_mutex);
376 
377 	if (selectInfos != NULL)
378 		deselect_select_infos(descriptor, selectInfos, true);
379 
380 	return disconnected ? NULL : descriptor;
381 }
382 
383 
384 static int
385 dup_fd(int fd, bool kernel)
386 {
387 	struct io_context* context = get_current_io_context(kernel);
388 	struct file_descriptor* descriptor;
389 	int status;
390 
391 	TRACE(("dup_fd: fd = %d\n", fd));
392 
393 	// Try to get the fd structure
394 	descriptor = get_fd(context, fd);
395 	if (descriptor == NULL)
396 		return B_FILE_ERROR;
397 
398 	// now put the fd in place
399 	status = new_fd(context, descriptor);
400 	if (status < 0)
401 		put_fd(descriptor);
402 	else {
403 		mutex_lock(&context->io_mutex);
404 		fd_set_close_on_exec(context, status, false);
405 		mutex_unlock(&context->io_mutex);
406 	}
407 
408 	return status;
409 }
410 
411 
412 /*!	POSIX says this should be the same as:
413 		close(newfd);
414 		fcntl(oldfd, F_DUPFD, newfd);
415 
416 	We do dup2() directly to be thread-safe.
417 */
418 static int
419 dup2_fd(int oldfd, int newfd, bool kernel)
420 {
421 	struct file_descriptor* evicted = NULL;
422 	struct io_context* context;
423 
424 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
425 
426 	// quick check
427 	if (oldfd < 0 || newfd < 0)
428 		return B_FILE_ERROR;
429 
430 	// Get current I/O context and lock it
431 	context = get_current_io_context(kernel);
432 	mutex_lock(&context->io_mutex);
433 
434 	// Check if the fds are valid (mutex must be locked because
435 	// the table size could be changed)
436 	if ((uint32)oldfd >= context->table_size
437 		|| (uint32)newfd >= context->table_size
438 		|| context->fds[oldfd] == NULL
439 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
440 		mutex_unlock(&context->io_mutex);
441 		return B_FILE_ERROR;
442 	}
443 
444 	// Check for identity, note that it cannot be made above
445 	// because we always want to return an error on invalid
446 	// handles
447 	select_info* selectInfos = NULL;
448 	if (oldfd != newfd) {
449 		// Now do the work
450 		TFD(Dup2FD(context, oldfd, newfd));
451 
452 		evicted = context->fds[newfd];
453 		selectInfos = context->select_infos[newfd];
454 		context->select_infos[newfd] = NULL;
455 		atomic_add(&context->fds[oldfd]->ref_count, 1);
456 		atomic_add(&context->fds[oldfd]->open_count, 1);
457 		context->fds[newfd] = context->fds[oldfd];
458 
459 		if (evicted == NULL)
460 			context->num_used_fds++;
461 	}
462 
463 	fd_set_close_on_exec(context, newfd, false);
464 
465 	mutex_unlock(&context->io_mutex);
466 
467 	// Say bye bye to the evicted fd
468 	if (evicted) {
469 		deselect_select_infos(evicted, selectInfos, true);
470 		close_fd(context, evicted);
471 		put_fd(evicted);
472 	}
473 
474 	return newfd;
475 }
476 
477 
478 /*!	Duplicates an FD from another team to this/the kernel team.
479 	\param fromTeam The team which owns the FD.
480 	\param fd The FD to duplicate.
481 	\param kernel If \c true, the new FD will be created in the kernel team,
482 			the current userland team otherwise.
483 	\return The newly created FD or an error code, if something went wrong.
484 */
485 int
486 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
487 {
488 	// get the I/O context for the team in question
489 	Team* team = Team::Get(fromTeam);
490 	if (team == NULL)
491 		return B_BAD_TEAM_ID;
492 	BReference<Team> teamReference(team, true);
493 
494 	io_context* fromContext = team->io_context;
495 
496 	// get the file descriptor
497 	file_descriptor* descriptor = get_fd(fromContext, fd);
498 	if (descriptor == NULL)
499 		return B_FILE_ERROR;
500 	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
501 
502 	// create a new FD in the target I/O context
503 	int result = new_fd(get_current_io_context(kernel), descriptor);
504 	if (result >= 0) {
505 		// the descriptor reference belongs to the slot, now
506 		descriptorPutter.Detach();
507 	}
508 
509 	return result;
510 }
511 
512 
513 static status_t
514 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
515 {
516 	struct file_descriptor* descriptor;
517 	int status;
518 
519 	descriptor = get_fd(get_current_io_context(kernelFD), fd);
520 	if (descriptor == NULL)
521 		return B_FILE_ERROR;
522 
523 	if (descriptor->ops->fd_ioctl)
524 		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
525 	else
526 		status = B_DEV_INVALID_IOCTL;
527 
528 	if (status == B_DEV_INVALID_IOCTL)
529 		status = ENOTTY;
530 
531 	put_fd(descriptor);
532 	return status;
533 }
534 
535 
536 static void
537 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
538 	bool putSyncObjects)
539 {
540 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
541 
542 	select_info* info = infos;
543 	while (info != NULL) {
544 		select_sync* sync = info->sync;
545 
546 		// deselect the selected events
547 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
548 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
549 			for (uint16 event = 1; event < 16; event++) {
550 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
551 					descriptor->ops->fd_deselect(descriptor, event,
552 						(selectsync*)info);
553 				}
554 			}
555 		}
556 
557 		notify_select_events(info, B_EVENT_INVALID);
558 		info = info->next;
559 
560 		if (putSyncObjects)
561 			put_select_sync(sync);
562 	}
563 }
564 
565 
566 status_t
567 select_fd(int32 fd, struct select_info* info, bool kernel)
568 {
569 	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
570 		info->sync, info->selected_events));
571 
572 	FDGetter fdGetter;
573 		// define before the context locker, so it will be destroyed after it
574 
575 	io_context* context = get_current_io_context(kernel);
576 	MutexLocker locker(context->io_mutex);
577 
578 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
579 	if (descriptor == NULL)
580 		return B_FILE_ERROR;
581 
582 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
583 
584 	if (descriptor->ops->fd_select == NULL) {
585 		// if the I/O subsystem doesn't support select(), we will
586 		// immediately notify the select call
587 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
588 		if (eventsToSelect != 0)
589 			return notify_select_events(info, eventsToSelect);
590 		else
591 			return B_OK;
592 	}
593 
594 	// We need the FD to stay open while we're doing this, so no select()/
595 	// deselect() will be called on it after it is closed.
596 	atomic_add(&descriptor->open_count, 1);
597 
598 	locker.Unlock();
599 
600 	// select any events asked for
601 	uint32 selectedEvents = 0;
602 
603 	for (uint16 event = 1; event < 16; event++) {
604 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
605 			&& descriptor->ops->fd_select(descriptor, event,
606 				(selectsync*)info) == B_OK) {
607 			selectedEvents |= SELECT_FLAG(event);
608 		}
609 	}
610 	info->selected_events = selectedEvents
611 		| (info->selected_events & B_EVENT_INVALID);
612 
613 	// Add the info to the IO context. Even if nothing has been selected -- we
614 	// always support B_EVENT_INVALID.
615 	locker.Lock();
616 	if (context->fds[fd] != descriptor) {
617 		// Someone close()d the index in the meantime. deselect() all
618 		// events.
619 		info->next = NULL;
620 		deselect_select_infos(descriptor, info, false);
621 
622 		// Release our open reference of the descriptor.
623 		close_fd(context, descriptor);
624 		return B_FILE_ERROR;
625 	}
626 
627 	// The FD index hasn't changed, so we add the select info to the table.
628 
629 	info->next = context->select_infos[fd];
630 	context->select_infos[fd] = info;
631 
632 	// As long as the info is in the list, we keep a reference to the sync
633 	// object.
634 	atomic_add(&info->sync->ref_count, 1);
635 
636 	// Finally release our open reference. It is safe just to decrement,
637 	// since as long as the descriptor is associated with the slot,
638 	// someone else still has it open.
639 	atomic_add(&descriptor->open_count, -1);
640 
641 	return B_OK;
642 }
643 
644 
645 status_t
646 deselect_fd(int32 fd, struct select_info* info, bool kernel)
647 {
648 	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
649 		info->sync, info->selected_events));
650 
651 	FDGetter fdGetter;
652 		// define before the context locker, so it will be destroyed after it
653 
654 	io_context* context = get_current_io_context(kernel);
655 	MutexLocker locker(context->io_mutex);
656 
657 	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
658 	if (descriptor == NULL)
659 		return B_FILE_ERROR;
660 
661 	// remove the info from the IO context
662 
663 	select_info** infoLocation = &context->select_infos[fd];
664 	while (*infoLocation != NULL && *infoLocation != info)
665 		infoLocation = &(*infoLocation)->next;
666 
667 	// If not found, someone else beat us to it.
668 	if (*infoLocation != info)
669 		return B_OK;
670 
671 	*infoLocation = info->next;
672 
673 	locker.Unlock();
674 
675 	// deselect the selected events
676 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
677 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
678 		for (uint16 event = 1; event < 16; event++) {
679 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
680 				descriptor->ops->fd_deselect(descriptor, event,
681 					(selectsync*)info);
682 			}
683 		}
684 	}
685 
686 	put_select_sync(info->sync);
687 
688 	return B_OK;
689 }
690 
691 
692 /*!	This function checks if the specified fd is valid in the current
693 	context. It can be used for a quick check; the fd is not locked
694 	so it could become invalid immediately after this check.
695 */
696 bool
697 fd_is_valid(int fd, bool kernel)
698 {
699 	struct file_descriptor* descriptor
700 		= get_fd(get_current_io_context(kernel), fd);
701 	if (descriptor == NULL)
702 		return false;
703 
704 	put_fd(descriptor);
705 	return true;
706 }
707 
708 
709 struct vnode*
710 fd_vnode(struct file_descriptor* descriptor)
711 {
712 	switch (descriptor->type) {
713 		case FDTYPE_FILE:
714 		case FDTYPE_DIR:
715 		case FDTYPE_ATTR_DIR:
716 		case FDTYPE_ATTR:
717 			return descriptor->u.vnode;
718 	}
719 
720 	return NULL;
721 }
722 
723 
724 static status_t
725 common_close(int fd, bool kernel)
726 {
727 	return close_fd_index(get_current_io_context(kernel), fd);
728 }
729 
730 
731 static ssize_t
732 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
733 {
734 	if (pos < -1)
735 		return B_BAD_VALUE;
736 
737 	FDGetter fdGetter;
738 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
739 	if (!descriptor)
740 		return B_FILE_ERROR;
741 
742 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
743 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
744 		return B_FILE_ERROR;
745 	}
746 
747 	bool movePosition = false;
748 	if (pos == -1) {
749 		pos = descriptor->pos;
750 		movePosition = true;
751 	}
752 
753 	if (write ? descriptor->ops->fd_write == NULL
754 			: descriptor->ops->fd_read == NULL) {
755 		return B_BAD_VALUE;
756 	}
757 
758 	if (length == 0)
759 		return 0;
760 
761 	if (!IS_USER_ADDRESS(buffer))
762 		return B_BAD_ADDRESS;
763 
764 	SyscallRestartWrapper<status_t> status;
765 
766 	if (write)
767 		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
768 	else
769 		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
770 
771 	if (status != B_OK)
772 		return status;
773 
774 	if (movePosition) {
775 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
776 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos + length;
777 	}
778 
779 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
780 }
781 
782 
783 static ssize_t
784 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
785 	bool write)
786 {
787 	if (!IS_USER_ADDRESS(userVecs))
788 		return B_BAD_ADDRESS;
789 
790 	if (pos < -1)
791 		return B_BAD_VALUE;
792 
793 	// prevent integer overflow exploit in malloc()
794 	if (count > IOV_MAX)
795 		return B_BAD_VALUE;
796 
797 	FDGetter fdGetter;
798 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
799 	if (!descriptor)
800 		return B_FILE_ERROR;
801 
802 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
803 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
804 		return B_FILE_ERROR;
805 	}
806 
807 	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
808 	if (vecs == NULL)
809 		return B_NO_MEMORY;
810 	MemoryDeleter _(vecs);
811 
812 	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
813 		return B_BAD_ADDRESS;
814 
815 	bool movePosition = false;
816 	if (pos == -1) {
817 		pos = descriptor->pos;
818 		movePosition = true;
819 	}
820 
821 	if (write ? descriptor->ops->fd_write == NULL
822 			: descriptor->ops->fd_read == NULL) {
823 		return B_BAD_VALUE;
824 	}
825 
826 	SyscallRestartWrapper<status_t> status;
827 
828 	ssize_t bytesTransferred = 0;
829 	for (uint32 i = 0; i < count; i++) {
830 		if (vecs[i].iov_base == NULL)
831 			continue;
832 		if (!IS_USER_ADDRESS(vecs[i].iov_base)) {
833 			status = B_BAD_ADDRESS;
834 			if (bytesTransferred == 0)
835 				return status;
836 			break;
837 		}
838 
839 		size_t length = vecs[i].iov_len;
840 		if (write) {
841 			status = descriptor->ops->fd_write(descriptor, pos,
842 				vecs[i].iov_base, &length);
843 		} else {
844 			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
845 				&length);
846 		}
847 
848 		if (status != B_OK) {
849 			if (bytesTransferred == 0)
850 				return status;
851 			status = B_OK;
852 			break;
853 		}
854 
855 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
856 			bytesTransferred = SSIZE_MAX;
857 		else
858 			bytesTransferred += (ssize_t)length;
859 
860 		pos += length;
861 
862 		if (length < vecs[i].iov_len)
863 			break;
864 	}
865 
866 	if (movePosition) {
867 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
868 			? descriptor->ops->fd_seek(descriptor, 0, SEEK_END) : pos;
869 	}
870 
871 	return bytesTransferred;
872 }
873 
874 
875 status_t
876 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
877 {
878 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
879 
880 	return fd_ioctl(false, fd, op, buffer, length);
881 }
882 
883 
884 //	#pragma mark - User syscalls
885 
886 
887 ssize_t
888 _user_read(int fd, off_t pos, void* buffer, size_t length)
889 {
890 	return common_user_io(fd, pos, buffer, length, false);
891 }
892 
893 
894 ssize_t
895 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
896 {
897 	return common_user_vector_io(fd, pos, userVecs, count, false);
898 }
899 
900 
901 ssize_t
902 _user_write(int fd, off_t pos, const void* buffer, size_t length)
903 {
904 	return common_user_io(fd, pos, (void*)buffer, length, true);
905 }
906 
907 
908 ssize_t
909 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
910 {
911 	return common_user_vector_io(fd, pos, userVecs, count, true);
912 }
913 
914 
915 off_t
916 _user_seek(int fd, off_t pos, int seekType)
917 {
918 	syscall_64_bit_return_value();
919 
920 	struct file_descriptor* descriptor;
921 
922 	descriptor = get_fd(get_current_io_context(false), fd);
923 	if (!descriptor)
924 		return B_FILE_ERROR;
925 
926 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
927 
928 	if (descriptor->ops->fd_seek)
929 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
930 	else
931 		pos = ESPIPE;
932 
933 	put_fd(descriptor);
934 	return pos;
935 }
936 
937 
938 status_t
939 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
940 {
941 	TRACE(("user_ioctl: fd %d\n", fd));
942 
943 	// "buffer" is not always a pointer depending on "op", so we cannot
944 	// check that it is a userland buffer here. Instead we check that
945 	// it is at least not within the bounds of kernel memory; as in
946 	// the cases where it is a numeric constant it is usually a low one.
947 	if (IS_KERNEL_ADDRESS(buffer))
948 		return B_BAD_ADDRESS;
949 
950 	SyscallRestartWrapper<status_t> status;
951 
952 	return status = fd_ioctl(false, fd, op, buffer, length);
953 }
954 
955 
956 ssize_t
957 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
958 	uint32 maxCount)
959 {
960 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
961 		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
962 
963 	if (maxCount == 0)
964 		return 0;
965 
966 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
967 		return B_BAD_ADDRESS;
968 
969 	// get I/O context and FD
970 	io_context* ioContext = get_current_io_context(false);
971 	FDGetter fdGetter;
972 	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
973 	if (descriptor == NULL)
974 		return B_FILE_ERROR;
975 
976 	if (descriptor->ops->fd_read_dir == NULL)
977 		return B_UNSUPPORTED;
978 
979 	// restrict buffer size and allocate a heap buffer
980 	if (bufferSize > kMaxReadDirBufferSize)
981 		bufferSize = kMaxReadDirBufferSize;
982 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
983 	if (buffer == NULL)
984 		return B_NO_MEMORY;
985 	MemoryDeleter bufferDeleter(buffer);
986 
987 	// read the directory
988 	uint32 count = maxCount;
989 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
990 		buffer, bufferSize, &count);
991 	if (status != B_OK)
992 		return status;
993 
994 	ASSERT(count <= maxCount);
995 
996 	// copy the buffer back -- determine the total buffer size first
997 	size_t sizeToCopy = 0;
998 	BytePointer<struct dirent> entry = buffer;
999 	for (uint32 i = 0; i < count; i++) {
1000 		size_t length = entry->d_reclen;
1001 		sizeToCopy += length;
1002 		entry += length;
1003 	}
1004 
1005 	ASSERT(sizeToCopy <= bufferSize);
1006 
1007 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
1008 		return B_BAD_ADDRESS;
1009 
1010 	return count;
1011 }
1012 
1013 
1014 status_t
1015 _user_rewind_dir(int fd)
1016 {
1017 	struct file_descriptor* descriptor;
1018 	status_t status;
1019 
1020 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1021 
1022 	descriptor = get_fd(get_current_io_context(false), fd);
1023 	if (descriptor == NULL)
1024 		return B_FILE_ERROR;
1025 
1026 	if (descriptor->ops->fd_rewind_dir)
1027 		status = descriptor->ops->fd_rewind_dir(descriptor);
1028 	else
1029 		status = B_UNSUPPORTED;
1030 
1031 	put_fd(descriptor);
1032 	return status;
1033 }
1034 
1035 
1036 status_t
1037 _user_close(int fd)
1038 {
1039 	return common_close(fd, false);
1040 }
1041 
1042 
1043 int
1044 _user_dup(int fd)
1045 {
1046 	return dup_fd(fd, false);
1047 }
1048 
1049 
1050 int
1051 _user_dup2(int ofd, int nfd)
1052 {
1053 	return dup2_fd(ofd, nfd, false);
1054 }
1055 
1056 
1057 //	#pragma mark - Kernel calls
1058 
1059 
1060 ssize_t
1061 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1062 {
1063 	if (pos < -1)
1064 		return B_BAD_VALUE;
1065 
1066 	FDGetter fdGetter;
1067 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1068 
1069 	if (!descriptor)
1070 		return B_FILE_ERROR;
1071 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1072 		return B_FILE_ERROR;
1073 
1074 	bool movePosition = false;
1075 	if (pos == -1) {
1076 		pos = descriptor->pos;
1077 		movePosition = true;
1078 	}
1079 
1080 	SyscallFlagUnsetter _;
1081 
1082 	if (descriptor->ops->fd_read == NULL)
1083 		return B_BAD_VALUE;
1084 
1085 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1086 		&length);
1087 	if (bytesRead >= B_OK) {
1088 		if (length > SSIZE_MAX)
1089 			bytesRead = SSIZE_MAX;
1090 		else
1091 			bytesRead = (ssize_t)length;
1092 
1093 		if (movePosition)
1094 			descriptor->pos = pos + length;
1095 	}
1096 
1097 	return bytesRead;
1098 }
1099 
1100 
1101 ssize_t
1102 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1103 {
1104 	bool movePosition = false;
1105 	status_t status;
1106 	uint32 i;
1107 
1108 	if (pos < -1)
1109 		return B_BAD_VALUE;
1110 
1111 	FDGetter fdGetter;
1112 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1113 
1114 	if (!descriptor)
1115 		return B_FILE_ERROR;
1116 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1117 		return B_FILE_ERROR;
1118 
1119 	if (pos == -1) {
1120 		pos = descriptor->pos;
1121 		movePosition = true;
1122 	}
1123 
1124 	if (descriptor->ops->fd_read == NULL)
1125 		return B_BAD_VALUE;
1126 
1127 	SyscallFlagUnsetter _;
1128 
1129 	ssize_t bytesRead = 0;
1130 
1131 	for (i = 0; i < count; i++) {
1132 		size_t length = vecs[i].iov_len;
1133 		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1134 			&length);
1135 		if (status != B_OK) {
1136 			bytesRead = status;
1137 			break;
1138 		}
1139 
1140 		if ((uint64)bytesRead + length > SSIZE_MAX)
1141 			bytesRead = SSIZE_MAX;
1142 		else
1143 			bytesRead += (ssize_t)length;
1144 
1145 		pos += vecs[i].iov_len;
1146 	}
1147 
1148 	if (movePosition)
1149 		descriptor->pos = pos;
1150 
1151 	return bytesRead;
1152 }
1153 
1154 
1155 ssize_t
1156 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1157 {
1158 	if (pos < -1)
1159 		return B_BAD_VALUE;
1160 
1161 	FDGetter fdGetter;
1162 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1163 
1164 	if (descriptor == NULL)
1165 		return B_FILE_ERROR;
1166 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1167 		return B_FILE_ERROR;
1168 
1169 	bool movePosition = false;
1170 	if (pos == -1) {
1171 		pos = descriptor->pos;
1172 		movePosition = true;
1173 	}
1174 
1175 	if (descriptor->ops->fd_write == NULL)
1176 		return B_BAD_VALUE;
1177 
1178 	SyscallFlagUnsetter _;
1179 
1180 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1181 		&length);
1182 	if (bytesWritten >= B_OK) {
1183 		if (length > SSIZE_MAX)
1184 			bytesWritten = SSIZE_MAX;
1185 		else
1186 			bytesWritten = (ssize_t)length;
1187 
1188 		if (movePosition)
1189 			descriptor->pos = pos + length;
1190 	}
1191 
1192 	return bytesWritten;
1193 }
1194 
1195 
1196 ssize_t
1197 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1198 {
1199 	bool movePosition = false;
1200 	status_t status;
1201 	uint32 i;
1202 
1203 	if (pos < -1)
1204 		return B_BAD_VALUE;
1205 
1206 	FDGetter fdGetter;
1207 	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1208 
1209 	if (!descriptor)
1210 		return B_FILE_ERROR;
1211 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1212 		return B_FILE_ERROR;
1213 
1214 	if (pos == -1) {
1215 		pos = descriptor->pos;
1216 		movePosition = true;
1217 	}
1218 
1219 	if (descriptor->ops->fd_write == NULL)
1220 		return B_BAD_VALUE;
1221 
1222 	SyscallFlagUnsetter _;
1223 
1224 	ssize_t bytesWritten = 0;
1225 
1226 	for (i = 0; i < count; i++) {
1227 		size_t length = vecs[i].iov_len;
1228 		status = descriptor->ops->fd_write(descriptor, pos,
1229 			vecs[i].iov_base, &length);
1230 		if (status != B_OK) {
1231 			bytesWritten = status;
1232 			break;
1233 		}
1234 
1235 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1236 			bytesWritten = SSIZE_MAX;
1237 		else
1238 			bytesWritten += (ssize_t)length;
1239 
1240 		pos += vecs[i].iov_len;
1241 	}
1242 
1243 	if (movePosition)
1244 		descriptor->pos = pos;
1245 
1246 	return bytesWritten;
1247 }
1248 
1249 
1250 off_t
1251 _kern_seek(int fd, off_t pos, int seekType)
1252 {
1253 	struct file_descriptor* descriptor;
1254 
1255 	descriptor = get_fd(get_current_io_context(true), fd);
1256 	if (!descriptor)
1257 		return B_FILE_ERROR;
1258 
1259 	if (descriptor->ops->fd_seek)
1260 		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1261 	else
1262 		pos = ESPIPE;
1263 
1264 	put_fd(descriptor);
1265 	return pos;
1266 }
1267 
1268 
1269 status_t
1270 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1271 {
1272 	TRACE(("kern_ioctl: fd %d\n", fd));
1273 
1274 	SyscallFlagUnsetter _;
1275 
1276 	return fd_ioctl(true, fd, op, buffer, length);
1277 }
1278 
1279 
1280 ssize_t
1281 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1282 	uint32 maxCount)
1283 {
1284 	struct file_descriptor* descriptor;
1285 	ssize_t retval;
1286 
1287 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1288 		"%lu)\n",fd, buffer, bufferSize, maxCount));
1289 
1290 	struct io_context* ioContext = get_current_io_context(true);
1291 	descriptor = get_fd(ioContext, fd);
1292 	if (descriptor == NULL)
1293 		return B_FILE_ERROR;
1294 
1295 	if (descriptor->ops->fd_read_dir) {
1296 		uint32 count = maxCount;
1297 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1298 			bufferSize, &count);
1299 		if (retval >= 0)
1300 			retval = count;
1301 	} else
1302 		retval = B_UNSUPPORTED;
1303 
1304 	put_fd(descriptor);
1305 	return retval;
1306 }
1307 
1308 
1309 status_t
1310 _kern_rewind_dir(int fd)
1311 {
1312 	struct file_descriptor* descriptor;
1313 	status_t status;
1314 
1315 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1316 
1317 	descriptor = get_fd(get_current_io_context(true), fd);
1318 	if (descriptor == NULL)
1319 		return B_FILE_ERROR;
1320 
1321 	if (descriptor->ops->fd_rewind_dir)
1322 		status = descriptor->ops->fd_rewind_dir(descriptor);
1323 	else
1324 		status = B_UNSUPPORTED;
1325 
1326 	put_fd(descriptor);
1327 	return status;
1328 }
1329 
1330 
1331 status_t
1332 _kern_close(int fd)
1333 {
1334 	return common_close(fd, true);
1335 }
1336 
1337 
1338 int
1339 _kern_dup(int fd)
1340 {
1341 	return dup_fd(fd, true);
1342 }
1343 
1344 
1345 int
1346 _kern_dup2(int ofd, int nfd)
1347 {
1348 	return dup2_fd(ofd, nfd, true);
1349 }
1350 
1351