xref: /haiku/src/system/kernel/fs/fd.cpp (revision f817735275abdcfea39273c05f3e8bab7a9818be)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <syscalls.h>
25 #include <syscall_restart.h>
26 #include <slab/Slab.h>
27 #include <util/AutoLock.h>
28 #include <util/iovec_support.h>
29 #include <vfs.h>
30 #include <wait_for_objects.h>
31 
32 #include "vfs_tracing.h"
33 
34 
35 //#define TRACE_FD
36 #ifdef TRACE_FD
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x)
40 #endif
41 
42 
43 static const size_t kMaxReadDirBufferSize = B_PAGE_SIZE * 2;
44 
45 extern object_cache* sFileDescriptorCache;
46 
47 
48 static struct file_descriptor* get_fd_locked(const struct io_context* context,
49 	int fd);
50 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
51 static void deselect_select_infos(file_descriptor* descriptor,
52 	select_info* infos, bool putSyncObjects);
53 
54 
55 //	#pragma mark - General fd routines
56 
57 
58 #ifdef DEBUG
59 void dump_fd(int fd, struct file_descriptor* descriptor);
60 
61 void
62 dump_fd(int fd,struct file_descriptor* descriptor)
63 {
64 	dprintf("fd[%d] = %p: ref_count = %" B_PRId32 ", ops "
65 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
66 		", pos = %" B_PRId64 "\n",
67 		fd, descriptor, descriptor->ref_count,
68 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
69 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
70 }
71 #endif
72 
73 
74 /*! Allocates and initializes a new file_descriptor.
75 */
76 struct file_descriptor*
77 alloc_fd(void)
78 {
79 	file_descriptor* descriptor
80 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
81 	if (descriptor == NULL)
82 		return NULL;
83 
84 	descriptor->u.vnode = NULL;
85 	descriptor->cookie = NULL;
86 	descriptor->ref_count = 1;
87 	descriptor->open_count = 0;
88 	descriptor->open_mode = 0;
89 	descriptor->pos = -1;
90 
91 	return descriptor;
92 }
93 
94 
95 bool
96 fd_close_on_exec(const struct io_context* context, int fd)
97 {
98 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
99 }
100 
101 
102 void
103 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
104 {
105 	if (closeFD)
106 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
107 	else
108 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
109 }
110 
111 
112 /*!	Searches a free slot in the FD table of the provided I/O context, and
113 	inserts the specified descriptor into it.
114 */
115 int
116 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
117 	int firstIndex)
118 {
119 	int fd = -1;
120 	uint32 i;
121 
122 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
123 		return B_BAD_VALUE;
124 
125 	WriteLocker locker(context->lock);
126 
127 	for (i = firstIndex; i < context->table_size; i++) {
128 		if (!context->fds[i]) {
129 			fd = i;
130 			break;
131 		}
132 	}
133 	if (fd < 0)
134 		return B_NO_MORE_FDS;
135 
136 	TFD(NewFD(context, fd, descriptor));
137 
138 	context->fds[fd] = descriptor;
139 	context->num_used_fds++;
140 	atomic_add(&descriptor->open_count, 1);
141 
142 	return fd;
143 }
144 
145 
146 int
147 new_fd(struct io_context* context, struct file_descriptor* descriptor)
148 {
149 	return new_fd_etc(context, descriptor, 0);
150 }
151 
152 
153 /*!	Reduces the descriptor's reference counter, and frees all resources
154 	when it's no longer used.
155 */
156 void
157 put_fd(struct file_descriptor* descriptor)
158 {
159 	int32 previous = atomic_add(&descriptor->ref_count, -1);
160 
161 	TFD(PutFD(descriptor));
162 
163 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
164 		descriptor, descriptor->ref_count, descriptor->cookie));
165 
166 	// free the descriptor if we don't need it anymore
167 	if (previous == 1) {
168 		// free the underlying object
169 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
170 			descriptor->ops->fd_free(descriptor);
171 
172 		object_cache_free(sFileDescriptorCache, descriptor, 0);
173 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
174 		&& previous - 1 == descriptor->open_count
175 		&& descriptor->ops != NULL) {
176 		// the descriptor has been disconnected - it cannot
177 		// be accessed anymore, let's close it (no one is
178 		// currently accessing this descriptor)
179 
180 		if (descriptor->ops->fd_close)
181 			descriptor->ops->fd_close(descriptor);
182 		if (descriptor->ops->fd_free)
183 			descriptor->ops->fd_free(descriptor);
184 
185 		// prevent this descriptor from being closed/freed again
186 		descriptor->ops = NULL;
187 		descriptor->u.vnode = NULL;
188 
189 		// the file descriptor is kept intact, so that it's not
190 		// reused until someone explicitly closes it
191 	}
192 }
193 
194 
195 /*!	Decrements the open counter of the file descriptor and invokes
196 	its close hook when appropriate.
197 */
198 void
199 close_fd(struct io_context* context, struct file_descriptor* descriptor)
200 {
201 	// POSIX advisory locks need to be released when any file descriptor closes
202 	if (fd_is_file(descriptor))
203 		vfs_release_posix_lock(context, descriptor);
204 
205 	if (atomic_add(&descriptor->open_count, -1) == 1) {
206 		vfs_unlock_vnode_if_locked(descriptor);
207 
208 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
209 			descriptor->ops->fd_close(descriptor);
210 	}
211 }
212 
213 
214 status_t
215 close_fd_index(struct io_context* context, int fd)
216 {
217 	struct file_descriptor* descriptor = remove_fd(context, fd);
218 
219 	if (descriptor == NULL)
220 		return B_FILE_ERROR;
221 
222 	close_fd(context, descriptor);
223 	put_fd(descriptor);
224 		// the reference associated with the slot
225 
226 	return B_OK;
227 }
228 
229 
230 /*!	This descriptor's underlying object will be closed and freed as soon as
231 	possible (in one of the next calls to put_fd() - get_fd() will no longer
232 	succeed on this descriptor).
233 	This is useful if the underlying object is gone, for instance when a
234 	(mounted) volume got removed unexpectedly.
235 */
236 void
237 disconnect_fd(struct file_descriptor* descriptor)
238 {
239 	descriptor->open_mode |= O_DISCONNECTED;
240 }
241 
242 
243 void
244 inc_fd_ref_count(struct file_descriptor* descriptor)
245 {
246 	atomic_add(&descriptor->ref_count, 1);
247 }
248 
249 
250 static struct file_descriptor*
251 get_fd_locked(const struct io_context* context, int fd)
252 {
253 	if (fd < 0 || (uint32)fd >= context->table_size)
254 		return NULL;
255 
256 	struct file_descriptor* descriptor = context->fds[fd];
257 
258 	if (descriptor != NULL) {
259 		// disconnected descriptors cannot be accessed anymore
260 		if (descriptor->open_mode & O_DISCONNECTED)
261 			return NULL;
262 
263 		TFD(GetFD(context, fd, descriptor));
264 		inc_fd_ref_count(descriptor);
265 	}
266 
267 	return descriptor;
268 }
269 
270 
271 struct file_descriptor*
272 get_fd(const struct io_context* context, int fd)
273 {
274 	ReadLocker locker(context->lock);
275 	return get_fd_locked(context, fd);
276 }
277 
278 
279 struct file_descriptor*
280 get_open_fd(const struct io_context* context, int fd)
281 {
282 	ReadLocker locker(context->lock);
283 
284 	file_descriptor* descriptor = get_fd_locked(context, fd);
285 	if (descriptor == NULL)
286 		return NULL;
287 
288 	atomic_add(&descriptor->open_count, 1);
289 
290 	return descriptor;
291 }
292 
293 
294 /*!	Removes the file descriptor from the specified slot.
295 */
296 static struct file_descriptor*
297 remove_fd(struct io_context* context, int fd)
298 {
299 	struct file_descriptor* descriptor = NULL;
300 
301 	if (fd < 0)
302 		return NULL;
303 
304 	WriteLocker locker(context->lock);
305 
306 	if ((uint32)fd < context->table_size)
307 		descriptor = context->fds[fd];
308 
309 	select_info* selectInfos = NULL;
310 	bool disconnected = false;
311 
312 	if (descriptor != NULL)	{
313 		// fd is valid
314 		TFD(RemoveFD(context, fd, descriptor));
315 
316 		context->fds[fd] = NULL;
317 		fd_set_close_on_exec(context, fd, false);
318 		context->num_used_fds--;
319 
320 		selectInfos = context->select_infos[fd];
321 		context->select_infos[fd] = NULL;
322 
323 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
324 	}
325 
326 	if (selectInfos != NULL)
327 		deselect_select_infos(descriptor, selectInfos, true);
328 
329 	return disconnected ? NULL : descriptor;
330 }
331 
332 
333 static int
334 dup_fd(int fd, bool kernel)
335 {
336 	struct io_context* context = get_current_io_context(kernel);
337 	struct file_descriptor* descriptor;
338 	int status;
339 
340 	TRACE(("dup_fd: fd = %d\n", fd));
341 
342 	// Try to get the fd structure
343 	descriptor = get_fd(context, fd);
344 	if (descriptor == NULL)
345 		return B_FILE_ERROR;
346 
347 	// now put the fd in place
348 	status = new_fd(context, descriptor);
349 	if (status < 0) {
350 		put_fd(descriptor);
351 	} else {
352 		WriteLocker locker(context->lock);
353 		fd_set_close_on_exec(context, status, false);
354 	}
355 
356 	return status;
357 }
358 
359 
360 /*!	POSIX says this should be the same as:
361 		close(newfd);
362 		fcntl(oldfd, F_DUPFD, newfd);
363 
364 	We do dup2() directly to be thread-safe.
365 */
366 static int
367 dup2_fd(int oldfd, int newfd, int flags, bool kernel)
368 {
369 	struct file_descriptor* evicted = NULL;
370 	struct io_context* context;
371 
372 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
373 
374 	// quick check
375 	if (oldfd < 0 || newfd < 0)
376 		return B_FILE_ERROR;
377 	if ((flags & ~O_CLOEXEC) != 0)
378 		return B_BAD_VALUE;
379 
380 	// Get current I/O context and lock it
381 	context = get_current_io_context(kernel);
382 	WriteLocker locker(context->lock);
383 
384 	// Check if the fds are valid (mutex must be locked because
385 	// the table size could be changed)
386 	if ((uint32)oldfd >= context->table_size
387 		|| (uint32)newfd >= context->table_size
388 		|| context->fds[oldfd] == NULL
389 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
390 		return B_FILE_ERROR;
391 	}
392 
393 	// Check for identity, note that it cannot be made above
394 	// because we always want to return an error on invalid
395 	// handles
396 	if (oldfd != newfd) {
397 		// Now do the work
398 		TFD(Dup2FD(context, oldfd, newfd));
399 
400 		evicted = context->fds[newfd];
401 		select_info* selectInfos = context->select_infos[newfd];
402 		context->select_infos[newfd] = NULL;
403 		atomic_add(&context->fds[oldfd]->ref_count, 1);
404 		atomic_add(&context->fds[oldfd]->open_count, 1);
405 		context->fds[newfd] = context->fds[oldfd];
406 
407 		if (evicted == NULL)
408 			context->num_used_fds++;
409 
410 		deselect_select_infos(evicted, selectInfos, true);
411 	}
412 
413 	fd_set_close_on_exec(context, newfd, (flags & O_CLOEXEC) != 0);
414 
415 	locker.Unlock();
416 
417 	// Say bye bye to the evicted fd
418 	if (evicted) {
419 		close_fd(context, evicted);
420 		put_fd(evicted);
421 	}
422 
423 	return newfd;
424 }
425 
426 
427 /*!	Duplicates an FD from another team to this/the kernel team.
428 	\param fromTeam The team which owns the FD.
429 	\param fd The FD to duplicate.
430 	\param kernel If \c true, the new FD will be created in the kernel team,
431 			the current userland team otherwise.
432 	\return The newly created FD or an error code, if something went wrong.
433 */
434 int
435 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
436 {
437 	// get the I/O context for the team in question
438 	Team* team = Team::Get(fromTeam);
439 	if (team == NULL)
440 		return B_BAD_TEAM_ID;
441 	BReference<Team> teamReference(team, true);
442 
443 	io_context* fromContext = team->io_context;
444 
445 	// get the file descriptor
446 	file_descriptor* descriptor = get_fd(fromContext, fd);
447 	if (descriptor == NULL)
448 		return B_FILE_ERROR;
449 	FileDescriptorPutter descriptorPutter(descriptor);
450 
451 	// create a new FD in the target I/O context
452 	int result = new_fd(get_current_io_context(kernel), descriptor);
453 	if (result >= 0) {
454 		// the descriptor reference belongs to the slot, now
455 		descriptorPutter.Detach();
456 	}
457 
458 	return result;
459 }
460 
461 
462 static status_t
463 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
464 {
465 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
466 	if (!descriptor.IsSet())
467 		return B_FILE_ERROR;
468 
469 	// Special case: translate FIONBIO into fcntl(F_SETFL).
470 	if (op == FIONBIO) {
471 		if (buffer == NULL)
472 			return B_BAD_VALUE;
473 
474 		int value;
475 		if (is_called_via_syscall()) {
476 			if (!IS_USER_ADDRESS(buffer)
477 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
478 				return B_BAD_ADDRESS;
479 			}
480 		} else
481 			value = *(int*)buffer;
482 
483 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
484 		argument |= (value ? O_NONBLOCK : 0);
485 
486 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
487 	}
488 
489 	status_t status;
490 	if (descriptor->ops->fd_ioctl)
491 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
492 	else
493 		status = B_DEV_INVALID_IOCTL;
494 
495 	if (status == B_DEV_INVALID_IOCTL)
496 		status = ENOTTY;
497 
498 	return status;
499 }
500 
501 
502 static void
503 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
504 	bool putSyncObjects)
505 {
506 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
507 
508 	select_info* info = infos;
509 	while (info != NULL) {
510 		select_sync* sync = info->sync;
511 
512 		// deselect the selected events
513 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
514 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
515 			for (uint16 event = 1; event < 16; event++) {
516 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
517 					descriptor->ops->fd_deselect(descriptor, event,
518 						(selectsync*)info);
519 				}
520 			}
521 		}
522 
523 		select_info* next = info->next;
524 		notify_select_events(info, B_EVENT_INVALID);
525 		info = next;
526 
527 		if (putSyncObjects)
528 			put_select_sync(sync);
529 	}
530 }
531 
532 
533 status_t
534 select_fd(int32 fd, struct select_info* info, bool kernel)
535 {
536 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
537 		info->sync, info->selected_events));
538 
539 	FileDescriptorPutter descriptor;
540 		// define before the context locker, so it will be destroyed after it
541 
542 	io_context* context = get_current_io_context(kernel);
543 	ReadLocker readLocker(context->lock);
544 
545 	descriptor.SetTo(get_fd_locked(context, fd));
546 	if (!descriptor.IsSet())
547 		return B_FILE_ERROR;
548 
549 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
550 
551 	if (descriptor->ops->fd_select == NULL) {
552 		// if the I/O subsystem doesn't support select(), we will
553 		// immediately notify the select call
554 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
555 		if (eventsToSelect != 0)
556 			notify_select_events(info, eventsToSelect);
557 
558 		info->selected_events = 0;
559 		return B_UNSUPPORTED;
560 	}
561 
562 	// We need the FD to stay open while we're doing this, so no select()/
563 	// deselect() will be called on it after it is closed.
564 	atomic_add(&descriptor->open_count, 1);
565 
566 	readLocker.Unlock();
567 
568 	// select any events asked for
569 	uint32 selectedEvents = 0;
570 
571 	for (uint16 event = 1; event < 16; event++) {
572 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
573 			&& descriptor->ops->fd_select(descriptor.Get(), event,
574 				(selectsync*)info) == B_OK) {
575 			selectedEvents |= SELECT_FLAG(event);
576 		}
577 	}
578 	info->selected_events = selectedEvents
579 		| (info->selected_events & B_EVENT_INVALID);
580 
581 	// Add the info to the IO context. Even if nothing has been selected -- we
582 	// always support B_EVENT_INVALID.
583 	WriteLocker writeLocker(context->lock);
584 	if (context->fds[fd] != descriptor.Get()) {
585 		// Someone close()d the index in the meantime. deselect() all
586 		// events.
587 		info->next = NULL;
588 		deselect_select_infos(descriptor.Get(), info, false);
589 
590 		// Release our open reference of the descriptor.
591 		close_fd(context, descriptor.Get());
592 		return B_FILE_ERROR;
593 	}
594 
595 	// The FD index hasn't changed, so we add the select info to the table.
596 
597 	info->next = context->select_infos[fd];
598 	context->select_infos[fd] = info;
599 
600 	// As long as the info is in the list, we keep a reference to the sync
601 	// object.
602 	acquire_select_sync(info->sync);
603 
604 	// Finally release our open reference. It is safe just to decrement,
605 	// since as long as the descriptor is associated with the slot,
606 	// someone else still has it open.
607 	atomic_add(&descriptor->open_count, -1);
608 
609 	return B_OK;
610 }
611 
612 
613 status_t
614 deselect_fd(int32 fd, struct select_info* info, bool kernel)
615 {
616 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
617 		info->sync, info->selected_events));
618 
619 	FileDescriptorPutter descriptor;
620 		// define before the context locker, so it will be destroyed after it
621 
622 	io_context* context = get_current_io_context(kernel);
623 	WriteLocker locker(context->lock);
624 
625 	descriptor.SetTo(get_fd_locked(context, fd));
626 	if (!descriptor.IsSet())
627 		return B_FILE_ERROR;
628 
629 	// remove the info from the IO context
630 
631 	select_info** infoLocation = &context->select_infos[fd];
632 	while (*infoLocation != NULL && *infoLocation != info)
633 		infoLocation = &(*infoLocation)->next;
634 
635 	// If not found, someone else beat us to it.
636 	if (*infoLocation != info)
637 		return B_OK;
638 
639 	*infoLocation = info->next;
640 
641 	locker.Unlock();
642 
643 	// deselect the selected events
644 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
645 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
646 		for (uint16 event = 1; event < 16; event++) {
647 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
648 				descriptor->ops->fd_deselect(descriptor.Get(), event,
649 					(selectsync*)info);
650 			}
651 		}
652 	}
653 
654 	put_select_sync(info->sync);
655 
656 	return B_OK;
657 }
658 
659 
660 /*!	This function checks if the specified fd is valid in the current
661 	context. It can be used for a quick check; the fd is not locked
662 	so it could become invalid immediately after this check.
663 */
664 bool
665 fd_is_valid(int fd, bool kernel)
666 {
667 	struct file_descriptor* descriptor
668 		= get_fd(get_current_io_context(kernel), fd);
669 	if (descriptor == NULL)
670 		return false;
671 
672 	put_fd(descriptor);
673 	return true;
674 }
675 
676 
677 static ssize_t
678 common_vector_io(int fd, off_t pos, const iovec* vecs, size_t count, bool write, bool kernel)
679 {
680 	if (pos < -1)
681 		return B_BAD_VALUE;
682 
683 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernel), fd));
684 	if (!descriptor.IsSet())
685 		return B_FILE_ERROR;
686 
687 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
688 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
689 		return B_FILE_ERROR;
690 	}
691 
692 	bool movePosition = false;
693 	if (pos == -1 && descriptor->pos != -1) {
694 		pos = descriptor->pos;
695 		movePosition = true;
696 	}
697 
698 	if (write ? descriptor->ops->fd_write == NULL
699 			: descriptor->ops->fd_read == NULL) {
700 		return B_BAD_VALUE;
701 	}
702 
703 	if (!movePosition && count > 1 && (write ? descriptor->ops->fd_writev != NULL
704 			: descriptor->ops->fd_readv != NULL)) {
705 		ssize_t result;
706 		if (write) {
707 			result = descriptor->ops->fd_writev(descriptor.Get(), pos,
708 				vecs, count);
709 		} else {
710 			result = descriptor->ops->fd_readv(descriptor.Get(), pos,
711 				vecs, count);
712 		}
713 		if (result != B_UNSUPPORTED)
714 			return result;
715 		// If not supported, just fall back to the loop.
716 	}
717 
718 	status_t status = B_OK;
719 	ssize_t bytesTransferred = 0;
720 	for (size_t i = 0; i < count; i++) {
721 		if (vecs[i].iov_base == NULL)
722 			continue;
723 
724 		size_t length = vecs[i].iov_len;
725 		if (write) {
726 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
727 				vecs[i].iov_base, &length);
728 		} else {
729 			status = descriptor->ops->fd_read(descriptor.Get(), pos,
730 				vecs[i].iov_base, &length);
731 		}
732 
733 		if (status != B_OK) {
734 			if (bytesTransferred == 0)
735 				return status;
736 			break;
737 		}
738 
739 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
740 			bytesTransferred = SSIZE_MAX;
741 		else
742 			bytesTransferred += (ssize_t)length;
743 
744 		if (pos != -1)
745 			pos += length;
746 
747 		if (length < vecs[i].iov_len)
748 			break;
749 	}
750 
751 	if (movePosition) {
752 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
753 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
754 	}
755 
756 	return bytesTransferred;
757 }
758 
759 
760 static ssize_t
761 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
762 {
763 	if (pos < -1)
764 		return B_BAD_VALUE;
765 
766 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
767 	if (!descriptor.IsSet())
768 		return B_FILE_ERROR;
769 
770 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
771 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
772 		return B_FILE_ERROR;
773 	}
774 
775 	bool movePosition = false;
776 	if (pos == -1 && descriptor->pos != -1) {
777 		pos = descriptor->pos;
778 		movePosition = true;
779 	}
780 
781 	if (write ? descriptor->ops->fd_write == NULL
782 			: descriptor->ops->fd_read == NULL) {
783 		return B_BAD_VALUE;
784 	}
785 
786 	if (length == 0)
787 		return 0;
788 
789 	if (!is_user_address_range(buffer, length))
790 		return B_BAD_ADDRESS;
791 
792 	SyscallRestartWrapper<status_t> status;
793 
794 	if (write)
795 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
796 	else
797 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
798 
799 	if (status != B_OK)
800 		return status;
801 
802 	if (movePosition) {
803 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
804 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
805 	}
806 
807 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
808 }
809 
810 
811 static ssize_t
812 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
813 	bool write)
814 {
815 	if (count > IOV_MAX)
816 		return B_BAD_VALUE;
817 
818 	BStackOrHeapArray<iovec, 16> vecs(count);
819 	if (!vecs.IsValid())
820 		return B_NO_MEMORY;
821 
822 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
823 	if (error != B_OK)
824 		return error;
825 
826 	SyscallRestartWrapper<ssize_t> result;
827 	result = common_vector_io(fd, pos, vecs, count, write, false);
828 
829 	return result;
830 }
831 
832 
833 static status_t
834 common_close(int fd, bool kernel)
835 {
836 	return close_fd_index(get_current_io_context(kernel), fd);
837 }
838 
839 
840 status_t
841 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
842 {
843 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
844 
845 	return fd_ioctl(false, fd, op, buffer, length);
846 }
847 
848 
849 //	#pragma mark - User syscalls
850 
851 
852 ssize_t
853 _user_read(int fd, off_t pos, void* buffer, size_t length)
854 {
855 	return common_user_io(fd, pos, buffer, length, false);
856 }
857 
858 
859 ssize_t
860 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
861 {
862 	return common_user_vector_io(fd, pos, userVecs, count, false);
863 }
864 
865 
866 ssize_t
867 _user_write(int fd, off_t pos, const void* buffer, size_t length)
868 {
869 	return common_user_io(fd, pos, (void*)buffer, length, true);
870 }
871 
872 
873 ssize_t
874 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
875 {
876 	return common_user_vector_io(fd, pos, userVecs, count, true);
877 }
878 
879 
880 off_t
881 _user_seek(int fd, off_t pos, int seekType)
882 {
883 	syscall_64_bit_return_value();
884 
885 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
886 	if (!descriptor.IsSet())
887 		return B_FILE_ERROR;
888 
889 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
890 
891 	if (descriptor->ops->fd_seek)
892 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
893 	else
894 		pos = ESPIPE;
895 
896 	return pos;
897 }
898 
899 
900 status_t
901 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
902 {
903 	TRACE(("user_ioctl: fd %d\n", fd));
904 
905 	// "buffer" is not always a pointer depending on "op", so we cannot
906 	// check that it is a userland buffer here. Instead we check that
907 	// it is at least not within the bounds of kernel memory; as in
908 	// the cases where it is a numeric constant it is usually a low one.
909 	if (IS_KERNEL_ADDRESS(buffer))
910 		return B_BAD_ADDRESS;
911 
912 	SyscallRestartWrapper<status_t> status;
913 
914 	return status = fd_ioctl(false, fd, op, buffer, length);
915 }
916 
917 
918 ssize_t
919 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
920 	uint32 maxCount)
921 {
922 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
923 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
924 
925 	if (maxCount == 0)
926 		return 0;
927 
928 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
929 		return B_BAD_ADDRESS;
930 
931 	// get I/O context and FD
932 	io_context* ioContext = get_current_io_context(false);
933 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
934 	if (!descriptor.IsSet())
935 		return B_FILE_ERROR;
936 
937 	if (descriptor->ops->fd_read_dir == NULL)
938 		return B_UNSUPPORTED;
939 
940 	// restrict buffer size and allocate a heap buffer
941 	if (bufferSize > kMaxReadDirBufferSize)
942 		bufferSize = kMaxReadDirBufferSize;
943 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
944 	if (buffer == NULL)
945 		return B_NO_MEMORY;
946 	MemoryDeleter bufferDeleter(buffer);
947 
948 	// read the directory
949 	uint32 count = maxCount;
950 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
951 		buffer, bufferSize, &count);
952 	if (status != B_OK)
953 		return status;
954 
955 	ASSERT(count <= maxCount);
956 
957 	// copy the buffer back -- determine the total buffer size first
958 	size_t sizeToCopy = 0;
959 	BytePointer<struct dirent> entry = buffer;
960 	for (uint32 i = 0; i < count; i++) {
961 		size_t length = entry->d_reclen;
962 		sizeToCopy += length;
963 		entry += length;
964 	}
965 
966 	ASSERT(sizeToCopy <= bufferSize);
967 
968 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
969 		return B_BAD_ADDRESS;
970 
971 	return count;
972 }
973 
974 
975 status_t
976 _user_rewind_dir(int fd)
977 {
978 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
979 
980 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
981 	if (!descriptor.IsSet())
982 		return B_FILE_ERROR;
983 
984 	status_t status;
985 	if (descriptor->ops->fd_rewind_dir)
986 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
987 	else
988 		status = B_UNSUPPORTED;
989 
990 	return status;
991 }
992 
993 
994 status_t
995 _user_close(int fd)
996 {
997 	return common_close(fd, false);
998 }
999 
1000 
1001 int
1002 _user_dup(int fd)
1003 {
1004 	return dup_fd(fd, false);
1005 }
1006 
1007 
1008 int
1009 _user_dup2(int ofd, int nfd, int flags)
1010 {
1011 	return dup2_fd(ofd, nfd, flags, false);
1012 }
1013 
1014 
1015 //	#pragma mark - Kernel calls
1016 
1017 
1018 ssize_t
1019 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1020 {
1021 	if (pos < -1)
1022 		return B_BAD_VALUE;
1023 
1024 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1025 
1026 	if (!descriptor.IsSet())
1027 		return B_FILE_ERROR;
1028 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1029 		return B_FILE_ERROR;
1030 
1031 	bool movePosition = false;
1032 	if (pos == -1 && descriptor->pos != -1) {
1033 		pos = descriptor->pos;
1034 		movePosition = true;
1035 	}
1036 
1037 	SyscallFlagUnsetter _;
1038 
1039 	if (descriptor->ops->fd_read == NULL)
1040 		return B_BAD_VALUE;
1041 
1042 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1043 		&length);
1044 	if (bytesRead >= B_OK) {
1045 		if (length > SSIZE_MAX)
1046 			bytesRead = SSIZE_MAX;
1047 		else
1048 			bytesRead = (ssize_t)length;
1049 
1050 		if (movePosition)
1051 			descriptor->pos = pos + length;
1052 	}
1053 
1054 	return bytesRead;
1055 }
1056 
1057 
1058 ssize_t
1059 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1060 {
1061 	if (pos < -1)
1062 		return B_BAD_VALUE;
1063 
1064 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1065 
1066 	if (!descriptor.IsSet())
1067 		return B_FILE_ERROR;
1068 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1069 		return B_FILE_ERROR;
1070 
1071 	bool movePosition = false;
1072 	if (pos == -1 && descriptor->pos != -1) {
1073 		pos = descriptor->pos;
1074 		movePosition = true;
1075 	}
1076 
1077 	if (descriptor->ops->fd_write == NULL)
1078 		return B_BAD_VALUE;
1079 
1080 	SyscallFlagUnsetter _;
1081 
1082 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1083 		buffer,	&length);
1084 	if (bytesWritten >= B_OK) {
1085 		if (length > SSIZE_MAX)
1086 			bytesWritten = SSIZE_MAX;
1087 		else
1088 			bytesWritten = (ssize_t)length;
1089 
1090 		if (movePosition)
1091 			descriptor->pos = pos + length;
1092 	}
1093 
1094 	return bytesWritten;
1095 }
1096 
1097 
1098 ssize_t
1099 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1100 {
1101 	SyscallFlagUnsetter _;
1102 	return common_vector_io(fd, pos, vecs, count, false, true);
1103 }
1104 
1105 
1106 ssize_t
1107 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1108 {
1109 	SyscallFlagUnsetter _;
1110 	return common_vector_io(fd, pos, vecs, count, true, true);
1111 }
1112 
1113 
1114 off_t
1115 _kern_seek(int fd, off_t pos, int seekType)
1116 {
1117 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1118 	if (!descriptor.IsSet())
1119 		return B_FILE_ERROR;
1120 
1121 	if (descriptor->ops->fd_seek)
1122 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1123 	else
1124 		pos = ESPIPE;
1125 
1126 	return pos;
1127 }
1128 
1129 
1130 status_t
1131 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1132 {
1133 	TRACE(("kern_ioctl: fd %d\n", fd));
1134 
1135 	SyscallFlagUnsetter _;
1136 
1137 	return fd_ioctl(true, fd, op, buffer, length);
1138 }
1139 
1140 
1141 ssize_t
1142 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1143 	uint32 maxCount)
1144 {
1145 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1146 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1147 
1148 	struct io_context* ioContext = get_current_io_context(true);
1149 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1150 	if (!descriptor.IsSet())
1151 		return B_FILE_ERROR;
1152 
1153 	ssize_t retval;
1154 	if (descriptor->ops->fd_read_dir) {
1155 		uint32 count = maxCount;
1156 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1157 			bufferSize, &count);
1158 		if (retval >= 0)
1159 			retval = count;
1160 	} else
1161 		retval = B_UNSUPPORTED;
1162 
1163 	return retval;
1164 }
1165 
1166 
1167 status_t
1168 _kern_rewind_dir(int fd)
1169 {
1170 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1171 
1172 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1173 	if (!descriptor.IsSet())
1174 		return B_FILE_ERROR;
1175 
1176 	status_t status;
1177 	if (descriptor->ops->fd_rewind_dir)
1178 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1179 	else
1180 		status = B_UNSUPPORTED;
1181 
1182 	return status;
1183 }
1184 
1185 
1186 status_t
1187 _kern_close(int fd)
1188 {
1189 	return common_close(fd, true);
1190 }
1191 
1192 
1193 int
1194 _kern_dup(int fd)
1195 {
1196 	return dup_fd(fd, true);
1197 }
1198 
1199 
1200 int
1201 _kern_dup2(int ofd, int nfd, int flags)
1202 {
1203 	return dup2_fd(ofd, nfd, flags, true);
1204 }
1205 
1206