xref: /haiku/src/system/kernel/fs/fd.cpp (revision 342a1b221b5bb385410f758df2c625b70cafdd03)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 
17 #include <OS.h>
18 
19 #include <AutoDeleter.h>
20 #include <AutoDeleterDrivers.h>
21 #include <BytePointer.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <syscalls.h>
25 #include <syscall_restart.h>
26 #include <slab/Slab.h>
27 #include <util/AutoLock.h>
28 #include <util/iovec_support.h>
29 #include <vfs.h>
30 #include <wait_for_objects.h>
31 
32 #include "Vnode.h"
33 #include "vfs_tracing.h"
34 
35 
36 //#define TRACE_FD
37 #ifdef TRACE_FD
38 #	define TRACE(x) dprintf x
39 #else
40 #	define TRACE(x)
41 #endif
42 
43 
44 static const size_t kMaxReadDirBufferSize = 64 * 1024;
45 
46 extern object_cache* sFileDescriptorCache;
47 
48 
49 static struct file_descriptor* get_fd_locked(struct io_context* context,
50 	int fd);
51 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
52 static void deselect_select_infos(file_descriptor* descriptor,
53 	select_info* infos, bool putSyncObjects);
54 
55 
56 //	#pragma mark - General fd routines
57 
58 
59 #ifdef DEBUG
60 void dump_fd(int fd, struct file_descriptor* descriptor);
61 
62 void
63 dump_fd(int fd,struct file_descriptor* descriptor)
64 {
65 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
66 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
67 		", pos = %" B_PRId64 "\n",
68 		fd, descriptor, descriptor->type, descriptor->ref_count,
69 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
70 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
71 }
72 #endif
73 
74 
75 /*! Allocates and initializes a new file_descriptor.
76 */
77 struct file_descriptor*
78 alloc_fd(void)
79 {
80 	file_descriptor* descriptor
81 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
82 	if (descriptor == NULL)
83 		return NULL;
84 
85 	descriptor->u.vnode = NULL;
86 	descriptor->cookie = NULL;
87 	descriptor->ref_count = 1;
88 	descriptor->open_count = 0;
89 	descriptor->open_mode = 0;
90 	descriptor->pos = -1;
91 
92 	return descriptor;
93 }
94 
95 
96 bool
97 fd_close_on_exec(struct io_context* context, int fd)
98 {
99 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
100 }
101 
102 
103 void
104 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
105 {
106 	if (closeFD)
107 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
108 	else
109 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
110 }
111 
112 
113 /*!	Searches a free slot in the FD table of the provided I/O context, and
114 	inserts the specified descriptor into it.
115 */
116 int
117 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
118 	int firstIndex)
119 {
120 	int fd = -1;
121 	uint32 i;
122 
123 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
124 		return B_BAD_VALUE;
125 
126 	mutex_lock(&context->io_mutex);
127 
128 	for (i = firstIndex; i < context->table_size; i++) {
129 		if (!context->fds[i]) {
130 			fd = i;
131 			break;
132 		}
133 	}
134 	if (fd < 0) {
135 		fd = B_NO_MORE_FDS;
136 		goto err;
137 	}
138 
139 	TFD(NewFD(context, fd, descriptor));
140 
141 	context->fds[fd] = descriptor;
142 	context->num_used_fds++;
143 	atomic_add(&descriptor->open_count, 1);
144 
145 err:
146 	mutex_unlock(&context->io_mutex);
147 
148 	return fd;
149 }
150 
151 
152 int
153 new_fd(struct io_context* context, struct file_descriptor* descriptor)
154 {
155 	return new_fd_etc(context, descriptor, 0);
156 }
157 
158 
159 /*!	Reduces the descriptor's reference counter, and frees all resources
160 	when it's no longer used.
161 */
162 void
163 put_fd(struct file_descriptor* descriptor)
164 {
165 	int32 previous = atomic_add(&descriptor->ref_count, -1);
166 
167 	TFD(PutFD(descriptor));
168 
169 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
170 		descriptor, descriptor->ref_count, descriptor->cookie));
171 
172 	// free the descriptor if we don't need it anymore
173 	if (previous == 1) {
174 		// free the underlying object
175 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
176 			descriptor->ops->fd_free(descriptor);
177 
178 		object_cache_free(sFileDescriptorCache, descriptor, 0);
179 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
180 		&& previous - 1 == descriptor->open_count
181 		&& descriptor->ops != NULL) {
182 		// the descriptor has been disconnected - it cannot
183 		// be accessed anymore, let's close it (no one is
184 		// currently accessing this descriptor)
185 
186 		if (descriptor->ops->fd_close)
187 			descriptor->ops->fd_close(descriptor);
188 		if (descriptor->ops->fd_free)
189 			descriptor->ops->fd_free(descriptor);
190 
191 		// prevent this descriptor from being closed/freed again
192 		descriptor->ops = NULL;
193 		descriptor->u.vnode = NULL;
194 
195 		// the file descriptor is kept intact, so that it's not
196 		// reused until someone explicitly closes it
197 	}
198 }
199 
200 
201 /*!	Decrements the open counter of the file descriptor and invokes
202 	its close hook when appropriate.
203 */
204 void
205 close_fd(struct io_context* context, struct file_descriptor* descriptor)
206 {
207 	// POSIX advisory locks need to be released when any file descriptor closes
208 	if (descriptor->type == FDTYPE_FILE)
209 		vfs_release_posix_lock(context, descriptor);
210 
211 	if (atomic_add(&descriptor->open_count, -1) == 1) {
212 		vfs_unlock_vnode_if_locked(descriptor);
213 
214 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
215 			descriptor->ops->fd_close(descriptor);
216 	}
217 }
218 
219 
220 status_t
221 close_fd_index(struct io_context* context, int fd)
222 {
223 	struct file_descriptor* descriptor = remove_fd(context, fd);
224 
225 	if (descriptor == NULL)
226 		return B_FILE_ERROR;
227 
228 	close_fd(context, descriptor);
229 	put_fd(descriptor);
230 		// the reference associated with the slot
231 
232 	return B_OK;
233 }
234 
235 
236 /*!	This descriptor's underlying object will be closed and freed as soon as
237 	possible (in one of the next calls to put_fd() - get_fd() will no longer
238 	succeed on this descriptor).
239 	This is useful if the underlying object is gone, for instance when a
240 	(mounted) volume got removed unexpectedly.
241 */
242 void
243 disconnect_fd(struct file_descriptor* descriptor)
244 {
245 	descriptor->open_mode |= O_DISCONNECTED;
246 }
247 
248 
249 void
250 inc_fd_ref_count(struct file_descriptor* descriptor)
251 {
252 	atomic_add(&descriptor->ref_count, 1);
253 }
254 
255 
256 static struct file_descriptor*
257 get_fd_locked(struct io_context* context, int fd)
258 {
259 	if (fd < 0 || (uint32)fd >= context->table_size)
260 		return NULL;
261 
262 	struct file_descriptor* descriptor = context->fds[fd];
263 
264 	if (descriptor != NULL) {
265 		// disconnected descriptors cannot be accessed anymore
266 		if (descriptor->open_mode & O_DISCONNECTED)
267 			return NULL;
268 
269 		TFD(GetFD(context, fd, descriptor));
270 		inc_fd_ref_count(descriptor);
271 	}
272 
273 	return descriptor;
274 }
275 
276 
277 struct file_descriptor*
278 get_fd(struct io_context* context, int fd)
279 {
280 	MutexLocker _(context->io_mutex);
281 
282 	return get_fd_locked(context, fd);
283 }
284 
285 
286 struct file_descriptor*
287 get_open_fd(struct io_context* context, int fd)
288 {
289 	MutexLocker _(context->io_mutex);
290 
291 	file_descriptor* descriptor = get_fd_locked(context, fd);
292 	if (descriptor == NULL)
293 		return NULL;
294 
295 	atomic_add(&descriptor->open_count, 1);
296 
297 	return descriptor;
298 }
299 
300 
301 /*!	Removes the file descriptor from the specified slot.
302 */
303 static struct file_descriptor*
304 remove_fd(struct io_context* context, int fd)
305 {
306 	struct file_descriptor* descriptor = NULL;
307 
308 	if (fd < 0)
309 		return NULL;
310 
311 	mutex_lock(&context->io_mutex);
312 
313 	if ((uint32)fd < context->table_size)
314 		descriptor = context->fds[fd];
315 
316 	select_info* selectInfos = NULL;
317 	bool disconnected = false;
318 
319 	if (descriptor != NULL)	{
320 		// fd is valid
321 		TFD(RemoveFD(context, fd, descriptor));
322 
323 		context->fds[fd] = NULL;
324 		fd_set_close_on_exec(context, fd, false);
325 		context->num_used_fds--;
326 
327 		selectInfos = context->select_infos[fd];
328 		context->select_infos[fd] = NULL;
329 
330 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
331 	}
332 
333 	if (selectInfos != NULL)
334 		deselect_select_infos(descriptor, selectInfos, true);
335 
336 	mutex_unlock(&context->io_mutex);
337 
338 	return disconnected ? NULL : descriptor;
339 }
340 
341 
342 static int
343 dup_fd(int fd, bool kernel)
344 {
345 	struct io_context* context = get_current_io_context(kernel);
346 	struct file_descriptor* descriptor;
347 	int status;
348 
349 	TRACE(("dup_fd: fd = %d\n", fd));
350 
351 	// Try to get the fd structure
352 	descriptor = get_fd(context, fd);
353 	if (descriptor == NULL)
354 		return B_FILE_ERROR;
355 
356 	// now put the fd in place
357 	status = new_fd(context, descriptor);
358 	if (status < 0)
359 		put_fd(descriptor);
360 	else {
361 		mutex_lock(&context->io_mutex);
362 		fd_set_close_on_exec(context, status, false);
363 		mutex_unlock(&context->io_mutex);
364 	}
365 
366 	return status;
367 }
368 
369 
370 /*!	POSIX says this should be the same as:
371 		close(newfd);
372 		fcntl(oldfd, F_DUPFD, newfd);
373 
374 	We do dup2() directly to be thread-safe.
375 */
376 static int
377 dup2_fd(int oldfd, int newfd, bool kernel)
378 {
379 	struct file_descriptor* evicted = NULL;
380 	struct io_context* context;
381 
382 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
383 
384 	// quick check
385 	if (oldfd < 0 || newfd < 0)
386 		return B_FILE_ERROR;
387 
388 	// Get current I/O context and lock it
389 	context = get_current_io_context(kernel);
390 	mutex_lock(&context->io_mutex);
391 
392 	// Check if the fds are valid (mutex must be locked because
393 	// the table size could be changed)
394 	if ((uint32)oldfd >= context->table_size
395 		|| (uint32)newfd >= context->table_size
396 		|| context->fds[oldfd] == NULL
397 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
398 		mutex_unlock(&context->io_mutex);
399 		return B_FILE_ERROR;
400 	}
401 
402 	// Check for identity, note that it cannot be made above
403 	// because we always want to return an error on invalid
404 	// handles
405 	if (oldfd != newfd) {
406 		// Now do the work
407 		TFD(Dup2FD(context, oldfd, newfd));
408 
409 		evicted = context->fds[newfd];
410 		select_info* selectInfos = context->select_infos[newfd];
411 		context->select_infos[newfd] = NULL;
412 		atomic_add(&context->fds[oldfd]->ref_count, 1);
413 		atomic_add(&context->fds[oldfd]->open_count, 1);
414 		context->fds[newfd] = context->fds[oldfd];
415 
416 		if (evicted == NULL)
417 			context->num_used_fds++;
418 
419 		deselect_select_infos(evicted, selectInfos, true);
420 	}
421 
422 	fd_set_close_on_exec(context, newfd, false);
423 
424 	mutex_unlock(&context->io_mutex);
425 
426 	// Say bye bye to the evicted fd
427 	if (evicted) {
428 		close_fd(context, evicted);
429 		put_fd(evicted);
430 	}
431 
432 	return newfd;
433 }
434 
435 
436 /*!	Duplicates an FD from another team to this/the kernel team.
437 	\param fromTeam The team which owns the FD.
438 	\param fd The FD to duplicate.
439 	\param kernel If \c true, the new FD will be created in the kernel team,
440 			the current userland team otherwise.
441 	\return The newly created FD or an error code, if something went wrong.
442 */
443 int
444 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
445 {
446 	// get the I/O context for the team in question
447 	Team* team = Team::Get(fromTeam);
448 	if (team == NULL)
449 		return B_BAD_TEAM_ID;
450 	BReference<Team> teamReference(team, true);
451 
452 	io_context* fromContext = team->io_context;
453 
454 	// get the file descriptor
455 	file_descriptor* descriptor = get_fd(fromContext, fd);
456 	if (descriptor == NULL)
457 		return B_FILE_ERROR;
458 	FileDescriptorPutter descriptorPutter(descriptor);
459 
460 	// create a new FD in the target I/O context
461 	int result = new_fd(get_current_io_context(kernel), descriptor);
462 	if (result >= 0) {
463 		// the descriptor reference belongs to the slot, now
464 		descriptorPutter.Detach();
465 	}
466 
467 	return result;
468 }
469 
470 
471 static status_t
472 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
473 {
474 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
475 	if (!descriptor.IsSet())
476 		return B_FILE_ERROR;
477 
478 	// Special case: translate FIONBIO into fcntl(F_SETFL).
479 	if (op == FIONBIO) {
480 		if (buffer == NULL)
481 			return B_BAD_VALUE;
482 
483 		int value;
484 		if (is_called_via_syscall()) {
485 			if (!IS_USER_ADDRESS(buffer)
486 				|| user_memcpy(&value, buffer, sizeof(int)) != B_OK) {
487 				return B_BAD_ADDRESS;
488 			}
489 		} else
490 			value = *(int*)buffer;
491 
492 		size_t argument = descriptor->open_mode & ~O_NONBLOCK;
493 		argument |= (value ? O_NONBLOCK : 0);
494 
495 		return (kernelFD ? _kern_fcntl : _user_fcntl)(fd, F_SETFL, argument);
496 	}
497 
498 	status_t status;
499 	if (descriptor->ops->fd_ioctl)
500 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
501 	else
502 		status = B_DEV_INVALID_IOCTL;
503 
504 	if (status == B_DEV_INVALID_IOCTL)
505 		status = ENOTTY;
506 
507 	return status;
508 }
509 
510 
511 static void
512 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
513 	bool putSyncObjects)
514 {
515 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
516 
517 	select_info* info = infos;
518 	while (info != NULL) {
519 		select_sync* sync = info->sync;
520 
521 		// deselect the selected events
522 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
523 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
524 			for (uint16 event = 1; event < 16; event++) {
525 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
526 					descriptor->ops->fd_deselect(descriptor, event,
527 						(selectsync*)info);
528 				}
529 			}
530 		}
531 
532 		select_info* next = info->next;
533 		notify_select_events(info, B_EVENT_INVALID);
534 		info = next;
535 
536 		if (putSyncObjects)
537 			put_select_sync(sync);
538 	}
539 }
540 
541 
542 status_t
543 select_fd(int32 fd, struct select_info* info, bool kernel)
544 {
545 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
546 		info->sync, info->selected_events));
547 
548 	FileDescriptorPutter descriptor;
549 		// define before the context locker, so it will be destroyed after it
550 
551 	io_context* context = get_current_io_context(kernel);
552 	MutexLocker locker(context->io_mutex);
553 
554 	descriptor.SetTo(get_fd_locked(context, fd));
555 	if (!descriptor.IsSet())
556 		return B_FILE_ERROR;
557 
558 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
559 
560 	if (descriptor->ops->fd_select == NULL) {
561 		// if the I/O subsystem doesn't support select(), we will
562 		// immediately notify the select call
563 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
564 		if (eventsToSelect != 0)
565 			return notify_select_events(info, eventsToSelect);
566 		else
567 			return B_OK;
568 	}
569 
570 	// We need the FD to stay open while we're doing this, so no select()/
571 	// deselect() will be called on it after it is closed.
572 	atomic_add(&descriptor->open_count, 1);
573 
574 	locker.Unlock();
575 
576 	// select any events asked for
577 	uint32 selectedEvents = 0;
578 
579 	for (uint16 event = 1; event < 16; event++) {
580 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
581 			&& descriptor->ops->fd_select(descriptor.Get(), event,
582 				(selectsync*)info) == B_OK) {
583 			selectedEvents |= SELECT_FLAG(event);
584 		}
585 	}
586 	info->selected_events = selectedEvents
587 		| (info->selected_events & B_EVENT_INVALID);
588 
589 	// Add the info to the IO context. Even if nothing has been selected -- we
590 	// always support B_EVENT_INVALID.
591 	locker.Lock();
592 	if (context->fds[fd] != descriptor.Get()) {
593 		// Someone close()d the index in the meantime. deselect() all
594 		// events.
595 		info->next = NULL;
596 		deselect_select_infos(descriptor.Get(), info, false);
597 
598 		// Release our open reference of the descriptor.
599 		close_fd(context, descriptor.Get());
600 		return B_FILE_ERROR;
601 	}
602 
603 	// The FD index hasn't changed, so we add the select info to the table.
604 
605 	info->next = context->select_infos[fd];
606 	context->select_infos[fd] = info;
607 
608 	// As long as the info is in the list, we keep a reference to the sync
609 	// object.
610 	acquire_select_sync(info->sync);
611 
612 	// Finally release our open reference. It is safe just to decrement,
613 	// since as long as the descriptor is associated with the slot,
614 	// someone else still has it open.
615 	atomic_add(&descriptor->open_count, -1);
616 
617 	return B_OK;
618 }
619 
620 
621 status_t
622 deselect_fd(int32 fd, struct select_info* info, bool kernel)
623 {
624 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
625 		info->sync, info->selected_events));
626 
627 	FileDescriptorPutter descriptor;
628 		// define before the context locker, so it will be destroyed after it
629 
630 	io_context* context = get_current_io_context(kernel);
631 	MutexLocker locker(context->io_mutex);
632 
633 	descriptor.SetTo(get_fd_locked(context, fd));
634 	if (!descriptor.IsSet())
635 		return B_FILE_ERROR;
636 
637 	// remove the info from the IO context
638 
639 	select_info** infoLocation = &context->select_infos[fd];
640 	while (*infoLocation != NULL && *infoLocation != info)
641 		infoLocation = &(*infoLocation)->next;
642 
643 	// If not found, someone else beat us to it.
644 	if (*infoLocation != info)
645 		return B_OK;
646 
647 	*infoLocation = info->next;
648 
649 	locker.Unlock();
650 
651 	// deselect the selected events
652 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
653 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
654 		for (uint16 event = 1; event < 16; event++) {
655 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
656 				descriptor->ops->fd_deselect(descriptor.Get(), event,
657 					(selectsync*)info);
658 			}
659 		}
660 	}
661 
662 	put_select_sync(info->sync);
663 
664 	return B_OK;
665 }
666 
667 
668 /*!	This function checks if the specified fd is valid in the current
669 	context. It can be used for a quick check; the fd is not locked
670 	so it could become invalid immediately after this check.
671 */
672 bool
673 fd_is_valid(int fd, bool kernel)
674 {
675 	struct file_descriptor* descriptor
676 		= get_fd(get_current_io_context(kernel), fd);
677 	if (descriptor == NULL)
678 		return false;
679 
680 	put_fd(descriptor);
681 	return true;
682 }
683 
684 
685 struct vnode*
686 fd_vnode(struct file_descriptor* descriptor)
687 {
688 	switch (descriptor->type) {
689 		case FDTYPE_FILE:
690 		case FDTYPE_DIR:
691 		case FDTYPE_ATTR_DIR:
692 		case FDTYPE_ATTR:
693 			return descriptor->u.vnode;
694 	}
695 
696 	return NULL;
697 }
698 
699 
700 static ssize_t
701 common_vector_io(int fd, off_t pos, const iovec* vecs, size_t count, bool write, bool kernel)
702 {
703 	if (pos < -1)
704 		return B_BAD_VALUE;
705 
706 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernel), fd));
707 	if (!descriptor.IsSet())
708 		return B_FILE_ERROR;
709 
710 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
711 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
712 		return B_FILE_ERROR;
713 	}
714 
715 	bool movePosition = false;
716 	if (pos == -1 && descriptor->pos != -1) {
717 		pos = descriptor->pos;
718 		movePosition = true;
719 	}
720 
721 	if (write ? descriptor->ops->fd_write == NULL
722 			: descriptor->ops->fd_read == NULL) {
723 		return B_BAD_VALUE;
724 	}
725 
726 	// See if we can bypass the loop and perform I/O directly. We can only do this
727 	// for vnodes that have no cache, as the I/O hook bypasses the cache entirely.
728 	struct vnode* vnode = descriptor->u.vnode;
729 	status_t status = B_OK;
730 	if (!movePosition && pos != -1 && count > 1 && descriptor->type == FDTYPE_FILE
731 			&& vnode != NULL && vnode->cache == NULL && vnode->ops->io != NULL) {
732 		BStackOrHeapArray<generic_io_vec, 8> iovecs(count);
733 		if (!iovecs.IsValid())
734 			return B_NO_MEMORY;
735 
736 		generic_size_t length = 0;
737 		for (size_t i = 0; i < count; i++) {
738 			iovecs[i].base = (generic_addr_t)vecs[i].iov_base;
739 			iovecs[i].length = vecs[i].iov_len;
740 			length += vecs[i].iov_len;
741 		}
742 
743 		status = (write ? vfs_write_pages : vfs_read_pages)(vnode,
744 			descriptor->cookie, pos, iovecs, count, 0, &length);
745 		if (length > 0)
746 			return length;
747 		return status;
748 	}
749 
750 	ssize_t bytesTransferred = 0;
751 	for (size_t i = 0; i < count; i++) {
752 		if (vecs[i].iov_base == NULL)
753 			continue;
754 
755 		size_t length = vecs[i].iov_len;
756 		if (write) {
757 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
758 				vecs[i].iov_base, &length);
759 		} else {
760 			status = descriptor->ops->fd_read(descriptor.Get(), pos, vecs[i].iov_base,
761 				&length);
762 		}
763 
764 		if (status != B_OK) {
765 			if (bytesTransferred == 0)
766 				return status;
767 			break;
768 		}
769 
770 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
771 			bytesTransferred = SSIZE_MAX;
772 		else
773 			bytesTransferred += (ssize_t)length;
774 
775 		if (pos != -1)
776 			pos += length;
777 
778 		if (length < vecs[i].iov_len)
779 			break;
780 	}
781 
782 	if (movePosition) {
783 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
784 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
785 	}
786 
787 	return bytesTransferred;
788 }
789 
790 
791 static ssize_t
792 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
793 {
794 	if (pos < -1)
795 		return B_BAD_VALUE;
796 
797 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
798 	if (!descriptor.IsSet())
799 		return B_FILE_ERROR;
800 
801 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
802 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
803 		return B_FILE_ERROR;
804 	}
805 
806 	bool movePosition = false;
807 	if (pos == -1 && descriptor->pos != -1) {
808 		pos = descriptor->pos;
809 		movePosition = true;
810 	}
811 
812 	if (write ? descriptor->ops->fd_write == NULL
813 			: descriptor->ops->fd_read == NULL) {
814 		return B_BAD_VALUE;
815 	}
816 
817 	if (length == 0)
818 		return 0;
819 
820 	if (!is_user_address_range(buffer, length))
821 		return B_BAD_ADDRESS;
822 
823 	SyscallRestartWrapper<status_t> status;
824 
825 	if (write)
826 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
827 	else
828 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
829 
830 	if (status != B_OK)
831 		return status;
832 
833 	if (movePosition) {
834 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
835 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
836 	}
837 
838 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
839 }
840 
841 
842 static ssize_t
843 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
844 	bool write)
845 {
846 	if (count > IOV_MAX)
847 		return B_BAD_VALUE;
848 
849 	BStackOrHeapArray<iovec, 16> vecs(count);
850 	if (!vecs.IsValid())
851 		return B_NO_MEMORY;
852 
853 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
854 	if (error != B_OK)
855 		return error;
856 
857 	SyscallRestartWrapper<ssize_t> result = common_vector_io(fd, pos,
858 		vecs, count, write, false);
859 
860 	return result;
861 }
862 
863 
864 static status_t
865 common_close(int fd, bool kernel)
866 {
867 	return close_fd_index(get_current_io_context(kernel), fd);
868 }
869 
870 
871 status_t
872 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
873 {
874 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
875 
876 	return fd_ioctl(false, fd, op, buffer, length);
877 }
878 
879 
880 //	#pragma mark - User syscalls
881 
882 
883 ssize_t
884 _user_read(int fd, off_t pos, void* buffer, size_t length)
885 {
886 	return common_user_io(fd, pos, buffer, length, false);
887 }
888 
889 
890 ssize_t
891 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
892 {
893 	return common_user_vector_io(fd, pos, userVecs, count, false);
894 }
895 
896 
897 ssize_t
898 _user_write(int fd, off_t pos, const void* buffer, size_t length)
899 {
900 	return common_user_io(fd, pos, (void*)buffer, length, true);
901 }
902 
903 
904 ssize_t
905 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
906 {
907 	return common_user_vector_io(fd, pos, userVecs, count, true);
908 }
909 
910 
911 off_t
912 _user_seek(int fd, off_t pos, int seekType)
913 {
914 	syscall_64_bit_return_value();
915 
916 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
917 	if (!descriptor.IsSet())
918 		return B_FILE_ERROR;
919 
920 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
921 
922 	if (descriptor->ops->fd_seek)
923 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
924 	else
925 		pos = ESPIPE;
926 
927 	return pos;
928 }
929 
930 
931 status_t
932 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
933 {
934 	TRACE(("user_ioctl: fd %d\n", fd));
935 
936 	// "buffer" is not always a pointer depending on "op", so we cannot
937 	// check that it is a userland buffer here. Instead we check that
938 	// it is at least not within the bounds of kernel memory; as in
939 	// the cases where it is a numeric constant it is usually a low one.
940 	if (IS_KERNEL_ADDRESS(buffer))
941 		return B_BAD_ADDRESS;
942 
943 	SyscallRestartWrapper<status_t> status;
944 
945 	return status = fd_ioctl(false, fd, op, buffer, length);
946 }
947 
948 
949 ssize_t
950 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
951 	uint32 maxCount)
952 {
953 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
954 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
955 
956 	if (maxCount == 0)
957 		return 0;
958 
959 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
960 		return B_BAD_ADDRESS;
961 
962 	// get I/O context and FD
963 	io_context* ioContext = get_current_io_context(false);
964 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
965 	if (!descriptor.IsSet())
966 		return B_FILE_ERROR;
967 
968 	if (descriptor->ops->fd_read_dir == NULL)
969 		return B_UNSUPPORTED;
970 
971 	// restrict buffer size and allocate a heap buffer
972 	if (bufferSize > kMaxReadDirBufferSize)
973 		bufferSize = kMaxReadDirBufferSize;
974 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
975 	if (buffer == NULL)
976 		return B_NO_MEMORY;
977 	MemoryDeleter bufferDeleter(buffer);
978 
979 	// read the directory
980 	uint32 count = maxCount;
981 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
982 		buffer, bufferSize, &count);
983 	if (status != B_OK)
984 		return status;
985 
986 	ASSERT(count <= maxCount);
987 
988 	// copy the buffer back -- determine the total buffer size first
989 	size_t sizeToCopy = 0;
990 	BytePointer<struct dirent> entry = buffer;
991 	for (uint32 i = 0; i < count; i++) {
992 		size_t length = entry->d_reclen;
993 		sizeToCopy += length;
994 		entry += length;
995 	}
996 
997 	ASSERT(sizeToCopy <= bufferSize);
998 
999 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
1000 		return B_BAD_ADDRESS;
1001 
1002 	return count;
1003 }
1004 
1005 
1006 status_t
1007 _user_rewind_dir(int fd)
1008 {
1009 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
1010 
1011 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
1012 	if (!descriptor.IsSet())
1013 		return B_FILE_ERROR;
1014 
1015 	status_t status;
1016 	if (descriptor->ops->fd_rewind_dir)
1017 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1018 	else
1019 		status = B_UNSUPPORTED;
1020 
1021 	return status;
1022 }
1023 
1024 
1025 status_t
1026 _user_close(int fd)
1027 {
1028 	return common_close(fd, false);
1029 }
1030 
1031 
1032 int
1033 _user_dup(int fd)
1034 {
1035 	return dup_fd(fd, false);
1036 }
1037 
1038 
1039 int
1040 _user_dup2(int ofd, int nfd)
1041 {
1042 	return dup2_fd(ofd, nfd, false);
1043 }
1044 
1045 
1046 //	#pragma mark - Kernel calls
1047 
1048 
1049 ssize_t
1050 _kern_read(int fd, off_t pos, void* buffer, size_t length)
1051 {
1052 	if (pos < -1)
1053 		return B_BAD_VALUE;
1054 
1055 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1056 
1057 	if (!descriptor.IsSet())
1058 		return B_FILE_ERROR;
1059 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1060 		return B_FILE_ERROR;
1061 
1062 	bool movePosition = false;
1063 	if (pos == -1 && descriptor->pos != -1) {
1064 		pos = descriptor->pos;
1065 		movePosition = true;
1066 	}
1067 
1068 	SyscallFlagUnsetter _;
1069 
1070 	if (descriptor->ops->fd_read == NULL)
1071 		return B_BAD_VALUE;
1072 
1073 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1074 		&length);
1075 	if (bytesRead >= B_OK) {
1076 		if (length > SSIZE_MAX)
1077 			bytesRead = SSIZE_MAX;
1078 		else
1079 			bytesRead = (ssize_t)length;
1080 
1081 		if (movePosition)
1082 			descriptor->pos = pos + length;
1083 	}
1084 
1085 	return bytesRead;
1086 }
1087 
1088 
1089 ssize_t
1090 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1091 {
1092 	if (pos < -1)
1093 		return B_BAD_VALUE;
1094 
1095 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1096 
1097 	if (!descriptor.IsSet())
1098 		return B_FILE_ERROR;
1099 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1100 		return B_FILE_ERROR;
1101 
1102 	bool movePosition = false;
1103 	if (pos == -1 && descriptor->pos != -1) {
1104 		pos = descriptor->pos;
1105 		movePosition = true;
1106 	}
1107 
1108 	if (descriptor->ops->fd_write == NULL)
1109 		return B_BAD_VALUE;
1110 
1111 	SyscallFlagUnsetter _;
1112 
1113 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1114 		buffer,	&length);
1115 	if (bytesWritten >= B_OK) {
1116 		if (length > SSIZE_MAX)
1117 			bytesWritten = SSIZE_MAX;
1118 		else
1119 			bytesWritten = (ssize_t)length;
1120 
1121 		if (movePosition)
1122 			descriptor->pos = pos + length;
1123 	}
1124 
1125 	return bytesWritten;
1126 }
1127 
1128 
1129 ssize_t
1130 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1131 {
1132 	SyscallFlagUnsetter _;
1133 	return common_vector_io(fd, pos, vecs, count, false, true);
1134 }
1135 
1136 
1137 ssize_t
1138 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1139 {
1140 	SyscallFlagUnsetter _;
1141 	return common_vector_io(fd, pos, vecs, count, true, true);
1142 }
1143 
1144 
1145 off_t
1146 _kern_seek(int fd, off_t pos, int seekType)
1147 {
1148 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1149 	if (!descriptor.IsSet())
1150 		return B_FILE_ERROR;
1151 
1152 	if (descriptor->ops->fd_seek)
1153 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1154 	else
1155 		pos = ESPIPE;
1156 
1157 	return pos;
1158 }
1159 
1160 
1161 status_t
1162 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1163 {
1164 	TRACE(("kern_ioctl: fd %d\n", fd));
1165 
1166 	SyscallFlagUnsetter _;
1167 
1168 	return fd_ioctl(true, fd, op, buffer, length);
1169 }
1170 
1171 
1172 ssize_t
1173 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1174 	uint32 maxCount)
1175 {
1176 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1177 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1178 
1179 	struct io_context* ioContext = get_current_io_context(true);
1180 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1181 	if (!descriptor.IsSet())
1182 		return B_FILE_ERROR;
1183 
1184 	ssize_t retval;
1185 	if (descriptor->ops->fd_read_dir) {
1186 		uint32 count = maxCount;
1187 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1188 			bufferSize, &count);
1189 		if (retval >= 0)
1190 			retval = count;
1191 	} else
1192 		retval = B_UNSUPPORTED;
1193 
1194 	return retval;
1195 }
1196 
1197 
1198 status_t
1199 _kern_rewind_dir(int fd)
1200 {
1201 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1202 
1203 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1204 	if (!descriptor.IsSet())
1205 		return B_FILE_ERROR;
1206 
1207 	status_t status;
1208 	if (descriptor->ops->fd_rewind_dir)
1209 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1210 	else
1211 		status = B_UNSUPPORTED;
1212 
1213 	return status;
1214 }
1215 
1216 
1217 status_t
1218 _kern_close(int fd)
1219 {
1220 	return common_close(fd, true);
1221 }
1222 
1223 
1224 int
1225 _kern_dup(int fd)
1226 {
1227 	return dup_fd(fd, true);
1228 }
1229 
1230 
1231 int
1232 _kern_dup2(int ofd, int nfd)
1233 {
1234 	return dup2_fd(ofd, nfd, true);
1235 }
1236 
1237