xref: /haiku/src/system/kernel/fs/fd.cpp (revision 9f3bdf3d039430b5172c424def20ce5d9f7367d4)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 
8 //! Operations on file descriptors
9 
10 
11 #include <fd.h>
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <OS.h>
17 
18 #include <AutoDeleter.h>
19 #include <AutoDeleterDrivers.h>
20 #include <BytePointer.h>
21 
22 #include <syscalls.h>
23 #include <syscall_restart.h>
24 #include <slab/Slab.h>
25 #include <util/AutoLock.h>
26 #include <util/iovec_support.h>
27 #include <vfs.h>
28 #include <wait_for_objects.h>
29 
30 #include "vfs_tracing.h"
31 
32 
33 //#define TRACE_FD
34 #ifdef TRACE_FD
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x)
38 #endif
39 
40 
41 static const size_t kMaxReadDirBufferSize = 64 * 1024;
42 
43 extern object_cache* sFileDescriptorCache;
44 
45 
46 static struct file_descriptor* get_fd_locked(struct io_context* context,
47 	int fd);
48 static struct file_descriptor* remove_fd(struct io_context* context, int fd);
49 static void deselect_select_infos(file_descriptor* descriptor,
50 	select_info* infos, bool putSyncObjects);
51 
52 
53 //	#pragma mark - General fd routines
54 
55 
56 #ifdef DEBUG
57 void dump_fd(int fd, struct file_descriptor* descriptor);
58 
59 void
60 dump_fd(int fd,struct file_descriptor* descriptor)
61 {
62 	dprintf("fd[%d] = %p: type = %" B_PRId32 ", ref_count = %" B_PRId32 ", ops "
63 		"= %p, u.vnode = %p, u.mount = %p, cookie = %p, open_mode = %" B_PRIx32
64 		", pos = %" B_PRId64 "\n",
65 		fd, descriptor, descriptor->type, descriptor->ref_count,
66 		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
67 		descriptor->cookie, descriptor->open_mode, descriptor->pos);
68 }
69 #endif
70 
71 
72 /*! Allocates and initializes a new file_descriptor.
73 */
74 struct file_descriptor*
75 alloc_fd(void)
76 {
77 	file_descriptor* descriptor
78 		= (file_descriptor*)object_cache_alloc(sFileDescriptorCache, 0);
79 	if (descriptor == NULL)
80 		return NULL;
81 
82 	descriptor->u.vnode = NULL;
83 	descriptor->cookie = NULL;
84 	descriptor->ref_count = 1;
85 	descriptor->open_count = 0;
86 	descriptor->open_mode = 0;
87 	descriptor->pos = -1;
88 
89 	return descriptor;
90 }
91 
92 
93 bool
94 fd_close_on_exec(struct io_context* context, int fd)
95 {
96 	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
97 }
98 
99 
100 void
101 fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
102 {
103 	if (closeFD)
104 		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
105 	else
106 		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
107 }
108 
109 
110 /*!	Searches a free slot in the FD table of the provided I/O context, and
111 	inserts the specified descriptor into it.
112 */
113 int
114 new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
115 	int firstIndex)
116 {
117 	int fd = -1;
118 	uint32 i;
119 
120 	if (firstIndex < 0 || (uint32)firstIndex >= context->table_size)
121 		return B_BAD_VALUE;
122 
123 	mutex_lock(&context->io_mutex);
124 
125 	for (i = firstIndex; i < context->table_size; i++) {
126 		if (!context->fds[i]) {
127 			fd = i;
128 			break;
129 		}
130 	}
131 	if (fd < 0) {
132 		fd = B_NO_MORE_FDS;
133 		goto err;
134 	}
135 
136 	TFD(NewFD(context, fd, descriptor));
137 
138 	context->fds[fd] = descriptor;
139 	context->num_used_fds++;
140 	atomic_add(&descriptor->open_count, 1);
141 
142 err:
143 	mutex_unlock(&context->io_mutex);
144 
145 	return fd;
146 }
147 
148 
149 int
150 new_fd(struct io_context* context, struct file_descriptor* descriptor)
151 {
152 	return new_fd_etc(context, descriptor, 0);
153 }
154 
155 
156 /*!	Reduces the descriptor's reference counter, and frees all resources
157 	when it's no longer used.
158 */
159 void
160 put_fd(struct file_descriptor* descriptor)
161 {
162 	int32 previous = atomic_add(&descriptor->ref_count, -1);
163 
164 	TFD(PutFD(descriptor));
165 
166 	TRACE(("put_fd(descriptor = %p [ref = %" B_PRId32 ", cookie = %p])\n",
167 		descriptor, descriptor->ref_count, descriptor->cookie));
168 
169 	// free the descriptor if we don't need it anymore
170 	if (previous == 1) {
171 		// free the underlying object
172 		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
173 			descriptor->ops->fd_free(descriptor);
174 
175 		object_cache_free(sFileDescriptorCache, descriptor, 0);
176 	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
177 		&& previous - 1 == descriptor->open_count
178 		&& descriptor->ops != NULL) {
179 		// the descriptor has been disconnected - it cannot
180 		// be accessed anymore, let's close it (no one is
181 		// currently accessing this descriptor)
182 
183 		if (descriptor->ops->fd_close)
184 			descriptor->ops->fd_close(descriptor);
185 		if (descriptor->ops->fd_free)
186 			descriptor->ops->fd_free(descriptor);
187 
188 		// prevent this descriptor from being closed/freed again
189 		descriptor->ops = NULL;
190 		descriptor->u.vnode = NULL;
191 
192 		// the file descriptor is kept intact, so that it's not
193 		// reused until someone explicitly closes it
194 	}
195 }
196 
197 
198 /*!	Decrements the open counter of the file descriptor and invokes
199 	its close hook when appropriate.
200 */
201 void
202 close_fd(struct io_context* context, struct file_descriptor* descriptor)
203 {
204 	// POSIX advisory locks need to be released when any file descriptor closes
205 	if (descriptor->type == FDTYPE_FILE)
206 		vfs_release_posix_lock(context, descriptor);
207 
208 	if (atomic_add(&descriptor->open_count, -1) == 1) {
209 		vfs_unlock_vnode_if_locked(descriptor);
210 
211 		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
212 			descriptor->ops->fd_close(descriptor);
213 	}
214 }
215 
216 
217 status_t
218 close_fd_index(struct io_context* context, int fd)
219 {
220 	struct file_descriptor* descriptor = remove_fd(context, fd);
221 
222 	if (descriptor == NULL)
223 		return B_FILE_ERROR;
224 
225 	close_fd(context, descriptor);
226 	put_fd(descriptor);
227 		// the reference associated with the slot
228 
229 	return B_OK;
230 }
231 
232 
233 /*!	This descriptor's underlying object will be closed and freed as soon as
234 	possible (in one of the next calls to put_fd() - get_fd() will no longer
235 	succeed on this descriptor).
236 	This is useful if the underlying object is gone, for instance when a
237 	(mounted) volume got removed unexpectedly.
238 */
239 void
240 disconnect_fd(struct file_descriptor* descriptor)
241 {
242 	descriptor->open_mode |= O_DISCONNECTED;
243 }
244 
245 
246 void
247 inc_fd_ref_count(struct file_descriptor* descriptor)
248 {
249 	atomic_add(&descriptor->ref_count, 1);
250 }
251 
252 
253 static struct file_descriptor*
254 get_fd_locked(struct io_context* context, int fd)
255 {
256 	if (fd < 0 || (uint32)fd >= context->table_size)
257 		return NULL;
258 
259 	struct file_descriptor* descriptor = context->fds[fd];
260 
261 	if (descriptor != NULL) {
262 		// disconnected descriptors cannot be accessed anymore
263 		if (descriptor->open_mode & O_DISCONNECTED)
264 			return NULL;
265 
266 		TFD(GetFD(context, fd, descriptor));
267 		inc_fd_ref_count(descriptor);
268 	}
269 
270 	return descriptor;
271 }
272 
273 
274 struct file_descriptor*
275 get_fd(struct io_context* context, int fd)
276 {
277 	MutexLocker _(context->io_mutex);
278 
279 	return get_fd_locked(context, fd);
280 }
281 
282 
283 struct file_descriptor*
284 get_open_fd(struct io_context* context, int fd)
285 {
286 	MutexLocker _(context->io_mutex);
287 
288 	file_descriptor* descriptor = get_fd_locked(context, fd);
289 	if (descriptor == NULL)
290 		return NULL;
291 
292 	atomic_add(&descriptor->open_count, 1);
293 
294 	return descriptor;
295 }
296 
297 
298 /*!	Removes the file descriptor from the specified slot.
299 */
300 static struct file_descriptor*
301 remove_fd(struct io_context* context, int fd)
302 {
303 	struct file_descriptor* descriptor = NULL;
304 
305 	if (fd < 0)
306 		return NULL;
307 
308 	mutex_lock(&context->io_mutex);
309 
310 	if ((uint32)fd < context->table_size)
311 		descriptor = context->fds[fd];
312 
313 	select_info* selectInfos = NULL;
314 	bool disconnected = false;
315 
316 	if (descriptor != NULL)	{
317 		// fd is valid
318 		TFD(RemoveFD(context, fd, descriptor));
319 
320 		context->fds[fd] = NULL;
321 		fd_set_close_on_exec(context, fd, false);
322 		context->num_used_fds--;
323 
324 		selectInfos = context->select_infos[fd];
325 		context->select_infos[fd] = NULL;
326 
327 		disconnected = (descriptor->open_mode & O_DISCONNECTED);
328 	}
329 
330 	if (selectInfos != NULL)
331 		deselect_select_infos(descriptor, selectInfos, true);
332 
333 	mutex_unlock(&context->io_mutex);
334 
335 	return disconnected ? NULL : descriptor;
336 }
337 
338 
339 static int
340 dup_fd(int fd, bool kernel)
341 {
342 	struct io_context* context = get_current_io_context(kernel);
343 	struct file_descriptor* descriptor;
344 	int status;
345 
346 	TRACE(("dup_fd: fd = %d\n", fd));
347 
348 	// Try to get the fd structure
349 	descriptor = get_fd(context, fd);
350 	if (descriptor == NULL)
351 		return B_FILE_ERROR;
352 
353 	// now put the fd in place
354 	status = new_fd(context, descriptor);
355 	if (status < 0)
356 		put_fd(descriptor);
357 	else {
358 		mutex_lock(&context->io_mutex);
359 		fd_set_close_on_exec(context, status, false);
360 		mutex_unlock(&context->io_mutex);
361 	}
362 
363 	return status;
364 }
365 
366 
367 /*!	POSIX says this should be the same as:
368 		close(newfd);
369 		fcntl(oldfd, F_DUPFD, newfd);
370 
371 	We do dup2() directly to be thread-safe.
372 */
373 static int
374 dup2_fd(int oldfd, int newfd, bool kernel)
375 {
376 	struct file_descriptor* evicted = NULL;
377 	struct io_context* context;
378 
379 	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
380 
381 	// quick check
382 	if (oldfd < 0 || newfd < 0)
383 		return B_FILE_ERROR;
384 
385 	// Get current I/O context and lock it
386 	context = get_current_io_context(kernel);
387 	mutex_lock(&context->io_mutex);
388 
389 	// Check if the fds are valid (mutex must be locked because
390 	// the table size could be changed)
391 	if ((uint32)oldfd >= context->table_size
392 		|| (uint32)newfd >= context->table_size
393 		|| context->fds[oldfd] == NULL
394 		|| (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) {
395 		mutex_unlock(&context->io_mutex);
396 		return B_FILE_ERROR;
397 	}
398 
399 	// Check for identity, note that it cannot be made above
400 	// because we always want to return an error on invalid
401 	// handles
402 	if (oldfd != newfd) {
403 		// Now do the work
404 		TFD(Dup2FD(context, oldfd, newfd));
405 
406 		evicted = context->fds[newfd];
407 		select_info* selectInfos = context->select_infos[newfd];
408 		context->select_infos[newfd] = NULL;
409 		atomic_add(&context->fds[oldfd]->ref_count, 1);
410 		atomic_add(&context->fds[oldfd]->open_count, 1);
411 		context->fds[newfd] = context->fds[oldfd];
412 
413 		if (evicted == NULL)
414 			context->num_used_fds++;
415 
416 		deselect_select_infos(evicted, selectInfos, true);
417 	}
418 
419 	fd_set_close_on_exec(context, newfd, false);
420 
421 	mutex_unlock(&context->io_mutex);
422 
423 	// Say bye bye to the evicted fd
424 	if (evicted) {
425 		close_fd(context, evicted);
426 		put_fd(evicted);
427 	}
428 
429 	return newfd;
430 }
431 
432 
433 /*!	Duplicates an FD from another team to this/the kernel team.
434 	\param fromTeam The team which owns the FD.
435 	\param fd The FD to duplicate.
436 	\param kernel If \c true, the new FD will be created in the kernel team,
437 			the current userland team otherwise.
438 	\return The newly created FD or an error code, if something went wrong.
439 */
440 int
441 dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
442 {
443 	// get the I/O context for the team in question
444 	Team* team = Team::Get(fromTeam);
445 	if (team == NULL)
446 		return B_BAD_TEAM_ID;
447 	BReference<Team> teamReference(team, true);
448 
449 	io_context* fromContext = team->io_context;
450 
451 	// get the file descriptor
452 	file_descriptor* descriptor = get_fd(fromContext, fd);
453 	if (descriptor == NULL)
454 		return B_FILE_ERROR;
455 	FileDescriptorPutter descriptorPutter(descriptor);
456 
457 	// create a new FD in the target I/O context
458 	int result = new_fd(get_current_io_context(kernel), descriptor);
459 	if (result >= 0) {
460 		// the descriptor reference belongs to the slot, now
461 		descriptorPutter.Detach();
462 	}
463 
464 	return result;
465 }
466 
467 
468 static status_t
469 fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
470 {
471 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(kernelFD), fd));
472 	if (!descriptor.IsSet())
473 		return B_FILE_ERROR;
474 
475 	status_t status;
476 	if (descriptor->ops->fd_ioctl)
477 		status = descriptor->ops->fd_ioctl(descriptor.Get(), op, buffer, length);
478 	else
479 		status = B_DEV_INVALID_IOCTL;
480 
481 	if (status == B_DEV_INVALID_IOCTL)
482 		status = ENOTTY;
483 
484 	return status;
485 }
486 
487 
488 static void
489 deselect_select_infos(file_descriptor* descriptor, select_info* infos,
490 	bool putSyncObjects)
491 {
492 	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
493 
494 	select_info* info = infos;
495 	while (info != NULL) {
496 		select_sync* sync = info->sync;
497 
498 		// deselect the selected events
499 		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
500 		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
501 			for (uint16 event = 1; event < 16; event++) {
502 				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
503 					descriptor->ops->fd_deselect(descriptor, event,
504 						(selectsync*)info);
505 				}
506 			}
507 		}
508 
509 		select_info* next = info->next;
510 		notify_select_events(info, B_EVENT_INVALID);
511 		info = next;
512 
513 		if (putSyncObjects)
514 			put_select_sync(sync);
515 	}
516 }
517 
518 
519 status_t
520 select_fd(int32 fd, struct select_info* info, bool kernel)
521 {
522 	TRACE(("select_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
523 		info->sync, info->selected_events));
524 
525 	FileDescriptorPutter descriptor;
526 		// define before the context locker, so it will be destroyed after it
527 
528 	io_context* context = get_current_io_context(kernel);
529 	MutexLocker locker(context->io_mutex);
530 
531 	descriptor.SetTo(get_fd_locked(context, fd));
532 	if (!descriptor.IsSet())
533 		return B_FILE_ERROR;
534 
535 	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
536 
537 	if (descriptor->ops->fd_select == NULL) {
538 		// if the I/O subsystem doesn't support select(), we will
539 		// immediately notify the select call
540 		eventsToSelect &= ~SELECT_OUTPUT_ONLY_FLAGS;
541 		if (eventsToSelect != 0)
542 			return notify_select_events(info, eventsToSelect);
543 		else
544 			return B_OK;
545 	}
546 
547 	// We need the FD to stay open while we're doing this, so no select()/
548 	// deselect() will be called on it after it is closed.
549 	atomic_add(&descriptor->open_count, 1);
550 
551 	locker.Unlock();
552 
553 	// select any events asked for
554 	uint32 selectedEvents = 0;
555 
556 	for (uint16 event = 1; event < 16; event++) {
557 		if ((eventsToSelect & SELECT_FLAG(event)) != 0
558 			&& descriptor->ops->fd_select(descriptor.Get(), event,
559 				(selectsync*)info) == B_OK) {
560 			selectedEvents |= SELECT_FLAG(event);
561 		}
562 	}
563 	info->selected_events = selectedEvents
564 		| (info->selected_events & B_EVENT_INVALID);
565 
566 	// Add the info to the IO context. Even if nothing has been selected -- we
567 	// always support B_EVENT_INVALID.
568 	locker.Lock();
569 	if (context->fds[fd] != descriptor.Get()) {
570 		// Someone close()d the index in the meantime. deselect() all
571 		// events.
572 		info->next = NULL;
573 		deselect_select_infos(descriptor.Get(), info, false);
574 
575 		// Release our open reference of the descriptor.
576 		close_fd(context, descriptor.Get());
577 		return B_FILE_ERROR;
578 	}
579 
580 	// The FD index hasn't changed, so we add the select info to the table.
581 
582 	info->next = context->select_infos[fd];
583 	context->select_infos[fd] = info;
584 
585 	// As long as the info is in the list, we keep a reference to the sync
586 	// object.
587 	acquire_select_sync(info->sync);
588 
589 	// Finally release our open reference. It is safe just to decrement,
590 	// since as long as the descriptor is associated with the slot,
591 	// someone else still has it open.
592 	atomic_add(&descriptor->open_count, -1);
593 
594 	return B_OK;
595 }
596 
597 
598 status_t
599 deselect_fd(int32 fd, struct select_info* info, bool kernel)
600 {
601 	TRACE(("deselect_fd(fd = %" B_PRId32 ", info = %p (%p), 0x%x)\n", fd, info,
602 		info->sync, info->selected_events));
603 
604 	FileDescriptorPutter descriptor;
605 		// define before the context locker, so it will be destroyed after it
606 
607 	io_context* context = get_current_io_context(kernel);
608 	MutexLocker locker(context->io_mutex);
609 
610 	descriptor.SetTo(get_fd_locked(context, fd));
611 	if (!descriptor.IsSet())
612 		return B_FILE_ERROR;
613 
614 	// remove the info from the IO context
615 
616 	select_info** infoLocation = &context->select_infos[fd];
617 	while (*infoLocation != NULL && *infoLocation != info)
618 		infoLocation = &(*infoLocation)->next;
619 
620 	// If not found, someone else beat us to it.
621 	if (*infoLocation != info)
622 		return B_OK;
623 
624 	*infoLocation = info->next;
625 
626 	locker.Unlock();
627 
628 	// deselect the selected events
629 	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
630 	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
631 		for (uint16 event = 1; event < 16; event++) {
632 			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
633 				descriptor->ops->fd_deselect(descriptor.Get(), event,
634 					(selectsync*)info);
635 			}
636 		}
637 	}
638 
639 	put_select_sync(info->sync);
640 
641 	return B_OK;
642 }
643 
644 
645 /*!	This function checks if the specified fd is valid in the current
646 	context. It can be used for a quick check; the fd is not locked
647 	so it could become invalid immediately after this check.
648 */
649 bool
650 fd_is_valid(int fd, bool kernel)
651 {
652 	struct file_descriptor* descriptor
653 		= get_fd(get_current_io_context(kernel), fd);
654 	if (descriptor == NULL)
655 		return false;
656 
657 	put_fd(descriptor);
658 	return true;
659 }
660 
661 
662 struct vnode*
663 fd_vnode(struct file_descriptor* descriptor)
664 {
665 	switch (descriptor->type) {
666 		case FDTYPE_FILE:
667 		case FDTYPE_DIR:
668 		case FDTYPE_ATTR_DIR:
669 		case FDTYPE_ATTR:
670 			return descriptor->u.vnode;
671 	}
672 
673 	return NULL;
674 }
675 
676 
677 static status_t
678 common_close(int fd, bool kernel)
679 {
680 	return close_fd_index(get_current_io_context(kernel), fd);
681 }
682 
683 
684 static ssize_t
685 common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
686 {
687 	if (pos < -1)
688 		return B_BAD_VALUE;
689 
690 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
691 	if (!descriptor.IsSet())
692 		return B_FILE_ERROR;
693 
694 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
695 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
696 		return B_FILE_ERROR;
697 	}
698 
699 	bool movePosition = false;
700 	if (pos == -1 && descriptor->pos != -1) {
701 		pos = descriptor->pos;
702 		movePosition = true;
703 	}
704 
705 	if (write ? descriptor->ops->fd_write == NULL
706 			: descriptor->ops->fd_read == NULL) {
707 		return B_BAD_VALUE;
708 	}
709 
710 	if (length == 0)
711 		return 0;
712 
713 	if (!is_user_address_range(buffer, length))
714 		return B_BAD_ADDRESS;
715 
716 	SyscallRestartWrapper<status_t> status;
717 
718 	if (write)
719 		status = descriptor->ops->fd_write(descriptor.Get(), pos, buffer, &length);
720 	else
721 		status = descriptor->ops->fd_read(descriptor.Get(), pos, buffer, &length);
722 
723 	if (status != B_OK)
724 		return status;
725 
726 	if (movePosition) {
727 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
728 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos + length;
729 	}
730 
731 	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
732 }
733 
734 
735 static ssize_t
736 common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
737 	bool write)
738 {
739 	if (pos < -1)
740 		return B_BAD_VALUE;
741 
742 	iovec* vecs;
743 	status_t error = get_iovecs_from_user(userVecs, count, vecs, true);
744 	if (error != B_OK)
745 		return error;
746 	MemoryDeleter _(vecs);
747 
748 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
749 	if (!descriptor.IsSet())
750 		return B_FILE_ERROR;
751 
752 	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
753 			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
754 		return B_FILE_ERROR;
755 	}
756 
757 	bool movePosition = false;
758 	if (pos == -1 && descriptor->pos != -1) {
759 		pos = descriptor->pos;
760 		movePosition = true;
761 	}
762 
763 	if (write ? descriptor->ops->fd_write == NULL
764 			: descriptor->ops->fd_read == NULL) {
765 		return B_BAD_VALUE;
766 	}
767 
768 	SyscallRestartWrapper<status_t> status;
769 
770 	ssize_t bytesTransferred = 0;
771 	for (size_t i = 0; i < count; i++) {
772 		if (vecs[i].iov_base == NULL)
773 			continue;
774 
775 		size_t length = vecs[i].iov_len;
776 		if (write) {
777 			status = descriptor->ops->fd_write(descriptor.Get(), pos,
778 				vecs[i].iov_base, &length);
779 		} else {
780 			status = descriptor->ops->fd_read(descriptor.Get(), pos, vecs[i].iov_base,
781 				&length);
782 		}
783 
784 		if (status != B_OK) {
785 			if (bytesTransferred == 0)
786 				return status;
787 			status = B_OK;
788 			break;
789 		}
790 
791 		if ((uint64)bytesTransferred + length > SSIZE_MAX)
792 			bytesTransferred = SSIZE_MAX;
793 		else
794 			bytesTransferred += (ssize_t)length;
795 
796 		pos += length;
797 
798 		if (length < vecs[i].iov_len)
799 			break;
800 	}
801 
802 	if (movePosition) {
803 		descriptor->pos = write && (descriptor->open_mode & O_APPEND) != 0
804 			? descriptor->ops->fd_seek(descriptor.Get(), 0, SEEK_END) : pos;
805 	}
806 
807 	return bytesTransferred;
808 }
809 
810 
811 status_t
812 user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
813 {
814 	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
815 
816 	return fd_ioctl(false, fd, op, buffer, length);
817 }
818 
819 
820 //	#pragma mark - User syscalls
821 
822 
823 ssize_t
824 _user_read(int fd, off_t pos, void* buffer, size_t length)
825 {
826 	return common_user_io(fd, pos, buffer, length, false);
827 }
828 
829 
830 ssize_t
831 _user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
832 {
833 	return common_user_vector_io(fd, pos, userVecs, count, false);
834 }
835 
836 
837 ssize_t
838 _user_write(int fd, off_t pos, const void* buffer, size_t length)
839 {
840 	return common_user_io(fd, pos, (void*)buffer, length, true);
841 }
842 
843 
844 ssize_t
845 _user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
846 {
847 	return common_user_vector_io(fd, pos, userVecs, count, true);
848 }
849 
850 
851 off_t
852 _user_seek(int fd, off_t pos, int seekType)
853 {
854 	syscall_64_bit_return_value();
855 
856 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
857 	if (!descriptor.IsSet())
858 		return B_FILE_ERROR;
859 
860 	TRACE(("user_seek(descriptor = %p)\n", descriptor));
861 
862 	if (descriptor->ops->fd_seek)
863 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
864 	else
865 		pos = ESPIPE;
866 
867 	return pos;
868 }
869 
870 
871 status_t
872 _user_ioctl(int fd, uint32 op, void* buffer, size_t length)
873 {
874 	TRACE(("user_ioctl: fd %d\n", fd));
875 
876 	// "buffer" is not always a pointer depending on "op", so we cannot
877 	// check that it is a userland buffer here. Instead we check that
878 	// it is at least not within the bounds of kernel memory; as in
879 	// the cases where it is a numeric constant it is usually a low one.
880 	if (IS_KERNEL_ADDRESS(buffer))
881 		return B_BAD_ADDRESS;
882 
883 	SyscallRestartWrapper<status_t> status;
884 
885 	return status = fd_ioctl(false, fd, op, buffer, length);
886 }
887 
888 
889 ssize_t
890 _user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
891 	uint32 maxCount)
892 {
893 	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
894 		"%" B_PRIu32 ")\n", fd, userBuffer, bufferSize, maxCount));
895 
896 	if (maxCount == 0)
897 		return 0;
898 
899 	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
900 		return B_BAD_ADDRESS;
901 
902 	// get I/O context and FD
903 	io_context* ioContext = get_current_io_context(false);
904 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
905 	if (!descriptor.IsSet())
906 		return B_FILE_ERROR;
907 
908 	if (descriptor->ops->fd_read_dir == NULL)
909 		return B_UNSUPPORTED;
910 
911 	// restrict buffer size and allocate a heap buffer
912 	if (bufferSize > kMaxReadDirBufferSize)
913 		bufferSize = kMaxReadDirBufferSize;
914 	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
915 	if (buffer == NULL)
916 		return B_NO_MEMORY;
917 	MemoryDeleter bufferDeleter(buffer);
918 
919 	// read the directory
920 	uint32 count = maxCount;
921 	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(),
922 		buffer, bufferSize, &count);
923 	if (status != B_OK)
924 		return status;
925 
926 	ASSERT(count <= maxCount);
927 
928 	// copy the buffer back -- determine the total buffer size first
929 	size_t sizeToCopy = 0;
930 	BytePointer<struct dirent> entry = buffer;
931 	for (uint32 i = 0; i < count; i++) {
932 		size_t length = entry->d_reclen;
933 		sizeToCopy += length;
934 		entry += length;
935 	}
936 
937 	ASSERT(sizeToCopy <= bufferSize);
938 
939 	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
940 		return B_BAD_ADDRESS;
941 
942 	return count;
943 }
944 
945 
946 status_t
947 _user_rewind_dir(int fd)
948 {
949 	TRACE(("user_rewind_dir(fd = %d)\n", fd));
950 
951 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(false), fd));
952 	if (!descriptor.IsSet())
953 		return B_FILE_ERROR;
954 
955 	status_t status;
956 	if (descriptor->ops->fd_rewind_dir)
957 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
958 	else
959 		status = B_UNSUPPORTED;
960 
961 	return status;
962 }
963 
964 
965 status_t
966 _user_close(int fd)
967 {
968 	return common_close(fd, false);
969 }
970 
971 
972 int
973 _user_dup(int fd)
974 {
975 	return dup_fd(fd, false);
976 }
977 
978 
979 int
980 _user_dup2(int ofd, int nfd)
981 {
982 	return dup2_fd(ofd, nfd, false);
983 }
984 
985 
986 //	#pragma mark - Kernel calls
987 
988 
989 ssize_t
990 _kern_read(int fd, off_t pos, void* buffer, size_t length)
991 {
992 	if (pos < -1)
993 		return B_BAD_VALUE;
994 
995 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
996 
997 	if (!descriptor.IsSet())
998 		return B_FILE_ERROR;
999 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1000 		return B_FILE_ERROR;
1001 
1002 	bool movePosition = false;
1003 	if (pos == -1 && descriptor->pos != -1) {
1004 		pos = descriptor->pos;
1005 		movePosition = true;
1006 	}
1007 
1008 	SyscallFlagUnsetter _;
1009 
1010 	if (descriptor->ops->fd_read == NULL)
1011 		return B_BAD_VALUE;
1012 
1013 	ssize_t bytesRead = descriptor->ops->fd_read(descriptor.Get(), pos, buffer,
1014 		&length);
1015 	if (bytesRead >= B_OK) {
1016 		if (length > SSIZE_MAX)
1017 			bytesRead = SSIZE_MAX;
1018 		else
1019 			bytesRead = (ssize_t)length;
1020 
1021 		if (movePosition)
1022 			descriptor->pos = pos + length;
1023 	}
1024 
1025 	return bytesRead;
1026 }
1027 
1028 
1029 ssize_t
1030 _kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1031 {
1032 	status_t status;
1033 
1034 	if (pos < -1)
1035 		return B_BAD_VALUE;
1036 
1037 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1038 
1039 	if (!descriptor.IsSet())
1040 		return B_FILE_ERROR;
1041 	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1042 		return B_FILE_ERROR;
1043 
1044 	bool movePosition = false;
1045 	if (pos == -1 && descriptor->pos != -1) {
1046 		pos = descriptor->pos;
1047 		movePosition = true;
1048 	}
1049 
1050 	if (descriptor->ops->fd_read == NULL)
1051 		return B_BAD_VALUE;
1052 
1053 	SyscallFlagUnsetter _;
1054 
1055 	ssize_t bytesRead = 0;
1056 
1057 	for (size_t i = 0; i < count; i++) {
1058 		size_t length = vecs[i].iov_len;
1059 		status = descriptor->ops->fd_read(descriptor.Get(), pos,
1060 			vecs[i].iov_base, &length);
1061 		if (status != B_OK) {
1062 			bytesRead = status;
1063 			break;
1064 		}
1065 
1066 		if ((uint64)bytesRead + length > SSIZE_MAX)
1067 			bytesRead = SSIZE_MAX;
1068 		else
1069 			bytesRead += (ssize_t)length;
1070 
1071 		pos += vecs[i].iov_len;
1072 	}
1073 
1074 	if (movePosition)
1075 		descriptor->pos = pos;
1076 
1077 	return bytesRead;
1078 }
1079 
1080 
1081 ssize_t
1082 _kern_write(int fd, off_t pos, const void* buffer, size_t length)
1083 {
1084 	if (pos < -1)
1085 		return B_BAD_VALUE;
1086 
1087 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1088 
1089 	if (!descriptor.IsSet())
1090 		return B_FILE_ERROR;
1091 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1092 		return B_FILE_ERROR;
1093 
1094 	bool movePosition = false;
1095 	if (pos == -1 && descriptor->pos != -1) {
1096 		pos = descriptor->pos;
1097 		movePosition = true;
1098 	}
1099 
1100 	if (descriptor->ops->fd_write == NULL)
1101 		return B_BAD_VALUE;
1102 
1103 	SyscallFlagUnsetter _;
1104 
1105 	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor.Get(), pos,
1106 		buffer,	&length);
1107 	if (bytesWritten >= B_OK) {
1108 		if (length > SSIZE_MAX)
1109 			bytesWritten = SSIZE_MAX;
1110 		else
1111 			bytesWritten = (ssize_t)length;
1112 
1113 		if (movePosition)
1114 			descriptor->pos = pos + length;
1115 	}
1116 
1117 	return bytesWritten;
1118 }
1119 
1120 
1121 ssize_t
1122 _kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1123 {
1124 	status_t status;
1125 
1126 	if (pos < -1)
1127 		return B_BAD_VALUE;
1128 
1129 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1130 
1131 	if (!descriptor.IsSet())
1132 		return B_FILE_ERROR;
1133 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1134 		return B_FILE_ERROR;
1135 
1136 	bool movePosition = false;
1137 	if (pos == -1 && descriptor->pos != -1) {
1138 		pos = descriptor->pos;
1139 		movePosition = true;
1140 	}
1141 
1142 	if (descriptor->ops->fd_write == NULL)
1143 		return B_BAD_VALUE;
1144 
1145 	SyscallFlagUnsetter _;
1146 
1147 	ssize_t bytesWritten = 0;
1148 
1149 	for (size_t i = 0; i < count; i++) {
1150 		size_t length = vecs[i].iov_len;
1151 		status = descriptor->ops->fd_write(descriptor.Get(), pos,
1152 			vecs[i].iov_base, &length);
1153 		if (status != B_OK) {
1154 			bytesWritten = status;
1155 			break;
1156 		}
1157 
1158 		if ((uint64)bytesWritten + length > SSIZE_MAX)
1159 			bytesWritten = SSIZE_MAX;
1160 		else
1161 			bytesWritten += (ssize_t)length;
1162 
1163 		pos += vecs[i].iov_len;
1164 	}
1165 
1166 	if (movePosition)
1167 		descriptor->pos = pos;
1168 
1169 	return bytesWritten;
1170 }
1171 
1172 
1173 off_t
1174 _kern_seek(int fd, off_t pos, int seekType)
1175 {
1176 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1177 	if (!descriptor.IsSet())
1178 		return B_FILE_ERROR;
1179 
1180 	if (descriptor->ops->fd_seek)
1181 		pos = descriptor->ops->fd_seek(descriptor.Get(), pos, seekType);
1182 	else
1183 		pos = ESPIPE;
1184 
1185 	return pos;
1186 }
1187 
1188 
1189 status_t
1190 _kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1191 {
1192 	TRACE(("kern_ioctl: fd %d\n", fd));
1193 
1194 	SyscallFlagUnsetter _;
1195 
1196 	return fd_ioctl(true, fd, op, buffer, length);
1197 }
1198 
1199 
1200 ssize_t
1201 _kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1202 	uint32 maxCount)
1203 {
1204 	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1205 		"%" B_PRIu32 ")\n",fd, buffer, bufferSize, maxCount));
1206 
1207 	struct io_context* ioContext = get_current_io_context(true);
1208 	FileDescriptorPutter descriptor(get_fd(ioContext, fd));
1209 	if (!descriptor.IsSet())
1210 		return B_FILE_ERROR;
1211 
1212 	ssize_t retval;
1213 	if (descriptor->ops->fd_read_dir) {
1214 		uint32 count = maxCount;
1215 		retval = descriptor->ops->fd_read_dir(ioContext, descriptor.Get(), buffer,
1216 			bufferSize, &count);
1217 		if (retval >= 0)
1218 			retval = count;
1219 	} else
1220 		retval = B_UNSUPPORTED;
1221 
1222 	return retval;
1223 }
1224 
1225 
1226 status_t
1227 _kern_rewind_dir(int fd)
1228 {
1229 	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1230 
1231 	FileDescriptorPutter descriptor(get_fd(get_current_io_context(true), fd));
1232 	if (!descriptor.IsSet())
1233 		return B_FILE_ERROR;
1234 
1235 	status_t status;
1236 	if (descriptor->ops->fd_rewind_dir)
1237 		status = descriptor->ops->fd_rewind_dir(descriptor.Get());
1238 	else
1239 		status = B_UNSUPPORTED;
1240 
1241 	return status;
1242 }
1243 
1244 
1245 status_t
1246 _kern_close(int fd)
1247 {
1248 	return common_close(fd, true);
1249 }
1250 
1251 
1252 int
1253 _kern_dup(int fd)
1254 {
1255 	return dup_fd(fd, true);
1256 }
1257 
1258 
1259 int
1260 _kern_dup2(int ofd, int nfd)
1261 {
1262 	return dup2_fd(ofd, nfd, true);
1263 }
1264 
1265