xref: /haiku/src/system/kernel/events/wait_for_objects.cpp (revision caed67a8cba83913b9c21ac2b06ebc6bd1cb3111)
1 /*
2  * Copyright 2007-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 #include <fs/select_sync_pool.h>
8 #include <wait_for_objects.h>
9 
10 #include <new>
11 
12 #include <poll.h>
13 #include <signal.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/select.h>
17 
18 #include <OS.h>
19 #include <Select.h>
20 
21 #include <AutoDeleter.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <event_queue.h>
25 #include <fs/fd.h>
26 #include <port.h>
27 #include <sem.h>
28 #include <syscalls.h>
29 #include <syscall_restart.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <util/DoublyLinkedList.h>
34 #include <vfs.h>
35 
36 #include "select_ops.h"
37 #include "select_sync.h"
38 
39 
40 //#define TRACE_WAIT_FOR_OBJECTS
41 #ifdef TRACE_WAIT_FOR_OBJECTS
42 #	define PRINT(x) dprintf x
43 #	define FUNCTION(x) dprintf x
44 #else
45 #	define PRINT(x) ;
46 #	define FUNCTION(x) ;
47 #endif
48 
49 
50 using std::nothrow;
51 
52 
53 struct select_sync_pool_entry
54 	: DoublyLinkedListLinkImpl<select_sync_pool_entry> {
55 	selectsync			*sync;
56 	uint16				events;
57 };
58 
59 typedef DoublyLinkedList<select_sync_pool_entry> SelectSyncPoolEntryList;
60 
61 struct select_sync_pool {
62 	SelectSyncPoolEntryList	entries;
63 };
64 
65 
66 struct wait_for_objects_sync : public select_sync {
67 	sem_id				sem;
68 	uint32				count;
69 	struct select_info*	set;
70 
71 	virtual ~wait_for_objects_sync();
72 	virtual status_t Notify(select_info* info, uint16 events);
73 };
74 
75 
76 select_sync::~select_sync()
77 {
78 }
79 
80 
81 #if WAIT_FOR_OBJECTS_TRACING
82 
83 
84 namespace WaitForObjectsTracing {
85 
86 
87 class SelectTraceEntry : public AbstractTraceEntry {
88 	protected:
89 		SelectTraceEntry(int count, fd_set* readSet, fd_set* writeSet,
90 			fd_set* errorSet)
91 			:
92 			fReadSet(NULL),
93 			fWriteSet(NULL),
94 			fErrorSet(NULL),
95 			fCount(count)
96 		{
97 			int sets = (readSet != NULL ? 1 : 0) + (writeSet != NULL ? 1 : 0)
98 				+ (errorSet != NULL ? 1 : 0);
99 			if (sets > 0 && count > 0) {
100 				uint32 bytes = _howmany(count, NFDBITS) * sizeof(fd_mask);
101 				uint8* allocated = (uint8*)alloc_tracing_buffer(bytes * sets);
102 				if (allocated != NULL) {
103 					if (readSet != NULL) {
104 						fReadSet = (fd_set*)allocated;
105 						memcpy(fReadSet, readSet, bytes);
106 						allocated += bytes;
107 					}
108 					if (writeSet != NULL) {
109 						fWriteSet = (fd_set*)allocated;
110 						memcpy(fWriteSet, writeSet, bytes);
111 						allocated += bytes;
112 					}
113 					if (errorSet != NULL) {
114 						fErrorSet = (fd_set*)allocated;
115 						memcpy(fErrorSet, errorSet, bytes);
116 					}
117 				}
118 			}
119 		}
120 
121 		void AddDump(TraceOutput& out, const char* name)
122 		{
123 			out.Print(name);
124 
125 			_PrintSet(out, "read", fReadSet);
126 			_PrintSet(out, ", write", fWriteSet);
127 			_PrintSet(out, ", error", fErrorSet);
128 		}
129 
130 	private:
131 		void _PrintSet(TraceOutput& out, const char* name, fd_set* set)
132 		{
133 
134 			out.Print("%s: <", name);
135 
136 			if (set != NULL) {
137 				bool first = true;
138 				for (int i = 0; i < fCount; i++) {
139 					if (!FD_ISSET(i, set))
140 						continue;
141 
142 					if (first) {
143 						out.Print("%d", i);
144 						first = false;
145 					} else
146 						out.Print(", %d", i);
147 				}
148 			}
149 
150 			out.Print(">");
151 		}
152 
153 	protected:
154 		fd_set*	fReadSet;
155 		fd_set*	fWriteSet;
156 		fd_set*	fErrorSet;
157 		int		fCount;
158 };
159 
160 
161 class SelectBegin : public SelectTraceEntry {
162 	public:
163 		SelectBegin(int count, fd_set* readSet, fd_set* writeSet,
164 			fd_set* errorSet, bigtime_t timeout)
165 			:
166 			SelectTraceEntry(count, readSet, writeSet, errorSet),
167 			fTimeout(timeout)
168 		{
169 			Initialized();
170 		}
171 
172 		virtual void AddDump(TraceOutput& out)
173 		{
174 			SelectTraceEntry::AddDump(out, "select begin: ");
175 			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
176 		}
177 
178 	private:
179 		bigtime_t	fTimeout;
180 };
181 
182 
183 class SelectDone : public SelectTraceEntry {
184 	public:
185 		SelectDone(int count, fd_set* readSet, fd_set* writeSet,
186 			fd_set* errorSet, status_t status)
187 			:
188 			SelectTraceEntry(status == B_OK ? count : 0, readSet, writeSet,
189 				errorSet),
190 			fStatus(status)
191 		{
192 			Initialized();
193 		}
194 
195 		virtual void AddDump(TraceOutput& out)
196 		{
197 			if (fStatus == B_OK)
198 				SelectTraceEntry::AddDump(out, "select done:  ");
199 			else
200 				out.Print("select done:  error: %#" B_PRIx32, fStatus);
201 		}
202 
203 	private:
204 		status_t	fStatus;
205 };
206 
207 
208 class PollTraceEntry : public AbstractTraceEntry {
209 	protected:
210 		PollTraceEntry(pollfd* fds, int count, bool resultEvents)
211 			:
212 			fEntries(NULL),
213 			fCount(0)
214 		{
215 			if (fds != NULL && count > 0) {
216 				for (int i = 0; i < count; i++) {
217 					if (resultEvents ? fds[i].revents : fds[i].events)
218 						fCount++;
219 				}
220 			}
221 
222 			if (fCount == 0)
223 				return;
224 
225 			fEntries = (FDEntry*)alloc_tracing_buffer(fCount * sizeof(FDEntry));
226 			if (fEntries != NULL) {
227 				for (int i = 0; i < fCount; fds++) {
228 					uint16 events = resultEvents ? fds->revents : fds->events;
229 					if (events != 0) {
230 						fEntries[i].fd = fds->fd;
231 						fEntries[i].events = events;
232 						i++;
233 					}
234 				}
235 			}
236 		}
237 
238 		void AddDump(TraceOutput& out)
239 		{
240 			if (fEntries == NULL)
241 				return;
242 
243 			static const struct {
244 				const char*	name;
245 				uint16		event;
246 			} kEventNames[] = {
247 				{ "r", POLLIN },
248 				{ "w", POLLOUT },
249 				{ "rb", POLLRDBAND },
250 				{ "wb", POLLWRBAND },
251 				{ "rp", POLLPRI },
252 				{ "err", POLLERR },
253 				{ "hup", POLLHUP },
254 				{ "inv", POLLNVAL },
255 				{ NULL, 0 }
256 			};
257 
258 			bool firstFD = true;
259 			for (int i = 0; i < fCount; i++) {
260 				if (firstFD) {
261 					out.Print("<%u: ", fEntries[i].fd);
262 					firstFD = false;
263 				} else
264 					out.Print(", <%u: ", fEntries[i].fd);
265 
266 				bool firstEvent = true;
267 				for (int k = 0; kEventNames[k].name != NULL; k++) {
268 					if ((fEntries[i].events & kEventNames[k].event) != 0) {
269 						if (firstEvent) {
270 							out.Print("%s", kEventNames[k].name);
271 							firstEvent = false;
272 						} else
273 							out.Print(", %s", kEventNames[k].name);
274 					}
275 				}
276 
277 				out.Print(">");
278 			}
279 		}
280 
281 	protected:
282 		struct FDEntry {
283 			uint16	fd;
284 			uint16	events;
285 		};
286 
287 		FDEntry*	fEntries;
288 		int			fCount;
289 };
290 
291 
292 class PollBegin : public PollTraceEntry {
293 	public:
294 		PollBegin(pollfd* fds, int count, bigtime_t timeout)
295 			:
296 			PollTraceEntry(fds, count, false),
297 			fTimeout(timeout)
298 		{
299 			Initialized();
300 		}
301 
302 		virtual void AddDump(TraceOutput& out)
303 		{
304 			out.Print("poll begin: ");
305 			PollTraceEntry::AddDump(out);
306 			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
307 		}
308 
309 	private:
310 		bigtime_t	fTimeout;
311 };
312 
313 
314 class PollDone : public PollTraceEntry {
315 	public:
316 		PollDone(pollfd* fds, int count, int result)
317 			:
318 			PollTraceEntry(fds, result >= 0 ? count : 0, true),
319 			fResult(result)
320 		{
321 			Initialized();
322 		}
323 
324 		virtual void AddDump(TraceOutput& out)
325 		{
326 			if (fResult >= 0) {
327 				out.Print("poll done:  count: %d: ", fResult);
328 				PollTraceEntry::AddDump(out);
329 			} else
330 				out.Print("poll done:  error: %#x", fResult);
331 		}
332 
333 	private:
334 		int		fResult;
335 };
336 
337 }	// namespace WaitForObjectsTracing
338 
339 #	define T(x)	new(std::nothrow) WaitForObjectsTracing::x
340 
341 #else
342 #	define T(x)
343 #endif	// WAIT_FOR_OBJECTS_TRACING
344 
345 
346 // #pragma mark -
347 
348 
349 /*!
350 	Clears all bits in the fd_set - since we are using variable sized
351 	arrays in the kernel, we can't use the FD_ZERO() macro provided by
352 	sys/select.h for this task.
353 	All other FD_xxx() macros are safe to use, though.
354 */
355 static inline void
356 fd_zero(fd_set *set, int numFDs)
357 {
358 	if (set != NULL)
359 		memset(set, 0, _howmany(numFDs, NFDBITS) * sizeof(fd_mask));
360 }
361 
362 
363 static status_t
364 create_select_sync(int numFDs, wait_for_objects_sync*& _sync)
365 {
366 	// create sync structure
367 	wait_for_objects_sync* sync = new(nothrow) wait_for_objects_sync;
368 	if (sync == NULL)
369 		return B_NO_MEMORY;
370 	ObjectDeleter<wait_for_objects_sync> syncDeleter(sync);
371 
372 	// create info set
373 	sync->set = new(nothrow) select_info[numFDs];
374 	if (sync->set == NULL)
375 		return B_NO_MEMORY;
376 	ArrayDeleter<select_info> setDeleter(sync->set);
377 
378 	// create select event semaphore
379 	sync->sem = create_sem(0, "select");
380 	if (sync->sem < 0)
381 		return sync->sem;
382 
383 	sync->count = numFDs;
384 
385 	for (int i = 0; i < numFDs; i++) {
386 		sync->set[i].next = NULL;
387 		sync->set[i].sync = sync;
388 	}
389 
390 	setDeleter.Detach();
391 	syncDeleter.Detach();
392 	_sync = sync;
393 
394 	return B_OK;
395 }
396 
397 
398 void
399 acquire_select_sync(select_sync* sync)
400 {
401 	FUNCTION(("acquire_select_sync(%p)\n", sync));
402 	sync->AcquireReference();
403 }
404 
405 
406 void
407 put_select_sync(select_sync* sync)
408 {
409 	FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->CountReferences() - 1));
410 	sync->ReleaseReference();
411 }
412 
413 
414 wait_for_objects_sync::~wait_for_objects_sync()
415 {
416 	delete_sem(sem);
417 	delete[] set;
418 }
419 
420 
421 status_t
422 wait_for_objects_sync::Notify(select_info* info, uint16 events)
423 {
424 	if (sem < B_OK)
425 		return B_BAD_VALUE;
426 
427 	atomic_or(&info->events, events);
428 
429 	// only wake up the waiting select()/poll() call if the events
430 	// match one of the selected ones
431 	if (info->selected_events & events)
432 		return release_sem_etc(sem, 1, B_DO_NOT_RESCHEDULE);
433 
434 	return B_OK;
435 }
436 
437 
438 static int
439 common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
440 	bigtime_t timeout, const sigset_t *sigMask, bool kernel)
441 {
442 	status_t status = B_OK;
443 	int fd;
444 
445 	FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n",
446 		find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout,
447 		sigMask, kernel));
448 
449 	// check if fds are valid before doing anything
450 
451 	for (fd = 0; fd < numFDs; fd++) {
452 		if (((readSet && FD_ISSET(fd, readSet))
453 			|| (writeSet && FD_ISSET(fd, writeSet))
454 			|| (errorSet && FD_ISSET(fd, errorSet)))
455 			&& !fd_is_valid(fd, kernel))
456 			return B_FILE_ERROR;
457 	}
458 
459 	// allocate sync object
460 	wait_for_objects_sync* sync;
461 	status = create_select_sync(numFDs, sync);
462 	if (status != B_OK)
463 		return status;
464 
465 	T(SelectBegin(numFDs, readSet, writeSet, errorSet, timeout));
466 
467 	// start selecting file descriptors
468 
469 	for (fd = 0; fd < numFDs; fd++) {
470 		sync->set[fd].selected_events = 0;
471 		sync->set[fd].events = 0;
472 
473 		if (readSet && FD_ISSET(fd, readSet)) {
474 			sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ)
475 				| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR);
476 		}
477 		if (writeSet && FD_ISSET(fd, writeSet)) {
478 			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE)
479 				| SELECT_FLAG(B_SELECT_ERROR);
480 		}
481 		if (errorSet && FD_ISSET(fd, errorSet))
482 			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR);
483 
484 		if (sync->set[fd].selected_events != 0) {
485 			select_fd(fd, sync->set + fd, kernel);
486 				// array position is the same as the fd for select()
487 		}
488 	}
489 
490 	// set new signal mask
491 	sigset_t oldSigMask;
492 	if (sigMask != NULL) {
493 		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
494 		if (!kernel) {
495 			Thread *thread = thread_get_current_thread();
496 			thread->old_sig_block_mask = oldSigMask;
497 			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
498 		}
499 	}
500 
501 	// wait for something to happen
502 	status = acquire_sem_etc(sync->sem, 1,
503 		B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
504 
505 	// restore the old signal mask
506 	if (sigMask != NULL && kernel)
507 		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
508 
509 	PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status));
510 
511 	// deselect file descriptors
512 
513 	for (fd = 0; fd < numFDs; fd++)
514 		deselect_fd(fd, sync->set + fd, kernel);
515 
516 	PRINT(("common_select(): events deselected\n"));
517 
518 	// collect the events that have happened in the meantime
519 
520 	int count = 0;
521 
522 	if (status == B_INTERRUPTED) {
523 		// We must not clear the sets in this case, as applications may
524 		// rely on the contents of them.
525 		put_select_sync(sync);
526 		T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
527 		return B_INTERRUPTED;
528 	}
529 
530 	// Clear sets to store the received events
531 	// (we can't use the macros, because we have variable sized arrays;
532 	// the other FD_xxx() macros are safe, though).
533 	fd_zero(readSet, numFDs);
534 	fd_zero(writeSet, numFDs);
535 	fd_zero(errorSet, numFDs);
536 
537 	if (status == B_OK) {
538 		for (count = 0, fd = 0; fd < numFDs; fd++) {
539 			if (readSet && sync->set[fd].events & (SELECT_FLAG(B_SELECT_READ)
540 					| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR))) {
541 				FD_SET(fd, readSet);
542 				count++;
543 			}
544 			if (writeSet
545 				&& sync->set[fd].events & (SELECT_FLAG(B_SELECT_WRITE)
546 					| SELECT_FLAG(B_SELECT_ERROR))) {
547 				FD_SET(fd, writeSet);
548 				count++;
549 			}
550 			if (errorSet
551 				&& sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) {
552 				FD_SET(fd, errorSet);
553 				count++;
554 			}
555 		}
556 	}
557 
558 	// B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0
559 
560 	put_select_sync(sync);
561 
562 	T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
563 
564 	return count;
565 }
566 
567 
568 static int
569 common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout,
570 	const sigset_t *sigMask, bool kernel)
571 {
572 	// allocate sync object
573 	wait_for_objects_sync* sync;
574 	status_t status = create_select_sync(numFDs, sync);
575 	if (status != B_OK)
576 		return status;
577 
578 	T(PollBegin(fds, numFDs, timeout));
579 
580 	// start polling file descriptors (by selecting them)
581 
582 	bool invalid = false;
583 	for (uint32 i = 0; i < numFDs; i++) {
584 		int fd = fds[i].fd;
585 
586 		// initialize events masks
587 		fds[i].events |= POLLNVAL | POLLERR | POLLHUP;
588 		sync->set[i].selected_events = fds[i].events;
589 		sync->set[i].events = 0;
590 		fds[i].revents = 0;
591 
592 		if (fd >= 0 && select_fd(fd, sync->set + i, kernel) != B_OK) {
593 			// If the FD returned events as well as an error, ignore the error.
594 			if (sync->set[i].events != 0)
595 				continue;
596 
597 			sync->set[i].events = POLLNVAL;
598 			fds[i].revents = POLLNVAL;
599 				// indicates that the FD doesn't need to be deselected
600 			invalid = true;
601 		}
602 	}
603 
604 	// set new signal mask
605 	sigset_t oldSigMask;
606 	if (sigMask != NULL) {
607 		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
608 		if (!kernel) {
609 			Thread *thread = thread_get_current_thread();
610 			thread->old_sig_block_mask = oldSigMask;
611 			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
612 		}
613 	}
614 
615 	if (!invalid) {
616 		status = acquire_sem_etc(sync->sem, 1,
617 			B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
618 	}
619 
620 	// restore the old signal mask
621 	if (sigMask != NULL && kernel)
622 		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
623 
624 	// deselect file descriptors
625 
626 	for (uint32 i = 0; i < numFDs; i++) {
627 		if (fds[i].fd >= 0 && (sync->set[i].events & POLLNVAL) == 0)
628 			deselect_fd(fds[i].fd, sync->set + i, kernel);
629 	}
630 
631 	// collect the events that have happened in the meantime
632 
633 	int count = 0;
634 	switch (status) {
635 		case B_OK:
636 			for (uint32 i = 0; i < numFDs; i++) {
637 				if (fds[i].fd < 0)
638 					continue;
639 
640 				// POLLxxx flags and B_SELECT_xxx flags are compatible
641 				fds[i].revents = sync->set[i].events & fds[i].events;
642 				if (fds[i].revents != 0)
643 					count++;
644 			}
645 			break;
646 		case B_INTERRUPTED:
647 			count = B_INTERRUPTED;
648 			break;
649 		default:
650 			// B_TIMED_OUT, and B_WOULD_BLOCK
651 			break;
652 	}
653 
654 	put_select_sync(sync);
655 
656 	T(PollDone(fds, numFDs, count));
657 
658 	return count;
659 }
660 
661 
662 static ssize_t
663 common_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
664 	bigtime_t timeout, bool kernel)
665 {
666 	status_t status = B_OK;
667 
668 	// allocate sync object
669 	wait_for_objects_sync* sync;
670 	status = create_select_sync(numInfos, sync);
671 	if (status != B_OK)
672 		return status;
673 
674 	// start selecting objects
675 
676 	bool invalid = false;
677 	for (int i = 0; i < numInfos; i++) {
678 		uint16 type = infos[i].type;
679 		int32 object = infos[i].object;
680 
681 		// initialize events masks
682 		infos[i].events |= B_EVENT_INVALID | B_EVENT_ERROR | B_EVENT_DISCONNECTED;
683 		sync->set[i].selected_events = infos[i].events;
684 		sync->set[i].events = 0;
685 
686 		if (select_object(type, object, sync->set + i, kernel) != B_OK) {
687 			// If the object returned events as well as an error, ignore the error.
688 			if (sync->set[i].events != 0)
689 				continue;
690 
691 			sync->set[i].events = B_EVENT_INVALID;
692 			infos[i].events = B_EVENT_INVALID;
693 				// indicates that the object doesn't need to be deselected
694 			invalid = true;
695 		}
696 	}
697 
698 	if (!invalid) {
699 		status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | flags,
700 			timeout);
701 	}
702 
703 	// deselect objects
704 
705 	for (int i = 0; i < numInfos; i++) {
706 		uint16 type = infos[i].type;
707 		if ((sync->set[i].events & B_EVENT_INVALID) == 0)
708 			deselect_object(type, infos[i].object, sync->set + i, kernel);
709 	}
710 
711 	// collect the events that have happened in the meantime
712 
713 	ssize_t count = 0;
714 	if (status == B_OK) {
715 		for (int i = 0; i < numInfos; i++) {
716 			infos[i].events &= sync->set[i].events;
717 			if (infos[i].events != 0)
718 				count++;
719 		}
720 	} else {
721 		// B_INTERRUPTED, B_TIMED_OUT, and B_WOULD_BLOCK
722 		count = status;
723 		for (int i = 0; i < numInfos; i++)
724 			infos[i].events = 0;
725 	}
726 
727 	put_select_sync(sync);
728 
729 	return count;
730 }
731 
732 
733 // #pragma mark - kernel private
734 
735 
736 status_t
737 notify_select_events(select_info* info, uint16 events)
738 {
739 	FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync,
740 		events));
741 
742 	if (info == NULL || info->sync == NULL)
743 		return B_BAD_VALUE;
744 
745 	return info->sync->Notify(info, events);
746 }
747 
748 
749 void
750 notify_select_events_list(select_info* list, uint16 events)
751 {
752 	struct select_info* info = list;
753 	while (info != NULL) {
754 		select_info* next = info->next;
755 		notify_select_events(info, events);
756 		info = next;
757 	}
758 }
759 
760 
761 //	#pragma mark - public kernel API
762 
763 
764 status_t
765 notify_select_event(struct selectsync *sync, uint8 event)
766 {
767 	return notify_select_events((select_info*)sync, SELECT_FLAG(event));
768 }
769 
770 
771 //	#pragma mark - private kernel exported API
772 
773 
774 static select_sync_pool_entry *
775 find_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync)
776 {
777 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
778 		 it.HasNext();) {
779 		select_sync_pool_entry *entry = it.Next();
780 		if (entry->sync == sync)
781 			return entry;
782 	}
783 
784 	return NULL;
785 }
786 
787 
788 static status_t
789 add_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync,
790 	uint8 event)
791 {
792 	// check, whether the entry does already exist
793 	select_sync_pool_entry *entry = find_select_sync_pool_entry(pool, sync);
794 	if (!entry) {
795 		entry = new (std::nothrow) select_sync_pool_entry;
796 		if (!entry)
797 			return B_NO_MEMORY;
798 
799 		entry->sync = sync;
800 		entry->events = 0;
801 
802 		pool->entries.Add(entry);
803 	}
804 
805 	entry->events |= SELECT_FLAG(event);
806 
807 	return B_OK;
808 }
809 
810 
811 status_t
812 add_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
813 	uint8 event)
814 {
815 	// create the pool, if necessary
816 	select_sync_pool *pool = *_pool;
817 	if (!pool) {
818 		pool = new (std::nothrow) select_sync_pool;
819 		if (!pool)
820 			return B_NO_MEMORY;
821 
822 		*_pool = pool;
823 	}
824 
825 	// add the entry
826 	status_t error = add_select_sync_pool_entry(pool, sync, event);
827 
828 	// cleanup
829 	if (pool->entries.IsEmpty()) {
830 		delete pool;
831 		*_pool = NULL;
832 	}
833 
834 	return error;
835 }
836 
837 
838 status_t
839 remove_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
840 	uint8 event)
841 {
842 	select_sync_pool *pool = *_pool;
843 	if (!pool)
844 		return B_ENTRY_NOT_FOUND;
845 
846 	// clear the event flag of the concerned entries
847 	bool found = false;
848 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
849 		 it.HasNext();) {
850 		select_sync_pool_entry *entry = it.Next();
851 		if (entry->sync == sync) {
852 			found = true;
853 			entry->events &= ~SELECT_FLAG(event);
854 
855 			// remove the entry, if no longer needed
856 			if (entry->events == 0) {
857 				it.Remove();
858 				delete entry;
859 			}
860 		}
861 	}
862 
863 	if (!found)
864 		return B_ENTRY_NOT_FOUND;
865 
866 	// delete the pool, if no longer needed
867 	if (pool->entries.IsEmpty()) {
868 		delete pool;
869 		*_pool = NULL;
870 	}
871 
872 	return B_OK;
873 }
874 
875 
876 void
877 delete_select_sync_pool(select_sync_pool *pool)
878 {
879 	if (!pool)
880 		return;
881 
882 	while (select_sync_pool_entry *entry = pool->entries.Head()) {
883 		pool->entries.Remove(entry);
884 		delete entry;
885 	}
886 
887 	delete pool;
888 }
889 
890 
891 void
892 notify_select_event_pool(select_sync_pool *pool, uint8 event)
893 {
894 	if (!pool)
895 		return;
896 
897 	FUNCTION(("notify_select_event_pool(%p, %u)\n", pool, event));
898 
899 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
900 		 it.HasNext();) {
901 		select_sync_pool_entry *entry = it.Next();
902 		if (entry->events & SELECT_FLAG(event))
903 			notify_select_event(entry->sync, event);
904 	}
905 }
906 
907 
908 //	#pragma mark - Kernel POSIX layer
909 
910 
911 ssize_t
912 _kern_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
913 	bigtime_t timeout, const sigset_t *sigMask)
914 {
915 	if (timeout >= 0)
916 		timeout += system_time();
917 
918 	return common_select(numFDs, readSet, writeSet, errorSet, timeout,
919 		sigMask, true);
920 }
921 
922 
923 ssize_t
924 _kern_poll(struct pollfd *fds, int numFDs, bigtime_t timeout,
925 	const sigset_t *sigMask)
926 {
927 	if (timeout >= 0)
928 		timeout += system_time();
929 
930 	return common_poll(fds, numFDs, timeout, sigMask, true);
931 }
932 
933 
934 ssize_t
935 _kern_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
936 	bigtime_t timeout)
937 {
938 	return common_wait_for_objects(infos, numInfos, flags, timeout, true);
939 }
940 
941 
942 //	#pragma mark - User syscalls
943 
944 
945 static bool
946 check_max_fds(int numFDs)
947 {
948 	if (numFDs <= 0)
949 		return true;
950 
951 	struct io_context *context = get_current_io_context(false);
952 	MutexLocker(&context->io_mutex);
953 	return (size_t)numFDs <= context->table_size;
954 }
955 
956 
957 ssize_t
958 _user_select(int numFDs, fd_set *userReadSet, fd_set *userWriteSet,
959 	fd_set *userErrorSet, bigtime_t timeout, const sigset_t *userSigMask)
960 {
961 	uint32 bytes = _howmany(numFDs, NFDBITS) * sizeof(fd_mask);
962 	int result;
963 
964 	if (timeout >= 0) {
965 		timeout += system_time();
966 		// deal with overflow
967 		if (timeout < 0)
968 			timeout = B_INFINITE_TIMEOUT;
969 	}
970 
971 	if (numFDs < 0 || !check_max_fds(numFDs))
972 		return B_BAD_VALUE;
973 
974 	if ((userReadSet != NULL && !IS_USER_ADDRESS(userReadSet))
975 		|| (userWriteSet != NULL && !IS_USER_ADDRESS(userWriteSet))
976 		|| (userErrorSet != NULL && !IS_USER_ADDRESS(userErrorSet))
977 		|| (userSigMask != NULL && !IS_USER_ADDRESS(userSigMask)))
978 		return B_BAD_ADDRESS;
979 
980 	// copy parameters
981 
982 	BStackOrHeapArray<char, 128> sets(bytes * (
983 		((userReadSet != NULL) ? 1 : 0) +
984 		((userWriteSet != NULL) ? 1 : 0) +
985 		((userErrorSet != NULL) ? 1 : 0)));
986 	if (!sets.IsValid())
987 		return B_NO_MEMORY;
988 
989 	char *nextSet = &sets[0];
990 	fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL;
991 
992 	if (userReadSet != NULL) {
993 		readSet = (fd_set *)nextSet;
994 		nextSet += bytes;
995 
996 		if (user_memcpy(readSet, userReadSet, bytes) != B_OK)
997 			return B_BAD_ADDRESS;
998 	}
999 
1000 	if (userWriteSet != NULL) {
1001 		writeSet = (fd_set *)nextSet;
1002 		nextSet += bytes;
1003 
1004 		if (user_memcpy(writeSet, userWriteSet, bytes) != B_OK)
1005 			return B_BAD_ADDRESS;
1006 	}
1007 
1008 	if (userErrorSet != NULL) {
1009 		errorSet = (fd_set *)nextSet;
1010 
1011 		if (user_memcpy(errorSet, userErrorSet, bytes) != B_OK)
1012 			return B_BAD_ADDRESS;
1013 	}
1014 
1015 	sigset_t sigMask;
1016 	if (userSigMask != NULL
1017 			&& user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) != B_OK) {
1018 		return B_BAD_ADDRESS;
1019 	}
1020 
1021 	result = common_select(numFDs, readSet, writeSet, errorSet, timeout,
1022 		userSigMask ? &sigMask : NULL, false);
1023 
1024 	// copy back results
1025 
1026 	if (result >= B_OK
1027 		&& ((readSet != NULL
1028 				&& user_memcpy(userReadSet, readSet, bytes) < B_OK)
1029 			|| (writeSet != NULL
1030 				&& user_memcpy(userWriteSet, writeSet, bytes) < B_OK)
1031 			|| (errorSet != NULL
1032 				&& user_memcpy(userErrorSet, errorSet, bytes) < B_OK))) {
1033 		result = B_BAD_ADDRESS;
1034 	}
1035 
1036 	return result;
1037 }
1038 
1039 
1040 ssize_t
1041 _user_poll(struct pollfd *userfds, int numFDs, bigtime_t timeout,
1042 	const sigset_t *userSigMask)
1043 {
1044 	if (timeout >= 0) {
1045 		timeout += system_time();
1046 		// deal with overflow
1047 		if (timeout < 0)
1048 			timeout = B_INFINITE_TIMEOUT;
1049 	}
1050 
1051 	if (numFDs < 0 || !check_max_fds(numFDs))
1052 		return B_BAD_VALUE;
1053 
1054 	BStackOrHeapArray<struct pollfd, 16> fds(numFDs);
1055 	if (!fds.IsValid())
1056 		return B_NO_MEMORY;
1057 
1058 	const size_t bytes = numFDs * sizeof(struct pollfd);
1059 	if (numFDs != 0) {
1060 		if (userfds == NULL || !IS_USER_ADDRESS(userfds))
1061 			return B_BAD_ADDRESS;
1062 
1063 		if (user_memcpy(fds, userfds, bytes) < B_OK)
1064 			return B_BAD_ADDRESS;
1065 	}
1066 
1067 	sigset_t sigMask;
1068 	if (userSigMask != NULL
1069 		&& (!IS_USER_ADDRESS(userSigMask)
1070 			|| user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) < B_OK)) {
1071 		return B_BAD_ADDRESS;
1072 	}
1073 
1074 	status_t result = common_poll(fds, numFDs, timeout,
1075 		userSigMask != NULL ? &sigMask : NULL, false);
1076 
1077 	// copy back results
1078 	if (numFDs > 0 && user_memcpy(userfds, fds, bytes) != 0) {
1079 		if (result >= 0)
1080 			result = B_BAD_ADDRESS;
1081 	}
1082 
1083 	return result;
1084 }
1085 
1086 
1087 ssize_t
1088 _user_wait_for_objects(object_wait_info* userInfos, int numInfos, uint32 flags,
1089 	bigtime_t timeout)
1090 {
1091 	syscall_restart_handle_timeout_pre(flags, timeout);
1092 
1093 	if (numInfos < 0 || !check_max_fds(numInfos - sem_max_sems()
1094 			- port_max_ports() - thread_max_threads())) {
1095 		return B_BAD_VALUE;
1096 	}
1097 
1098 	if (numInfos == 0) {
1099 		// special case: no infos
1100 		ssize_t result = common_wait_for_objects(NULL, 0, flags, timeout,
1101 			false);
1102 		return result < 0
1103 			? syscall_restart_handle_timeout_post(result, timeout) : result;
1104 	}
1105 
1106 	if (userInfos == NULL || !IS_USER_ADDRESS(userInfos))
1107 		return B_BAD_ADDRESS;
1108 
1109 	BStackOrHeapArray<object_wait_info, 16> infos(numInfos);
1110 	if (!infos.IsValid())
1111 		return B_NO_MEMORY;
1112 	const int bytes = sizeof(object_wait_info) * numInfos;
1113 
1114 	if (user_memcpy(infos, userInfos, bytes) != B_OK)
1115 		return B_BAD_ADDRESS;
1116 
1117 	ssize_t result = common_wait_for_objects(infos, numInfos, flags, timeout, false);
1118 
1119 	if (result >= 0 && user_memcpy(userInfos, infos, bytes) != B_OK) {
1120 		result = B_BAD_ADDRESS;
1121 	} else {
1122 		syscall_restart_handle_timeout_post(result, timeout);
1123 	}
1124 
1125 	return result;
1126 }
1127