xref: /haiku/src/system/kernel/events/wait_for_objects.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2007-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  */
6 
7 #include <fs/select_sync_pool.h>
8 #include <wait_for_objects.h>
9 
10 #include <new>
11 
12 #include <poll.h>
13 #include <signal.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/select.h>
17 
18 #include <OS.h>
19 #include <Select.h>
20 
21 #include <AutoDeleter.h>
22 #include <StackOrHeapArray.h>
23 
24 #include <event_queue.h>
25 #include <fs/fd.h>
26 #include <port.h>
27 #include <sem.h>
28 #include <syscalls.h>
29 #include <syscall_restart.h>
30 #include <thread.h>
31 #include <tracing.h>
32 #include <util/AutoLock.h>
33 #include <util/DoublyLinkedList.h>
34 #include <vfs.h>
35 
36 #include "select_ops.h"
37 #include "select_sync.h"
38 
39 
40 //#define TRACE_WAIT_FOR_OBJECTS
41 #ifdef TRACE_WAIT_FOR_OBJECTS
42 #	define PRINT(x) dprintf x
43 #	define FUNCTION(x) dprintf x
44 #else
45 #	define PRINT(x) ;
46 #	define FUNCTION(x) ;
47 #endif
48 
49 
50 using std::nothrow;
51 
52 
53 struct select_sync_pool_entry
54 	: DoublyLinkedListLinkImpl<select_sync_pool_entry> {
55 	selectsync			*sync;
56 	uint16				events;
57 };
58 
59 typedef DoublyLinkedList<select_sync_pool_entry> SelectSyncPoolEntryList;
60 
61 struct select_sync_pool {
62 	SelectSyncPoolEntryList	entries;
63 };
64 
65 
66 struct wait_for_objects_sync : public select_sync {
67 	sem_id				sem;
68 	uint32				count;
69 	struct select_info*	set;
70 
71 	virtual ~wait_for_objects_sync();
72 	virtual status_t Notify(select_info* info, uint16 events);
73 };
74 
75 
76 select_sync::~select_sync()
77 {
78 }
79 
80 
81 #if WAIT_FOR_OBJECTS_TRACING
82 
83 
84 namespace WaitForObjectsTracing {
85 
86 
87 class SelectTraceEntry : public AbstractTraceEntry {
88 	protected:
89 		SelectTraceEntry(int count, fd_set* readSet, fd_set* writeSet,
90 			fd_set* errorSet)
91 			:
92 			fReadSet(NULL),
93 			fWriteSet(NULL),
94 			fErrorSet(NULL),
95 			fCount(count)
96 		{
97 			int sets = (readSet != NULL ? 1 : 0) + (writeSet != NULL ? 1 : 0)
98 				+ (errorSet != NULL ? 1 : 0);
99 			if (sets > 0 && count > 0) {
100 				uint32 bytes = _howmany(count, NFDBITS) * sizeof(fd_mask);
101 				uint8* allocated = (uint8*)alloc_tracing_buffer(bytes * sets);
102 				if (allocated != NULL) {
103 					if (readSet != NULL) {
104 						fReadSet = (fd_set*)allocated;
105 						memcpy(fReadSet, readSet, bytes);
106 						allocated += bytes;
107 					}
108 					if (writeSet != NULL) {
109 						fWriteSet = (fd_set*)allocated;
110 						memcpy(fWriteSet, writeSet, bytes);
111 						allocated += bytes;
112 					}
113 					if (errorSet != NULL) {
114 						fErrorSet = (fd_set*)allocated;
115 						memcpy(fErrorSet, errorSet, bytes);
116 					}
117 				}
118 			}
119 		}
120 
121 		void AddDump(TraceOutput& out, const char* name)
122 		{
123 			out.Print(name);
124 
125 			_PrintSet(out, "read", fReadSet);
126 			_PrintSet(out, ", write", fWriteSet);
127 			_PrintSet(out, ", error", fErrorSet);
128 		}
129 
130 	private:
131 		void _PrintSet(TraceOutput& out, const char* name, fd_set* set)
132 		{
133 
134 			out.Print("%s: <", name);
135 
136 			if (set != NULL) {
137 				bool first = true;
138 				for (int i = 0; i < fCount; i++) {
139 					if (!FD_ISSET(i, set))
140 						continue;
141 
142 					if (first) {
143 						out.Print("%d", i);
144 						first = false;
145 					} else
146 						out.Print(", %d", i);
147 				}
148 			}
149 
150 			out.Print(">");
151 		}
152 
153 	protected:
154 		fd_set*	fReadSet;
155 		fd_set*	fWriteSet;
156 		fd_set*	fErrorSet;
157 		int		fCount;
158 };
159 
160 
161 class SelectBegin : public SelectTraceEntry {
162 	public:
163 		SelectBegin(int count, fd_set* readSet, fd_set* writeSet,
164 			fd_set* errorSet, bigtime_t timeout)
165 			:
166 			SelectTraceEntry(count, readSet, writeSet, errorSet),
167 			fTimeout(timeout)
168 		{
169 			Initialized();
170 		}
171 
172 		virtual void AddDump(TraceOutput& out)
173 		{
174 			SelectTraceEntry::AddDump(out, "select begin: ");
175 			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
176 		}
177 
178 	private:
179 		bigtime_t	fTimeout;
180 };
181 
182 
183 class SelectDone : public SelectTraceEntry {
184 	public:
185 		SelectDone(int count, fd_set* readSet, fd_set* writeSet,
186 			fd_set* errorSet, status_t status)
187 			:
188 			SelectTraceEntry(status == B_OK ? count : 0, readSet, writeSet,
189 				errorSet),
190 			fStatus(status)
191 		{
192 			Initialized();
193 		}
194 
195 		virtual void AddDump(TraceOutput& out)
196 		{
197 			if (fStatus == B_OK)
198 				SelectTraceEntry::AddDump(out, "select done:  ");
199 			else
200 				out.Print("select done:  error: %#" B_PRIx32, fStatus);
201 		}
202 
203 	private:
204 		status_t	fStatus;
205 };
206 
207 
208 class PollTraceEntry : public AbstractTraceEntry {
209 	protected:
210 		PollTraceEntry(pollfd* fds, int count, bool resultEvents)
211 			:
212 			fEntries(NULL),
213 			fCount(0)
214 		{
215 			if (fds != NULL && count > 0) {
216 				for (int i = 0; i < count; i++) {
217 					if (resultEvents ? fds[i].revents : fds[i].events)
218 						fCount++;
219 				}
220 			}
221 
222 			if (fCount == 0)
223 				return;
224 
225 			fEntries = (FDEntry*)alloc_tracing_buffer(fCount * sizeof(FDEntry));
226 			if (fEntries != NULL) {
227 				for (int i = 0; i < fCount; fds++) {
228 					uint16 events = resultEvents ? fds->revents : fds->events;
229 					if (events != 0) {
230 						fEntries[i].fd = fds->fd;
231 						fEntries[i].events = events;
232 						i++;
233 					}
234 				}
235 			}
236 		}
237 
238 		void AddDump(TraceOutput& out)
239 		{
240 			if (fEntries == NULL)
241 				return;
242 
243 			static const struct {
244 				const char*	name;
245 				uint16		event;
246 			} kEventNames[] = {
247 				{ "r", POLLIN },
248 				{ "w", POLLOUT },
249 				{ "rb", POLLRDBAND },
250 				{ "wb", POLLWRBAND },
251 				{ "rp", POLLPRI },
252 				{ "err", POLLERR },
253 				{ "hup", POLLHUP },
254 				{ "inv", POLLNVAL },
255 				{ NULL, 0 }
256 			};
257 
258 			bool firstFD = true;
259 			for (int i = 0; i < fCount; i++) {
260 				if (firstFD) {
261 					out.Print("<%u: ", fEntries[i].fd);
262 					firstFD = false;
263 				} else
264 					out.Print(", <%u: ", fEntries[i].fd);
265 
266 				bool firstEvent = true;
267 				for (int k = 0; kEventNames[k].name != NULL; k++) {
268 					if ((fEntries[i].events & kEventNames[k].event) != 0) {
269 						if (firstEvent) {
270 							out.Print("%s", kEventNames[k].name);
271 							firstEvent = false;
272 						} else
273 							out.Print(", %s", kEventNames[k].name);
274 					}
275 				}
276 
277 				out.Print(">");
278 			}
279 		}
280 
281 	protected:
282 		struct FDEntry {
283 			uint16	fd;
284 			uint16	events;
285 		};
286 
287 		FDEntry*	fEntries;
288 		int			fCount;
289 };
290 
291 
292 class PollBegin : public PollTraceEntry {
293 	public:
294 		PollBegin(pollfd* fds, int count, bigtime_t timeout)
295 			:
296 			PollTraceEntry(fds, count, false),
297 			fTimeout(timeout)
298 		{
299 			Initialized();
300 		}
301 
302 		virtual void AddDump(TraceOutput& out)
303 		{
304 			out.Print("poll begin: ");
305 			PollTraceEntry::AddDump(out);
306 			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
307 		}
308 
309 	private:
310 		bigtime_t	fTimeout;
311 };
312 
313 
314 class PollDone : public PollTraceEntry {
315 	public:
316 		PollDone(pollfd* fds, int count, int result)
317 			:
318 			PollTraceEntry(fds, result >= 0 ? count : 0, true),
319 			fResult(result)
320 		{
321 			Initialized();
322 		}
323 
324 		virtual void AddDump(TraceOutput& out)
325 		{
326 			if (fResult >= 0) {
327 				out.Print("poll done:  count: %d: ", fResult);
328 				PollTraceEntry::AddDump(out);
329 			} else
330 				out.Print("poll done:  error: %#x", fResult);
331 		}
332 
333 	private:
334 		int		fResult;
335 };
336 
337 }	// namespace WaitForObjectsTracing
338 
339 #	define T(x)	new(std::nothrow) WaitForObjectsTracing::x
340 
341 #else
342 #	define T(x)
343 #endif	// WAIT_FOR_OBJECTS_TRACING
344 
345 
346 // #pragma mark -
347 
348 
349 /*!
350 	Clears all bits in the fd_set - since we are using variable sized
351 	arrays in the kernel, we can't use the FD_ZERO() macro provided by
352 	sys/select.h for this task.
353 	All other FD_xxx() macros are safe to use, though.
354 */
355 static inline void
356 fd_zero(fd_set *set, int numFDs)
357 {
358 	if (set != NULL)
359 		memset(set, 0, _howmany(numFDs, NFDBITS) * sizeof(fd_mask));
360 }
361 
362 
363 static status_t
364 create_select_sync(int numFDs, wait_for_objects_sync*& _sync)
365 {
366 	// create sync structure
367 	wait_for_objects_sync* sync = new(nothrow) wait_for_objects_sync;
368 	if (sync == NULL)
369 		return B_NO_MEMORY;
370 	ObjectDeleter<wait_for_objects_sync> syncDeleter(sync);
371 
372 	// create info set
373 	sync->set = new(nothrow) select_info[numFDs];
374 	if (sync->set == NULL)
375 		return B_NO_MEMORY;
376 	ArrayDeleter<select_info> setDeleter(sync->set);
377 
378 	// create select event semaphore
379 	sync->sem = create_sem(0, "select");
380 	if (sync->sem < 0)
381 		return sync->sem;
382 
383 	sync->count = numFDs;
384 
385 	for (int i = 0; i < numFDs; i++) {
386 		sync->set[i].next = NULL;
387 		sync->set[i].sync = sync;
388 	}
389 
390 	setDeleter.Detach();
391 	syncDeleter.Detach();
392 	_sync = sync;
393 
394 	return B_OK;
395 }
396 
397 
398 void
399 acquire_select_sync(select_sync* sync)
400 {
401 	FUNCTION(("acquire_select_sync(%p)\n", sync));
402 	sync->AcquireReference();
403 }
404 
405 
406 void
407 put_select_sync(select_sync* sync)
408 {
409 	FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->CountReferences() - 1));
410 	sync->ReleaseReference();
411 }
412 
413 
414 wait_for_objects_sync::~wait_for_objects_sync()
415 {
416 	delete_sem(sem);
417 	delete[] set;
418 }
419 
420 
421 status_t
422 wait_for_objects_sync::Notify(select_info* info, uint16 events)
423 {
424 	if (sem < B_OK)
425 		return B_BAD_VALUE;
426 
427 	atomic_or(&info->events, events);
428 
429 	// only wake up the waiting select()/poll() call if the events
430 	// match one of the selected ones
431 	if (info->selected_events & events)
432 		return release_sem_etc(sem, 1, B_DO_NOT_RESCHEDULE);
433 
434 	return B_OK;
435 }
436 
437 
438 static int
439 common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
440 	bigtime_t timeout, const sigset_t *sigMask, bool kernel)
441 {
442 	status_t status = B_OK;
443 	int fd;
444 
445 	FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n",
446 		find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout,
447 		sigMask, kernel));
448 
449 	// check if fds are valid before doing anything
450 
451 	for (fd = 0; fd < numFDs; fd++) {
452 		if (((readSet && FD_ISSET(fd, readSet))
453 			|| (writeSet && FD_ISSET(fd, writeSet))
454 			|| (errorSet && FD_ISSET(fd, errorSet)))
455 			&& !fd_is_valid(fd, kernel))
456 			return B_FILE_ERROR;
457 	}
458 
459 	// allocate sync object
460 	wait_for_objects_sync* sync;
461 	status = create_select_sync(numFDs, sync);
462 	if (status != B_OK)
463 		return status;
464 
465 	T(SelectBegin(numFDs, readSet, writeSet, errorSet, timeout));
466 
467 	// start selecting file descriptors
468 
469 	for (fd = 0; fd < numFDs; fd++) {
470 		sync->set[fd].selected_events = 0;
471 		sync->set[fd].events = 0;
472 
473 		if (readSet && FD_ISSET(fd, readSet)) {
474 			sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ)
475 				| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR);
476 		}
477 		if (writeSet && FD_ISSET(fd, writeSet)) {
478 			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE)
479 				| SELECT_FLAG(B_SELECT_ERROR);
480 		}
481 		if (errorSet && FD_ISSET(fd, errorSet))
482 			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR);
483 
484 		if (sync->set[fd].selected_events != 0) {
485 			select_fd(fd, sync->set + fd, kernel);
486 				// array position is the same as the fd for select()
487 		}
488 	}
489 
490 	// set new signal mask
491 	sigset_t oldSigMask;
492 	if (sigMask != NULL) {
493 		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
494 		if (!kernel) {
495 			Thread *thread = thread_get_current_thread();
496 			thread->old_sig_block_mask = oldSigMask;
497 			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
498 		}
499 	}
500 
501 	// wait for something to happen
502 	status = acquire_sem_etc(sync->sem, 1,
503 		B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
504 
505 	// restore the old signal mask
506 	if (sigMask != NULL && kernel)
507 		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
508 
509 	PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status));
510 
511 	// deselect file descriptors
512 
513 	for (fd = 0; fd < numFDs; fd++)
514 		deselect_fd(fd, sync->set + fd, kernel);
515 
516 	PRINT(("common_select(): events deselected\n"));
517 
518 	// collect the events that have happened in the meantime
519 
520 	int count = 0;
521 
522 	if (status == B_INTERRUPTED) {
523 		// We must not clear the sets in this case, as applications may
524 		// rely on the contents of them.
525 		put_select_sync(sync);
526 		T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
527 		return B_INTERRUPTED;
528 	}
529 
530 	// Clear sets to store the received events
531 	// (we can't use the macros, because we have variable sized arrays;
532 	// the other FD_xxx() macros are safe, though).
533 	fd_zero(readSet, numFDs);
534 	fd_zero(writeSet, numFDs);
535 	fd_zero(errorSet, numFDs);
536 
537 	if (status == B_OK) {
538 		for (count = 0, fd = 0;fd < numFDs; fd++) {
539 			if (readSet && sync->set[fd].events & (SELECT_FLAG(B_SELECT_READ)
540 					| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR))) {
541 				FD_SET(fd, readSet);
542 				count++;
543 			}
544 			if (writeSet
545 				&& sync->set[fd].events & (SELECT_FLAG(B_SELECT_WRITE)
546 					| SELECT_FLAG(B_SELECT_ERROR))) {
547 				FD_SET(fd, writeSet);
548 				count++;
549 			}
550 			if (errorSet
551 				&& sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) {
552 				FD_SET(fd, errorSet);
553 				count++;
554 			}
555 		}
556 	}
557 
558 	// B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0
559 
560 	put_select_sync(sync);
561 
562 	T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
563 
564 	return count;
565 }
566 
567 
568 static int
569 common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout,
570 	const sigset_t *sigMask, bool kernel)
571 {
572 	// allocate sync object
573 	wait_for_objects_sync* sync;
574 	status_t status = create_select_sync(numFDs, sync);
575 	if (status != B_OK)
576 		return status;
577 
578 	T(PollBegin(fds, numFDs, timeout));
579 
580 	// start polling file descriptors (by selecting them)
581 
582 	bool invalid = false;
583 	for (uint32 i = 0; i < numFDs; i++) {
584 		int fd = fds[i].fd;
585 
586 		// initialize events masks
587 		sync->set[i].selected_events = fds[i].events
588 			| POLLNVAL | POLLERR | POLLHUP;
589 		sync->set[i].events = 0;
590 		fds[i].revents = 0;
591 
592 		if (fd >= 0 && select_fd(fd, sync->set + i, kernel) != B_OK) {
593 			sync->set[i].events = POLLNVAL;
594 			fds[i].revents = POLLNVAL;
595 				// indicates that the FD doesn't need to be deselected
596 			invalid = true;
597 		}
598 	}
599 
600 	// set new signal mask
601 	sigset_t oldSigMask;
602 	if (sigMask != NULL) {
603 		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
604 		if (!kernel) {
605 			Thread *thread = thread_get_current_thread();
606 			thread->old_sig_block_mask = oldSigMask;
607 			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
608 		}
609 	}
610 
611 	if (!invalid) {
612 		status = acquire_sem_etc(sync->sem, 1,
613 			B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
614 	}
615 
616 	// restore the old signal mask
617 	if (sigMask != NULL && kernel)
618 		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
619 
620 	// deselect file descriptors
621 
622 	for (uint32 i = 0; i < numFDs; i++) {
623 		if (fds[i].fd >= 0 && (fds[i].revents & POLLNVAL) == 0)
624 			deselect_fd(fds[i].fd, sync->set + i, kernel);
625 	}
626 
627 	// collect the events that have happened in the meantime
628 
629 	int count = 0;
630 	switch (status) {
631 		case B_OK:
632 			for (uint32 i = 0; i < numFDs; i++) {
633 				if (fds[i].fd < 0)
634 					continue;
635 
636 				// POLLxxx flags and B_SELECT_xxx flags are compatible
637 				fds[i].revents = sync->set[i].events
638 					& sync->set[i].selected_events;
639 				if (fds[i].revents != 0)
640 					count++;
641 			}
642 			break;
643 		case B_INTERRUPTED:
644 			count = B_INTERRUPTED;
645 			break;
646 		default:
647 			// B_TIMED_OUT, and B_WOULD_BLOCK
648 			break;
649 	}
650 
651 	put_select_sync(sync);
652 
653 	T(PollDone(fds, numFDs, count));
654 
655 	return count;
656 }
657 
658 
659 static ssize_t
660 common_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
661 	bigtime_t timeout, bool kernel)
662 {
663 	status_t status = B_OK;
664 
665 	// allocate sync object
666 	wait_for_objects_sync* sync;
667 	status = create_select_sync(numInfos, sync);
668 	if (status != B_OK)
669 		return status;
670 
671 	// start selecting objects
672 
673 	bool invalid = false;
674 	for (int i = 0; i < numInfos; i++) {
675 		uint16 type = infos[i].type;
676 		int32 object = infos[i].object;
677 
678 		// initialize events masks
679 		sync->set[i].selected_events = infos[i].events
680 			| B_EVENT_INVALID | B_EVENT_ERROR | B_EVENT_DISCONNECTED;
681 		sync->set[i].events = 0;
682 		infos[i].events = 0;
683 
684 		if (select_object(type, object, sync->set + i, kernel) != B_OK) {
685 			sync->set[i].events = B_EVENT_INVALID;
686 			infos[i].events = B_EVENT_INVALID;
687 				// indicates that the object doesn't need to be deselected
688 			invalid = true;
689 		}
690 	}
691 
692 	if (!invalid) {
693 		status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | flags,
694 			timeout);
695 	}
696 
697 	// deselect objects
698 
699 	for (int i = 0; i < numInfos; i++) {
700 		uint16 type = infos[i].type;
701 
702 		if ((infos[i].events & B_EVENT_INVALID) == 0)
703 			deselect_object(type, infos[i].object, sync->set + i, kernel);
704 	}
705 
706 	// collect the events that have happened in the meantime
707 
708 	ssize_t count = 0;
709 	if (status == B_OK) {
710 		for (int i = 0; i < numInfos; i++) {
711 			infos[i].events = sync->set[i].events
712 				& sync->set[i].selected_events;
713 			if (infos[i].events != 0)
714 				count++;
715 		}
716 	} else {
717 		// B_INTERRUPTED, B_TIMED_OUT, and B_WOULD_BLOCK
718 		count = status;
719 	}
720 
721 	put_select_sync(sync);
722 
723 	return count;
724 }
725 
726 
727 // #pragma mark - kernel private
728 
729 
730 status_t
731 notify_select_events(select_info* info, uint16 events)
732 {
733 	FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync,
734 		events));
735 
736 	if (info == NULL || info->sync == NULL)
737 		return B_BAD_VALUE;
738 
739 	return info->sync->Notify(info, events);
740 }
741 
742 
743 void
744 notify_select_events_list(select_info* list, uint16 events)
745 {
746 	struct select_info* info = list;
747 	while (info != NULL) {
748 		select_info* next = info->next;
749 		notify_select_events(info, events);
750 		info = next;
751 	}
752 }
753 
754 
755 //	#pragma mark - public kernel API
756 
757 
758 status_t
759 notify_select_event(struct selectsync *sync, uint8 event)
760 {
761 	return notify_select_events((select_info*)sync, SELECT_FLAG(event));
762 }
763 
764 
765 //	#pragma mark - private kernel exported API
766 
767 
768 static select_sync_pool_entry *
769 find_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync)
770 {
771 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
772 		 it.HasNext();) {
773 		select_sync_pool_entry *entry = it.Next();
774 		if (entry->sync == sync)
775 			return entry;
776 	}
777 
778 	return NULL;
779 }
780 
781 
782 static status_t
783 add_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync,
784 	uint8 event)
785 {
786 	// check, whether the entry does already exist
787 	select_sync_pool_entry *entry = find_select_sync_pool_entry(pool, sync);
788 	if (!entry) {
789 		entry = new (std::nothrow) select_sync_pool_entry;
790 		if (!entry)
791 			return B_NO_MEMORY;
792 
793 		entry->sync = sync;
794 		entry->events = 0;
795 
796 		pool->entries.Add(entry);
797 	}
798 
799 	entry->events |= SELECT_FLAG(event);
800 
801 	return B_OK;
802 }
803 
804 
805 status_t
806 add_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
807 	uint8 event)
808 {
809 	// create the pool, if necessary
810 	select_sync_pool *pool = *_pool;
811 	if (!pool) {
812 		pool = new (std::nothrow) select_sync_pool;
813 		if (!pool)
814 			return B_NO_MEMORY;
815 
816 		*_pool = pool;
817 	}
818 
819 	// add the entry
820 	status_t error = add_select_sync_pool_entry(pool, sync, event);
821 
822 	// cleanup
823 	if (pool->entries.IsEmpty()) {
824 		delete pool;
825 		*_pool = NULL;
826 	}
827 
828 	return error;
829 }
830 
831 
832 status_t
833 remove_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
834 	uint8 event)
835 {
836 	select_sync_pool *pool = *_pool;
837 	if (!pool)
838 		return B_ENTRY_NOT_FOUND;
839 
840 	// clear the event flag of the concerned entries
841 	bool found = false;
842 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
843 		 it.HasNext();) {
844 		select_sync_pool_entry *entry = it.Next();
845 		if (entry->sync == sync) {
846 			found = true;
847 			entry->events &= ~SELECT_FLAG(event);
848 
849 			// remove the entry, if no longer needed
850 			if (entry->events == 0) {
851 				it.Remove();
852 				delete entry;
853 			}
854 		}
855 	}
856 
857 	if (!found)
858 		return B_ENTRY_NOT_FOUND;
859 
860 	// delete the pool, if no longer needed
861 	if (pool->entries.IsEmpty()) {
862 		delete pool;
863 		*_pool = NULL;
864 	}
865 
866 	return B_OK;
867 }
868 
869 
870 void
871 delete_select_sync_pool(select_sync_pool *pool)
872 {
873 	if (!pool)
874 		return;
875 
876 	while (select_sync_pool_entry *entry = pool->entries.Head()) {
877 		pool->entries.Remove(entry);
878 		delete entry;
879 	}
880 
881 	delete pool;
882 }
883 
884 
885 void
886 notify_select_event_pool(select_sync_pool *pool, uint8 event)
887 {
888 	if (!pool)
889 		return;
890 
891 	FUNCTION(("notify_select_event_pool(%p, %u)\n", pool, event));
892 
893 	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
894 		 it.HasNext();) {
895 		select_sync_pool_entry *entry = it.Next();
896 		if (entry->events & SELECT_FLAG(event))
897 			notify_select_event(entry->sync, event);
898 	}
899 }
900 
901 
902 //	#pragma mark - Kernel POSIX layer
903 
904 
905 ssize_t
906 _kern_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
907 	bigtime_t timeout, const sigset_t *sigMask)
908 {
909 	if (timeout >= 0)
910 		timeout += system_time();
911 
912 	return common_select(numFDs, readSet, writeSet, errorSet, timeout,
913 		sigMask, true);
914 }
915 
916 
917 ssize_t
918 _kern_poll(struct pollfd *fds, int numFDs, bigtime_t timeout,
919 	const sigset_t *sigMask)
920 {
921 	if (timeout >= 0)
922 		timeout += system_time();
923 
924 	return common_poll(fds, numFDs, timeout, sigMask, true);
925 }
926 
927 
928 ssize_t
929 _kern_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
930 	bigtime_t timeout)
931 {
932 	return common_wait_for_objects(infos, numInfos, flags, timeout, true);
933 }
934 
935 
936 //	#pragma mark - User syscalls
937 
938 
939 static bool
940 check_max_fds(int numFDs)
941 {
942 	if (numFDs <= 0)
943 		return true;
944 
945 	struct io_context *context = get_current_io_context(false);
946 	MutexLocker(&context->io_mutex);
947 	return (size_t)numFDs <= context->table_size;
948 }
949 
950 
951 ssize_t
952 _user_select(int numFDs, fd_set *userReadSet, fd_set *userWriteSet,
953 	fd_set *userErrorSet, bigtime_t timeout, const sigset_t *userSigMask)
954 {
955 	uint32 bytes = _howmany(numFDs, NFDBITS) * sizeof(fd_mask);
956 	int result;
957 
958 	if (timeout >= 0) {
959 		timeout += system_time();
960 		// deal with overflow
961 		if (timeout < 0)
962 			timeout = B_INFINITE_TIMEOUT;
963 	}
964 
965 	if (numFDs < 0 || !check_max_fds(numFDs))
966 		return B_BAD_VALUE;
967 
968 	if ((userReadSet != NULL && !IS_USER_ADDRESS(userReadSet))
969 		|| (userWriteSet != NULL && !IS_USER_ADDRESS(userWriteSet))
970 		|| (userErrorSet != NULL && !IS_USER_ADDRESS(userErrorSet))
971 		|| (userSigMask != NULL && !IS_USER_ADDRESS(userSigMask)))
972 		return B_BAD_ADDRESS;
973 
974 	// copy parameters
975 
976 	BStackOrHeapArray<char, 128> sets(bytes * (
977 		((userReadSet != NULL) ? 1 : 0) +
978 		((userWriteSet != NULL) ? 1 : 0) +
979 		((userErrorSet != NULL) ? 1 : 0)));
980 	if (!sets.IsValid())
981 		return B_NO_MEMORY;
982 
983 	char *nextSet = &sets[0];
984 	fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL;
985 
986 	if (userReadSet != NULL) {
987 		readSet = (fd_set *)nextSet;
988 		nextSet += bytes;
989 
990 		if (user_memcpy(readSet, userReadSet, bytes) != B_OK)
991 			return B_BAD_ADDRESS;
992 	}
993 
994 	if (userWriteSet != NULL) {
995 		writeSet = (fd_set *)nextSet;
996 		nextSet += bytes;
997 
998 		if (user_memcpy(writeSet, userWriteSet, bytes) != B_OK)
999 			return B_BAD_ADDRESS;
1000 	}
1001 
1002 	if (userErrorSet != NULL) {
1003 		errorSet = (fd_set *)nextSet;
1004 
1005 		if (user_memcpy(errorSet, userErrorSet, bytes) != B_OK)
1006 			return B_BAD_ADDRESS;
1007 	}
1008 
1009 	sigset_t sigMask;
1010 	if (userSigMask != NULL
1011 			&& user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) != B_OK) {
1012 		return B_BAD_ADDRESS;
1013 	}
1014 
1015 	result = common_select(numFDs, readSet, writeSet, errorSet, timeout,
1016 		userSigMask ? &sigMask : NULL, false);
1017 
1018 	// copy back results
1019 
1020 	if (result >= B_OK
1021 		&& ((readSet != NULL
1022 				&& user_memcpy(userReadSet, readSet, bytes) < B_OK)
1023 			|| (writeSet != NULL
1024 				&& user_memcpy(userWriteSet, writeSet, bytes) < B_OK)
1025 			|| (errorSet != NULL
1026 				&& user_memcpy(userErrorSet, errorSet, bytes) < B_OK))) {
1027 		result = B_BAD_ADDRESS;
1028 	}
1029 
1030 	return result;
1031 }
1032 
1033 
1034 ssize_t
1035 _user_poll(struct pollfd *userfds, int numFDs, bigtime_t timeout,
1036 	const sigset_t *userSigMask)
1037 {
1038 	if (timeout >= 0) {
1039 		timeout += system_time();
1040 		// deal with overflow
1041 		if (timeout < 0)
1042 			timeout = B_INFINITE_TIMEOUT;
1043 	}
1044 
1045 	if (numFDs < 0 || !check_max_fds(numFDs))
1046 		return B_BAD_VALUE;
1047 
1048 	BStackOrHeapArray<struct pollfd, 16> fds(numFDs);
1049 	if (!fds.IsValid())
1050 		return B_NO_MEMORY;
1051 
1052 	const size_t bytes = numFDs * sizeof(struct pollfd);
1053 	if (numFDs != 0) {
1054 		if (userfds == NULL || !IS_USER_ADDRESS(userfds))
1055 			return B_BAD_ADDRESS;
1056 
1057 		if (user_memcpy(fds, userfds, bytes) < B_OK)
1058 			return B_BAD_ADDRESS;
1059 	}
1060 
1061 	sigset_t sigMask;
1062 	if (userSigMask != NULL
1063 		&& (!IS_USER_ADDRESS(userSigMask)
1064 			|| user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) < B_OK)) {
1065 		return B_BAD_ADDRESS;
1066 	}
1067 
1068 	status_t result = common_poll(fds, numFDs, timeout,
1069 		userSigMask != NULL ? &sigMask : NULL, false);
1070 
1071 	// copy back results
1072 	if (numFDs > 0 && user_memcpy(userfds, fds, bytes) != 0) {
1073 		if (result >= 0)
1074 			result = B_BAD_ADDRESS;
1075 	}
1076 
1077 	return result;
1078 }
1079 
1080 
1081 ssize_t
1082 _user_wait_for_objects(object_wait_info* userInfos, int numInfos, uint32 flags,
1083 	bigtime_t timeout)
1084 {
1085 	syscall_restart_handle_timeout_pre(flags, timeout);
1086 
1087 	if (numInfos < 0 || !check_max_fds(numInfos - sem_max_sems()
1088 			- port_max_ports() - thread_max_threads())) {
1089 		return B_BAD_VALUE;
1090 	}
1091 
1092 	if (numInfos == 0) {
1093 		// special case: no infos
1094 		ssize_t result = common_wait_for_objects(NULL, 0, flags, timeout,
1095 			false);
1096 		return result < 0
1097 			? syscall_restart_handle_timeout_post(result, timeout) : result;
1098 	}
1099 
1100 	if (userInfos == NULL || !IS_USER_ADDRESS(userInfos))
1101 		return B_BAD_ADDRESS;
1102 
1103 	BStackOrHeapArray<object_wait_info, 16> infos(numInfos);
1104 	if (!infos.IsValid())
1105 		return B_NO_MEMORY;
1106 	const int bytes = sizeof(object_wait_info) * numInfos;
1107 
1108 	if (user_memcpy(infos, userInfos, bytes) != B_OK)
1109 		return B_BAD_ADDRESS;
1110 
1111 	ssize_t result = common_wait_for_objects(infos, numInfos, flags, timeout, false);
1112 
1113 	if (result >= 0 && user_memcpy(userInfos, infos, bytes) != B_OK) {
1114 		result = B_BAD_ADDRESS;
1115 	} else {
1116 		syscall_restart_handle_timeout_post(result, timeout);
1117 	}
1118 
1119 	return result;
1120 }
1121