xref: /haiku/src/libs/bsd/kqueue.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2023, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 #include <sys/event.h>
6 
7 #include <StackOrHeapArray.h>
8 
9 #include <libroot/errno_private.h>
10 #include <syscalls.h>
11 #include <event_queue_defs.h>
12 
13 
14 extern "C" int
15 kqueue()
16 {
17 	int fd = _kern_event_queue_create(0);
18 	if (fd < 0) {
19 		__set_errno(fd);
20 		return -1;
21 	}
22 	return fd;
23 }
24 
25 
26 static short
27 filter_from_info(const event_wait_info& info)
28 {
29 	switch (info.type) {
30 		case B_OBJECT_TYPE_FD:
31 			if (info.events > 0 && (info.events & B_EVENT_WRITE) != 0)
32 				return EVFILT_WRITE;
33 			return EVFILT_READ;
34 
35 		case B_OBJECT_TYPE_THREAD:
36 			return EVFILT_PROC;
37 	}
38 
39 	return 0;
40 }
41 
42 
43 extern "C" int
44 kevent(int kq,
45 	const struct kevent *changelist, int nchanges,
46 	struct kevent *eventlist, int nevents,
47 	const struct timespec *tspec)
48 {
49 	BStackOrHeapArray<event_wait_info, 16> waitInfos(max_c(nchanges, nevents));
50 
51 	event_wait_info* waitInfo = waitInfos;
52 	int changedInfos = 0;
53 
54 	for (int i = 0; i < nchanges; i++) {
55 		waitInfo->object = changelist[i].ident;
56 		waitInfo->events = 0;
57 		waitInfo->user_data = changelist[i].udata;
58 
59 		int32 events = 0, behavior = 0;
60 		switch (changelist[i].filter) {
61 			case EVFILT_READ:
62 				waitInfo->type = B_OBJECT_TYPE_FD;
63 				events = B_EVENT_READ;
64 				break;
65 
66 			case EVFILT_WRITE:
67 				waitInfo->type = B_OBJECT_TYPE_FD;
68 				events = B_EVENT_WRITE;
69 				break;
70 
71 			case EVFILT_PROC:
72 				waitInfo->type = B_OBJECT_TYPE_THREAD;
73 				if ((changelist[i].fflags & NOTE_EXIT) != 0)
74 					events |= B_EVENT_INVALID;
75 				break;
76 
77 			default:
78 				return EINVAL;
79 		}
80 
81 		if ((changelist[i].flags & EV_ONESHOT) != 0)
82 			behavior |= B_EVENT_ONE_SHOT;
83 		if ((changelist[i].flags & EV_CLEAR) == 0)
84 			behavior |= B_EVENT_LEVEL_TRIGGERED;
85 
86 		if (changelist[i].filter == EVFILT_READ || changelist[i].filter == EVFILT_WRITE) {
87 			// kqueue treats the same file descriptor with both READ and WRITE filters
88 			// as two separate listeners. Haiku, however, treats it as one.
89 			// We rectify this here by carefully combining the two.
90 
91 			// We can't support ONESHOT for descriptors due to the separation.
92 			if ((changelist[i].flags & EV_ONESHOT) != 0) {
93 				__set_errno(EOPNOTSUPP);
94 				return -1;
95 			}
96 
97 			const short otherFilter = (changelist[i].filter == EVFILT_READ)
98 				? EVFILT_WRITE : EVFILT_READ;
99 			const int32 otherEvents = (otherFilter == EVFILT_READ)
100 				? B_EVENT_READ : B_EVENT_WRITE;
101 
102 			// First, check if the other filter is specified in this changelist.
103 			int j;
104 			for (j = 0; j < nchanges; j++) {
105 				if (changelist[j].ident != changelist[i].ident)
106 					continue;
107 				if (changelist[j].filter != otherFilter)
108 					continue;
109 
110 				// We've found it.
111 				break;
112 			}
113 			if (j < nchanges) {
114 				// It is in the list.
115 				if (j < i) {
116 					// And it's already been taken care of.
117 					continue;
118 				}
119 
120 				// Fold it into this one.
121 				if ((changelist[j].flags & EV_ADD) != 0) {
122 					waitInfo->events |= otherEvents;
123 				} else if ((changelist[j].flags & EV_DELETE) != 0) {
124 					waitInfo->events &= ~otherEvents;
125 				}
126 			} else {
127 				// It is not in the list. See if it's already set.
128 				event_wait_info info;
129 				info.type = B_OBJECT_TYPE_FD;
130 				info.object = waitInfo->object;
131 				info.events = -1;
132 
133 				status_t status = _kern_event_queue_select(kq, &info, 1);
134 				if (status == B_OK)
135 					waitInfo->events |= (info.events & otherEvents);
136 			}
137 		}
138 
139 		if ((changelist[i].flags & EV_ADD) != 0) {
140 			waitInfo->events |= events;
141 		} else if ((changelist[i].flags & EV_DELETE) != 0) {
142 			waitInfo->events &= ~events;
143 		}
144 
145 		if (waitInfo->events != 0)
146 			waitInfo->events |= behavior;
147 
148 		changedInfos++;
149 		waitInfo++;
150 	}
151 	if (changedInfos != 0) {
152 		status_t status = _kern_event_queue_select(kq, waitInfos, changedInfos);
153 		if (status != B_OK) {
154 			if (nchanges == 1 && nevents == 0) {
155 				// Special case: return the lone error directly.
156 				__set_errno(waitInfos[0].events);
157 				return -1;
158 			}
159 
160 			// Report problems as error events.
161 			int errors = 0;
162 			for (int i = 0; i < changedInfos; i++) {
163 				if (waitInfos[i].events > 0)
164 					continue;
165 				if (nevents == 0)
166 					break;
167 
168 				short filter = filter_from_info(waitInfos[i]);
169 				int64_t data = waitInfos[i].events;
170 				EV_SET(eventlist, waitInfos[i].object,
171 					filter, EV_ERROR, 0, data, waitInfos[i].user_data);
172 				eventlist++;
173 				nevents--;
174 				errors++;
175 			}
176 			if (nevents == 0 || errors == 0) {
177 				__set_errno(status);
178 				return -1;
179 			}
180 		}
181 	}
182 
183 	if (nevents != 0) {
184 		bigtime_t timeout = 0;
185 		uint32 waitFlags = 0;
186 		if (tspec != NULL) {
187 			timeout = (tspec->tv_sec * 1000000LL) + (tspec->tv_nsec / 1000LL);
188 			waitFlags |= B_RELATIVE_TIMEOUT;
189 		}
190 
191 		ssize_t events = _kern_event_queue_wait(kq, waitInfos,
192 			max_c(1, nevents / 2), waitFlags, timeout);
193 		if (events > 0) {
194 			int returnedEvents = 0;
195 			for (ssize_t i = 0; i < events; i++) {
196 				unsigned short flags = 0;
197 				unsigned int fflags = 0;
198 				int64_t data = 0;
199 
200 				if (waitInfos[i].events < 0) {
201 					flags |= EV_ERROR;
202 					data = waitInfos[i].events;
203 				} else if ((waitInfos[i].events & B_EVENT_DISCONNECTED) != 0) {
204 					flags |= EV_EOF;
205 				} else if ((waitInfos[i].events & B_EVENT_INVALID) != 0) {
206 					switch (waitInfos[i].type) {
207 						case B_OBJECT_TYPE_FD:
208 							flags |= EV_EOF;
209 							break;
210 
211 						case B_OBJECT_TYPE_THREAD: {
212 							fflags |= NOTE_EXIT;
213 
214 							status_t returnValue = -1;
215 							status_t status = wait_for_thread(waitInfos[i].object, &returnValue);
216 							if (status == B_OK)
217 								data = returnValue;
218 							else
219 								data = -1;
220 							break;
221 						}
222 					}
223 				} else if ((waitInfos[i].events & B_EVENT_ERROR) != 0) {
224 					flags |= EV_ERROR;
225 					data = EINVAL;
226 				}
227 
228 				short filter = filter_from_info(waitInfos[i]);
229 				if (waitInfos[i].type == B_OBJECT_TYPE_FD && (flags & (EV_ERROR | EV_EOF)) == 0) {
230 					// Do we have both a read and a write event?
231 					if ((waitInfos[i].events & (B_EVENT_READ | B_EVENT_WRITE))
232 							== (B_EVENT_READ | B_EVENT_WRITE)) {
233 						// We do. Report both, if we can.
234 						if (nevents > 1) {
235 							EV_SET(eventlist, waitInfos[i].object,
236 								EVFILT_WRITE, flags, fflags, data, waitInfos[i].user_data);
237 							eventlist++;
238 							returnedEvents++;
239 							nevents--;
240 						}
241 						filter = EVFILT_READ;
242 					}
243 				}
244 
245 				EV_SET(eventlist, waitInfos[i].object,
246 					filter, flags, fflags, data, waitInfos[i].user_data);
247 				eventlist++;
248 				returnedEvents++;
249 				nevents--;
250 			}
251 			return returnedEvents;
252 		} else if (events < 0) {
253 			if (events == B_WOULD_BLOCK || events == B_TIMED_OUT)
254 				return 0;
255 
256 			__set_errno(events);
257 			return -1;
258 		}
259 		return 0;
260 	}
261 
262 	return 0;
263 }
264