xref: /haiku/src/libs/bsd/kqueue.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2  * Copyright 2023, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 #include <sys/event.h>
6 
7 #include <StackOrHeapArray.h>
8 
9 #include <libroot/errno_private.h>
10 #include <libroot/time_private.h>
11 #include <syscalls.h>
12 #include <event_queue_defs.h>
13 
14 
15 extern "C" int
16 kqueue()
17 {
18 	int fd = _kern_event_queue_create(0);
19 	if (fd < 0) {
20 		__set_errno(fd);
21 		return -1;
22 	}
23 	return fd;
24 }
25 
26 
27 static short
28 filter_from_info(const event_wait_info& info)
29 {
30 	switch (info.type) {
31 		case B_OBJECT_TYPE_FD:
32 			if (info.events > 0 && (info.events & B_EVENT_WRITE) != 0)
33 				return EVFILT_WRITE;
34 			return EVFILT_READ;
35 
36 		case B_OBJECT_TYPE_THREAD:
37 			return EVFILT_PROC;
38 	}
39 
40 	return 0;
41 }
42 
43 
44 extern "C" int
45 kevent(int kq,
46 	const struct kevent *changelist, int nchanges,
47 	struct kevent *eventlist, int nevents,
48 	const struct timespec *tspec)
49 {
50 	BStackOrHeapArray<event_wait_info, 16> waitInfos(max_c(nchanges, nevents));
51 
52 	event_wait_info* waitInfo = waitInfos;
53 	int changedInfos = 0;
54 
55 	for (int i = 0; i < nchanges; i++) {
56 		waitInfo->object = changelist[i].ident;
57 		waitInfo->events = 0;
58 		waitInfo->user_data = changelist[i].udata;
59 
60 		int32 events = 0, behavior = 0;
61 		switch (changelist[i].filter) {
62 			case EVFILT_READ:
63 				waitInfo->type = B_OBJECT_TYPE_FD;
64 				events = B_EVENT_READ;
65 				break;
66 
67 			case EVFILT_WRITE:
68 				waitInfo->type = B_OBJECT_TYPE_FD;
69 				events = B_EVENT_WRITE;
70 				break;
71 
72 			case EVFILT_PROC:
73 				waitInfo->type = B_OBJECT_TYPE_THREAD;
74 				if ((changelist[i].fflags & NOTE_EXIT) != 0)
75 					events |= B_EVENT_INVALID;
76 				break;
77 
78 			default:
79 				return EINVAL;
80 		}
81 
82 		if ((changelist[i].flags & EV_ONESHOT) != 0)
83 			behavior |= B_EVENT_ONE_SHOT;
84 		if ((changelist[i].flags & EV_CLEAR) == 0)
85 			behavior |= B_EVENT_LEVEL_TRIGGERED;
86 
87 		if (changelist[i].filter == EVFILT_READ || changelist[i].filter == EVFILT_WRITE) {
88 			// kqueue treats the same file descriptor with both READ and WRITE filters
89 			// as two separate listeners. Haiku, however, treats it as one.
90 			// We rectify this here by carefully combining the two.
91 
92 			// We can't support ONESHOT for descriptors due to the separation.
93 			if ((changelist[i].flags & EV_ONESHOT) != 0) {
94 				__set_errno(EOPNOTSUPP);
95 				return -1;
96 			}
97 
98 			const short otherFilter = (changelist[i].filter == EVFILT_READ)
99 				? EVFILT_WRITE : EVFILT_READ;
100 			const int32 otherEvents = (otherFilter == EVFILT_READ)
101 				? B_EVENT_READ : B_EVENT_WRITE;
102 
103 			// First, check if the other filter is specified in this changelist.
104 			int j;
105 			for (j = 0; j < nchanges; j++) {
106 				if (changelist[j].ident != changelist[i].ident)
107 					continue;
108 				if (changelist[j].filter != otherFilter)
109 					continue;
110 
111 				// We've found it.
112 				break;
113 			}
114 			if (j < nchanges) {
115 				// It is in the list.
116 				if (j < i) {
117 					// And it's already been taken care of.
118 					continue;
119 				}
120 
121 				// Fold it into this one.
122 				if ((changelist[j].flags & EV_ADD) != 0) {
123 					waitInfo->events |= otherEvents;
124 				} else if ((changelist[j].flags & EV_DELETE) != 0) {
125 					waitInfo->events &= ~otherEvents;
126 				}
127 			} else {
128 				// It is not in the list. See if it's already set.
129 				event_wait_info info;
130 				info.type = B_OBJECT_TYPE_FD;
131 				info.object = waitInfo->object;
132 				info.events = -1;
133 
134 				status_t status = _kern_event_queue_select(kq, &info, 1);
135 				if (status == B_OK)
136 					waitInfo->events |= (info.events & otherEvents);
137 			}
138 		}
139 
140 		if ((changelist[i].flags & EV_ADD) != 0) {
141 			waitInfo->events |= events;
142 		} else if ((changelist[i].flags & EV_DELETE) != 0) {
143 			waitInfo->events &= ~events;
144 		}
145 
146 		if (waitInfo->events != 0)
147 			waitInfo->events |= behavior;
148 
149 		changedInfos++;
150 		waitInfo++;
151 	}
152 	if (changedInfos != 0) {
153 		status_t status = _kern_event_queue_select(kq, waitInfos, changedInfos);
154 		if (status != B_OK) {
155 			if (nchanges == 1 && nevents == 0) {
156 				// Special case: return the lone error directly.
157 				__set_errno(waitInfos[0].events);
158 				return -1;
159 			}
160 
161 			// Report problems as error events.
162 			int errors = 0;
163 			for (int i = 0; i < changedInfos; i++) {
164 				if (waitInfos[i].events > 0)
165 					continue;
166 				if (nevents == 0) {
167 					errors = -1;
168 					break;
169 				}
170 
171 				short filter = filter_from_info(waitInfos[i]);
172 				int64_t data = waitInfos[i].events;
173 				EV_SET(eventlist, waitInfos[i].object,
174 					filter, EV_ERROR, 0, data, waitInfos[i].user_data);
175 				eventlist++;
176 				nevents--;
177 				errors++;
178 			}
179 
180 			if (errors > 0)
181 				return errors;
182 			__set_errno(status);
183 			return -1;
184 		}
185 	}
186 
187 	if (nevents != 0) {
188 		bigtime_t timeout = 0;
189 		uint32 waitFlags = 0;
190 		if (tspec != NULL) {
191 			if (!timespec_to_bigtime(*tspec, timeout)) {
192 				__set_errno(EINVAL);
193 				return -1;
194 			}
195 			waitFlags |= B_RELATIVE_TIMEOUT;
196 		}
197 
198 		ssize_t events = _kern_event_queue_wait(kq, waitInfos,
199 			max_c(1, nevents / 2), waitFlags, timeout);
200 		if (events > 0) {
201 			int returnedEvents = 0;
202 			for (ssize_t i = 0; i < events; i++) {
203 				unsigned short flags = 0;
204 				unsigned int fflags = 0;
205 				int64_t data = 0;
206 
207 				if (waitInfos[i].events < 0) {
208 					flags |= EV_ERROR;
209 					data = waitInfos[i].events;
210 				} else if ((waitInfos[i].events & B_EVENT_DISCONNECTED) != 0) {
211 					flags |= EV_EOF;
212 				} else if ((waitInfos[i].events & B_EVENT_INVALID) != 0) {
213 					switch (waitInfos[i].type) {
214 						case B_OBJECT_TYPE_FD:
215 							flags |= EV_EOF;
216 							break;
217 
218 						case B_OBJECT_TYPE_THREAD: {
219 							fflags |= NOTE_EXIT;
220 
221 							status_t returnValue = -1;
222 							status_t status = wait_for_thread(waitInfos[i].object, &returnValue);
223 							if (status == B_OK)
224 								data = returnValue;
225 							else
226 								data = -1;
227 							break;
228 						}
229 					}
230 				} else if ((waitInfos[i].events & B_EVENT_ERROR) != 0) {
231 					flags |= EV_ERROR;
232 					data = EINVAL;
233 				}
234 
235 				short filter = filter_from_info(waitInfos[i]);
236 				if (waitInfos[i].type == B_OBJECT_TYPE_FD && (flags & (EV_ERROR | EV_EOF)) == 0) {
237 					// Do we have both a read and a write event?
238 					if ((waitInfos[i].events & (B_EVENT_READ | B_EVENT_WRITE))
239 							== (B_EVENT_READ | B_EVENT_WRITE)) {
240 						// We do. Report both, if we can.
241 						if (nevents > 1) {
242 							EV_SET(eventlist, waitInfos[i].object,
243 								EVFILT_WRITE, flags, fflags, data, waitInfos[i].user_data);
244 							eventlist++;
245 							returnedEvents++;
246 							nevents--;
247 						}
248 						filter = EVFILT_READ;
249 					}
250 				}
251 
252 				EV_SET(eventlist, waitInfos[i].object,
253 					filter, flags, fflags, data, waitInfos[i].user_data);
254 				eventlist++;
255 				returnedEvents++;
256 				nevents--;
257 			}
258 			return returnedEvents;
259 		} else if (events < 0) {
260 			if (events == B_WOULD_BLOCK || events == B_TIMED_OUT)
261 				return 0;
262 
263 			__set_errno(events);
264 			return -1;
265 		}
266 		return 0;
267 	}
268 
269 	return 0;
270 }
271