xref: /haiku/src/libs/bsd/kqueue.cpp (revision 2141d2fe3a5df2f55f3590f67660573b50d1d1d3)
1 /*
2  * Copyright 2023, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 #include <sys/event.h>
6 
7 #include <StackOrHeapArray.h>
8 
9 #include <libroot/errno_private.h>
10 #include <syscalls.h>
11 #include <event_queue_defs.h>
12 
13 
14 extern "C" int
15 kqueue()
16 {
17 	int fd = _kern_event_queue_create(0);
18 	if (fd < 0) {
19 		__set_errno(fd);
20 		return -1;
21 	}
22 	return fd;
23 }
24 
25 
26 static short
27 filter_from_info(const event_wait_info& info)
28 {
29 	switch (info.type) {
30 		case B_OBJECT_TYPE_FD:
31 			if (info.events > 0 && (info.events & B_EVENT_WRITE) != 0)
32 				return EVFILT_WRITE;
33 			return EVFILT_READ;
34 
35 		case B_OBJECT_TYPE_THREAD:
36 			return EVFILT_PROC;
37 	}
38 
39 	return 0;
40 }
41 
42 
43 extern "C" int
44 kevent(int kq,
45 	const struct kevent *changelist, int nchanges,
46 	struct kevent *eventlist, int nevents,
47 	const struct timespec *tspec)
48 {
49 	BStackOrHeapArray<event_wait_info, 16> waitInfos(max_c(nchanges, nevents));
50 
51 	event_wait_info* waitInfo = waitInfos;
52 	int changedInfos = 0;
53 
54 	for (int i = 0; i < nchanges; i++) {
55 		waitInfo->object = changelist[i].ident;
56 		waitInfo->events = 0;
57 		waitInfo->user_data = changelist[i].udata;
58 
59 		int32 events = 0, behavior = 0;
60 		switch (changelist[i].filter) {
61 			case EVFILT_READ:
62 				waitInfo->type = B_OBJECT_TYPE_FD;
63 				events = B_EVENT_READ;
64 				break;
65 
66 			case EVFILT_WRITE:
67 				waitInfo->type = B_OBJECT_TYPE_FD;
68 				events = B_EVENT_WRITE;
69 				break;
70 
71 			case EVFILT_PROC:
72 				waitInfo->type = B_OBJECT_TYPE_THREAD;
73 				if ((changelist[i].fflags & NOTE_EXIT) != 0)
74 					events |= B_EVENT_INVALID;
75 				break;
76 
77 			default:
78 				return EINVAL;
79 		}
80 
81 		if ((changelist[i].flags & EV_ONESHOT) != 0)
82 			behavior |= B_EVENT_ONE_SHOT;
83 		if ((changelist[i].flags & EV_CLEAR) == 0)
84 			behavior |= B_EVENT_LEVEL_TRIGGERED;
85 
86 		if (changelist[i].filter == EVFILT_READ || changelist[i].filter == EVFILT_WRITE) {
87 			// kqueue treats the same file descriptor with both READ and WRITE filters
88 			// as two separate listeners. Haiku, however, treats it as one.
89 			// We rectify this here by carefully combining the two.
90 
91 			// We can't support ONESHOT for descriptors due to the separation.
92 			if ((changelist[i].flags & EV_ONESHOT) != 0) {
93 				__set_errno(EOPNOTSUPP);
94 				return -1;
95 			}
96 
97 			const short otherFilter = (changelist[i].filter == EVFILT_READ)
98 				? EVFILT_WRITE : EVFILT_READ;
99 			const int32 otherEvents = (otherFilter == EVFILT_READ)
100 				? B_EVENT_READ : B_EVENT_WRITE;
101 
102 			// First, check if the other filter is specified in this changelist.
103 			int j;
104 			for (j = 0; j < nchanges; j++) {
105 				if (changelist[j].ident != changelist[i].ident)
106 					continue;
107 				if (changelist[j].filter != otherFilter)
108 					continue;
109 
110 				// We've found it.
111 				break;
112 			}
113 			if (j < nchanges) {
114 				// It is in the list.
115 				if (j < i) {
116 					// And it's already been taken care of.
117 					continue;
118 				}
119 
120 				// Fold it into this one.
121 				if ((changelist[j].flags & EV_ADD) != 0) {
122 					waitInfo->events |= otherEvents;
123 				} else if ((changelist[j].flags & EV_DELETE) != 0) {
124 					waitInfo->events &= ~otherEvents;
125 				}
126 			} else {
127 				// It is not in the list. See if it's already set.
128 				event_wait_info info;
129 				info.type = B_OBJECT_TYPE_FD;
130 				info.object = waitInfo->object;
131 				info.events = -1;
132 
133 				status_t status = _kern_event_queue_select(kq, &info, 1);
134 				if (status == B_OK)
135 					waitInfo->events |= (info.events & otherEvents);
136 			}
137 		}
138 
139 		if ((changelist[i].flags & EV_ADD) != 0) {
140 			waitInfo->events |= events;
141 		} else if ((changelist[i].flags & EV_DELETE) != 0) {
142 			waitInfo->events &= ~events;
143 		}
144 
145 		if (waitInfo->events != 0)
146 			waitInfo->events |= behavior;
147 
148 		changedInfos++;
149 		waitInfo++;
150 	}
151 	if (changedInfos != 0) {
152 		status_t status = _kern_event_queue_select(kq, waitInfos, changedInfos);
153 		if (status != B_OK) {
154 			if (nchanges == 1 && nevents == 0) {
155 				// Special case: return the lone error directly.
156 				__set_errno(waitInfos[0].events);
157 				return -1;
158 			}
159 
160 			// Report problems as error events.
161 			int errors = 0;
162 			for (int i = 0; i < changedInfos; i++) {
163 				if (waitInfos[i].events > 0)
164 					continue;
165 				if (nevents == 0) {
166 					errors = -1;
167 					break;
168 				}
169 
170 				short filter = filter_from_info(waitInfos[i]);
171 				int64_t data = waitInfos[i].events;
172 				EV_SET(eventlist, waitInfos[i].object,
173 					filter, EV_ERROR, 0, data, waitInfos[i].user_data);
174 				eventlist++;
175 				nevents--;
176 				errors++;
177 			}
178 
179 			if (errors > 0)
180 				return errors;
181 			__set_errno(status);
182 			return -1;
183 		}
184 	}
185 
186 	if (nevents != 0) {
187 		bigtime_t timeout = 0;
188 		uint32 waitFlags = 0;
189 		if (tspec != NULL) {
190 			timeout = (tspec->tv_sec * 1000000LL) + (tspec->tv_nsec / 1000LL);
191 			waitFlags |= B_RELATIVE_TIMEOUT;
192 		}
193 
194 		ssize_t events = _kern_event_queue_wait(kq, waitInfos,
195 			max_c(1, nevents / 2), waitFlags, timeout);
196 		if (events > 0) {
197 			int returnedEvents = 0;
198 			for (ssize_t i = 0; i < events; i++) {
199 				unsigned short flags = 0;
200 				unsigned int fflags = 0;
201 				int64_t data = 0;
202 
203 				if (waitInfos[i].events < 0) {
204 					flags |= EV_ERROR;
205 					data = waitInfos[i].events;
206 				} else if ((waitInfos[i].events & B_EVENT_DISCONNECTED) != 0) {
207 					flags |= EV_EOF;
208 				} else if ((waitInfos[i].events & B_EVENT_INVALID) != 0) {
209 					switch (waitInfos[i].type) {
210 						case B_OBJECT_TYPE_FD:
211 							flags |= EV_EOF;
212 							break;
213 
214 						case B_OBJECT_TYPE_THREAD: {
215 							fflags |= NOTE_EXIT;
216 
217 							status_t returnValue = -1;
218 							status_t status = wait_for_thread(waitInfos[i].object, &returnValue);
219 							if (status == B_OK)
220 								data = returnValue;
221 							else
222 								data = -1;
223 							break;
224 						}
225 					}
226 				} else if ((waitInfos[i].events & B_EVENT_ERROR) != 0) {
227 					flags |= EV_ERROR;
228 					data = EINVAL;
229 				}
230 
231 				short filter = filter_from_info(waitInfos[i]);
232 				if (waitInfos[i].type == B_OBJECT_TYPE_FD && (flags & (EV_ERROR | EV_EOF)) == 0) {
233 					// Do we have both a read and a write event?
234 					if ((waitInfos[i].events & (B_EVENT_READ | B_EVENT_WRITE))
235 							== (B_EVENT_READ | B_EVENT_WRITE)) {
236 						// We do. Report both, if we can.
237 						if (nevents > 1) {
238 							EV_SET(eventlist, waitInfos[i].object,
239 								EVFILT_WRITE, flags, fflags, data, waitInfos[i].user_data);
240 							eventlist++;
241 							returnedEvents++;
242 							nevents--;
243 						}
244 						filter = EVFILT_READ;
245 					}
246 				}
247 
248 				EV_SET(eventlist, waitInfos[i].object,
249 					filter, flags, fflags, data, waitInfos[i].user_data);
250 				eventlist++;
251 				returnedEvents++;
252 				nevents--;
253 			}
254 			return returnedEvents;
255 		} else if (events < 0) {
256 			if (events == B_WOULD_BLOCK || events == B_TIMED_OUT)
257 				return 0;
258 
259 			__set_errno(events);
260 			return -1;
261 		}
262 		return 0;
263 	}
264 
265 	return 0;
266 }
267