xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2008-2023, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 #include <AutoDeleter.h>
26 #include <StackOrHeapArray.h>
27 
28 
29 //#define TRACE_XSI_SEM
30 #ifdef TRACE_XSI_SEM
31 #	define TRACE(x)			dprintf x
32 #	define TRACE_ERROR(x)	dprintf x
33 #else
34 #	define TRACE(x)			/* nothing */
35 #	define TRACE_ERROR(x)	dprintf x
36 #endif
37 
38 
39 namespace {
40 
41 class XsiSemaphoreSet;
42 
43 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
44 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
45 		:
46 		semaphore_set(semaphoreSet),
47 		team(team),
48 		undo_values(undoValues)
49 	{
50 	}
51 
52 	DoublyLinkedListLink<sem_undo>		team_link;
53 	XsiSemaphoreSet						*semaphore_set;
54 	Team								*team;
55 	int16								*undo_values;
56 };
57 
58 typedef DoublyLinkedList<sem_undo> UndoList;
59 typedef DoublyLinkedList<sem_undo,
60 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
61 
62 } // namespace
63 
64 
65 // Forward declared in global namespace.
66 struct xsi_sem_context {
67 	xsi_sem_context()
68 	{
69 		mutex_init(&lock, "Private team undo_list lock");
70 	}
71 
72 	~xsi_sem_context()
73 	{
74 		mutex_destroy(&lock);
75 	}
76 
77 	TeamList	undo_list;
78 	mutex		lock;
79 };
80 
81 
82 namespace {
83 
84 // Xsi semaphore definition
85 class XsiSemaphore {
86 public:
87 	XsiSemaphore()
88 		:
89 		fLastPidOperation(0),
90 		fValue(0)
91 	{
92 		fWaitingToIncrease.Init(this, "XsiSemaphore");
93 		fWaitingToBeZero.Init(this, "XsiSemaphore");
94 	}
95 
96 	~XsiSemaphore()
97 	{
98 		// For some reason the semaphore is getting destroyed.
99 		// Wake up any remaing awaiting threads
100 		fWaitingToIncrease.NotifyAll(EIDRM);
101 		fWaitingToBeZero.NotifyAll(EIDRM);
102 
103 		// No need to remove any sem_undo request still
104 		// hanging. When the process exit and doesn't found
105 		// the semaphore set, it'll just ignore the sem_undo
106 		// request. That's better than iterating trough the
107 		// whole sUndoList. Beside we don't know our semaphore
108 		// number nor our semaphore set id.
109 	}
110 
111 	// We return true in case the operation causes the
112 	// caller to wait, so it can undo all the operations
113 	// previously done
114 	bool Add(short value)
115 	{
116 		if ((int)(fValue + value) < 0) {
117 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
118 			return true;
119 		} else {
120 			fValue += value;
121 			if (fValue == 0)
122 				WakeUpThreads(true);
123 			else if (fValue > 0)
124 				WakeUpThreads(false);
125 			return false;
126 		}
127 	}
128 
129 	static void Dequeue(ConditionVariableEntry *queueEntry)
130 	{
131 		queueEntry->Wait(B_RELATIVE_TIMEOUT, 0);
132 	}
133 
134 	void Enqueue(ConditionVariableEntry *queueEntry, bool waitForZero)
135 	{
136 		if (waitForZero) {
137 			fWaitingToBeZero.Add(queueEntry);
138 		} else {
139 			fWaitingToIncrease.Add(queueEntry);
140 		}
141 	}
142 
143 	pid_t LastPid() const
144 	{
145 		return fLastPidOperation;
146 	}
147 
148 	void Revert(short value)
149 	{
150 		fValue -= value;
151 		if (fValue == 0)
152 			WakeUpThreads(true);
153 		else if (fValue > 0)
154 			WakeUpThreads(false);
155 	}
156 
157 	void SetPid(pid_t pid)
158 	{
159 		fLastPidOperation = pid;
160 	}
161 
162 	void SetValue(ushort value)
163 	{
164 		fValue = value;
165 	}
166 
167 	ushort ThreadsWaitingToIncrease()
168 	{
169 		return fWaitingToIncrease.EntriesCount();
170 	}
171 
172 	ushort ThreadsWaitingToBeZero()
173 	{
174 		return fWaitingToBeZero.EntriesCount();
175 	}
176 
177 	ushort Value() const
178 	{
179 		return fValue;
180 	}
181 
182 	void WakeUpThreads(bool waitingForZero)
183 	{
184 		if (waitingForZero) {
185 			fWaitingToBeZero.NotifyAll();
186 		} else {
187 			fWaitingToIncrease.NotifyAll();
188 		}
189 	}
190 
191 private:
192 	pid_t				fLastPidOperation;				// sempid
193 	ushort				fValue;							// semval
194 
195 	ConditionVariable	fWaitingToIncrease;
196 	ConditionVariable	fWaitingToBeZero;
197 };
198 
199 #define MAX_XSI_SEMS_PER_TEAM	128
200 
201 // Xsi semaphore set definition (semid_ds)
202 class XsiSemaphoreSet {
203 public:
204 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
205 		: fInitOK(false),
206 		fLastSemctlTime((time_t)real_time_clock()),
207 		fLastSemopTime(0),
208 		fNumberOfSemaphores(numberOfSemaphores),
209 		fSemaphores(0)
210 	{
211 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
212 		SetIpcKey((key_t)-1);
213 		SetPermissions(flags);
214 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
215 		if (fSemaphores == NULL) {
216 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
217 				"XsiSemaphore object\n"));
218 		} else
219 			fInitOK = true;
220 	}
221 
222 	~XsiSemaphoreSet()
223 	{
224 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
225 			"set %d\n", fID));
226 		mutex_destroy(&fLock);
227 		delete[] fSemaphores;
228 	}
229 
230 	void ClearUndo(ushort semaphoreNumber)
231 	{
232 		Team *team = thread_get_current_thread()->team;
233 		UndoList::Iterator iterator = fUndoList.GetIterator();
234 		while (iterator.HasNext()) {
235 			struct sem_undo *current = iterator.Next();
236 			if (current->team == team) {
237 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
238 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
239 					fID, semaphoreNumber, (int)team->id));
240 				MutexLocker _(team->xsi_sem_context->lock);
241 				current->undo_values[semaphoreNumber] = 0;
242 				return;
243 			}
244 		}
245 	}
246 
247 	void ClearUndos()
248 	{
249 		// Clear all undo_values (POSIX semadj equivalent)
250 		// of the calling team. This happens only on semctl SETALL.
251 		Team *team = thread_get_current_thread()->team;
252 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
253 		while (iterator.HasNext()) {
254 			struct sem_undo *current = iterator.Next();
255 			if (current->team == team) {
256 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
257 					"semaphoreSetID = %d\n", (int)team->id, fID));
258 				MutexLocker _(team->xsi_sem_context->lock);
259 				memset(current->undo_values, 0,
260 					sizeof(int16) * fNumberOfSemaphores);
261 				return;
262 			}
263 		}
264 	}
265 
266 	void DoIpcSet(struct semid_ds *result)
267 	{
268 		fPermissions.uid = result->sem_perm.uid;
269 		fPermissions.gid = result->sem_perm.gid;
270 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
271 			| (result->sem_perm.mode & 0x01ff);
272 	}
273 
274 	bool HasPermission() const
275 	{
276 		if ((fPermissions.mode & S_IWOTH) != 0)
277 			return true;
278 
279 		uid_t uid = geteuid();
280 		if (uid == 0 || (uid == fPermissions.uid
281 			&& (fPermissions.mode & S_IWUSR) != 0))
282 			return true;
283 
284 		gid_t gid = getegid();
285 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
286 			return true;
287 
288 		return false;
289 	}
290 
291 	bool HasReadPermission() const
292 	{
293 		// TODO: fix this
294 		return HasPermission();
295 	}
296 
297 	int ID() const
298 	{
299 		return fID;
300 	}
301 
302 	bool InitOK()
303 	{
304 		return fInitOK;
305 	}
306 
307 	key_t IpcKey() const
308 	{
309 		return fPermissions.key;
310 	}
311 
312 	struct ipc_perm IpcPermission() const
313 	{
314 		return fPermissions;
315 	}
316 
317 	time_t LastSemctlTime() const
318 	{
319 		return fLastSemctlTime;
320 	}
321 
322 	time_t LastSemopTime() const
323 	{
324 		return fLastSemopTime;
325 	}
326 
327 	mutex &Lock()
328 	{
329 		return fLock;
330 	}
331 
332 	ushort NumberOfSemaphores() const
333 	{
334 		return fNumberOfSemaphores;
335 	}
336 
337 	// Record the sem_undo operation into our private fUndoList and
338 	// the team undo_list. The only limit here is the memory needed
339 	// for creating a new sem_undo structure.
340 	int RecordUndo(ushort semaphoreNumber, short value)
341 	{
342 		// Look if there is already a record from the team caller
343 		// for the same semaphore set
344 		bool notFound = true;
345 		Team *team = thread_get_current_thread()->team;
346 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
347 		while (iterator.HasNext()) {
348 			struct sem_undo *current = iterator.Next();
349 			if (current->team == team) {
350 				// Update its undo value
351 				MutexLocker _(team->xsi_sem_context->lock);
352 				int newValue = current->undo_values[semaphoreNumber] + value;
353 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
354 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
355 						"out of range\n", newValue));
356 					return ERANGE;
357 				}
358 				current->undo_values[semaphoreNumber] = newValue;
359 				notFound = false;
360 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
361 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
362 					(int)team->id, fID, semaphoreNumber,
363 					current->undo_values[semaphoreNumber]));
364 				break;
365 			}
366 		}
367 
368 		if (notFound) {
369 			// First sem_undo request from this team for this
370 			// semaphore set
371 			int16 *undoValues
372 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
373 			if (undoValues == NULL)
374 				return B_NO_MEMORY;
375 			struct sem_undo *request
376 				= new(std::nothrow) sem_undo(this, team, undoValues);
377 			if (request == NULL) {
378 				free(undoValues);
379 				return B_NO_MEMORY;
380 			}
381 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
382 			request->undo_values[semaphoreNumber] = value;
383 
384 			// Check if it's the very first sem_undo request for this team
385 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
386 			if (context == NULL) {
387 				// Create the context
388 				context = new(std::nothrow) xsi_sem_context;
389 				if (context == NULL) {
390 					free(request->undo_values);
391 					delete request;
392 					return B_NO_MEMORY;
393 				}
394 				// Since we don't hold any global lock, someone
395 				// else could have been quicker than us, so we have
396 				// to delete the one we just created and use the one
397 				// in place.
398 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
399 					(xsi_sem_context *)NULL) != NULL)
400 					delete context;
401 			}
402 
403 			// Add the request to both XsiSemaphoreSet and team list
404 			fUndoList.Add(request);
405 			MutexLocker _(team->xsi_sem_context->lock);
406 			team->xsi_sem_context->undo_list.Add(request);
407 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
408 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
409 				(int)team->id, fID, semaphoreNumber, value));
410 		}
411 		return B_OK;
412 	}
413 
414 	void RevertUndo(ushort semaphoreNumber, short value)
415 	{
416 		// This can be called only when RecordUndo fails.
417 		Team *team = thread_get_current_thread()->team;
418 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
419 		while (iterator.HasNext()) {
420 			struct sem_undo *current = iterator.Next();
421 			if (current->team == team) {
422 				MutexLocker _(team->xsi_sem_context->lock);
423 				fSemaphores[semaphoreNumber].Revert(value);
424 				break;
425 			}
426 		}
427 	}
428 
429 	XsiSemaphore* Semaphore(int nth) const
430 	{
431 		return &fSemaphores[nth];
432 	}
433 
434 	uint32 SequenceNumber() const
435 	{
436 		return fSequenceNumber;
437 	}
438 
439 	// Implemented after sGlobalSequenceNumber is declared
440 	void SetID();
441 
442 	void SetIpcKey(key_t key)
443 	{
444 		fPermissions.key = key;
445 	}
446 
447 	void SetLastSemctlTime()
448 	{
449 		fLastSemctlTime = real_time_clock();
450 	}
451 
452 	void SetLastSemopTime()
453 	{
454 		fLastSemopTime = real_time_clock();
455 	}
456 
457 	void SetPermissions(int flags)
458 	{
459 		fPermissions.uid = fPermissions.cuid = geteuid();
460 		fPermissions.gid = fPermissions.cgid = getegid();
461 		fPermissions.mode = (flags & 0x01ff);
462 	}
463 
464 	UndoList &GetUndoList()
465 	{
466 		return fUndoList;
467 	}
468 
469 	XsiSemaphoreSet*& Link()
470 	{
471 		return fLink;
472 	}
473 
474 private:
475 	int							fID;					// semaphore set id
476 	bool						fInitOK;
477 	time_t						fLastSemctlTime;		// sem_ctime
478 	time_t						fLastSemopTime;			// sem_otime
479 	mutex 						fLock;					// private lock
480 	ushort						fNumberOfSemaphores;	// sem_nsems
481 	struct ipc_perm				fPermissions;			// sem_perm
482 	XsiSemaphore				*fSemaphores;			// array of semaphores
483 	uint32						fSequenceNumber;		// used as a second id
484 	UndoList					fUndoList;				// undo list requests
485 
486 	XsiSemaphoreSet*			fLink;
487 };
488 
489 // Xsi semaphore set hash table
490 struct SemaphoreHashTableDefinition {
491 	typedef int					KeyType;
492 	typedef XsiSemaphoreSet		ValueType;
493 
494 	size_t HashKey (const int key) const
495 	{
496 		return (size_t)key;
497 	}
498 
499 	size_t Hash(XsiSemaphoreSet *variable) const
500 	{
501 		return (size_t)variable->ID();
502 	}
503 
504 	bool Compare(const int key, XsiSemaphoreSet *variable) const
505 	{
506 		return (int)key == (int)variable->ID();
507 	}
508 
509 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
510 	{
511 		return variable->Link();
512 	}
513 };
514 
515 
516 // IPC class
517 class Ipc {
518 public:
519 	Ipc(key_t key)
520 		: fKey(key),
521 		fSemaphoreSetId(-1)
522 	{
523 	}
524 
525 	key_t Key() const
526 	{
527 		return fKey;
528 	}
529 
530 	int SemaphoreSetID() const
531 	{
532 		return fSemaphoreSetId;
533 	}
534 
535 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
536 	{
537 		fSemaphoreSetId = semaphoreSet->ID();
538 	}
539 
540 	Ipc*& Link()
541 	{
542 		return fLink;
543 	}
544 
545 private:
546 	key_t				fKey;
547 	int					fSemaphoreSetId;
548 	Ipc*				fLink;
549 };
550 
551 
552 struct IpcHashTableDefinition {
553 	typedef key_t	KeyType;
554 	typedef Ipc		ValueType;
555 
556 	size_t HashKey (const key_t key) const
557 	{
558 		return (size_t)(key);
559 	}
560 
561 	size_t Hash(Ipc *variable) const
562 	{
563 		return (size_t)HashKey(variable->Key());
564 	}
565 
566 	bool Compare(const key_t key, Ipc *variable) const
567 	{
568 		return (key_t)key == (key_t)variable->Key();
569 	}
570 
571 	Ipc*& GetLink(Ipc *variable) const
572 	{
573 		return variable->Link();
574 	}
575 };
576 
577 } // namespace
578 
579 
580 // Arbitrary limit
581 #define MAX_XSI_SEMAPHORE		4096
582 #define MAX_XSI_SEMAPHORE_SET	2048
583 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
584 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
585 
586 static mutex sIpcLock;
587 static mutex sXsiSemaphoreSetLock;
588 
589 static uint32 sGlobalSequenceNumber = 1;
590 static int32 sXsiSemaphoreCount = 0;
591 static int32 sXsiSemaphoreSetCount = 0;
592 
593 
594 //	#pragma mark -
595 
596 
597 void
598 XsiSemaphoreSet::SetID()
599 {
600 	fID = real_time_clock();
601 	// The lock is held before calling us
602 	while (true) {
603 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
604 			break;
605 		fID = (fID + 1) % INT_MAX;
606 	}
607 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
608 	fSequenceNumber = sGlobalSequenceNumber;
609 }
610 
611 
612 //	#pragma mark - Kernel exported API
613 
614 
615 void
616 xsi_sem_init()
617 {
618 	// Initialize hash tables
619 	status_t status = sIpcHashTable.Init();
620 	if (status != B_OK)
621 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
622 	status =  sSemaphoreHashTable.Init();
623 	if (status != B_OK)
624 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
625 
626 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
627 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
628 }
629 
630 
631 /*!	Function called on team exit to process any sem_undo requests */
632 void
633 xsi_sem_undo(Team *team)
634 {
635 	if (team->xsi_sem_context == NULL)
636 		return;
637 
638 	// By acquiring first the semaphore hash table lock
639 	// we make sure the semaphore set in our sem_undo
640 	// list won't get removed by IPC_RMID call
641 	MutexLocker _(sXsiSemaphoreSetLock);
642 
643 	// Process all sem_undo request in the team sem undo list
644 	// if any
645 	TeamList::Iterator iterator
646 		= team->xsi_sem_context->undo_list.GetIterator();
647 	while (iterator.HasNext()) {
648 		struct sem_undo *current = iterator.Next();
649 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
650 		// Acquire the set lock in order to prevent race
651 		// condition with RecordUndo
652 		MutexLocker setLocker(semaphoreSet->Lock());
653 		MutexLocker _(team->xsi_sem_context->lock);
654 		// Revert the changes done by this process
655 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
656 			if (current->undo_values[i] != 0) {
657 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
658 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
659 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
660 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
661 			}
662 
663 		// Remove and free the sem_undo structure from both lists
664 		iterator.Remove();
665 		semaphoreSet->GetUndoList().Remove(current);
666 		delete current;
667 	}
668 	delete team->xsi_sem_context;
669 	team->xsi_sem_context = NULL;
670 }
671 
672 
673 //	#pragma mark - Syscalls
674 
675 
676 int
677 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
678 {
679 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
680 		(int)key, numberOfSemaphores, flags));
681 	XsiSemaphoreSet *semaphoreSet = NULL;
682 	Ipc *ipcKey = NULL;
683 
684 	// Default assumption
685 	bool isPrivate = true;
686 
687 	MutexLocker ipcLocker(sIpcLock);
688 	if (key != IPC_PRIVATE) {
689 		isPrivate = false;
690 		// Check if key already exist, if it does it already has a semaphore
691 		// set associated with it
692 		ipcKey = sIpcHashTable.Lookup(key);
693 		if (ipcKey != NULL) {
694 			// The IPC key exist and it already has a semaphore
695 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
696 				TRACE(("xsi_semget: key %d already exist\n", (int)key));
697 				return EEXIST;
698 			}
699 			int semaphoreSetID = ipcKey->SemaphoreSetID();
700 
701 			MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
702 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
703 			if (semaphoreSet == NULL) {
704 				TRACE(("xsi_semget: calling process has no semaphore, "
705 					"key %d\n", (int)key));
706 				return EINVAL;
707 			}
708 			if (!semaphoreSet->HasPermission()) {
709 				TRACE(("xsi_semget: calling process has no permission "
710 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
711 					(int)key));
712 				return EACCES;
713 			}
714 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
715 					&& numberOfSemaphores != 0) {
716 				TRACE(("xsi_semget: numberOfSemaphores greater than the "
717 					"one associated with semaphore %d, key %d\n",
718 					semaphoreSet->ID(), (int)key));
719 				return EINVAL;
720 			}
721 
722 			return semaphoreSet->ID();
723 		}
724 
725 		// The ipc key does not exist. Create it and add it to the system
726 		if (!(flags & IPC_CREAT)) {
727 			TRACE(("xsi_semget: key %d does not exist, but the "
728 				"caller did not ask for creation\n",(int)key));
729 			return ENOENT;
730 		}
731 		ipcKey = new(std::nothrow) Ipc(key);
732 		if (ipcKey == NULL) {
733 			TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
734 				"for key %d\n",	(int)key));
735 			return ENOMEM;
736 		}
737 	}
738 
739 	// Create a new semaphore set for this key
740 	if (numberOfSemaphores <= 0
741 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
742 		TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
743 		delete ipcKey;
744 		return EINVAL;
745 	}
746 	if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
747 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
748 		TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
749 			"semaphores allowed\n"));
750 		delete ipcKey;
751 		return ENOSPC;
752 	}
753 
754 	semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores, flags);
755 	if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
756 		TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
757 			"semaphore set\n"));
758 		delete semaphoreSet;
759 		delete ipcKey;
760 		return ENOMEM;
761 	}
762 
763 	atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
764 	atomic_add(&sXsiSemaphoreSetCount, 1);
765 
766 	MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
767 	semaphoreSet->SetID();
768 	if (isPrivate) {
769 		semaphoreSet->SetIpcKey((key_t)-1);
770 	} else {
771 		sIpcHashTable.Insert(ipcKey);
772 		semaphoreSet->SetIpcKey(key);
773 		ipcKey->SetSemaphoreSetID(semaphoreSet);
774 	}
775 	sSemaphoreHashTable.Insert(semaphoreSet);
776 	TRACE(("semget: new set = %d created, sequence = %ld\n",
777 		semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
778 
779 	return semaphoreSet->ID();
780 }
781 
782 
783 int
784 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
785 	union semun *_args)
786 {
787 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
788 		semaphoreID, semaphoreNumber, command));
789 
790 	union semun args = {0};
791 	if (_args != NULL) {
792 		if (!IS_USER_ADDRESS(_args)
793 				|| user_memcpy(&args, _args, sizeof(union semun)) != B_OK)
794 			return B_BAD_ADDRESS;
795 	}
796 
797 	MutexLocker ipcHashLocker(sIpcLock);
798 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
799 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
800 	if (semaphoreSet == NULL) {
801 		TRACE(("xsi_semctl: semaphore set id %d not valid\n",
802 			semaphoreID));
803 		return EINVAL;
804 	}
805 	if (semaphoreNumber < 0
806 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
807 		TRACE(("xsi_semctl: semaphore number %d not valid for "
808 			"semaphore %d\n", semaphoreNumber, semaphoreID));
809 		return EINVAL;
810 	}
811 
812 	// Lock the semaphore set itself and release both the semaphore
813 	// set hash table lock and the ipc hash table lock _only_ if
814 	// the command it's not IPC_RMID, this prevents undesidered
815 	// situation from happening while (hopefully) improving the
816 	// concurrency.
817 	MutexLocker setLocker(semaphoreSet->Lock());
818 	if (command != IPC_RMID) {
819 		setHashLocker.Unlock();
820 		ipcHashLocker.Unlock();
821 	}
822 
823 	int result = 0;
824 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
825 	switch (command) {
826 		case GETVAL: {
827 			if (!semaphoreSet->HasReadPermission()) {
828 				TRACE(("xsi_semctl: calling process has not permission "
829 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
830 					(int)semaphoreSet->IpcKey()));
831 				result = EACCES;
832 			} else
833 				result = semaphore->Value();
834 			break;
835 		}
836 
837 		case SETVAL: {
838 			if (!semaphoreSet->HasPermission()) {
839 				TRACE(("xsi_semctl: calling process has not permission "
840 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
841 					(int)semaphoreSet->IpcKey()));
842 				result = EACCES;
843 			} else {
844 				if (args.val > USHRT_MAX) {
845 					TRACE(("xsi_semctl: value %d out of range\n", args.val));
846 					result = ERANGE;
847 				} else {
848 					semaphore->SetValue(args.val);
849 					semaphoreSet->ClearUndo(semaphoreNumber);
850 				}
851 			}
852 			break;
853 		}
854 
855 		case GETPID: {
856 			if (!semaphoreSet->HasReadPermission()) {
857 				TRACE(("xsi_semctl: calling process has not permission "
858 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
859 					(int)semaphoreSet->IpcKey()));
860 				result = EACCES;
861 			} else
862 				result = semaphore->LastPid();
863 			break;
864 		}
865 
866 		case GETNCNT: {
867 			if (!semaphoreSet->HasReadPermission()) {
868 				TRACE(("xsi_semctl: calling process has not permission "
869 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
870 					(int)semaphoreSet->IpcKey()));
871 				result = EACCES;
872 			} else
873 				result = semaphore->ThreadsWaitingToIncrease();
874 			break;
875 		}
876 
877 		case GETZCNT: {
878 			if (!semaphoreSet->HasReadPermission()) {
879 				TRACE(("xsi_semctl: calling process has not permission "
880 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
881 					(int)semaphoreSet->IpcKey()));
882 				result = EACCES;
883 			} else
884 				result = semaphore->ThreadsWaitingToBeZero();
885 			break;
886 		}
887 
888 		case GETALL: {
889 			if (!semaphoreSet->HasReadPermission()) {
890 				TRACE(("xsi_semctl: calling process has not read "
891 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
892 					(int)semaphoreSet->IpcKey()));
893 				result = EACCES;
894 			} else
895 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
896 					semaphore = semaphoreSet->Semaphore(i);
897 					unsigned short value = semaphore->Value();
898 					if (user_memcpy(args.array + i, &value,
899 							sizeof(unsigned short)) != B_OK) {
900 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
901 						result = B_BAD_ADDRESS;
902 						break;
903 					}
904 				}
905 			break;
906 		}
907 
908 		case SETALL: {
909 			if (!semaphoreSet->HasPermission()) {
910 				TRACE(("xsi_semctl: calling process has not permission "
911 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
912 					(int)semaphoreSet->IpcKey()));
913 				result = EACCES;
914 			} else {
915 				bool doClear = true;
916 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
917 					semaphore = semaphoreSet->Semaphore(i);
918 					unsigned short value;
919 					if (user_memcpy(&value, args.array + i,
920 							sizeof(unsigned short)) != B_OK) {
921 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
922 						result = B_BAD_ADDRESS;
923 						doClear = false;
924 						break;
925 					} else
926 						semaphore->SetValue(value);
927 				}
928 				if (doClear)
929 					semaphoreSet->ClearUndos();
930 			}
931 			break;
932 		}
933 
934 		case IPC_STAT: {
935 			if (!semaphoreSet->HasReadPermission()) {
936 				TRACE(("xsi_semctl: calling process has not read "
937 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
938 					(int)semaphoreSet->IpcKey()));
939 				result = EACCES;
940 			} else {
941 				struct semid_ds sem;
942 				sem.sem_perm = semaphoreSet->IpcPermission();
943 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
944 				sem.sem_otime = semaphoreSet->LastSemopTime();
945 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
946 				if (user_memcpy(args.buf, &sem, sizeof(struct semid_ds))
947 						< B_OK) {
948 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
949 					result = B_BAD_ADDRESS;
950 				}
951 			}
952 			break;
953 		}
954 
955 		case IPC_SET: {
956 			if (!semaphoreSet->HasPermission()) {
957 				TRACE(("xsi_semctl: calling process has not "
958 					"permission on semaphore %d, key %d\n",
959 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
960 				result = EACCES;
961 			} else {
962 				struct semid_ds sem;
963 				if (user_memcpy(&sem, args.buf, sizeof(struct semid_ds))
964 						!= B_OK) {
965 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
966 					result = B_BAD_ADDRESS;
967 				} else
968 					semaphoreSet->DoIpcSet(&sem);
969 			}
970 			break;
971 		}
972 
973 		case IPC_RMID: {
974 			// If this was the command, we are still holding
975 			// the semaphore set hash table lock along with the
976 			// ipc hash table lock and the semaphore set lock
977 			// itself, this way we are sure there is not
978 			// one waiting in the queue of the mutex.
979 			if (!semaphoreSet->HasPermission()) {
980 				TRACE(("xsi_semctl: calling process has not "
981 					"permission on semaphore %d, key %d\n",
982 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
983 				return EACCES;
984 			}
985 			key_t key = semaphoreSet->IpcKey();
986 			Ipc *ipcKey = NULL;
987 			if (key != -1) {
988 				ipcKey = sIpcHashTable.Lookup(key);
989 				sIpcHashTable.Remove(ipcKey);
990 			}
991 			sSemaphoreHashTable.Remove(semaphoreSet);
992 			// Wake up of threads waiting on this set
993 			// happens in the destructor
994 			if (key != -1)
995 				delete ipcKey;
996 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
997 			atomic_add(&sXsiSemaphoreSetCount, -1);
998 			// Remove any sem_undo request
999 			while (struct sem_undo *entry
1000 					= semaphoreSet->GetUndoList().RemoveHead()) {
1001 				MutexLocker _(entry->team->xsi_sem_context->lock);
1002 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1003 				delete entry;
1004 			}
1005 
1006 			setLocker.Detach();
1007 			delete semaphoreSet;
1008 			return 0;
1009 		}
1010 
1011 		default:
1012 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1013 			result = EINVAL;
1014 	}
1015 
1016 	return result;
1017 }
1018 
1019 
1020 status_t
1021 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1022 {
1023 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1024 		semaphoreID, ops, numOps));
1025 
1026 	if (!IS_USER_ADDRESS(ops)) {
1027 		TRACE(("xsi_semop: sembuf address is not valid\n"));
1028 		return B_BAD_ADDRESS;
1029 	}
1030 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1031 		TRACE(("xsi_semop: numOps out of range\n"));
1032 		return EINVAL;
1033 	}
1034 
1035 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1036 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1037 	if (semaphoreSet == NULL) {
1038 		TRACE(("xsi_semop: semaphore set id %d not valid\n",
1039 			semaphoreID));
1040 		return EINVAL;
1041 	}
1042 	MutexLocker setLocker(semaphoreSet->Lock());
1043 	setHashLocker.Unlock();
1044 
1045 	BStackOrHeapArray<struct sembuf, 16> operations(numOps);
1046 	if (!operations.IsValid()) {
1047 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1048 		return B_NO_MEMORY;
1049 	}
1050 
1051 	if (user_memcpy(operations, ops,
1052 			(sizeof(struct sembuf) * numOps)) != B_OK) {
1053 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1054 		return B_BAD_ADDRESS;
1055 	}
1056 
1057 	// We won't do partial request, that is operations
1058 	// only on some sempahores belonging to the set and then
1059 	// going to sleep. If we must wait on a semaphore, we undo
1060 	// all the operations already done and go to sleep, otherwise
1061 	// we may caused some unwanted deadlock among threads
1062 	// fighting for the same set.
1063 	bool notDone = true;
1064 	status_t result = 0;
1065 	while (notDone) {
1066 		XsiSemaphore *semaphore = NULL;
1067 		const ushort numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1068 		bool goToSleep = false;
1069 
1070 		uint32 i = 0;
1071 		for (; i < numOps; i++) {
1072 			ushort semaphoreNumber = operations[i].sem_num;
1073 			if (semaphoreNumber >= numberOfSemaphores) {
1074 				TRACE(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1075 					"\n", i));
1076 				result = EINVAL;
1077 				break;
1078 			}
1079 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1080 			unsigned short value = semaphore->Value();
1081 			short operation = operations[i].sem_op;
1082 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1083 				semaphoreNumber, value));
1084 			if (operation < 0) {
1085 				if (semaphore->Add(operation)) {
1086 					if (operations[i].sem_flg & IPC_NOWAIT)
1087 						result = EAGAIN;
1088 					else
1089 						goToSleep = true;
1090 					break;
1091 				}
1092 			} else if (operation == 0) {
1093 				if (value == 0)
1094 					continue;
1095 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1096 					result = EAGAIN;
1097 					break;
1098 				} else {
1099 					goToSleep = true;
1100 					break;
1101 				}
1102 			} else {
1103 				// Operation must be greater than zero,
1104 				// just add the value and continue
1105 				semaphore->Add(operation);
1106 			}
1107 		}
1108 
1109 		// Either we have to wait or an error occured
1110 		if (goToSleep || result != 0) {
1111 			// Undo all previously done operations
1112 			for (uint32 j = 0; j < i; j++) {
1113 				ushort semaphoreNumber = operations[j].sem_num;
1114 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1115 				short operation = operations[j].sem_op;
1116 				if (operation != 0)
1117 					semaphore->Revert(operation);
1118 			}
1119 			if (result != 0)
1120 				return result;
1121 
1122 			// We have to wait: first enqueue the thread
1123 			// in the appropriate set waiting list, then
1124 			// unlock the set itself and block the thread.
1125 			bool waitOnZero = true;
1126 			if (operations[i].sem_op != 0)
1127 				waitOnZero = false;
1128 
1129 			ConditionVariableEntry queueEntry;
1130 			semaphore->Enqueue(&queueEntry, waitOnZero);
1131 
1132 			const uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1133 
1134 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1135 			setLocker.Unlock();
1136 			semaphoreSet = NULL;
1137 			semaphore = NULL;
1138 			result = queueEntry.Wait(B_CAN_INTERRUPT);
1139 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1140 
1141 			// We are back to life. Find out why!
1142 			// Make sure the set hasn't been deleted or worst yet replaced.
1143 			setHashLocker.Lock();
1144 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1145 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1146 					&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1147 				TRACE(("xsi_semop: semaphore set id %d (sequence = "
1148 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1149 					sequenceNumber));
1150 				notDone = false;
1151 				result = EIDRM;
1152 			} else if (result == B_INTERRUPTED) {
1153 				TRACE(("xsi_semop: thread %d got interrupted while "
1154 					"waiting on semaphore set id %d\n", (int)thread_get_current_thread_id(),
1155 					semaphoreID));
1156 				XsiSemaphore::Dequeue(&queueEntry);
1157 				result = EINTR;
1158 				notDone = false;
1159 			} else {
1160 				setLocker.Lock();
1161 				setHashLocker.Unlock();
1162 			}
1163 		} else {
1164 			// everything worked like a charm (so far)
1165 			notDone = false;
1166 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1167 			// We acquired the semaphore, now records the sem_undo
1168 			// requests
1169 			for (uint32 i = 0; i < numOps; i++) {
1170 				if ((operations[i].sem_flg & SEM_UNDO) == 0)
1171 					continue;
1172 
1173 				ushort semaphoreNumber = operations[i].sem_num;
1174 				XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1175 				short operation = operations[i].sem_op;
1176 
1177 				if (semaphoreSet->RecordUndo(semaphoreNumber, operation) != B_OK) {
1178 					// Unlikely scenario, but we might get here.
1179 					// Undo everything!
1180 					// Start with semaphore operations
1181 					for (uint32 j = 0; j < numOps; j++) {
1182 						ushort semaphoreNumber = operations[j].sem_num;
1183 						semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1184 						short operation = operations[j].sem_op;
1185 						if (operation != 0)
1186 							semaphore->Revert(operation);
1187 					}
1188 					// Remove all previously registered sem_undo request
1189 					for (uint32 j = 0; j < i; j++) {
1190 						if (operations[j].sem_flg & SEM_UNDO) {
1191 							semaphoreSet->RevertUndo(operations[j].sem_num,
1192 								operations[j].sem_op);
1193 						}
1194 					}
1195 					result = ENOSPC;
1196 				}
1197 			}
1198 		}
1199 	}
1200 
1201 	// We did it. Set the pid of all semaphores used
1202 	if (result == 0) {
1203 		for (uint32 i = 0; i < numOps; i++) {
1204 			ushort semaphoreNumber = operations[i].sem_num;
1205 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1206 			semaphore->SetPid(getpid());
1207 		}
1208 		semaphoreSet->SetLastSemopTime();
1209 	}
1210 	return result;
1211 }
1212