xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision 445d4fd926c569e7b9ae28017da86280aaecbae2)
1 /*
2  * Copyright 2008-2023, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 #include <AutoDeleter.h>
26 
27 
28 //#define TRACE_XSI_SEM
29 #ifdef TRACE_XSI_SEM
30 #	define TRACE(x)			dprintf x
31 #	define TRACE_ERROR(x)	dprintf x
32 #else
33 #	define TRACE(x)			/* nothing */
34 #	define TRACE_ERROR(x)	dprintf x
35 #endif
36 
37 
38 namespace {
39 
40 class XsiSemaphoreSet;
41 
42 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
43 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
44 		:
45 		semaphore_set(semaphoreSet),
46 		team(team),
47 		undo_values(undoValues)
48 	{
49 	}
50 
51 	DoublyLinkedListLink<sem_undo>		team_link;
52 	XsiSemaphoreSet						*semaphore_set;
53 	Team								*team;
54 	int16								*undo_values;
55 };
56 
57 typedef DoublyLinkedList<sem_undo> UndoList;
58 typedef DoublyLinkedList<sem_undo,
59 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
60 
61 } // namespace
62 
63 
64 // Forward declared in global namespace.
65 struct xsi_sem_context {
66 	xsi_sem_context()
67 	{
68 		mutex_init(&lock, "Private team undo_list lock");
69 	}
70 
71 	~xsi_sem_context()
72 	{
73 		mutex_destroy(&lock);
74 	}
75 
76 	TeamList	undo_list;
77 	mutex		lock;
78 };
79 
80 
81 namespace {
82 
83 // Xsi semaphore definition
84 class XsiSemaphore {
85 public:
86 	XsiSemaphore()
87 		:
88 		fLastPidOperation(0),
89 		fValue(0)
90 	{
91 		fWaitingToIncrease.Init(this, "XsiSemaphore");
92 		fWaitingToBeZero.Init(this, "XsiSemaphore");
93 	}
94 
95 	~XsiSemaphore()
96 	{
97 		// For some reason the semaphore is getting destroyed.
98 		// Wake up any remaing awaiting threads
99 		fWaitingToIncrease.NotifyAll(EIDRM);
100 		fWaitingToBeZero.NotifyAll(EIDRM);
101 
102 		// No need to remove any sem_undo request still
103 		// hanging. When the process exit and doesn't found
104 		// the semaphore set, it'll just ignore the sem_undo
105 		// request. That's better than iterating trough the
106 		// whole sUndoList. Beside we don't know our semaphore
107 		// number nor our semaphore set id.
108 	}
109 
110 	// We return true in case the operation causes the
111 	// caller to wait, so it can undo all the operations
112 	// previously done
113 	bool Add(short value)
114 	{
115 		if ((int)(fValue + value) < 0) {
116 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
117 			return true;
118 		} else {
119 			fValue += value;
120 			if (fValue == 0)
121 				WakeUpThreads(true);
122 			else if (fValue > 0)
123 				WakeUpThreads(false);
124 			return false;
125 		}
126 	}
127 
128 	status_t BlockAndUnlock(ConditionVariableEntry *queueEntry, MutexLocker *setLocker)
129 	{
130 		// Unlock the set before blocking
131 		setLocker->Unlock();
132 		return queueEntry->Wait(B_CAN_INTERRUPT);
133 	}
134 
135 	void Dequeue(ConditionVariableEntry *queueEntry, bool waitForZero)
136 	{
137 		queueEntry->Wait(B_RELATIVE_TIMEOUT, 0);
138 	}
139 
140 	void Enqueue(ConditionVariableEntry *queueEntry, bool waitForZero)
141 	{
142 		if (waitForZero) {
143 			fWaitingToBeZero.Add(queueEntry);
144 		} else {
145 			fWaitingToIncrease.Add(queueEntry);
146 		}
147 	}
148 
149 	pid_t LastPid() const
150 	{
151 		return fLastPidOperation;
152 	}
153 
154 	void Revert(short value)
155 	{
156 		fValue -= value;
157 		if (fValue == 0)
158 			WakeUpThreads(true);
159 		else if (fValue > 0)
160 			WakeUpThreads(false);
161 	}
162 
163 	void SetPid(pid_t pid)
164 	{
165 		fLastPidOperation = pid;
166 	}
167 
168 	void SetValue(ushort value)
169 	{
170 		fValue = value;
171 	}
172 
173 	ushort ThreadsWaitingToIncrease()
174 	{
175 		return fWaitingToIncrease.EntriesCount();
176 	}
177 
178 	ushort ThreadsWaitingToBeZero()
179 	{
180 		return fWaitingToBeZero.EntriesCount();
181 	}
182 
183 	ushort Value() const
184 	{
185 		return fValue;
186 	}
187 
188 	void WakeUpThreads(bool waitingForZero)
189 	{
190 		if (waitingForZero) {
191 			fWaitingToBeZero.NotifyAll();
192 		} else {
193 			fWaitingToIncrease.NotifyAll();
194 		}
195 	}
196 
197 private:
198 	pid_t				fLastPidOperation;				// sempid
199 	ushort				fValue;							// semval
200 
201 	ConditionVariable	fWaitingToIncrease;
202 	ConditionVariable	fWaitingToBeZero;
203 };
204 
205 #define MAX_XSI_SEMS_PER_TEAM	128
206 
207 // Xsi semaphore set definition (semid_ds)
208 class XsiSemaphoreSet {
209 public:
210 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
211 		: fInitOK(false),
212 		fLastSemctlTime((time_t)real_time_clock()),
213 		fLastSemopTime(0),
214 		fNumberOfSemaphores(numberOfSemaphores),
215 		fSemaphores(0)
216 	{
217 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
218 		SetIpcKey((key_t)-1);
219 		SetPermissions(flags);
220 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
221 		if (fSemaphores == NULL) {
222 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
223 				"XsiSemaphore object\n"));
224 		} else
225 			fInitOK = true;
226 	}
227 
228 	~XsiSemaphoreSet()
229 	{
230 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
231 			"set %d\n", fID));
232 		mutex_destroy(&fLock);
233 		delete[] fSemaphores;
234 	}
235 
236 	void ClearUndo(unsigned short semaphoreNumber)
237 	{
238 		Team *team = thread_get_current_thread()->team;
239 		UndoList::Iterator iterator = fUndoList.GetIterator();
240 		while (iterator.HasNext()) {
241 			struct sem_undo *current = iterator.Next();
242 			if (current->team == team) {
243 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
244 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
245 					fID, semaphoreNumber, (int)team->id));
246 				MutexLocker _(team->xsi_sem_context->lock);
247 				current->undo_values[semaphoreNumber] = 0;
248 				return;
249 			}
250 		}
251 	}
252 
253 	void ClearUndos()
254 	{
255 		// Clear all undo_values (POSIX semadj equivalent)
256 		// of the calling team. This happens only on semctl SETALL.
257 		Team *team = thread_get_current_thread()->team;
258 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
259 		while (iterator.HasNext()) {
260 			struct sem_undo *current = iterator.Next();
261 			if (current->team == team) {
262 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
263 					"semaphoreSetID = %d\n", (int)team->id, fID));
264 				MutexLocker _(team->xsi_sem_context->lock);
265 				memset(current->undo_values, 0,
266 					sizeof(int16) * fNumberOfSemaphores);
267 				return;
268 			}
269 		}
270 	}
271 
272 	void DoIpcSet(struct semid_ds *result)
273 	{
274 		fPermissions.uid = result->sem_perm.uid;
275 		fPermissions.gid = result->sem_perm.gid;
276 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
277 			| (result->sem_perm.mode & 0x01ff);
278 	}
279 
280 	bool HasPermission() const
281 	{
282 		if ((fPermissions.mode & S_IWOTH) != 0)
283 			return true;
284 
285 		uid_t uid = geteuid();
286 		if (uid == 0 || (uid == fPermissions.uid
287 			&& (fPermissions.mode & S_IWUSR) != 0))
288 			return true;
289 
290 		gid_t gid = getegid();
291 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
292 			return true;
293 
294 		return false;
295 	}
296 
297 	bool HasReadPermission() const
298 	{
299 		// TODO: fix this
300 		return HasPermission();
301 	}
302 
303 	int ID() const
304 	{
305 		return fID;
306 	}
307 
308 	bool InitOK()
309 	{
310 		return fInitOK;
311 	}
312 
313 	key_t IpcKey() const
314 	{
315 		return fPermissions.key;
316 	}
317 
318 	struct ipc_perm IpcPermission() const
319 	{
320 		return fPermissions;
321 	}
322 
323 	time_t LastSemctlTime() const
324 	{
325 		return fLastSemctlTime;
326 	}
327 
328 	time_t LastSemopTime() const
329 	{
330 		return fLastSemopTime;
331 	}
332 
333 	mutex &Lock()
334 	{
335 		return fLock;
336 	}
337 
338 	ushort NumberOfSemaphores() const
339 	{
340 		return fNumberOfSemaphores;
341 	}
342 
343 	// Record the sem_undo operation into our private fUndoList and
344 	// the team undo_list. The only limit here is the memory needed
345 	// for creating a new sem_undo structure.
346 	int RecordUndo(short semaphoreNumber, short value)
347 	{
348 		// Look if there is already a record from the team caller
349 		// for the same semaphore set
350 		bool notFound = true;
351 		Team *team = thread_get_current_thread()->team;
352 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
353 		while (iterator.HasNext()) {
354 			struct sem_undo *current = iterator.Next();
355 			if (current->team == team) {
356 				// Update its undo value
357 				MutexLocker _(team->xsi_sem_context->lock);
358 				int newValue = current->undo_values[semaphoreNumber] + value;
359 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
360 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
361 						"out of range\n", newValue));
362 					return ERANGE;
363 				}
364 				current->undo_values[semaphoreNumber] = newValue;
365 				notFound = false;
366 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
367 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
368 					(int)team->id, fID, semaphoreNumber,
369 					current->undo_values[semaphoreNumber]));
370 				break;
371 			}
372 		}
373 
374 		if (notFound) {
375 			// First sem_undo request from this team for this
376 			// semaphore set
377 			int16 *undoValues
378 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
379 			if (undoValues == NULL)
380 				return B_NO_MEMORY;
381 			struct sem_undo *request
382 				= new(std::nothrow) sem_undo(this, team, undoValues);
383 			if (request == NULL) {
384 				free(undoValues);
385 				return B_NO_MEMORY;
386 			}
387 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
388 			request->undo_values[semaphoreNumber] = value;
389 
390 			// Check if it's the very first sem_undo request for this team
391 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
392 			if (context == NULL) {
393 				// Create the context
394 				context = new(std::nothrow) xsi_sem_context;
395 				if (context == NULL) {
396 					free(request->undo_values);
397 					delete request;
398 					return B_NO_MEMORY;
399 				}
400 				// Since we don't hold any global lock, someone
401 				// else could have been quicker than us, so we have
402 				// to delete the one we just created and use the one
403 				// in place.
404 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
405 					(xsi_sem_context *)NULL) != NULL)
406 					delete context;
407 			}
408 
409 			// Add the request to both XsiSemaphoreSet and team list
410 			fUndoList.Add(request);
411 			MutexLocker _(team->xsi_sem_context->lock);
412 			team->xsi_sem_context->undo_list.Add(request);
413 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
414 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
415 				(int)team->id, fID, semaphoreNumber, value));
416 		}
417 		return B_OK;
418 	}
419 
420 	void RevertUndo(short semaphoreNumber, short value)
421 	{
422 		// This can be called only when RecordUndo fails.
423 		Team *team = thread_get_current_thread()->team;
424 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
425 		while (iterator.HasNext()) {
426 			struct sem_undo *current = iterator.Next();
427 			if (current->team == team) {
428 				MutexLocker _(team->xsi_sem_context->lock);
429 				fSemaphores[semaphoreNumber].Revert(value);
430 				break;
431 			}
432 		}
433 	}
434 
435 	XsiSemaphore* Semaphore(int nth) const
436 	{
437 		return &fSemaphores[nth];
438 	}
439 
440 	uint32 SequenceNumber() const
441 	{
442 		return fSequenceNumber;
443 	}
444 
445 	// Implemented after sGlobalSequenceNumber is declared
446 	void SetID();
447 
448 	void SetIpcKey(key_t key)
449 	{
450 		fPermissions.key = key;
451 	}
452 
453 	void SetLastSemctlTime()
454 	{
455 		fLastSemctlTime = real_time_clock();
456 	}
457 
458 	void SetLastSemopTime()
459 	{
460 		fLastSemopTime = real_time_clock();
461 	}
462 
463 	void SetPermissions(int flags)
464 	{
465 		fPermissions.uid = fPermissions.cuid = geteuid();
466 		fPermissions.gid = fPermissions.cgid = getegid();
467 		fPermissions.mode = (flags & 0x01ff);
468 	}
469 
470 	UndoList &GetUndoList()
471 	{
472 		return fUndoList;
473 	}
474 
475 	XsiSemaphoreSet*& Link()
476 	{
477 		return fLink;
478 	}
479 
480 private:
481 	int							fID;					// semaphore set id
482 	bool						fInitOK;
483 	time_t						fLastSemctlTime;		// sem_ctime
484 	time_t						fLastSemopTime;			// sem_otime
485 	mutex 						fLock;					// private lock
486 	ushort						fNumberOfSemaphores;	// sem_nsems
487 	struct ipc_perm				fPermissions;			// sem_perm
488 	XsiSemaphore				*fSemaphores;			// array of semaphores
489 	uint32						fSequenceNumber;		// used as a second id
490 	UndoList					fUndoList;				// undo list requests
491 
492 	XsiSemaphoreSet*			fLink;
493 };
494 
495 // Xsi semaphore set hash table
496 struct SemaphoreHashTableDefinition {
497 	typedef int					KeyType;
498 	typedef XsiSemaphoreSet		ValueType;
499 
500 	size_t HashKey (const int key) const
501 	{
502 		return (size_t)key;
503 	}
504 
505 	size_t Hash(XsiSemaphoreSet *variable) const
506 	{
507 		return (size_t)variable->ID();
508 	}
509 
510 	bool Compare(const int key, XsiSemaphoreSet *variable) const
511 	{
512 		return (int)key == (int)variable->ID();
513 	}
514 
515 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
516 	{
517 		return variable->Link();
518 	}
519 };
520 
521 
522 // IPC class
523 class Ipc {
524 public:
525 	Ipc(key_t key)
526 		: fKey(key),
527 		fSemaphoreSetId(-1)
528 	{
529 	}
530 
531 	key_t Key() const
532 	{
533 		return fKey;
534 	}
535 
536 	int SemaphoreSetID() const
537 	{
538 		return fSemaphoreSetId;
539 	}
540 
541 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
542 	{
543 		fSemaphoreSetId = semaphoreSet->ID();
544 	}
545 
546 	Ipc*& Link()
547 	{
548 		return fLink;
549 	}
550 
551 private:
552 	key_t				fKey;
553 	int					fSemaphoreSetId;
554 	Ipc*				fLink;
555 };
556 
557 
558 struct IpcHashTableDefinition {
559 	typedef key_t	KeyType;
560 	typedef Ipc		ValueType;
561 
562 	size_t HashKey (const key_t key) const
563 	{
564 		return (size_t)(key);
565 	}
566 
567 	size_t Hash(Ipc *variable) const
568 	{
569 		return (size_t)HashKey(variable->Key());
570 	}
571 
572 	bool Compare(const key_t key, Ipc *variable) const
573 	{
574 		return (key_t)key == (key_t)variable->Key();
575 	}
576 
577 	Ipc*& GetLink(Ipc *variable) const
578 	{
579 		return variable->Link();
580 	}
581 };
582 
583 } // namespace
584 
585 
586 // Arbitrary limit
587 #define MAX_XSI_SEMAPHORE		4096
588 #define MAX_XSI_SEMAPHORE_SET	2048
589 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
590 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
591 
592 static mutex sIpcLock;
593 static mutex sXsiSemaphoreSetLock;
594 
595 static uint32 sGlobalSequenceNumber = 1;
596 static int32 sXsiSemaphoreCount = 0;
597 static int32 sXsiSemaphoreSetCount = 0;
598 
599 
600 //	#pragma mark -
601 
602 
603 void
604 XsiSemaphoreSet::SetID()
605 {
606 	fID = real_time_clock();
607 	// The lock is held before calling us
608 	while (true) {
609 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
610 			break;
611 		fID = (fID + 1) % INT_MAX;
612 	}
613 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
614 	fSequenceNumber = sGlobalSequenceNumber;
615 }
616 
617 
618 //	#pragma mark - Kernel exported API
619 
620 
621 void
622 xsi_sem_init()
623 {
624 	// Initialize hash tables
625 	status_t status = sIpcHashTable.Init();
626 	if (status != B_OK)
627 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
628 	status =  sSemaphoreHashTable.Init();
629 	if (status != B_OK)
630 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
631 
632 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
633 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
634 }
635 
636 
637 /*!	Function called on team exit to process any sem_undo requests */
638 void
639 xsi_sem_undo(Team *team)
640 {
641 	if (team->xsi_sem_context == NULL)
642 		return;
643 
644 	// By acquiring first the semaphore hash table lock
645 	// we make sure the semaphore set in our sem_undo
646 	// list won't get removed by IPC_RMID call
647 	MutexLocker _(sXsiSemaphoreSetLock);
648 
649 	// Process all sem_undo request in the team sem undo list
650 	// if any
651 	TeamList::Iterator iterator
652 		= team->xsi_sem_context->undo_list.GetIterator();
653 	while (iterator.HasNext()) {
654 		struct sem_undo *current = iterator.Next();
655 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
656 		// Acquire the set lock in order to prevent race
657 		// condition with RecordUndo
658 		MutexLocker setLocker(semaphoreSet->Lock());
659 		MutexLocker _(team->xsi_sem_context->lock);
660 		// Revert the changes done by this process
661 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
662 			if (current->undo_values[i] != 0) {
663 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
664 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
665 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
666 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
667 			}
668 
669 		// Remove and free the sem_undo structure from both lists
670 		iterator.Remove();
671 		semaphoreSet->GetUndoList().Remove(current);
672 		delete current;
673 	}
674 	delete team->xsi_sem_context;
675 	team->xsi_sem_context = NULL;
676 }
677 
678 
679 //	#pragma mark - Syscalls
680 
681 
682 int
683 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
684 {
685 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
686 		(int)key, numberOfSemaphores, flags));
687 	XsiSemaphoreSet *semaphoreSet = NULL;
688 	Ipc *ipcKey = NULL;
689 
690 	// Default assumption
691 	bool isPrivate = true;
692 
693 	MutexLocker ipcLocker(sIpcLock);
694 	if (key != IPC_PRIVATE) {
695 		isPrivate = false;
696 		// Check if key already exist, if it does it already has a semaphore
697 		// set associated with it
698 		ipcKey = sIpcHashTable.Lookup(key);
699 		if (ipcKey == NULL) {
700 			// The ipc key does not exist. Create it and add it to the system
701 			if (!(flags & IPC_CREAT)) {
702 				TRACE(("xsi_semget: key %d does not exist, but the "
703 					"caller did not ask for creation\n",(int)key));
704 				return ENOENT;
705 			}
706 			ipcKey = new(std::nothrow) Ipc(key);
707 			if (ipcKey == NULL) {
708 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
709 					"for key %d\n",	(int)key));
710 				return ENOMEM;
711 			}
712 		} else {
713 			// The IPC key exist and it already has a semaphore
714 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
715 				TRACE(("xsi_semget: key %d already exist\n", (int)key));
716 				return EEXIST;
717 			}
718 			int semaphoreSetID = ipcKey->SemaphoreSetID();
719 
720 			MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
721 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
722 			if (semaphoreSet == NULL) {
723 				TRACE(("xsi_semget: calling process has no semaphore, "
724 					"key %d\n", (int)key));
725 				return EINVAL;
726 			}
727 			if (!semaphoreSet->HasPermission()) {
728 				TRACE(("xsi_semget: calling process has no permission "
729 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
730 					(int)key));
731 				return EACCES;
732 			}
733 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
734 					&& numberOfSemaphores != 0) {
735 				TRACE(("xsi_semget: numberOfSemaphores greater than the "
736 					"one associated with semaphore %d, key %d\n",
737 					semaphoreSet->ID(), (int)key));
738 				return EINVAL;
739 			}
740 
741 			return semaphoreSet->ID();
742 		}
743 	}
744 
745 	// Create a new sempahore set for this key
746 	if (numberOfSemaphores <= 0
747 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
748 		TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
749 		delete ipcKey;
750 		return EINVAL;
751 	}
752 	if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
753 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
754 		TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
755 			"semaphores allowed\n"));
756 		delete ipcKey;
757 		return ENOSPC;
758 	}
759 
760 	semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores, flags);
761 	if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
762 		TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
763 			"semaphore set\n"));
764 		delete semaphoreSet;
765 		delete ipcKey;
766 		return ENOMEM;
767 	}
768 
769 	atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
770 	atomic_add(&sXsiSemaphoreSetCount, 1);
771 
772 	MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
773 	semaphoreSet->SetID();
774 	if (isPrivate) {
775 		semaphoreSet->SetIpcKey((key_t)-1);
776 	} else {
777 		sIpcHashTable.Insert(ipcKey);
778 		semaphoreSet->SetIpcKey(key);
779 		ipcKey->SetSemaphoreSetID(semaphoreSet);
780 	}
781 	sSemaphoreHashTable.Insert(semaphoreSet);
782 	TRACE(("semget: new set = %d created, sequence = %ld\n",
783 		semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
784 
785 	return semaphoreSet->ID();
786 }
787 
788 
789 int
790 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
791 	union semun *_args)
792 {
793 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
794 		semaphoreID, semaphoreNumber, command));
795 
796 	union semun args = {0};
797 	if (_args != NULL) {
798 		if (!IS_USER_ADDRESS(_args)
799 				|| user_memcpy(&args, _args, sizeof(union semun)) != B_OK)
800 			return B_BAD_ADDRESS;
801 	}
802 
803 	MutexLocker ipcHashLocker(sIpcLock);
804 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
805 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
806 	if (semaphoreSet == NULL) {
807 		TRACE(("xsi_semctl: semaphore set id %d not valid\n",
808 			semaphoreID));
809 		return EINVAL;
810 	}
811 	if (semaphoreNumber < 0
812 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
813 		TRACE(("xsi_semctl: semaphore number %d not valid for "
814 			"semaphore %d\n", semaphoreNumber, semaphoreID));
815 		return EINVAL;
816 	}
817 
818 	// Lock the semaphore set itself and release both the semaphore
819 	// set hash table lock and the ipc hash table lock _only_ if
820 	// the command it's not IPC_RMID, this prevents undesidered
821 	// situation from happening while (hopefully) improving the
822 	// concurrency.
823 	MutexLocker setLocker;
824 	if (command != IPC_RMID) {
825 		setLocker.SetTo(&semaphoreSet->Lock(), false);
826 		setHashLocker.Unlock();
827 		ipcHashLocker.Unlock();
828 	} else {
829 		// We are about to delete the set along with its mutex, so
830 		// we can't use the MutexLocker class, as the mutex itself
831 		// won't exist on function exit
832 		mutex_lock(&semaphoreSet->Lock());
833 	}
834 
835 	int result = 0;
836 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
837 	switch (command) {
838 		case GETVAL: {
839 			if (!semaphoreSet->HasReadPermission()) {
840 				TRACE(("xsi_semctl: calling process has not permission "
841 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
842 					(int)semaphoreSet->IpcKey()));
843 				result = EACCES;
844 			} else
845 				result = semaphore->Value();
846 			break;
847 		}
848 
849 		case SETVAL: {
850 			if (!semaphoreSet->HasPermission()) {
851 				TRACE(("xsi_semctl: calling process has not permission "
852 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
853 					(int)semaphoreSet->IpcKey()));
854 				result = EACCES;
855 			} else {
856 				if (args.val > USHRT_MAX) {
857 					TRACE(("xsi_semctl: value %d out of range\n", args.val));
858 					result = ERANGE;
859 				} else {
860 					semaphore->SetValue(args.val);
861 					semaphoreSet->ClearUndo(semaphoreNumber);
862 				}
863 			}
864 			break;
865 		}
866 
867 		case GETPID: {
868 			if (!semaphoreSet->HasReadPermission()) {
869 				TRACE(("xsi_semctl: calling process has not permission "
870 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
871 					(int)semaphoreSet->IpcKey()));
872 				result = EACCES;
873 			} else
874 				result = semaphore->LastPid();
875 			break;
876 		}
877 
878 		case GETNCNT: {
879 			if (!semaphoreSet->HasReadPermission()) {
880 				TRACE(("xsi_semctl: calling process has not permission "
881 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
882 					(int)semaphoreSet->IpcKey()));
883 				result = EACCES;
884 			} else
885 				result = semaphore->ThreadsWaitingToIncrease();
886 			break;
887 		}
888 
889 		case GETZCNT: {
890 			if (!semaphoreSet->HasReadPermission()) {
891 				TRACE(("xsi_semctl: calling process has not permission "
892 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
893 					(int)semaphoreSet->IpcKey()));
894 				result = EACCES;
895 			} else
896 				result = semaphore->ThreadsWaitingToBeZero();
897 			break;
898 		}
899 
900 		case GETALL: {
901 			if (!semaphoreSet->HasReadPermission()) {
902 				TRACE(("xsi_semctl: calling process has not read "
903 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
904 					(int)semaphoreSet->IpcKey()));
905 				result = EACCES;
906 			} else
907 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
908 					semaphore = semaphoreSet->Semaphore(i);
909 					unsigned short value = semaphore->Value();
910 					if (user_memcpy(args.array + i, &value,
911 							sizeof(unsigned short)) != B_OK) {
912 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
913 						result = B_BAD_ADDRESS;
914 						break;
915 					}
916 				}
917 			break;
918 		}
919 
920 		case SETALL: {
921 			if (!semaphoreSet->HasPermission()) {
922 				TRACE(("xsi_semctl: calling process has not permission "
923 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
924 					(int)semaphoreSet->IpcKey()));
925 				result = EACCES;
926 			} else {
927 				bool doClear = true;
928 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
929 					semaphore = semaphoreSet->Semaphore(i);
930 					unsigned short value;
931 					if (user_memcpy(&value, args.array + i,
932 							sizeof(unsigned short)) != B_OK) {
933 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
934 						result = B_BAD_ADDRESS;
935 						doClear = false;
936 						break;
937 					} else
938 						semaphore->SetValue(value);
939 				}
940 				if (doClear)
941 					semaphoreSet->ClearUndos();
942 			}
943 			break;
944 		}
945 
946 		case IPC_STAT: {
947 			if (!semaphoreSet->HasReadPermission()) {
948 				TRACE(("xsi_semctl: calling process has not read "
949 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
950 					(int)semaphoreSet->IpcKey()));
951 				result = EACCES;
952 			} else {
953 				struct semid_ds sem;
954 				sem.sem_perm = semaphoreSet->IpcPermission();
955 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
956 				sem.sem_otime = semaphoreSet->LastSemopTime();
957 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
958 				if (user_memcpy(args.buf, &sem, sizeof(struct semid_ds))
959 						< B_OK) {
960 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
961 					result = B_BAD_ADDRESS;
962 				}
963 			}
964 			break;
965 		}
966 
967 		case IPC_SET: {
968 			if (!semaphoreSet->HasPermission()) {
969 				TRACE(("xsi_semctl: calling process has not "
970 					"permission on semaphore %d, key %d\n",
971 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
972 				result = EACCES;
973 			} else {
974 				struct semid_ds sem;
975 				if (user_memcpy(&sem, args.buf, sizeof(struct semid_ds))
976 						!= B_OK) {
977 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
978 					result = B_BAD_ADDRESS;
979 				} else
980 					semaphoreSet->DoIpcSet(&sem);
981 			}
982 			break;
983 		}
984 
985 		case IPC_RMID: {
986 			// If this was the command, we are still holding
987 			// the semaphore set hash table lock along with the
988 			// ipc hash table lock and the semaphore set lock
989 			// itself, this way we are sure there is not
990 			// one waiting in the queue of the mutex.
991 			if (!semaphoreSet->HasPermission()) {
992 				TRACE(("xsi_semctl: calling process has not "
993 					"permission on semaphore %d, key %d\n",
994 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
995 				return EACCES;
996 			}
997 			key_t key = semaphoreSet->IpcKey();
998 			Ipc *ipcKey = NULL;
999 			if (key != -1) {
1000 				ipcKey = sIpcHashTable.Lookup(key);
1001 				sIpcHashTable.Remove(ipcKey);
1002 			}
1003 			sSemaphoreHashTable.Remove(semaphoreSet);
1004 			// Wake up of threads waiting on this set
1005 			// happens in the destructor
1006 			if (key != -1)
1007 				delete ipcKey;
1008 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1009 			atomic_add(&sXsiSemaphoreSetCount, -1);
1010 			// Remove any sem_undo request
1011 			while (struct sem_undo *entry
1012 					= semaphoreSet->GetUndoList().RemoveHead()) {
1013 				MutexLocker _(entry->team->xsi_sem_context->lock);
1014 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1015 				delete entry;
1016 			}
1017 
1018 			delete semaphoreSet;
1019 			return 0;
1020 		}
1021 
1022 		default:
1023 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1024 			result = EINVAL;
1025 	}
1026 
1027 	return result;
1028 }
1029 
1030 
1031 status_t
1032 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1033 {
1034 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1035 		semaphoreID, ops, numOps));
1036 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1037 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1038 	if (semaphoreSet == NULL) {
1039 		TRACE(("xsi_semop: semaphore set id %d not valid\n",
1040 			semaphoreID));
1041 		return EINVAL;
1042 	}
1043 	MutexLocker setLocker(semaphoreSet->Lock());
1044 	setHashLocker.Unlock();
1045 
1046 	if (!IS_USER_ADDRESS(ops)) {
1047 		TRACE(("xsi_semop: sembuf address is not valid\n"));
1048 		return B_BAD_ADDRESS;
1049 	}
1050 
1051 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1052 		TRACE(("xsi_semop: numOps out of range\n"));
1053 		return EINVAL;
1054 	}
1055 
1056 	struct sembuf *operations
1057 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1058 	if (operations == NULL) {
1059 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1060 		return B_NO_MEMORY;
1061 	}
1062 	MemoryDeleter operationsDeleter(operations);
1063 
1064 	if (user_memcpy(operations, ops,
1065 			(sizeof(struct sembuf) * numOps)) != B_OK) {
1066 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1067 		return B_BAD_ADDRESS;
1068 	}
1069 
1070 	// We won't do partial request, that is operations
1071 	// only on some sempahores belonging to the set and then
1072 	// going to sleep. If we must wait on a semaphore, we undo
1073 	// all the operations already done and go to sleep, otherwise
1074 	// we may caused some unwanted deadlock among threads
1075 	// fighting for the same set.
1076 	bool notDone = true;
1077 	status_t result = 0;
1078 	while (notDone) {
1079 		XsiSemaphore *semaphore = NULL;
1080 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1081 		bool goToSleep = false;
1082 
1083 		uint32 i = 0;
1084 		for (; i < numOps; i++) {
1085 			short semaphoreNumber = operations[i].sem_num;
1086 			if (semaphoreNumber >= numberOfSemaphores) {
1087 				TRACE(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1088 					"\n", i));
1089 				result = EINVAL;
1090 				break;
1091 			}
1092 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1093 			unsigned short value = semaphore->Value();
1094 			short operation = operations[i].sem_op;
1095 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1096 				semaphoreNumber, value));
1097 			if (operation < 0) {
1098 				if (semaphore->Add(operation)) {
1099 					if (operations[i].sem_flg & IPC_NOWAIT)
1100 						result = EAGAIN;
1101 					else
1102 						goToSleep = true;
1103 					break;
1104 				}
1105 			} else if (operation == 0) {
1106 				if (value == 0)
1107 					continue;
1108 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1109 					result = EAGAIN;
1110 					break;
1111 				} else {
1112 					goToSleep = true;
1113 					break;
1114 				}
1115 			} else {
1116 				// Operation must be greater than zero,
1117 				// just add the value and continue
1118 				semaphore->Add(operation);
1119 			}
1120 		}
1121 
1122 		// Either we have to wait or an error occured
1123 		if (goToSleep || result != 0) {
1124 			// Undo all previously done operations
1125 			for (uint32 j = 0; j < i; j++) {
1126 				short semaphoreNumber = operations[j].sem_num;
1127 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1128 				short operation = operations[j].sem_op;
1129 				if (operation != 0)
1130 					semaphore->Revert(operation);
1131 			}
1132 			if (result != 0)
1133 				return result;
1134 
1135 			// We have to wait: first enqueue the thread
1136 			// in the appropriate set waiting list, then
1137 			// unlock the set itself and block the thread.
1138 			bool waitOnZero = true;
1139 			if (operations[i].sem_op != 0)
1140 				waitOnZero = false;
1141 
1142 			ConditionVariableEntry queueEntry;
1143 			semaphore->Enqueue(&queueEntry, waitOnZero);
1144 
1145 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1146 
1147 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1148 			result = semaphore->BlockAndUnlock(&queueEntry, &setLocker);
1149 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1150 
1151 			// We are back to life. Find out why!
1152 			// Make sure the set hasn't been deleted or worst yet replaced.
1153 			setHashLocker.Lock();
1154 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1155 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1156 					&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1157 				TRACE(("xsi_semop: semaphore set id %d (sequence = "
1158 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1159 					sequenceNumber));
1160 				notDone = false;
1161 				result = EIDRM;
1162 			} else if (result == B_INTERRUPTED) {
1163 				TRACE(("xsi_semop: thread %d got interrupted while "
1164 					"waiting on semaphore set id %d\n", (int)thread_get_current_thread_id(),
1165 					semaphoreID));
1166 				semaphore->Dequeue(&queueEntry, waitOnZero);
1167 				result = EINTR;
1168 				notDone = false;
1169 			} else {
1170 				setLocker.Lock();
1171 				setHashLocker.Unlock();
1172 			}
1173 		} else {
1174 			// everything worked like a charm (so far)
1175 			notDone = false;
1176 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1177 			// We acquired the semaphore, now records the sem_undo
1178 			// requests
1179 			XsiSemaphore *semaphore = NULL;
1180 			uint32 i = 0;
1181 			for (; i < numOps; i++) {
1182 				short semaphoreNumber = operations[i].sem_num;
1183 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1184 				short operation = operations[i].sem_op;
1185 				if (operations[i].sem_flg & SEM_UNDO)
1186 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1187 						!= B_OK) {
1188 						// Unlikely scenario, but we might get here.
1189 						// Undo everything!
1190 						// Start with semaphore operations
1191 						for (uint32 j = 0; j < numOps; j++) {
1192 							short semaphoreNumber = operations[j].sem_num;
1193 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1194 							short operation = operations[j].sem_op;
1195 							if (operation != 0)
1196 								semaphore->Revert(operation);
1197 						}
1198 						// Remove all previously registered sem_undo request
1199 						for (uint32 j = 0; j < i; j++) {
1200 							if (operations[j].sem_flg & SEM_UNDO)
1201 								semaphoreSet->RevertUndo(operations[j].sem_num,
1202 									operations[j].sem_op);
1203 						}
1204 						result = ENOSPC;
1205 					}
1206 			}
1207 		}
1208 	}
1209 
1210 	// We did it. Set the pid of all semaphores used
1211 	if (result == 0) {
1212 		for (uint32 i = 0; i < numOps; i++) {
1213 			short semaphoreNumber = operations[i].sem_num;
1214 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1215 			semaphore->SetPid(getpid());
1216 		}
1217 		semaphoreSet->SetLastSemopTime();
1218 	}
1219 	return result;
1220 }
1221