xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision a629567a9001547736cfe892cdf992be16868fed)
1 /*
2  * Copyright 2008-2011, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 #include <AutoDeleter.h>
26 
27 
28 //#define TRACE_XSI_SEM
29 #ifdef TRACE_XSI_SEM
30 #	define TRACE(x)			dprintf x
31 #	define TRACE_ERROR(x)	dprintf x
32 #else
33 #	define TRACE(x)			/* nothing */
34 #	define TRACE_ERROR(x)	dprintf x
35 #endif
36 
37 // Queue for holding blocked threads
38 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
39 	queued_thread(Thread *thread, int32 count)
40 		:
41 		thread(thread),
42 		count(count),
43 		queued(false)
44 	{
45 	}
46 
47 	Thread	*thread;
48 	int32	count;
49 	bool	queued;
50 };
51 
52 typedef DoublyLinkedList<queued_thread> ThreadQueue;
53 
54 class XsiSemaphoreSet;
55 
56 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
57 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
58 		:
59 		semaphore_set(semaphoreSet),
60 		team(team),
61 		undo_values(undoValues)
62 	{
63 	}
64 
65 	DoublyLinkedListLink<sem_undo>		team_link;
66 	XsiSemaphoreSet						*semaphore_set;
67 	Team								*team;
68 	int16								*undo_values;
69 };
70 
71 typedef DoublyLinkedList<sem_undo> UndoList;
72 typedef DoublyLinkedList<sem_undo,
73 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
74 
75 struct xsi_sem_context {
76 	xsi_sem_context()
77 	{
78 		mutex_init(&lock, "Private team undo_list lock");
79 	}
80 
81 	~xsi_sem_context()
82 	{
83 		mutex_destroy(&lock);
84 	}
85 
86 	TeamList	undo_list;
87 	mutex		lock;
88 };
89 
90 // Xsi semaphore definition
91 class XsiSemaphore {
92 public:
93 	XsiSemaphore()
94 		: fLastPidOperation(0),
95 		fThreadsWaitingToIncrease(0),
96 		fThreadsWaitingToBeZero(0),
97 		fValue(0)
98 	{
99 	}
100 
101 	~XsiSemaphore()
102 	{
103 		// For some reason the semaphore is getting destroyed.
104 		// Wake up any remaing awaiting threads
105 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
106 			entry->queued = false;
107 			thread_unblock(entry->thread, EIDRM);
108 		}
109 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
110 			entry->queued = false;
111 			thread_unblock(entry->thread, EIDRM);
112 		}
113 		// No need to remove any sem_undo request still
114 		// hanging. When the process exit and doesn't found
115 		// the semaphore set, it'll just ignore the sem_undo
116 		// request. That's better than iterating trough the
117 		// whole sUndoList. Beside we don't know our semaphore
118 		// number nor our semaphore set id.
119 	}
120 
121 	// We return true in case the operation causes the
122 	// caller to wait, so it can undo all the operations
123 	// previously done
124 	bool Add(short value)
125 	{
126 		if ((int)(fValue + value) < 0) {
127 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
128 			return true;
129 		} else {
130 			fValue += value;
131 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
132 				WakeUpThread(true);
133 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
134 				WakeUpThread(false);
135 			return false;
136 		}
137 	}
138 
139 	status_t BlockAndUnlock(Thread *thread, MutexLocker *setLocker)
140 	{
141 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
142 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
143 		// Unlock the set before blocking
144 		setLocker->Unlock();
145 
146 // TODO: We've got a serious race condition: If BlockAndUnlock() returned due to
147 // interruption, we will still be queued. A WakeUpThread() at this point will
148 // call thread_unblock() and might thus screw with our trying to re-lock the
149 // mutex.
150 		return thread_block();
151 	}
152 
153 	void Deque(queued_thread *queueEntry, bool waitForZero)
154 	{
155 		if (queueEntry->queued) {
156 			if (waitForZero) {
157 				fWaitingToBeZeroQueue.Remove(queueEntry);
158 				fThreadsWaitingToBeZero--;
159 			} else {
160 				fWaitingToIncreaseQueue.Remove(queueEntry);
161 				fThreadsWaitingToIncrease--;
162 			}
163 		}
164 	}
165 
166 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
167 	{
168 		if (waitForZero) {
169 			fWaitingToBeZeroQueue.Add(queueEntry);
170 			fThreadsWaitingToBeZero++;
171 		} else {
172 			fWaitingToIncreaseQueue.Add(queueEntry);
173 			fThreadsWaitingToIncrease++;
174 		}
175 		queueEntry->queued = true;
176 	}
177 
178 	pid_t LastPid() const
179 	{
180 		return fLastPidOperation;
181 	}
182 
183 	void Revert(short value)
184 	{
185 		fValue -= value;
186 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
187 			WakeUpThread(true);
188 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
189 			WakeUpThread(false);
190 	}
191 
192 	void SetPid(pid_t pid)
193 	{
194 		fLastPidOperation = pid;
195 	}
196 
197 	void SetValue(ushort value)
198 	{
199 		fValue = value;
200 	}
201 
202 	ushort ThreadsWaitingToIncrease() const
203 	{
204 		return fThreadsWaitingToIncrease;
205 	}
206 
207 	ushort ThreadsWaitingToBeZero() const
208 	{
209 		return fThreadsWaitingToBeZero;
210 	}
211 
212 	ushort Value() const
213 	{
214 		return fValue;
215 	}
216 
217 	void WakeUpThread(bool waitingForZero)
218 	{
219 		if (waitingForZero) {
220 			// Wake up all threads waiting on zero
221 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
222 				entry->queued = false;
223 				fThreadsWaitingToBeZero--;
224 				thread_unblock(entry->thread, 0);
225 			}
226 		} else {
227 			// Wake up all threads even though they might go back to sleep
228 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
229 				entry->queued = false;
230 				fThreadsWaitingToIncrease--;
231 				thread_unblock(entry->thread, 0);
232 			}
233 		}
234 	}
235 
236 private:
237 	pid_t				fLastPidOperation;				// sempid
238 	ushort				fThreadsWaitingToIncrease;		// semncnt
239 	ushort				fThreadsWaitingToBeZero;		// semzcnt
240 	ushort				fValue;							// semval
241 
242 	ThreadQueue			fWaitingToIncreaseQueue;
243 	ThreadQueue			fWaitingToBeZeroQueue;
244 };
245 
246 #define MAX_XSI_SEMS_PER_TEAM	128
247 
248 // Xsi semaphore set definition (semid_ds)
249 class XsiSemaphoreSet {
250 public:
251 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
252 		: fInitOK(false),
253 		fLastSemctlTime((time_t)real_time_clock()),
254 		fLastSemopTime(0),
255 		fNumberOfSemaphores(numberOfSemaphores),
256 		fSemaphores(0)
257 	{
258 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
259 		SetIpcKey((key_t)-1);
260 		SetPermissions(flags);
261 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
262 		if (fSemaphores == NULL) {
263 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
264 				"XsiSemaphore object\n"));
265 		} else
266 			fInitOK = true;
267 	}
268 
269 	~XsiSemaphoreSet()
270 	{
271 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
272 			"set %d\n", fID));
273 		mutex_destroy(&fLock);
274 		delete[] fSemaphores;
275 	}
276 
277 	void ClearUndo(unsigned short semaphoreNumber)
278 	{
279 		Team *team = thread_get_current_thread()->team;
280 		UndoList::Iterator iterator = fUndoList.GetIterator();
281 		while (iterator.HasNext()) {
282 			struct sem_undo *current = iterator.Next();
283 			if (current->team == team) {
284 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
285 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
286 					fID, semaphoreNumber, (int)team->id));
287 				MutexLocker _(team->xsi_sem_context->lock);
288 				current->undo_values[semaphoreNumber] = 0;
289 				return;
290 			}
291 		}
292 	}
293 
294 	void ClearUndos()
295 	{
296 		// Clear all undo_values (POSIX semadj equivalent)
297 		// of the calling team. This happens only on semctl SETALL.
298 		Team *team = thread_get_current_thread()->team;
299 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
300 		while (iterator.HasNext()) {
301 			struct sem_undo *current = iterator.Next();
302 			if (current->team == team) {
303 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
304 					"semaphoreSetID = %d\n", (int)team->id, fID));
305 				MutexLocker _(team->xsi_sem_context->lock);
306 				memset(current->undo_values, 0,
307 					sizeof(int16) * fNumberOfSemaphores);
308 				return;
309 			}
310 		}
311 	}
312 
313 	void DoIpcSet(struct semid_ds *result)
314 	{
315 		fPermissions.uid = result->sem_perm.uid;
316 		fPermissions.gid = result->sem_perm.gid;
317 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
318 			| (result->sem_perm.mode & 0x01ff);
319 	}
320 
321 	bool HasPermission() const
322 	{
323 		if ((fPermissions.mode & S_IWOTH) != 0)
324 			return true;
325 
326 		uid_t uid = geteuid();
327 		if (uid == 0 || (uid == fPermissions.uid
328 			&& (fPermissions.mode & S_IWUSR) != 0))
329 			return true;
330 
331 		gid_t gid = getegid();
332 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
333 			return true;
334 
335 		return false;
336 	}
337 
338 	bool HasReadPermission() const
339 	{
340 		// TODO: fix this
341 		return HasPermission();
342 	}
343 
344 	int ID() const
345 	{
346 		return fID;
347 	}
348 
349 	bool InitOK()
350 	{
351 		return fInitOK;
352 	}
353 
354 	key_t IpcKey() const
355 	{
356 		return fPermissions.key;
357 	}
358 
359 	struct ipc_perm IpcPermission() const
360 	{
361 		return fPermissions;
362 	}
363 
364 	time_t LastSemctlTime() const
365 	{
366 		return fLastSemctlTime;
367 	}
368 
369 	time_t LastSemopTime() const
370 	{
371 		return fLastSemopTime;
372 	}
373 
374 	mutex &Lock()
375 	{
376 		return fLock;
377 	}
378 
379 	ushort NumberOfSemaphores() const
380 	{
381 		return fNumberOfSemaphores;
382 	}
383 
384 	// Record the sem_undo operation into our private fUndoList and
385 	// the team undo_list. The only limit here is the memory needed
386 	// for creating a new sem_undo structure.
387 	int RecordUndo(short semaphoreNumber, short value)
388 	{
389 		// Look if there is already a record from the team caller
390 		// for the same semaphore set
391 		bool notFound = true;
392 		Team *team = thread_get_current_thread()->team;
393 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
394 		while (iterator.HasNext()) {
395 			struct sem_undo *current = iterator.Next();
396 			if (current->team == team) {
397 				// Update its undo value
398 				MutexLocker _(team->xsi_sem_context->lock);
399 				int newValue = current->undo_values[semaphoreNumber] + value;
400 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
401 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
402 						"out of range\n", newValue));
403 					return ERANGE;
404 				}
405 				current->undo_values[semaphoreNumber] = newValue;
406 				notFound = false;
407 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
408 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
409 					(int)team->id, fID, semaphoreNumber,
410 					current->undo_values[semaphoreNumber]));
411 				break;
412 			}
413 		}
414 
415 		if (notFound) {
416 			// First sem_undo request from this team for this
417 			// semaphore set
418 			int16 *undoValues
419 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
420 			if (undoValues == NULL)
421 				return B_NO_MEMORY;
422 			struct sem_undo *request
423 				= new(std::nothrow) sem_undo(this, team, undoValues);
424 			if (request == NULL) {
425 				free(undoValues);
426 				return B_NO_MEMORY;
427 			}
428 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
429 			request->undo_values[semaphoreNumber] = value;
430 
431 			// Check if it's the very first sem_undo request for this team
432 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
433 			if (context == NULL) {
434 				// Create the context
435 				context = new(std::nothrow) xsi_sem_context;
436 				if (context == NULL) {
437 					free(request->undo_values);
438 					delete request;
439 					return B_NO_MEMORY;
440 				}
441 				// Since we don't hold any global lock, someone
442 				// else could have been quicker than us, so we have
443 				// to delete the one we just created and use the one
444 				// in place.
445 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
446 					(xsi_sem_context *)NULL) != NULL)
447 					delete context;
448 			}
449 
450 			// Add the request to both XsiSemaphoreSet and team list
451 			fUndoList.Add(request);
452 			MutexLocker _(team->xsi_sem_context->lock);
453 			team->xsi_sem_context->undo_list.Add(request);
454 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
455 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
456 				(int)team->id, fID, semaphoreNumber, value));
457 		}
458 		return B_OK;
459 	}
460 
461 	void RevertUndo(short semaphoreNumber, short value)
462 	{
463 		// This can be called only when RecordUndo fails.
464 		Team *team = thread_get_current_thread()->team;
465 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
466 		while (iterator.HasNext()) {
467 			struct sem_undo *current = iterator.Next();
468 			if (current->team == team) {
469 				MutexLocker _(team->xsi_sem_context->lock);
470 				fSemaphores[semaphoreNumber].Revert(value);
471 				break;
472 			}
473 		}
474 	}
475 
476 	XsiSemaphore* Semaphore(int nth) const
477 	{
478 		return &fSemaphores[nth];
479 	}
480 
481 	uint32 SequenceNumber() const
482 	{
483 		return fSequenceNumber;
484 	}
485 
486 	// Implemented after sGlobalSequenceNumber is declared
487 	void SetID();
488 
489 	void SetIpcKey(key_t key)
490 	{
491 		fPermissions.key = key;
492 	}
493 
494 	void SetLastSemctlTime()
495 	{
496 		fLastSemctlTime = real_time_clock();
497 	}
498 
499 	void SetLastSemopTime()
500 	{
501 		fLastSemopTime = real_time_clock();
502 	}
503 
504 	void SetPermissions(int flags)
505 	{
506 		fPermissions.uid = fPermissions.cuid = geteuid();
507 		fPermissions.gid = fPermissions.cgid = getegid();
508 		fPermissions.mode = (flags & 0x01ff);
509 	}
510 
511 	UndoList &GetUndoList()
512 	{
513 		return fUndoList;
514 	}
515 
516 	XsiSemaphoreSet*& Link()
517 	{
518 		return fLink;
519 	}
520 
521 private:
522 	int							fID;					// semaphore set id
523 	bool						fInitOK;
524 	time_t						fLastSemctlTime;		// sem_ctime
525 	time_t						fLastSemopTime;			// sem_otime
526 	mutex 						fLock;					// private lock
527 	ushort						fNumberOfSemaphores;	// sem_nsems
528 	struct ipc_perm				fPermissions;			// sem_perm
529 	XsiSemaphore				*fSemaphores;			// array of semaphores
530 	uint32						fSequenceNumber;		// used as a second id
531 	UndoList					fUndoList;				// undo list requests
532 
533 	XsiSemaphoreSet*			fLink;
534 };
535 
536 // Xsi semaphore set hash table
537 struct SemaphoreHashTableDefinition {
538 	typedef int					KeyType;
539 	typedef XsiSemaphoreSet		ValueType;
540 
541 	size_t HashKey (const int key) const
542 	{
543 		return (size_t)key;
544 	}
545 
546 	size_t Hash(XsiSemaphoreSet *variable) const
547 	{
548 		return (size_t)variable->ID();
549 	}
550 
551 	bool Compare(const int key, XsiSemaphoreSet *variable) const
552 	{
553 		return (int)key == (int)variable->ID();
554 	}
555 
556 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
557 	{
558 		return variable->Link();
559 	}
560 };
561 
562 
563 // IPC class
564 class Ipc {
565 public:
566 	Ipc(key_t key)
567 		: fKey(key),
568 		fSemaphoreSetId(-1)
569 	{
570 	}
571 
572 	key_t Key() const
573 	{
574 		return fKey;
575 	}
576 
577 	int SemaphoreSetID() const
578 	{
579 		return fSemaphoreSetId;
580 	}
581 
582 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
583 	{
584 		fSemaphoreSetId = semaphoreSet->ID();
585 	}
586 
587 	Ipc*& Link()
588 	{
589 		return fLink;
590 	}
591 
592 private:
593 	key_t				fKey;
594 	int					fSemaphoreSetId;
595 	Ipc*				fLink;
596 };
597 
598 
599 struct IpcHashTableDefinition {
600 	typedef key_t	KeyType;
601 	typedef Ipc		ValueType;
602 
603 	size_t HashKey (const key_t key) const
604 	{
605 		return (size_t)(key);
606 	}
607 
608 	size_t Hash(Ipc *variable) const
609 	{
610 		return (size_t)HashKey(variable->Key());
611 	}
612 
613 	bool Compare(const key_t key, Ipc *variable) const
614 	{
615 		return (key_t)key == (key_t)variable->Key();
616 	}
617 
618 	Ipc*& GetLink(Ipc *variable) const
619 	{
620 		return variable->Link();
621 	}
622 };
623 
624 // Arbitrary limit
625 #define MAX_XSI_SEMAPHORE		4096
626 #define MAX_XSI_SEMAPHORE_SET	2048
627 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
628 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
629 
630 static mutex sIpcLock;
631 static mutex sXsiSemaphoreSetLock;
632 
633 static uint32 sGlobalSequenceNumber = 1;
634 static int32 sXsiSemaphoreCount = 0;
635 static int32 sXsiSemaphoreSetCount = 0;
636 
637 
638 //	#pragma mark -
639 
640 
641 void
642 XsiSemaphoreSet::SetID()
643 {
644 	fID = real_time_clock();
645 	// The lock is held before calling us
646 	while (true) {
647 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
648 			break;
649 		fID = (fID + 1) % INT_MAX;
650 	}
651 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
652 	fSequenceNumber = sGlobalSequenceNumber;
653 }
654 
655 
656 //	#pragma mark - Kernel exported API
657 
658 
659 void
660 xsi_sem_init()
661 {
662 	// Initialize hash tables
663 	status_t status = sIpcHashTable.Init();
664 	if (status != B_OK)
665 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
666 	status =  sSemaphoreHashTable.Init();
667 	if (status != B_OK)
668 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
669 
670 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
671 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
672 }
673 
674 
675 /*!	Function called on team exit to process any sem_undo requests */
676 void
677 xsi_sem_undo(Team *team)
678 {
679 	if (team->xsi_sem_context == NULL)
680 		return;
681 
682 	// By acquiring first the semaphore hash table lock
683 	// we make sure the semaphore set in our sem_undo
684 	// list won't get removed by IPC_RMID call
685 	MutexLocker _(sXsiSemaphoreSetLock);
686 
687 	// Process all sem_undo request in the team sem undo list
688 	// if any
689 	TeamList::Iterator iterator
690 		= team->xsi_sem_context->undo_list.GetIterator();
691 	while (iterator.HasNext()) {
692 		struct sem_undo *current = iterator.Next();
693 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
694 		// Acquire the set lock in order to prevent race
695 		// condition with RecordUndo
696 		MutexLocker setLocker(semaphoreSet->Lock());
697 		MutexLocker _(team->xsi_sem_context->lock);
698 		// Revert the changes done by this process
699 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
700 			if (current->undo_values[i] != 0) {
701 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
702 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
703 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
704 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
705 			}
706 
707 		// Remove and free the sem_undo structure from both lists
708 		iterator.Remove();
709 		semaphoreSet->GetUndoList().Remove(current);
710 		delete current;
711 	}
712 	delete team->xsi_sem_context;
713 	team->xsi_sem_context = NULL;
714 }
715 
716 
717 //	#pragma mark - Syscalls
718 
719 
720 int
721 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
722 {
723 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
724 		(int)key, numberOfSemaphores, flags));
725 	XsiSemaphoreSet *semaphoreSet = NULL;
726 	Ipc *ipcKey = NULL;
727 	// Default assumptions
728 	bool isPrivate = true;
729 	bool create = true;
730 
731 	MutexLocker _(sIpcLock);
732 	if (key != IPC_PRIVATE) {
733 		isPrivate = false;
734 		// Check if key already exist, if it does it already has a semaphore
735 		// set associated with it
736 		ipcKey = sIpcHashTable.Lookup(key);
737 		if (ipcKey == NULL) {
738 			// The ipc key does not exist. Create it and add it to the system
739 			if (!(flags & IPC_CREAT)) {
740 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
741 					"caller did not ask for creation\n",(int)key));
742 				return ENOENT;
743 			}
744 			ipcKey = new(std::nothrow) Ipc(key);
745 			if (ipcKey == NULL) {
746 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
747 					"for key %d\n",	(int)key));
748 				return ENOMEM;
749 			}
750 			sIpcHashTable.Insert(ipcKey);
751 		} else {
752 			// The IPC key exist and it already has a semaphore
753 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
754 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
755 				return EEXIST;
756 			}
757 			int semaphoreSetID = ipcKey->SemaphoreSetID();
758 
759 			MutexLocker _(sXsiSemaphoreSetLock);
760 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
761 			if (!semaphoreSet->HasPermission()) {
762 				TRACE_ERROR(("xsi_semget: calling process has not permission "
763 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
764 					(int)key));
765 				return EACCES;
766 			}
767 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
768 				&& numberOfSemaphores != 0) {
769 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
770 					"one associated with semaphore %d, key %d\n",
771 					semaphoreSet->ID(), (int)key));
772 				return EINVAL;
773 			}
774 			create = false;
775 		}
776 	}
777 
778 	if (create) {
779 		// Create a new sempahore set for this key
780 		if (numberOfSemaphores <= 0
781 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
782 			TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
783 			return EINVAL;
784 		}
785 		if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
786 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
787 			TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
788 				"semaphores allowed\n"));
789 			return ENOSPC;
790 		}
791 
792 		semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
793 			flags);
794 		if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
795 			TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
796 				"semaphore set\n"));
797 			delete semaphoreSet;
798 			return ENOMEM;
799 		}
800 		atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
801 		atomic_add(&sXsiSemaphoreSetCount, 1);
802 
803 		MutexLocker _(sXsiSemaphoreSetLock);
804 		semaphoreSet->SetID();
805 		if (isPrivate)
806 			semaphoreSet->SetIpcKey((key_t)-1);
807 		else {
808 			semaphoreSet->SetIpcKey(key);
809 			ipcKey->SetSemaphoreSetID(semaphoreSet);
810 		}
811 		sSemaphoreHashTable.Insert(semaphoreSet);
812 		TRACE(("semget: new set = %d created, sequence = %ld\n",
813 			semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
814 	}
815 
816 	return semaphoreSet->ID();
817 }
818 
819 
820 int
821 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
822 	union semun *args)
823 {
824 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
825 		semaphoreID, semaphoreNumber, command));
826 	MutexLocker ipcHashLocker(sIpcLock);
827 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
828 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
829 	if (semaphoreSet == NULL) {
830 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
831 			semaphoreID));
832 		return EINVAL;
833 	}
834 	if (semaphoreNumber < 0
835 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
836 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
837 			"semaphore %d\n", semaphoreNumber, semaphoreID));
838 		return EINVAL;
839 	}
840 	if (args != 0 && !IS_USER_ADDRESS(args)) {
841 		TRACE_ERROR(("xsi_semctl: semun address is not valid\n"));
842 		return B_BAD_ADDRESS;
843 	}
844 
845 	// Lock the semaphore set itself and release both the semaphore
846 	// set hash table lock and the ipc hash table lock _only_ if
847 	// the command it's not IPC_RMID, this prevents undesidered
848 	// situation from happening while (hopefully) improving the
849 	// concurrency.
850 	MutexLocker setLocker;
851 	if (command != IPC_RMID) {
852 		setLocker.SetTo(&semaphoreSet->Lock(), false);
853 		setHashLocker.Unlock();
854 		ipcHashLocker.Unlock();
855 	} else
856 		// We are about to delete the set along with its mutex, so
857 		// we can't use the MutexLocker class, as the mutex itself
858 		// won't exist on function exit
859 		mutex_lock(&semaphoreSet->Lock());
860 
861 	int result = 0;
862 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
863 	switch (command) {
864 		case GETVAL: {
865 			if (!semaphoreSet->HasReadPermission()) {
866 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
867 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
868 					(int)semaphoreSet->IpcKey()));
869 				result = EACCES;
870 			} else
871 				result = semaphore->Value();
872 			break;
873 		}
874 
875 		case SETVAL: {
876 			if (!semaphoreSet->HasPermission()) {
877 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
878 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
879 					(int)semaphoreSet->IpcKey()));
880 				result = EACCES;
881 			} else {
882 				int value;
883 				if (user_memcpy(&value, &args->val, sizeof(int)) < B_OK) {
884 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
885 					result = B_BAD_ADDRESS;
886 				} else if (value > USHRT_MAX) {
887 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", value));
888 					result = ERANGE;
889 				} else {
890 					semaphore->SetValue(value);
891 					semaphoreSet->ClearUndo(semaphoreNumber);
892 				}
893 			}
894 			break;
895 		}
896 
897 		case GETPID: {
898 			if (!semaphoreSet->HasReadPermission()) {
899 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
900 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
901 					(int)semaphoreSet->IpcKey()));
902 				result = EACCES;
903 			} else
904 				result = semaphore->LastPid();
905 			break;
906 		}
907 
908 		case GETNCNT: {
909 			if (!semaphoreSet->HasReadPermission()) {
910 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
911 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
912 					(int)semaphoreSet->IpcKey()));
913 				result = EACCES;
914 			} else
915 				result = semaphore->ThreadsWaitingToIncrease();
916 			break;
917 		}
918 
919 		case GETZCNT: {
920 			if (!semaphoreSet->HasReadPermission()) {
921 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
922 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
923 					(int)semaphoreSet->IpcKey()));
924 				result = EACCES;
925 			} else
926 				result = semaphore->ThreadsWaitingToBeZero();
927 			break;
928 		}
929 
930 		case GETALL: {
931 			if (!semaphoreSet->HasReadPermission()) {
932 				TRACE_ERROR(("xsi_semctl: calling process has not read "
933 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
934 					(int)semaphoreSet->IpcKey()));
935 				result = EACCES;
936 			} else
937 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
938 					semaphore = semaphoreSet->Semaphore(i);
939 					unsigned short value = semaphore->Value();
940 					if (user_memcpy(&args->array[i], &value,
941 						sizeof(unsigned short)) < B_OK) {
942 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
943 						result = B_BAD_ADDRESS;
944 						break;
945 					}
946 				}
947 			break;
948 		}
949 
950 		case SETALL: {
951 			if (!semaphoreSet->HasPermission()) {
952 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
953 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
954 					(int)semaphoreSet->IpcKey()));
955 				result = EACCES;
956 			} else {
957 				bool doClear = true;
958 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
959 					semaphore = semaphoreSet->Semaphore(i);
960 					unsigned short value;
961 					if (user_memcpy(&value, &args->array[i], sizeof(unsigned short))
962 						< B_OK) {
963 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
964 						result = B_BAD_ADDRESS;
965 						doClear = false;
966 						break;
967 					} else
968 						semaphore->SetValue(value);
969 				}
970 				if (doClear)
971 					semaphoreSet->ClearUndos();
972 			}
973 			break;
974 		}
975 
976 		case IPC_STAT: {
977 			if (!semaphoreSet->HasReadPermission()) {
978 				TRACE_ERROR(("xsi_semctl: calling process has not read "
979 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
980 					(int)semaphoreSet->IpcKey()));
981 				result = EACCES;
982 			} else {
983 				struct semid_ds sem;
984 				sem.sem_perm = semaphoreSet->IpcPermission();
985 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
986 				sem.sem_otime = semaphoreSet->LastSemopTime();
987 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
988 				if (user_memcpy(args->buf, &sem, sizeof(struct semid_ds))
989 					< B_OK) {
990 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
991 					result = B_BAD_ADDRESS;
992 				}
993 			}
994 			break;
995 		}
996 
997 		case IPC_SET: {
998 			if (!semaphoreSet->HasPermission()) {
999 				TRACE_ERROR(("xsi_semctl: calling process has not "
1000 					"permission on semaphore %d, key %d\n",
1001 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1002 				result = EACCES;
1003 			} else {
1004 				struct semid_ds sem;
1005 				if (user_memcpy(&sem, args->buf, sizeof(struct semid_ds))
1006 					< B_OK) {
1007 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1008 					result = B_BAD_ADDRESS;
1009 				} else
1010 					semaphoreSet->DoIpcSet(&sem);
1011 			}
1012 			break;
1013 		}
1014 
1015 		case IPC_RMID: {
1016 			// If this was the command, we are still holding
1017 			// the semaphore set hash table lock along with the
1018 			// ipc hash table lock and the semaphore set lock
1019 			// itself, this way we are sure there is not
1020 			// one waiting in the queue of the mutex.
1021 			if (!semaphoreSet->HasPermission()) {
1022 				TRACE_ERROR(("xsi_semctl: calling process has not "
1023 					"permission on semaphore %d, key %d\n",
1024 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1025 				return EACCES;
1026 			}
1027 			key_t key = semaphoreSet->IpcKey();
1028 			Ipc *ipcKey = NULL;
1029 			if (key != -1) {
1030 				ipcKey = sIpcHashTable.Lookup(key);
1031 				sIpcHashTable.Remove(ipcKey);
1032 			}
1033 			sSemaphoreHashTable.Remove(semaphoreSet);
1034 			// Wake up of threads waiting on this set
1035 			// happens in the destructor
1036 			if (key != -1)
1037 				delete ipcKey;
1038 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1039 			atomic_add(&sXsiSemaphoreSetCount, -1);
1040 			// Remove any sem_undo request
1041 			while (struct sem_undo *entry
1042 					= semaphoreSet->GetUndoList().RemoveHead()) {
1043 				MutexLocker _(entry->team->xsi_sem_context->lock);
1044 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1045 				delete entry;
1046 			}
1047 
1048 			delete semaphoreSet;
1049 			return 0;
1050 		}
1051 
1052 		default:
1053 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1054 			result = EINVAL;
1055 	}
1056 
1057 	return result;
1058 }
1059 
1060 
1061 status_t
1062 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1063 {
1064 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1065 		semaphoreID, ops, numOps));
1066 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1067 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1068 	if (semaphoreSet == NULL) {
1069 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1070 			semaphoreID));
1071 		return EINVAL;
1072 	}
1073 	MutexLocker setLocker(semaphoreSet->Lock());
1074 	setHashLocker.Unlock();
1075 
1076 	if (!IS_USER_ADDRESS(ops)) {
1077 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1078 		return B_BAD_ADDRESS;
1079 	}
1080 
1081 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1082 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1083 		return EINVAL;
1084 	}
1085 
1086 	struct sembuf *operations
1087 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1088 	if (operations == NULL) {
1089 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1090 		return B_NO_MEMORY;
1091 	}
1092        MemoryDeleter operationsDeleter(operations);
1093 
1094 	if (user_memcpy(operations, ops,
1095 		(sizeof(struct sembuf) * numOps)) < B_OK) {
1096 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1097 		return B_BAD_ADDRESS;
1098 	}
1099 
1100 	// We won't do partial request, that is operations
1101 	// only on some sempahores belonging to the set and then
1102 	// going to sleep. If we must wait on a semaphore, we undo
1103 	// all the operations already done and go to sleep, otherwise
1104 	// we may caused some unwanted deadlock among threads
1105 	// fighting for the same set.
1106 	bool notDone = true;
1107 	status_t result = 0;
1108 	while (notDone) {
1109 		XsiSemaphore *semaphore = NULL;
1110 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1111 		bool goToSleep = false;
1112 
1113 		uint32 i = 0;
1114 		for (; i < numOps; i++) {
1115 			short semaphoreNumber = operations[i].sem_num;
1116 			if (semaphoreNumber >= numberOfSemaphores) {
1117 				TRACE_ERROR(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1118 					"\n", i));
1119 				result = EINVAL;
1120 				break;
1121 			}
1122 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1123 			unsigned short value = semaphore->Value();
1124 			short operation = operations[i].sem_op;
1125 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1126 				semaphoreNumber, value));
1127 			if (operation < 0) {
1128 				if (semaphore->Add(operation)) {
1129 					if (operations[i].sem_flg & IPC_NOWAIT)
1130 						result = EAGAIN;
1131 					else
1132 						goToSleep = true;
1133 					break;
1134 				}
1135 			} else if (operation == 0) {
1136 				if (value == 0)
1137 					continue;
1138 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1139 					result = EAGAIN;
1140 					break;
1141 				} else {
1142 					goToSleep = true;
1143 					break;
1144 				}
1145 			} else {
1146 				// Operation must be greater than zero,
1147 				// just add the value and continue
1148 				semaphore->Add(operation);
1149 			}
1150 		}
1151 
1152 		// Either we have to wait or an error occured
1153 		if (goToSleep || result != 0) {
1154 			// Undo all previously done operations
1155 			for (uint32 j = 0; j < i; j++) {
1156 				short semaphoreNumber = operations[j].sem_num;
1157 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1158 				short operation = operations[j].sem_op;
1159 				if (operation != 0)
1160 					semaphore->Revert(operation);
1161 			}
1162                        if (result != 0)
1163 				return result;
1164 
1165 			// We have to wait: first enqueue the thread
1166 			// in the appropriate set waiting list, then
1167 			// unlock the set itself and block the thread.
1168 			bool waitOnZero = true;
1169 			if (operations[i].sem_op != 0)
1170 				waitOnZero = false;
1171 
1172 			Thread *thread = thread_get_current_thread();
1173 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1174 			semaphore->Enqueue(&queueEntry, waitOnZero);
1175 
1176 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1177 
1178 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1179 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1180 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1181 
1182 			// We are back to life. Find out why!
1183 			// Make sure the set hasn't been deleted or worst yet
1184 			// replaced.
1185 			setHashLocker.Lock();
1186 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1187 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1188 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1189 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = "
1190 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1191 					sequenceNumber));
1192 				notDone = false;
1193 				result = EIDRM;
1194 			} else if (result == B_INTERRUPTED) {
1195 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1196 					"waiting on semaphore set id %d\n",(int)thread->id,
1197 					semaphoreID));
1198 				semaphore->Deque(&queueEntry, waitOnZero);
1199 				result = EINTR;
1200 				notDone = false;
1201 			} else {
1202 				setLocker.Lock();
1203 				setHashLocker.Unlock();
1204 			}
1205 		} else {
1206 			// everything worked like a charm (so far)
1207 			notDone = false;
1208 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1209 			// We acquired the semaphore, now records the sem_undo
1210 			// requests
1211 			XsiSemaphore *semaphore = NULL;
1212 			uint32 i = 0;
1213 			for (; i < numOps; i++) {
1214 				short semaphoreNumber = operations[i].sem_num;
1215 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1216 				short operation = operations[i].sem_op;
1217 				if (operations[i].sem_flg & SEM_UNDO)
1218 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1219 						!= B_OK) {
1220 						// Unlikely scenario, but we might get here.
1221 						// Undo everything!
1222 						// Start with semaphore operations
1223 						for (uint32 j = 0; j < numOps; j++) {
1224 							short semaphoreNumber = operations[j].sem_num;
1225 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1226 							short operation = operations[j].sem_op;
1227 							if (operation != 0)
1228 								semaphore->Revert(operation);
1229 						}
1230 						// Remove all previously registered sem_undo request
1231 						for (uint32 j = 0; j < i; j++) {
1232 							if (operations[j].sem_flg & SEM_UNDO)
1233 								semaphoreSet->RevertUndo(operations[j].sem_num,
1234 									operations[j].sem_op);
1235 						}
1236 						result = ENOSPC;
1237 					}
1238 			}
1239 		}
1240 	}
1241 
1242 	// We did it. Set the pid of all semaphores used
1243 	if (result == 0) {
1244 		for (uint32 i = 0; i < numOps; i++) {
1245 			short semaphoreNumber = operations[i].sem_num;
1246 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1247 			semaphore->SetPid(getpid());
1248 		}
1249 	}
1250 	return result;
1251 }
1252