xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /*
2  * Copyright 2008-2011, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 #include <AutoDeleter.h>
26 
27 
28 //#define TRACE_XSI_SEM
29 #ifdef TRACE_XSI_SEM
30 #	define TRACE(x)			dprintf x
31 #	define TRACE_ERROR(x)	dprintf x
32 #else
33 #	define TRACE(x)			/* nothing */
34 #	define TRACE_ERROR(x)	dprintf x
35 #endif
36 
37 
38 namespace {
39 
40 // Queue for holding blocked threads
41 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
42 	queued_thread(Thread *thread, int32 count)
43 		:
44 		thread(thread),
45 		count(count),
46 		queued(false)
47 	{
48 	}
49 
50 	Thread	*thread;
51 	int32	count;
52 	bool	queued;
53 };
54 
55 typedef DoublyLinkedList<queued_thread> ThreadQueue;
56 
57 class XsiSemaphoreSet;
58 
59 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
60 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
61 		:
62 		semaphore_set(semaphoreSet),
63 		team(team),
64 		undo_values(undoValues)
65 	{
66 	}
67 
68 	DoublyLinkedListLink<sem_undo>		team_link;
69 	XsiSemaphoreSet						*semaphore_set;
70 	Team								*team;
71 	int16								*undo_values;
72 };
73 
74 typedef DoublyLinkedList<sem_undo> UndoList;
75 typedef DoublyLinkedList<sem_undo,
76 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
77 
78 } // namespace
79 
80 
81 // Forward declared in global namespace.
82 struct xsi_sem_context {
83 	xsi_sem_context()
84 	{
85 		mutex_init(&lock, "Private team undo_list lock");
86 	}
87 
88 	~xsi_sem_context()
89 	{
90 		mutex_destroy(&lock);
91 	}
92 
93 	TeamList	undo_list;
94 	mutex		lock;
95 };
96 
97 
98 namespace {
99 
100 // Xsi semaphore definition
101 class XsiSemaphore {
102 public:
103 	XsiSemaphore()
104 		: fLastPidOperation(0),
105 		fThreadsWaitingToIncrease(0),
106 		fThreadsWaitingToBeZero(0),
107 		fValue(0)
108 	{
109 	}
110 
111 	~XsiSemaphore()
112 	{
113 		// For some reason the semaphore is getting destroyed.
114 		// Wake up any remaing awaiting threads
115 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
116 			entry->queued = false;
117 			thread_unblock(entry->thread, EIDRM);
118 		}
119 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
120 			entry->queued = false;
121 			thread_unblock(entry->thread, EIDRM);
122 		}
123 		// No need to remove any sem_undo request still
124 		// hanging. When the process exit and doesn't found
125 		// the semaphore set, it'll just ignore the sem_undo
126 		// request. That's better than iterating trough the
127 		// whole sUndoList. Beside we don't know our semaphore
128 		// number nor our semaphore set id.
129 	}
130 
131 	// We return true in case the operation causes the
132 	// caller to wait, so it can undo all the operations
133 	// previously done
134 	bool Add(short value)
135 	{
136 		if ((int)(fValue + value) < 0) {
137 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
138 			return true;
139 		} else {
140 			fValue += value;
141 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
142 				WakeUpThread(true);
143 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
144 				WakeUpThread(false);
145 			return false;
146 		}
147 	}
148 
149 	status_t BlockAndUnlock(Thread *thread, MutexLocker *setLocker)
150 	{
151 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
152 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
153 		// Unlock the set before blocking
154 		setLocker->Unlock();
155 
156 // TODO: We've got a serious race condition: If BlockAndUnlock() returned due to
157 // interruption, we will still be queued. A WakeUpThread() at this point will
158 // call thread_unblock() and might thus screw with our trying to re-lock the
159 // mutex.
160 		return thread_block();
161 	}
162 
163 	void Deque(queued_thread *queueEntry, bool waitForZero)
164 	{
165 		if (queueEntry->queued) {
166 			if (waitForZero) {
167 				fWaitingToBeZeroQueue.Remove(queueEntry);
168 				fThreadsWaitingToBeZero--;
169 			} else {
170 				fWaitingToIncreaseQueue.Remove(queueEntry);
171 				fThreadsWaitingToIncrease--;
172 			}
173 		}
174 	}
175 
176 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
177 	{
178 		if (waitForZero) {
179 			fWaitingToBeZeroQueue.Add(queueEntry);
180 			fThreadsWaitingToBeZero++;
181 		} else {
182 			fWaitingToIncreaseQueue.Add(queueEntry);
183 			fThreadsWaitingToIncrease++;
184 		}
185 		queueEntry->queued = true;
186 	}
187 
188 	pid_t LastPid() const
189 	{
190 		return fLastPidOperation;
191 	}
192 
193 	void Revert(short value)
194 	{
195 		fValue -= value;
196 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
197 			WakeUpThread(true);
198 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
199 			WakeUpThread(false);
200 	}
201 
202 	void SetPid(pid_t pid)
203 	{
204 		fLastPidOperation = pid;
205 	}
206 
207 	void SetValue(ushort value)
208 	{
209 		fValue = value;
210 	}
211 
212 	ushort ThreadsWaitingToIncrease() const
213 	{
214 		return fThreadsWaitingToIncrease;
215 	}
216 
217 	ushort ThreadsWaitingToBeZero() const
218 	{
219 		return fThreadsWaitingToBeZero;
220 	}
221 
222 	ushort Value() const
223 	{
224 		return fValue;
225 	}
226 
227 	void WakeUpThread(bool waitingForZero)
228 	{
229 		if (waitingForZero) {
230 			// Wake up all threads waiting on zero
231 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
232 				entry->queued = false;
233 				fThreadsWaitingToBeZero--;
234 				thread_unblock(entry->thread, 0);
235 			}
236 		} else {
237 			// Wake up all threads even though they might go back to sleep
238 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
239 				entry->queued = false;
240 				fThreadsWaitingToIncrease--;
241 				thread_unblock(entry->thread, 0);
242 			}
243 		}
244 	}
245 
246 private:
247 	pid_t				fLastPidOperation;				// sempid
248 	ushort				fThreadsWaitingToIncrease;		// semncnt
249 	ushort				fThreadsWaitingToBeZero;		// semzcnt
250 	ushort				fValue;							// semval
251 
252 	ThreadQueue			fWaitingToIncreaseQueue;
253 	ThreadQueue			fWaitingToBeZeroQueue;
254 };
255 
256 #define MAX_XSI_SEMS_PER_TEAM	128
257 
258 // Xsi semaphore set definition (semid_ds)
259 class XsiSemaphoreSet {
260 public:
261 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
262 		: fInitOK(false),
263 		fLastSemctlTime((time_t)real_time_clock()),
264 		fLastSemopTime(0),
265 		fNumberOfSemaphores(numberOfSemaphores),
266 		fSemaphores(0)
267 	{
268 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
269 		SetIpcKey((key_t)-1);
270 		SetPermissions(flags);
271 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
272 		if (fSemaphores == NULL) {
273 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
274 				"XsiSemaphore object\n"));
275 		} else
276 			fInitOK = true;
277 	}
278 
279 	~XsiSemaphoreSet()
280 	{
281 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
282 			"set %d\n", fID));
283 		mutex_destroy(&fLock);
284 		delete[] fSemaphores;
285 	}
286 
287 	void ClearUndo(unsigned short semaphoreNumber)
288 	{
289 		Team *team = thread_get_current_thread()->team;
290 		UndoList::Iterator iterator = fUndoList.GetIterator();
291 		while (iterator.HasNext()) {
292 			struct sem_undo *current = iterator.Next();
293 			if (current->team == team) {
294 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
295 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
296 					fID, semaphoreNumber, (int)team->id));
297 				MutexLocker _(team->xsi_sem_context->lock);
298 				current->undo_values[semaphoreNumber] = 0;
299 				return;
300 			}
301 		}
302 	}
303 
304 	void ClearUndos()
305 	{
306 		// Clear all undo_values (POSIX semadj equivalent)
307 		// of the calling team. This happens only on semctl SETALL.
308 		Team *team = thread_get_current_thread()->team;
309 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
310 		while (iterator.HasNext()) {
311 			struct sem_undo *current = iterator.Next();
312 			if (current->team == team) {
313 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
314 					"semaphoreSetID = %d\n", (int)team->id, fID));
315 				MutexLocker _(team->xsi_sem_context->lock);
316 				memset(current->undo_values, 0,
317 					sizeof(int16) * fNumberOfSemaphores);
318 				return;
319 			}
320 		}
321 	}
322 
323 	void DoIpcSet(struct semid_ds *result)
324 	{
325 		fPermissions.uid = result->sem_perm.uid;
326 		fPermissions.gid = result->sem_perm.gid;
327 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
328 			| (result->sem_perm.mode & 0x01ff);
329 	}
330 
331 	bool HasPermission() const
332 	{
333 		if ((fPermissions.mode & S_IWOTH) != 0)
334 			return true;
335 
336 		uid_t uid = geteuid();
337 		if (uid == 0 || (uid == fPermissions.uid
338 			&& (fPermissions.mode & S_IWUSR) != 0))
339 			return true;
340 
341 		gid_t gid = getegid();
342 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
343 			return true;
344 
345 		return false;
346 	}
347 
348 	bool HasReadPermission() const
349 	{
350 		// TODO: fix this
351 		return HasPermission();
352 	}
353 
354 	int ID() const
355 	{
356 		return fID;
357 	}
358 
359 	bool InitOK()
360 	{
361 		return fInitOK;
362 	}
363 
364 	key_t IpcKey() const
365 	{
366 		return fPermissions.key;
367 	}
368 
369 	struct ipc_perm IpcPermission() const
370 	{
371 		return fPermissions;
372 	}
373 
374 	time_t LastSemctlTime() const
375 	{
376 		return fLastSemctlTime;
377 	}
378 
379 	time_t LastSemopTime() const
380 	{
381 		return fLastSemopTime;
382 	}
383 
384 	mutex &Lock()
385 	{
386 		return fLock;
387 	}
388 
389 	ushort NumberOfSemaphores() const
390 	{
391 		return fNumberOfSemaphores;
392 	}
393 
394 	// Record the sem_undo operation into our private fUndoList and
395 	// the team undo_list. The only limit here is the memory needed
396 	// for creating a new sem_undo structure.
397 	int RecordUndo(short semaphoreNumber, short value)
398 	{
399 		// Look if there is already a record from the team caller
400 		// for the same semaphore set
401 		bool notFound = true;
402 		Team *team = thread_get_current_thread()->team;
403 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
404 		while (iterator.HasNext()) {
405 			struct sem_undo *current = iterator.Next();
406 			if (current->team == team) {
407 				// Update its undo value
408 				MutexLocker _(team->xsi_sem_context->lock);
409 				int newValue = current->undo_values[semaphoreNumber] + value;
410 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
411 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
412 						"out of range\n", newValue));
413 					return ERANGE;
414 				}
415 				current->undo_values[semaphoreNumber] = newValue;
416 				notFound = false;
417 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
418 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
419 					(int)team->id, fID, semaphoreNumber,
420 					current->undo_values[semaphoreNumber]));
421 				break;
422 			}
423 		}
424 
425 		if (notFound) {
426 			// First sem_undo request from this team for this
427 			// semaphore set
428 			int16 *undoValues
429 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
430 			if (undoValues == NULL)
431 				return B_NO_MEMORY;
432 			struct sem_undo *request
433 				= new(std::nothrow) sem_undo(this, team, undoValues);
434 			if (request == NULL) {
435 				free(undoValues);
436 				return B_NO_MEMORY;
437 			}
438 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
439 			request->undo_values[semaphoreNumber] = value;
440 
441 			// Check if it's the very first sem_undo request for this team
442 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
443 			if (context == NULL) {
444 				// Create the context
445 				context = new(std::nothrow) xsi_sem_context;
446 				if (context == NULL) {
447 					free(request->undo_values);
448 					delete request;
449 					return B_NO_MEMORY;
450 				}
451 				// Since we don't hold any global lock, someone
452 				// else could have been quicker than us, so we have
453 				// to delete the one we just created and use the one
454 				// in place.
455 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
456 					(xsi_sem_context *)NULL) != NULL)
457 					delete context;
458 			}
459 
460 			// Add the request to both XsiSemaphoreSet and team list
461 			fUndoList.Add(request);
462 			MutexLocker _(team->xsi_sem_context->lock);
463 			team->xsi_sem_context->undo_list.Add(request);
464 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
465 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
466 				(int)team->id, fID, semaphoreNumber, value));
467 		}
468 		return B_OK;
469 	}
470 
471 	void RevertUndo(short semaphoreNumber, short value)
472 	{
473 		// This can be called only when RecordUndo fails.
474 		Team *team = thread_get_current_thread()->team;
475 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
476 		while (iterator.HasNext()) {
477 			struct sem_undo *current = iterator.Next();
478 			if (current->team == team) {
479 				MutexLocker _(team->xsi_sem_context->lock);
480 				fSemaphores[semaphoreNumber].Revert(value);
481 				break;
482 			}
483 		}
484 	}
485 
486 	XsiSemaphore* Semaphore(int nth) const
487 	{
488 		return &fSemaphores[nth];
489 	}
490 
491 	uint32 SequenceNumber() const
492 	{
493 		return fSequenceNumber;
494 	}
495 
496 	// Implemented after sGlobalSequenceNumber is declared
497 	void SetID();
498 
499 	void SetIpcKey(key_t key)
500 	{
501 		fPermissions.key = key;
502 	}
503 
504 	void SetLastSemctlTime()
505 	{
506 		fLastSemctlTime = real_time_clock();
507 	}
508 
509 	void SetLastSemopTime()
510 	{
511 		fLastSemopTime = real_time_clock();
512 	}
513 
514 	void SetPermissions(int flags)
515 	{
516 		fPermissions.uid = fPermissions.cuid = geteuid();
517 		fPermissions.gid = fPermissions.cgid = getegid();
518 		fPermissions.mode = (flags & 0x01ff);
519 	}
520 
521 	UndoList &GetUndoList()
522 	{
523 		return fUndoList;
524 	}
525 
526 	XsiSemaphoreSet*& Link()
527 	{
528 		return fLink;
529 	}
530 
531 private:
532 	int							fID;					// semaphore set id
533 	bool						fInitOK;
534 	time_t						fLastSemctlTime;		// sem_ctime
535 	time_t						fLastSemopTime;			// sem_otime
536 	mutex 						fLock;					// private lock
537 	ushort						fNumberOfSemaphores;	// sem_nsems
538 	struct ipc_perm				fPermissions;			// sem_perm
539 	XsiSemaphore				*fSemaphores;			// array of semaphores
540 	uint32						fSequenceNumber;		// used as a second id
541 	UndoList					fUndoList;				// undo list requests
542 
543 	XsiSemaphoreSet*			fLink;
544 };
545 
546 // Xsi semaphore set hash table
547 struct SemaphoreHashTableDefinition {
548 	typedef int					KeyType;
549 	typedef XsiSemaphoreSet		ValueType;
550 
551 	size_t HashKey (const int key) const
552 	{
553 		return (size_t)key;
554 	}
555 
556 	size_t Hash(XsiSemaphoreSet *variable) const
557 	{
558 		return (size_t)variable->ID();
559 	}
560 
561 	bool Compare(const int key, XsiSemaphoreSet *variable) const
562 	{
563 		return (int)key == (int)variable->ID();
564 	}
565 
566 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
567 	{
568 		return variable->Link();
569 	}
570 };
571 
572 
573 // IPC class
574 class Ipc {
575 public:
576 	Ipc(key_t key)
577 		: fKey(key),
578 		fSemaphoreSetId(-1)
579 	{
580 	}
581 
582 	key_t Key() const
583 	{
584 		return fKey;
585 	}
586 
587 	int SemaphoreSetID() const
588 	{
589 		return fSemaphoreSetId;
590 	}
591 
592 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
593 	{
594 		fSemaphoreSetId = semaphoreSet->ID();
595 	}
596 
597 	Ipc*& Link()
598 	{
599 		return fLink;
600 	}
601 
602 private:
603 	key_t				fKey;
604 	int					fSemaphoreSetId;
605 	Ipc*				fLink;
606 };
607 
608 
609 struct IpcHashTableDefinition {
610 	typedef key_t	KeyType;
611 	typedef Ipc		ValueType;
612 
613 	size_t HashKey (const key_t key) const
614 	{
615 		return (size_t)(key);
616 	}
617 
618 	size_t Hash(Ipc *variable) const
619 	{
620 		return (size_t)HashKey(variable->Key());
621 	}
622 
623 	bool Compare(const key_t key, Ipc *variable) const
624 	{
625 		return (key_t)key == (key_t)variable->Key();
626 	}
627 
628 	Ipc*& GetLink(Ipc *variable) const
629 	{
630 		return variable->Link();
631 	}
632 };
633 
634 } // namespace
635 
636 
637 // Arbitrary limit
638 #define MAX_XSI_SEMAPHORE		4096
639 #define MAX_XSI_SEMAPHORE_SET	2048
640 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
641 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
642 
643 static mutex sIpcLock;
644 static mutex sXsiSemaphoreSetLock;
645 
646 static uint32 sGlobalSequenceNumber = 1;
647 static int32 sXsiSemaphoreCount = 0;
648 static int32 sXsiSemaphoreSetCount = 0;
649 
650 
651 //	#pragma mark -
652 
653 
654 void
655 XsiSemaphoreSet::SetID()
656 {
657 	fID = real_time_clock();
658 	// The lock is held before calling us
659 	while (true) {
660 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
661 			break;
662 		fID = (fID + 1) % INT_MAX;
663 	}
664 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
665 	fSequenceNumber = sGlobalSequenceNumber;
666 }
667 
668 
669 //	#pragma mark - Kernel exported API
670 
671 
672 void
673 xsi_sem_init()
674 {
675 	// Initialize hash tables
676 	status_t status = sIpcHashTable.Init();
677 	if (status != B_OK)
678 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
679 	status =  sSemaphoreHashTable.Init();
680 	if (status != B_OK)
681 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
682 
683 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
684 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
685 }
686 
687 
688 /*!	Function called on team exit to process any sem_undo requests */
689 void
690 xsi_sem_undo(Team *team)
691 {
692 	if (team->xsi_sem_context == NULL)
693 		return;
694 
695 	// By acquiring first the semaphore hash table lock
696 	// we make sure the semaphore set in our sem_undo
697 	// list won't get removed by IPC_RMID call
698 	MutexLocker _(sXsiSemaphoreSetLock);
699 
700 	// Process all sem_undo request in the team sem undo list
701 	// if any
702 	TeamList::Iterator iterator
703 		= team->xsi_sem_context->undo_list.GetIterator();
704 	while (iterator.HasNext()) {
705 		struct sem_undo *current = iterator.Next();
706 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
707 		// Acquire the set lock in order to prevent race
708 		// condition with RecordUndo
709 		MutexLocker setLocker(semaphoreSet->Lock());
710 		MutexLocker _(team->xsi_sem_context->lock);
711 		// Revert the changes done by this process
712 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
713 			if (current->undo_values[i] != 0) {
714 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
715 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
716 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
717 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
718 			}
719 
720 		// Remove and free the sem_undo structure from both lists
721 		iterator.Remove();
722 		semaphoreSet->GetUndoList().Remove(current);
723 		delete current;
724 	}
725 	delete team->xsi_sem_context;
726 	team->xsi_sem_context = NULL;
727 }
728 
729 
730 //	#pragma mark - Syscalls
731 
732 
733 int
734 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
735 {
736 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
737 		(int)key, numberOfSemaphores, flags));
738 	XsiSemaphoreSet *semaphoreSet = NULL;
739 	Ipc *ipcKey = NULL;
740 
741 	// Default assumption
742 	bool isPrivate = true;
743 
744 	MutexLocker ipcLocker(sIpcLock);
745 	if (key != IPC_PRIVATE) {
746 		isPrivate = false;
747 		// Check if key already exist, if it does it already has a semaphore
748 		// set associated with it
749 		ipcKey = sIpcHashTable.Lookup(key);
750 		if (ipcKey == NULL) {
751 			// The ipc key does not exist. Create it and add it to the system
752 			if (!(flags & IPC_CREAT)) {
753 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
754 					"caller did not ask for creation\n",(int)key));
755 				return ENOENT;
756 			}
757 			ipcKey = new(std::nothrow) Ipc(key);
758 			if (ipcKey == NULL) {
759 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
760 					"for key %d\n",	(int)key));
761 				return ENOMEM;
762 			}
763 		} else {
764 			// The IPC key exist and it already has a semaphore
765 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
766 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
767 				return EEXIST;
768 			}
769 			int semaphoreSetID = ipcKey->SemaphoreSetID();
770 
771 			MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
772 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
773 			if (semaphoreSet == NULL) {
774 				TRACE_ERROR(("xsi_semget: calling process has no semaphore, "
775 					"key %d\n", (int)key));
776 				return EINVAL;
777 			}
778 			if (!semaphoreSet->HasPermission()) {
779 				TRACE_ERROR(("xsi_semget: calling process has no permission "
780 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
781 					(int)key));
782 				return EACCES;
783 			}
784 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
785 				&& numberOfSemaphores != 0) {
786 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
787 					"one associated with semaphore %d, key %d\n",
788 					semaphoreSet->ID(), (int)key));
789 				return EINVAL;
790 			}
791 
792 			return semaphoreSet->ID();
793 		}
794 	}
795 
796 	// Create a new sempahore set for this key
797 	if (numberOfSemaphores <= 0
798 		|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
799 		TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
800 		delete ipcKey;
801 		return EINVAL;
802 	}
803 	if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
804 		|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
805 		TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
806 			"semaphores allowed\n"));
807 		delete ipcKey;
808 		return ENOSPC;
809 	}
810 
811 	semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
812 		flags);
813 	if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
814 		TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
815 			"semaphore set\n"));
816 		delete semaphoreSet;
817 		delete ipcKey;
818 		return ENOMEM;
819 	}
820 
821 	atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
822 	atomic_add(&sXsiSemaphoreSetCount, 1);
823 
824 	MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
825 	semaphoreSet->SetID();
826 	if (isPrivate) {
827 		semaphoreSet->SetIpcKey((key_t)-1);
828 	} else {
829 		sIpcHashTable.Insert(ipcKey);
830 		semaphoreSet->SetIpcKey(key);
831 		ipcKey->SetSemaphoreSetID(semaphoreSet);
832 	}
833 	sSemaphoreHashTable.Insert(semaphoreSet);
834 	TRACE(("semget: new set = %d created, sequence = %ld\n",
835 		semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
836 
837 	return semaphoreSet->ID();
838 }
839 
840 
841 int
842 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
843 	union semun *_args)
844 {
845 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
846 		semaphoreID, semaphoreNumber, command));
847 
848 	union semun args = {0};
849 	if (_args != NULL) {
850 		if (!IS_USER_ADDRESS(_args)
851 				|| user_memcpy(&args, _args, sizeof(union semun)) != B_OK)
852 			return B_BAD_ADDRESS;
853 	}
854 
855 	MutexLocker ipcHashLocker(sIpcLock);
856 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
857 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
858 	if (semaphoreSet == NULL) {
859 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
860 			semaphoreID));
861 		return EINVAL;
862 	}
863 	if (semaphoreNumber < 0
864 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
865 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
866 			"semaphore %d\n", semaphoreNumber, semaphoreID));
867 		return EINVAL;
868 	}
869 
870 	// Lock the semaphore set itself and release both the semaphore
871 	// set hash table lock and the ipc hash table lock _only_ if
872 	// the command it's not IPC_RMID, this prevents undesidered
873 	// situation from happening while (hopefully) improving the
874 	// concurrency.
875 	MutexLocker setLocker;
876 	if (command != IPC_RMID) {
877 		setLocker.SetTo(&semaphoreSet->Lock(), false);
878 		setHashLocker.Unlock();
879 		ipcHashLocker.Unlock();
880 	} else {
881 		// We are about to delete the set along with its mutex, so
882 		// we can't use the MutexLocker class, as the mutex itself
883 		// won't exist on function exit
884 		mutex_lock(&semaphoreSet->Lock());
885 	}
886 
887 	int result = 0;
888 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
889 	switch (command) {
890 		case GETVAL: {
891 			if (!semaphoreSet->HasReadPermission()) {
892 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
893 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
894 					(int)semaphoreSet->IpcKey()));
895 				result = EACCES;
896 			} else
897 				result = semaphore->Value();
898 			break;
899 		}
900 
901 		case SETVAL: {
902 			if (!semaphoreSet->HasPermission()) {
903 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
904 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
905 					(int)semaphoreSet->IpcKey()));
906 				result = EACCES;
907 			} else {
908 				if (args.val > USHRT_MAX) {
909 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", args.val));
910 					result = ERANGE;
911 				} else {
912 					semaphore->SetValue(args.val);
913 					semaphoreSet->ClearUndo(semaphoreNumber);
914 				}
915 			}
916 			break;
917 		}
918 
919 		case GETPID: {
920 			if (!semaphoreSet->HasReadPermission()) {
921 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
922 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
923 					(int)semaphoreSet->IpcKey()));
924 				result = EACCES;
925 			} else
926 				result = semaphore->LastPid();
927 			break;
928 		}
929 
930 		case GETNCNT: {
931 			if (!semaphoreSet->HasReadPermission()) {
932 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
933 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
934 					(int)semaphoreSet->IpcKey()));
935 				result = EACCES;
936 			} else
937 				result = semaphore->ThreadsWaitingToIncrease();
938 			break;
939 		}
940 
941 		case GETZCNT: {
942 			if (!semaphoreSet->HasReadPermission()) {
943 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
944 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
945 					(int)semaphoreSet->IpcKey()));
946 				result = EACCES;
947 			} else
948 				result = semaphore->ThreadsWaitingToBeZero();
949 			break;
950 		}
951 
952 		case GETALL: {
953 			if (!semaphoreSet->HasReadPermission()) {
954 				TRACE_ERROR(("xsi_semctl: calling process has not read "
955 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
956 					(int)semaphoreSet->IpcKey()));
957 				result = EACCES;
958 			} else
959 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
960 					semaphore = semaphoreSet->Semaphore(i);
961 					unsigned short value = semaphore->Value();
962 					if (user_memcpy(args.array + i, &value,
963 							sizeof(unsigned short)) != B_OK) {
964 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
965 						result = B_BAD_ADDRESS;
966 						break;
967 					}
968 				}
969 			break;
970 		}
971 
972 		case SETALL: {
973 			if (!semaphoreSet->HasPermission()) {
974 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
975 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
976 					(int)semaphoreSet->IpcKey()));
977 				result = EACCES;
978 			} else {
979 				bool doClear = true;
980 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
981 					semaphore = semaphoreSet->Semaphore(i);
982 					unsigned short value;
983 					if (user_memcpy(&value, args.array + i,
984 							sizeof(unsigned short)) != B_OK) {
985 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
986 						result = B_BAD_ADDRESS;
987 						doClear = false;
988 						break;
989 					} else
990 						semaphore->SetValue(value);
991 				}
992 				if (doClear)
993 					semaphoreSet->ClearUndos();
994 			}
995 			break;
996 		}
997 
998 		case IPC_STAT: {
999 			if (!semaphoreSet->HasReadPermission()) {
1000 				TRACE_ERROR(("xsi_semctl: calling process has not read "
1001 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
1002 					(int)semaphoreSet->IpcKey()));
1003 				result = EACCES;
1004 			} else {
1005 				struct semid_ds sem;
1006 				sem.sem_perm = semaphoreSet->IpcPermission();
1007 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
1008 				sem.sem_otime = semaphoreSet->LastSemopTime();
1009 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
1010 				if (user_memcpy(args.buf, &sem, sizeof(struct semid_ds))
1011 						< B_OK) {
1012 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1013 					result = B_BAD_ADDRESS;
1014 				}
1015 			}
1016 			break;
1017 		}
1018 
1019 		case IPC_SET: {
1020 			if (!semaphoreSet->HasPermission()) {
1021 				TRACE_ERROR(("xsi_semctl: calling process has not "
1022 					"permission on semaphore %d, key %d\n",
1023 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
1024 				result = EACCES;
1025 			} else {
1026 				struct semid_ds sem;
1027 				if (user_memcpy(&sem, args.buf, sizeof(struct semid_ds))
1028 						!= B_OK) {
1029 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1030 					result = B_BAD_ADDRESS;
1031 				} else
1032 					semaphoreSet->DoIpcSet(&sem);
1033 			}
1034 			break;
1035 		}
1036 
1037 		case IPC_RMID: {
1038 			// If this was the command, we are still holding
1039 			// the semaphore set hash table lock along with the
1040 			// ipc hash table lock and the semaphore set lock
1041 			// itself, this way we are sure there is not
1042 			// one waiting in the queue of the mutex.
1043 			if (!semaphoreSet->HasPermission()) {
1044 				TRACE_ERROR(("xsi_semctl: calling process has not "
1045 					"permission on semaphore %d, key %d\n",
1046 					semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
1047 				return EACCES;
1048 			}
1049 			key_t key = semaphoreSet->IpcKey();
1050 			Ipc *ipcKey = NULL;
1051 			if (key != -1) {
1052 				ipcKey = sIpcHashTable.Lookup(key);
1053 				sIpcHashTable.Remove(ipcKey);
1054 			}
1055 			sSemaphoreHashTable.Remove(semaphoreSet);
1056 			// Wake up of threads waiting on this set
1057 			// happens in the destructor
1058 			if (key != -1)
1059 				delete ipcKey;
1060 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1061 			atomic_add(&sXsiSemaphoreSetCount, -1);
1062 			// Remove any sem_undo request
1063 			while (struct sem_undo *entry
1064 					= semaphoreSet->GetUndoList().RemoveHead()) {
1065 				MutexLocker _(entry->team->xsi_sem_context->lock);
1066 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1067 				delete entry;
1068 			}
1069 
1070 			delete semaphoreSet;
1071 			return 0;
1072 		}
1073 
1074 		default:
1075 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1076 			result = EINVAL;
1077 	}
1078 
1079 	return result;
1080 }
1081 
1082 
1083 status_t
1084 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1085 {
1086 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1087 		semaphoreID, ops, numOps));
1088 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1089 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1090 	if (semaphoreSet == NULL) {
1091 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1092 			semaphoreID));
1093 		return EINVAL;
1094 	}
1095 	MutexLocker setLocker(semaphoreSet->Lock());
1096 	setHashLocker.Unlock();
1097 
1098 	if (!IS_USER_ADDRESS(ops)) {
1099 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1100 		return B_BAD_ADDRESS;
1101 	}
1102 
1103 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1104 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1105 		return EINVAL;
1106 	}
1107 
1108 	struct sembuf *operations
1109 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1110 	if (operations == NULL) {
1111 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1112 		return B_NO_MEMORY;
1113 	}
1114 	MemoryDeleter operationsDeleter(operations);
1115 
1116 	if (user_memcpy(operations, ops,
1117 			(sizeof(struct sembuf) * numOps)) != B_OK) {
1118 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1119 		return B_BAD_ADDRESS;
1120 	}
1121 
1122 	// We won't do partial request, that is operations
1123 	// only on some sempahores belonging to the set and then
1124 	// going to sleep. If we must wait on a semaphore, we undo
1125 	// all the operations already done and go to sleep, otherwise
1126 	// we may caused some unwanted deadlock among threads
1127 	// fighting for the same set.
1128 	bool notDone = true;
1129 	status_t result = 0;
1130 	while (notDone) {
1131 		XsiSemaphore *semaphore = NULL;
1132 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1133 		bool goToSleep = false;
1134 
1135 		uint32 i = 0;
1136 		for (; i < numOps; i++) {
1137 			short semaphoreNumber = operations[i].sem_num;
1138 			if (semaphoreNumber >= numberOfSemaphores) {
1139 				TRACE_ERROR(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1140 					"\n", i));
1141 				result = EINVAL;
1142 				break;
1143 			}
1144 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1145 			unsigned short value = semaphore->Value();
1146 			short operation = operations[i].sem_op;
1147 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1148 				semaphoreNumber, value));
1149 			if (operation < 0) {
1150 				if (semaphore->Add(operation)) {
1151 					if (operations[i].sem_flg & IPC_NOWAIT)
1152 						result = EAGAIN;
1153 					else
1154 						goToSleep = true;
1155 					break;
1156 				}
1157 			} else if (operation == 0) {
1158 				if (value == 0)
1159 					continue;
1160 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1161 					result = EAGAIN;
1162 					break;
1163 				} else {
1164 					goToSleep = true;
1165 					break;
1166 				}
1167 			} else {
1168 				// Operation must be greater than zero,
1169 				// just add the value and continue
1170 				semaphore->Add(operation);
1171 			}
1172 		}
1173 
1174 		// Either we have to wait or an error occured
1175 		if (goToSleep || result != 0) {
1176 			// Undo all previously done operations
1177 			for (uint32 j = 0; j < i; j++) {
1178 				short semaphoreNumber = operations[j].sem_num;
1179 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1180 				short operation = operations[j].sem_op;
1181 				if (operation != 0)
1182 					semaphore->Revert(operation);
1183 			}
1184 			if (result != 0)
1185 				return result;
1186 
1187 			// We have to wait: first enqueue the thread
1188 			// in the appropriate set waiting list, then
1189 			// unlock the set itself and block the thread.
1190 			bool waitOnZero = true;
1191 			if (operations[i].sem_op != 0)
1192 				waitOnZero = false;
1193 
1194 			Thread *thread = thread_get_current_thread();
1195 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1196 			semaphore->Enqueue(&queueEntry, waitOnZero);
1197 
1198 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1199 
1200 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1201 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1202 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1203 
1204 			// We are back to life. Find out why!
1205 			// Make sure the set hasn't been deleted or worst yet
1206 			// replaced.
1207 			setHashLocker.Lock();
1208 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1209 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1210 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1211 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = "
1212 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1213 					sequenceNumber));
1214 				notDone = false;
1215 				result = EIDRM;
1216 			} else if (result == B_INTERRUPTED) {
1217 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1218 					"waiting on semaphore set id %d\n",(int)thread->id,
1219 					semaphoreID));
1220 				semaphore->Deque(&queueEntry, waitOnZero);
1221 				result = EINTR;
1222 				notDone = false;
1223 			} else {
1224 				setLocker.Lock();
1225 				setHashLocker.Unlock();
1226 			}
1227 		} else {
1228 			// everything worked like a charm (so far)
1229 			notDone = false;
1230 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1231 			// We acquired the semaphore, now records the sem_undo
1232 			// requests
1233 			XsiSemaphore *semaphore = NULL;
1234 			uint32 i = 0;
1235 			for (; i < numOps; i++) {
1236 				short semaphoreNumber = operations[i].sem_num;
1237 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1238 				short operation = operations[i].sem_op;
1239 				if (operations[i].sem_flg & SEM_UNDO)
1240 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1241 						!= B_OK) {
1242 						// Unlikely scenario, but we might get here.
1243 						// Undo everything!
1244 						// Start with semaphore operations
1245 						for (uint32 j = 0; j < numOps; j++) {
1246 							short semaphoreNumber = operations[j].sem_num;
1247 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1248 							short operation = operations[j].sem_op;
1249 							if (operation != 0)
1250 								semaphore->Revert(operation);
1251 						}
1252 						// Remove all previously registered sem_undo request
1253 						for (uint32 j = 0; j < i; j++) {
1254 							if (operations[j].sem_flg & SEM_UNDO)
1255 								semaphoreSet->RevertUndo(operations[j].sem_num,
1256 									operations[j].sem_op);
1257 						}
1258 						result = ENOSPC;
1259 					}
1260 			}
1261 		}
1262 	}
1263 
1264 	// We did it. Set the pid of all semaphores used
1265 	if (result == 0) {
1266 		for (uint32 i = 0; i < numOps; i++) {
1267 			short semaphoreNumber = operations[i].sem_num;
1268 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1269 			semaphore->SetPid(getpid());
1270 		}
1271 		semaphoreSet->SetLastSemopTime();
1272 	}
1273 	return result;
1274 }
1275