xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision b6b0567fbd186f8ce8a0c90bdc7a7b5b4c649678)
1 /*
2  * Copyright 2008, Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 
26 
27 //#define TRACE_XSI_SEM
28 #ifdef TRACE_XSI_SEM
29 #	define TRACE(x)			dprintf x
30 #	define TRACE_ERROR(x)	dprintf x
31 #else
32 #	define TRACE(x)			/* nothing */
33 #	define TRACE_ERROR(x)	dprintf x
34 #endif
35 
36 // Queue for holding blocked threads
37 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
38 	queued_thread(struct thread *thread, int32 count)
39 		:
40 		thread(thread),
41 		count(count),
42 		queued(false)
43 	{
44 	}
45 
46 	struct thread	*thread;
47 	int32			count;
48 	bool			queued;
49 };
50 
51 typedef DoublyLinkedList<queued_thread> ThreadQueue;
52 
53 class XsiSemaphoreSet;
54 
55 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
56 	sem_undo(XsiSemaphoreSet *semaphoreSet, struct team *team, int16 *undoValues)
57 		:
58 		semaphore_set(semaphoreSet),
59 		team(team),
60 		undo_values(undoValues)
61 	{
62 	}
63 
64 	DoublyLinkedListLink<sem_undo>		team_link;
65 	XsiSemaphoreSet						*semaphore_set;
66 	struct team							*team;
67 	int16								*undo_values;
68 };
69 
70 typedef DoublyLinkedList<sem_undo> UndoList;
71 typedef DoublyLinkedList<sem_undo,
72 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
73 
74 struct xsi_sem_context {
75 	xsi_sem_context()
76 	{
77 		mutex_init(&lock, "Private team undo_list lock");
78 	}
79 
80 	~xsi_sem_context()
81 	{
82 		mutex_destroy(&lock);
83 	}
84 
85 	TeamList	undo_list;
86 	mutex		lock;
87 };
88 
89 // Xsi semaphore definition
90 class XsiSemaphore {
91 public:
92 	XsiSemaphore()
93 		: fLastPidOperation(0),
94 		fThreadsWaitingToIncrease(0),
95 		fThreadsWaitingToBeZero(0),
96 		fValue(0)
97 	{
98 	}
99 
100 	~XsiSemaphore()
101 	{
102 		// For some reason the semaphore is getting destroyed.
103 		// Wake up any remaing awaiting threads
104 		InterruptsSpinLocker _(gThreadSpinlock);
105 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
106 			entry->queued = false;
107 			thread_unblock_locked(entry->thread, EIDRM);
108 		}
109 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
110 			entry->queued = false;
111 			thread_unblock_locked(entry->thread, EIDRM);
112 		}
113 		// No need to remove any sem_undo request still
114 		// hanging. When the process exit and doesn't found
115 		// the semaphore set, it'll just ignore the sem_undo
116 		// request. That's better than iterating trough the
117 		// whole sUndoList. Beside we don't know our semaphore
118 		// number nor our semaphore set id.
119 	}
120 
121 	// We return true in case the operation causes the
122 	// caller to wait, so it can undo all the operations
123 	// previously done
124 	bool Add(short value)
125 	{
126 		if ((int)(fValue + value) < 0) {
127 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
128 			return true;
129 		} else {
130 			fValue += value;
131 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
132 				WakeUpThread(true);
133 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
134 				WakeUpThread(false);
135 			return false;
136 		}
137 	}
138 
139 	status_t BlockAndUnlock(struct thread *thread, MutexLocker *setLocker)
140 	{
141 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
142 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
143 		// Unlock the set before blocking
144 		setLocker->Unlock();
145 
146 		InterruptsSpinLocker _(gThreadSpinlock);
147 // TODO: We've got a serious race condition: If BlockAndUnlock() returned due to
148 // interruption, we will still be queued. A WakeUpThread() at this point will
149 // call thread_unblock() and might thus screw with our trying to re-lock the
150 // mutex.
151 		return thread_block_locked(thread);
152 	}
153 
154 	void Deque(queued_thread *queueEntry, bool waitForZero)
155 	{
156 		if (queueEntry->queued) {
157 			if (waitForZero) {
158 				fWaitingToBeZeroQueue.Remove(queueEntry);
159 				fThreadsWaitingToBeZero--;
160 			} else {
161 				fWaitingToIncreaseQueue.Remove(queueEntry);
162 				fThreadsWaitingToIncrease--;
163 			}
164 		}
165 	}
166 
167 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
168 	{
169 		if (waitForZero) {
170 			fWaitingToBeZeroQueue.Add(queueEntry);
171 			fThreadsWaitingToBeZero++;
172 		} else {
173 			fWaitingToIncreaseQueue.Add(queueEntry);
174 			fThreadsWaitingToIncrease++;
175 		}
176 		queueEntry->queued = true;
177 	}
178 
179 	pid_t LastPid() const
180 	{
181 		return fLastPidOperation;
182 	}
183 
184 	void Revert(short value)
185 	{
186 		fValue -= value;
187 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
188 			WakeUpThread(true);
189 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
190 			WakeUpThread(false);
191 	}
192 
193 	void SetPid(pid_t pid)
194 	{
195 		fLastPidOperation = pid;
196 	}
197 
198 	void SetValue(ushort value)
199 	{
200 		fValue = value;
201 	}
202 
203 	ushort ThreadsWaitingToIncrease() const
204 	{
205 		return fThreadsWaitingToIncrease;
206 	}
207 
208 	ushort ThreadsWaitingToBeZero() const
209 	{
210 		return fThreadsWaitingToBeZero;
211 	}
212 
213 	ushort Value() const
214 	{
215 		return fValue;
216 	}
217 
218 	void WakeUpThread(bool waitingForZero)
219 	{
220 		InterruptsSpinLocker _(gThreadSpinlock);
221 		if (waitingForZero) {
222 			// Wake up all threads waiting on zero
223 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
224 				entry->queued = false;
225 				fThreadsWaitingToBeZero--;
226 				thread_unblock_locked(entry->thread, 0);
227 			}
228 		} else {
229 			// Wake up all threads even though they might go back to sleep
230 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
231 				entry->queued = false;
232 				fThreadsWaitingToIncrease--;
233 				thread_unblock_locked(entry->thread, 0);
234 			}
235 		}
236 	}
237 
238 private:
239 	pid_t				fLastPidOperation;				// sempid
240 	ushort				fThreadsWaitingToIncrease;		// semncnt
241 	ushort				fThreadsWaitingToBeZero;		// semzcnt
242 	ushort				fValue;							// semval
243 
244 	ThreadQueue			fWaitingToIncreaseQueue;
245 	ThreadQueue			fWaitingToBeZeroQueue;
246 };
247 
248 #define MAX_XSI_SEMS_PER_TEAM	128
249 
250 // Xsi semaphore set definition (semid_ds)
251 class XsiSemaphoreSet {
252 public:
253 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
254 		: fInitOK(false),
255 		fLastSemctlTime((time_t)real_time_clock()),
256 		fLastSemopTime(0),
257 		fNumberOfSemaphores(numberOfSemaphores),
258 		fSemaphores(0)
259 	{
260 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
261 		SetIpcKey((key_t)-1);
262 		SetPermissions(flags);
263 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
264 		if (fSemaphores == NULL) {
265 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
266 				"XsiSemaphore object\n"));
267 		} else
268 			fInitOK = true;
269 	}
270 
271 	~XsiSemaphoreSet()
272 	{
273 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
274 			"set %d\n", fID));
275 		mutex_destroy(&fLock);
276 		delete[] fSemaphores;
277 	}
278 
279 	void ClearUndo(unsigned short semaphoreNumber)
280 	{
281 		struct team *team = thread_get_current_thread()->team;
282 		UndoList::Iterator iterator = fUndoList.GetIterator();
283 		while (iterator.HasNext()) {
284 			struct sem_undo *current = iterator.Next();
285 			if (current->team == team) {
286 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
287 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
288 					fID, semaphoreNumber, (int)team->id));
289 				MutexLocker _(team->xsi_sem_context->lock);
290 				current->undo_values[semaphoreNumber] = 0;
291 				return;
292 			}
293 		}
294 	}
295 
296 	void ClearUndos()
297 	{
298 		// Clear all undo_values (POSIX semadj equivalent)
299 		// of the calling team. This happens only on semctl SETALL.
300 		struct team *team = thread_get_current_thread()->team;
301 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
302 		while (iterator.HasNext()) {
303 			struct sem_undo *current = iterator.Next();
304 			if (current->team == team) {
305 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
306 					"semaphoreSetID = %d\n", (int)team->id, fID));
307 				MutexLocker _(team->xsi_sem_context->lock);
308 				memset(current->undo_values, 0,
309 					sizeof(int16) * fNumberOfSemaphores);
310 				return;
311 			}
312 		}
313 	}
314 
315 	void DoIpcSet(struct semid_ds *result)
316 	{
317 		fPermissions.uid = result->sem_perm.uid;
318 		fPermissions.gid = result->sem_perm.gid;
319 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
320 			| (result->sem_perm.mode & 0x01ff);
321 	}
322 
323 	bool HasPermission() const
324 	{
325 		if ((fPermissions.mode & S_IWOTH) != 0)
326 			return true;
327 
328 		uid_t uid = geteuid();
329 		if (uid == 0 || (uid == fPermissions.uid
330 			&& (fPermissions.mode & S_IWUSR) != 0))
331 			return true;
332 
333 		gid_t gid = getegid();
334 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
335 			return true;
336 
337 		return false;
338 	}
339 
340 	bool HasReadPermission() const
341 	{
342 		// TODO: fix this
343 		return HasPermission();
344 	}
345 
346 	int ID() const
347 	{
348 		return fID;
349 	}
350 
351 	bool InitOK()
352 	{
353 		return fInitOK;
354 	}
355 
356 	key_t IpcKey() const
357 	{
358 		return fPermissions.key;
359 	}
360 
361 	struct ipc_perm IpcPermission() const
362 	{
363 		return fPermissions;
364 	}
365 
366 	time_t LastSemctlTime() const
367 	{
368 		return fLastSemctlTime;
369 	}
370 
371 	time_t LastSemopTime() const
372 	{
373 		return fLastSemopTime;
374 	}
375 
376 	mutex &Lock()
377 	{
378 		return fLock;
379 	}
380 
381 	ushort NumberOfSemaphores() const
382 	{
383 		return fNumberOfSemaphores;
384 	}
385 
386 	// Record the sem_undo operation into our private fUndoList and
387 	// the team undo_list. The only limit here is the memory needed
388 	// for creating a new sem_undo structure.
389 	int RecordUndo(short semaphoreNumber, short value)
390 	{
391 		// Look if there is already a record from the team caller
392 		// for the same semaphore set
393 		bool notFound = true;
394 		struct team *team = thread_get_current_thread()->team;
395 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
396 		while (iterator.HasNext()) {
397 			struct sem_undo *current = iterator.Next();
398 			if (current->team == team) {
399 				// Update its undo value
400 				MutexLocker _(team->xsi_sem_context->lock);
401 				int newValue = current->undo_values[semaphoreNumber] + value;
402 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
403 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
404 						"out of range\n", newValue));
405 					return ERANGE;
406 				}
407 				current->undo_values[semaphoreNumber] = newValue;
408 				notFound = false;
409 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
410 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
411 					(int)team->id, fID, semaphoreNumber,
412 					current->undo_values[semaphoreNumber]));
413 				break;
414 			}
415 		}
416 
417 		if (notFound) {
418 			// First sem_undo request from this team for this
419 			// semaphore set
420 			int16 *undoValues
421 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
422 			if (undoValues == NULL)
423 				return B_NO_MEMORY;
424 			struct sem_undo *request
425 				= new(std::nothrow) sem_undo(this, team, undoValues);
426 			if (request == NULL) {
427 				free(undoValues);
428 				return B_NO_MEMORY;
429 			}
430 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
431 			request->undo_values[semaphoreNumber] = value;
432 
433 			// Check if it's the very first sem_undo request for this team
434 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
435 			if (context == NULL) {
436 				// Create the context
437 				context = new(std::nothrow) xsi_sem_context;
438 				if (context == NULL) {
439 					free(request->undo_values);
440 					delete request;
441 					return B_NO_MEMORY;
442 				}
443 				// Since we don't hold any global lock, someone
444 				// else could have been quicker than us, so we have
445 				// to delete the one we just created and use the one
446 				// in place.
447 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
448 					(xsi_sem_context *)NULL) != NULL)
449 					delete context;
450 			}
451 
452 			// Add the request to both XsiSemaphoreSet and team list
453 			fUndoList.Add(request);
454 			MutexLocker _(team->xsi_sem_context->lock);
455 			team->xsi_sem_context->undo_list.Add(request);
456 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
457 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
458 				(int)team->id, fID, semaphoreNumber, value));
459 		}
460 		return B_OK;
461 	}
462 
463 	void RevertUndo(short semaphoreNumber, short value)
464 	{
465 		// This can be called only when RecordUndo fails.
466 		struct team *team = thread_get_current_thread()->team;
467 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
468 		while (iterator.HasNext()) {
469 			struct sem_undo *current = iterator.Next();
470 			if (current->team == team) {
471 				MutexLocker _(team->xsi_sem_context->lock);
472 				fSemaphores[semaphoreNumber].Revert(value);
473 				break;
474 			}
475 		}
476 	}
477 
478 	XsiSemaphore* Semaphore(int nth) const
479 	{
480 		return &fSemaphores[nth];
481 	}
482 
483 	uint32 SequenceNumber() const
484 	{
485 		return fSequenceNumber;
486 	}
487 
488 	// Implemented after sGlobalSequenceNumber is declared
489 	void SetID();
490 
491 	void SetIpcKey(key_t key)
492 	{
493 		fPermissions.key = key;
494 	}
495 
496 	void SetLastSemctlTime()
497 	{
498 		fLastSemctlTime = real_time_clock();
499 	}
500 
501 	void SetLastSemopTime()
502 	{
503 		fLastSemopTime = real_time_clock();
504 	}
505 
506 	void SetPermissions(int flags)
507 	{
508 		fPermissions.uid = fPermissions.cuid = geteuid();
509 		fPermissions.gid = fPermissions.cgid = getegid();
510 		fPermissions.mode = (flags & 0x01ff);
511 	}
512 
513 	UndoList &GetUndoList()
514 	{
515 		return fUndoList;
516 	}
517 
518 	XsiSemaphoreSet*& Link()
519 	{
520 		return fLink;
521 	}
522 
523 private:
524 	int							fID;					// semaphore set id
525 	bool						fInitOK;
526 	time_t						fLastSemctlTime;		// sem_ctime
527 	time_t						fLastSemopTime;			// sem_otime
528 	mutex 						fLock;					// private lock
529 	ushort						fNumberOfSemaphores;	// sem_nsems
530 	struct ipc_perm				fPermissions;			// sem_perm
531 	XsiSemaphore				*fSemaphores;			// array of semaphores
532 	uint32						fSequenceNumber;		// used as a second id
533 	UndoList					fUndoList;				// undo list requests
534 
535 	XsiSemaphoreSet*			fLink;
536 };
537 
538 // Xsi semaphore set hash table
539 struct SemaphoreHashTableDefinition {
540 	typedef int					KeyType;
541 	typedef XsiSemaphoreSet		ValueType;
542 
543 	size_t HashKey (const int key) const
544 	{
545 		return (size_t)key;
546 	}
547 
548 	size_t Hash(XsiSemaphoreSet *variable) const
549 	{
550 		return (size_t)variable->ID();
551 	}
552 
553 	bool Compare(const int key, XsiSemaphoreSet *variable) const
554 	{
555 		return (int)key == (int)variable->ID();
556 	}
557 
558 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
559 	{
560 		return variable->Link();
561 	}
562 };
563 
564 
565 // IPC class
566 class Ipc {
567 public:
568 	Ipc(key_t key)
569 		: fKey(key),
570 		fSemaphoreSetId(-1)
571 	{
572 	}
573 
574 	key_t Key() const
575 	{
576 		return fKey;
577 	}
578 
579 	int SemaphoreSetID() const
580 	{
581 		return fSemaphoreSetId;
582 	}
583 
584 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
585 	{
586 		fSemaphoreSetId = semaphoreSet->ID();
587 	}
588 
589 	Ipc*& Link()
590 	{
591 		return fLink;
592 	}
593 
594 private:
595 	key_t				fKey;
596 	int					fSemaphoreSetId;
597 	Ipc*				fLink;
598 };
599 
600 
601 struct IpcHashTableDefinition {
602 	typedef key_t	KeyType;
603 	typedef Ipc		ValueType;
604 
605 	size_t HashKey (const key_t key) const
606 	{
607 		return (size_t)(key);
608 	}
609 
610 	size_t Hash(Ipc *variable) const
611 	{
612 		return (size_t)HashKey(variable->Key());
613 	}
614 
615 	bool Compare(const key_t key, Ipc *variable) const
616 	{
617 		return (key_t)key == (key_t)variable->Key();
618 	}
619 
620 	Ipc*& GetLink(Ipc *variable) const
621 	{
622 		return variable->Link();
623 	}
624 };
625 
626 // Arbitrary limit
627 #define MAX_XSI_SEMAPHORE		4096
628 #define MAX_XSI_SEMAPHORE_SET	2048
629 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
630 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
631 
632 static mutex sIpcLock;
633 static mutex sXsiSemaphoreSetLock;
634 
635 static uint32 sGlobalSequenceNumber = 1;
636 static vint32 sXsiSemaphoreCount = 0;
637 static vint32 sXsiSemaphoreSetCount = 0;
638 
639 
640 //	#pragma mark -
641 
642 
643 void
644 XsiSemaphoreSet::SetID()
645 {
646 	fID = real_time_clock();
647 	// The lock is held before calling us
648 	while (true) {
649 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
650 			break;
651 		fID = (fID + 1) % INT_MAX;
652 	}
653 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
654 	fSequenceNumber = sGlobalSequenceNumber;
655 }
656 
657 
658 //	#pragma mark - Kernel exported API
659 
660 
661 void
662 xsi_sem_init()
663 {
664 	// Initialize hash tables
665 	status_t status = sIpcHashTable.Init();
666 	if (status != B_OK)
667 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
668 	status =  sSemaphoreHashTable.Init();
669 	if (status != B_OK)
670 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
671 
672 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
673 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
674 }
675 
676 
677 /*!	Function called on team exit to process any sem_undo requests */
678 void
679 xsi_sem_undo(struct team *team)
680 {
681 	if (team->xsi_sem_context == NULL)
682 		return;
683 
684 	// By acquiring first the semaphore hash table lock
685 	// we make sure the semaphore set in our sem_undo
686 	// list won't get removed by IPC_RMID call
687 	MutexLocker _(sXsiSemaphoreSetLock);
688 
689 	// Process all sem_undo request in the team sem undo list
690 	// if any
691 	TeamList::Iterator iterator
692 		= team->xsi_sem_context->undo_list.GetIterator();
693 	while (iterator.HasNext()) {
694 		struct sem_undo *current = iterator.Next();
695 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
696 		// Acquire the set lock in order to prevent race
697 		// condition with RecordUndo
698 		MutexLocker setLocker(semaphoreSet->Lock());
699 		MutexLocker _(team->xsi_sem_context->lock);
700 		// Revert the changes done by this process
701 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
702 			if (current->undo_values[i] != 0) {
703 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
704 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
705 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
706 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
707 			}
708 
709 		// Remove and free the sem_undo structure from both lists
710 		iterator.Remove();
711 		semaphoreSet->GetUndoList().Remove(current);
712 		delete current;
713 	}
714 	delete team->xsi_sem_context;
715 	team->xsi_sem_context = NULL;
716 }
717 
718 
719 //	#pragma mark - Syscalls
720 
721 
722 int
723 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
724 {
725 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
726 		(int)key, numberOfSemaphores, flags));
727 	XsiSemaphoreSet *semaphoreSet = NULL;
728 	Ipc *ipcKey = NULL;
729 	// Default assumptions
730 	bool isPrivate = true;
731 	bool create = true;
732 
733 	MutexLocker _(sIpcLock);
734 	if (key != IPC_PRIVATE) {
735 		isPrivate = false;
736 		// Check if key already exist, if it does it already has a semaphore
737 		// set associated with it
738 		ipcKey = sIpcHashTable.Lookup(key);
739 		if (ipcKey == NULL) {
740 			// The ipc key does not exist. Create it and add it to the system
741 			if (!(flags & IPC_CREAT)) {
742 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
743 					"caller did not ask for creation\n",(int)key));
744 				return ENOENT;
745 			}
746 			ipcKey = new(std::nothrow) Ipc(key);
747 			if (ipcKey == NULL) {
748 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
749 					"for key %d\n",	(int)key));
750 				return ENOMEM;
751 			}
752 			sIpcHashTable.Insert(ipcKey);
753 		} else {
754 			// The IPC key exist and it already has a semaphore
755 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
756 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
757 				return EEXIST;
758 			}
759 			int semaphoreSetID = ipcKey->SemaphoreSetID();
760 
761 			MutexLocker _(sXsiSemaphoreSetLock);
762 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
763 			if (!semaphoreSet->HasPermission()) {
764 				TRACE_ERROR(("xsi_semget: calling process has not permission "
765 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
766 					(int)key));
767 				return EACCES;
768 			}
769 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
770 				&& numberOfSemaphores != 0) {
771 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
772 					"one associated with semaphore %d, key %d\n",
773 					semaphoreSet->ID(), (int)key));
774 				return EINVAL;
775 			}
776 			create = false;
777 		}
778 	}
779 
780 	if (create) {
781 		// Create a new sempahore set for this key
782 		if (numberOfSemaphores <= 0
783 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
784 			TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
785 			return EINVAL;
786 		}
787 		if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
788 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
789 			TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
790 				"semaphores allowed\n"));
791 			return ENOSPC;
792 		}
793 
794 		semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
795 			flags);
796 		if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
797 			TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
798 				"semaphore set\n"));
799 			return ENOMEM;
800 		}
801 		atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
802 		atomic_add(&sXsiSemaphoreSetCount, 1);
803 
804 		MutexLocker _(sXsiSemaphoreSetLock);
805 		semaphoreSet->SetID();
806 		if (isPrivate)
807 			semaphoreSet->SetIpcKey((key_t)-1);
808 		else {
809 			semaphoreSet->SetIpcKey(key);
810 			ipcKey->SetSemaphoreSetID(semaphoreSet);
811 		}
812 		sSemaphoreHashTable.Insert(semaphoreSet);
813 		TRACE(("semget: new set = %d created, sequence = %ld\n",
814 			semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
815 	}
816 
817 	return semaphoreSet->ID();
818 }
819 
820 
821 int
822 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
823 	union semun *args)
824 {
825 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
826 		semaphoreID, semaphoreNumber, command));
827 	MutexLocker ipcHashLocker(sIpcLock);
828 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
829 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
830 	if (semaphoreSet == NULL) {
831 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
832 			semaphoreID));
833 		return EINVAL;
834 	}
835 	if (semaphoreNumber < 0
836 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
837 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
838 			"semaphore %d\n", semaphoreNumber, semaphoreID));
839 		return EINVAL;
840 	}
841 	if (args != 0 && !IS_USER_ADDRESS(args)) {
842 		TRACE_ERROR(("xsi_semctl: semun address is not valid\n"));
843 		return B_BAD_ADDRESS;
844 	}
845 
846 	// Lock the semaphore set itself and release both the semaphore
847 	// set hash table lock and the ipc hash table lock _only_ if
848 	// the command it's not IPC_RMID, this prevents undesidered
849 	// situation from happening while (hopefully) improving the
850 	// concurrency.
851 	MutexLocker setLocker;
852 	if (command != IPC_RMID) {
853 		setLocker.SetTo(&semaphoreSet->Lock(), false);
854 		setHashLocker.Unlock();
855 		ipcHashLocker.Unlock();
856 	} else
857 		// We are about to delete the set along with its mutex, so
858 		// we can't use the MutexLocker class, as the mutex itself
859 		// won't exist on function exit
860 		mutex_lock(&semaphoreSet->Lock());
861 
862 	int result = 0;
863 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
864 	switch (command) {
865 		case GETVAL: {
866 			if (!semaphoreSet->HasReadPermission()) {
867 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
868 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
869 					(int)semaphoreSet->IpcKey()));
870 				result = EACCES;
871 			} else
872 				result = semaphore->Value();
873 			break;
874 		}
875 
876 		case SETVAL: {
877 			if (!semaphoreSet->HasPermission()) {
878 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
879 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
880 					(int)semaphoreSet->IpcKey()));
881 				result = EACCES;
882 			} else {
883 				int value;
884 				if (user_memcpy(&value, &args->val, sizeof(int)) < B_OK) {
885 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
886 					result = B_BAD_ADDRESS;
887 				} else if (value > USHRT_MAX) {
888 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", value));
889 					result = ERANGE;
890 				} else {
891 					semaphore->SetValue(value);
892 					semaphoreSet->ClearUndo(semaphoreNumber);
893 				}
894 			}
895 			break;
896 		}
897 
898 		case GETPID: {
899 			if (!semaphoreSet->HasReadPermission()) {
900 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
901 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
902 					(int)semaphoreSet->IpcKey()));
903 				result = EACCES;
904 			} else
905 				result = semaphore->LastPid();
906 			break;
907 		}
908 
909 		case GETNCNT: {
910 			if (!semaphoreSet->HasReadPermission()) {
911 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
912 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
913 					(int)semaphoreSet->IpcKey()));
914 				result = EACCES;
915 			} else
916 				result = semaphore->ThreadsWaitingToIncrease();
917 			break;
918 		}
919 
920 		case GETZCNT: {
921 			if (!semaphoreSet->HasReadPermission()) {
922 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
923 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
924 					(int)semaphoreSet->IpcKey()));
925 				result = EACCES;
926 			} else
927 				result = semaphore->ThreadsWaitingToBeZero();
928 			break;
929 		}
930 
931 		case GETALL: {
932 			if (!semaphoreSet->HasReadPermission()) {
933 				TRACE_ERROR(("xsi_semctl: calling process has not read "
934 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
935 					(int)semaphoreSet->IpcKey()));
936 				result = EACCES;
937 			} else
938 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
939 					semaphore = semaphoreSet->Semaphore(i);
940 					unsigned short value = semaphore->Value();
941 					if (user_memcpy(&args->array[i], &value,
942 						sizeof(unsigned short)) < B_OK) {
943 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
944 						result = B_BAD_ADDRESS;
945 						break;
946 					}
947 				}
948 			break;
949 		}
950 
951 		case SETALL: {
952 			if (!semaphoreSet->HasPermission()) {
953 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
954 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
955 					(int)semaphoreSet->IpcKey()));
956 				result = EACCES;
957 			} else {
958 				bool doClear = true;
959 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
960 					semaphore = semaphoreSet->Semaphore(i);
961 					unsigned short value;
962 					if (user_memcpy(&value, &args->array[i], sizeof(unsigned short))
963 						< B_OK) {
964 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
965 						result = B_BAD_ADDRESS;
966 						doClear = false;
967 						break;
968 					} else
969 						semaphore->SetValue(value);
970 				}
971 				if (doClear)
972 					semaphoreSet->ClearUndos();
973 			}
974 			break;
975 		}
976 
977 		case IPC_STAT: {
978 			if (!semaphoreSet->HasReadPermission()) {
979 				TRACE_ERROR(("xsi_semctl: calling process has not read "
980 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
981 					(int)semaphoreSet->IpcKey()));
982 				result = EACCES;
983 			} else {
984 				struct semid_ds sem;
985 				sem.sem_perm = semaphoreSet->IpcPermission();
986 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
987 				sem.sem_otime = semaphoreSet->LastSemopTime();
988 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
989 				if (user_memcpy(args->buf, &sem, sizeof(struct semid_ds))
990 					< B_OK) {
991 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
992 					result = B_BAD_ADDRESS;
993 				}
994 			}
995 			break;
996 		}
997 
998 		case IPC_SET: {
999 			if (!semaphoreSet->HasPermission()) {
1000 				TRACE_ERROR(("xsi_semctl: calling process has not "
1001 					"permission on semaphore %d, key %d\n",
1002 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1003 				result = EACCES;
1004 			} else {
1005 				struct semid_ds sem;
1006 				if (user_memcpy(&sem, args->buf, sizeof(struct semid_ds))
1007 					< B_OK) {
1008 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1009 					result = B_BAD_ADDRESS;
1010 				} else
1011 					semaphoreSet->DoIpcSet(&sem);
1012 			}
1013 			break;
1014 		}
1015 
1016 		case IPC_RMID: {
1017 			// If this was the command, we are still holding
1018 			// the semaphore set hash table lock along with the
1019 			// ipc hash table lock and the semaphore set lock
1020 			// itself, this way we are sure there is not
1021 			// one waiting in the queue of the mutex.
1022 			if (!semaphoreSet->HasPermission()) {
1023 				TRACE_ERROR(("xsi_semctl: calling process has not "
1024 					"permission on semaphore %d, key %d\n",
1025 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1026 				return EACCES;
1027 			}
1028 			key_t key = semaphoreSet->IpcKey();
1029 			Ipc *ipcKey = NULL;
1030 			if (key != -1) {
1031 				ipcKey = sIpcHashTable.Lookup(key);
1032 				sIpcHashTable.Remove(ipcKey);
1033 			}
1034 			sSemaphoreHashTable.Remove(semaphoreSet);
1035 			// Wake up of threads waiting on this set
1036 			// happens in the destructor
1037 			if (key != -1)
1038 				delete ipcKey;
1039 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1040 			atomic_add(&sXsiSemaphoreSetCount, -1);
1041 			// Remove any sem_undo request
1042 			while (struct sem_undo *entry
1043 					= semaphoreSet->GetUndoList().RemoveHead()) {
1044 				MutexLocker _(entry->team->xsi_sem_context->lock);
1045 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1046 				delete entry;
1047 			}
1048 
1049 			delete semaphoreSet;
1050 			return 0;
1051 		}
1052 
1053 		default:
1054 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1055 			result = EINVAL;
1056 	}
1057 
1058 	return result;
1059 }
1060 
1061 
1062 status_t
1063 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1064 {
1065 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1066 		semaphoreID, ops, numOps));
1067 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1068 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1069 	if (semaphoreSet == NULL) {
1070 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1071 			semaphoreID));
1072 		return EINVAL;
1073 	}
1074 	MutexLocker setLocker(semaphoreSet->Lock());
1075 	setHashLocker.Unlock();
1076 
1077 	if (!IS_USER_ADDRESS(ops)) {
1078 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1079 		return B_BAD_ADDRESS;
1080 	}
1081 
1082 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1083 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1084 		return EINVAL;
1085 	}
1086 
1087 	struct sembuf *operations
1088 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1089 	if (operations == NULL) {
1090 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1091 		return B_NO_MEMORY;
1092 	}
1093 
1094 	if (user_memcpy(operations, ops,
1095 		(sizeof(struct sembuf) * numOps)) < B_OK) {
1096 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1097 		free(operations);
1098 		return B_BAD_ADDRESS;
1099 	}
1100 
1101 	// We won't do partial request, that is operations
1102 	// only on some sempahores belonging to the set and then
1103 	// going to sleep. If we must wait on a semaphore, we undo
1104 	// all the operations already done and go to sleep, otherwise
1105 	// we may caused some unwanted deadlock among threads
1106 	// fighting for the same set.
1107 	bool notDone = true;
1108 	status_t result = 0;
1109 	while (notDone) {
1110 		XsiSemaphore *semaphore = NULL;
1111 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1112 		bool goToSleep = false;
1113 
1114 		uint32 i = 0;
1115 		for (; i < numOps; i++) {
1116 			short semaphoreNumber = operations[i].sem_num;
1117 			if (semaphoreNumber >= numberOfSemaphores) {
1118 				TRACE_ERROR(("xsi_semop: %ld invalid semaphore number\n", i));
1119 				result = EINVAL;
1120 				break;
1121 			}
1122 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1123 			unsigned short value = semaphore->Value();
1124 			short operation = operations[i].sem_op;
1125 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1126 				semaphoreNumber, value));
1127 			if (operation < 0) {
1128 				if (semaphore->Add(operation)) {
1129 					if (operations[i].sem_flg & IPC_NOWAIT)
1130 						result = EAGAIN;
1131 					else
1132 						goToSleep = true;
1133 					break;
1134 				}
1135 			} else if (operation == 0) {
1136 				if (value == 0)
1137 					continue;
1138 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1139 					result = EAGAIN;
1140 					break;
1141 				} else {
1142 					goToSleep = true;
1143 					break;
1144 				}
1145 			} else {
1146 				// Operation must be greater than zero,
1147 				// just add the value and continue
1148 				semaphore->Add(operation);
1149 			}
1150 		}
1151 
1152 		// Either we have to wait or an error occured
1153 		if (goToSleep || result != 0) {
1154 			// Undo all previously done operations
1155 			for (uint32 j = 0; j < i; j++) {
1156 				short semaphoreNumber = operations[j].sem_num;
1157 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1158 				short operation = operations[j].sem_op;
1159 				if (operation != 0)
1160 					semaphore->Revert(operation);
1161 			}
1162 			if (result != 0)
1163 				return result;
1164 
1165 			// We have to wait: first enqueue the thread
1166 			// in the appropriate set waiting list, then
1167 			// unlock the set itself and block the thread.
1168 			bool waitOnZero = true;
1169 			if (operations[i].sem_op != 0)
1170 				waitOnZero = false;
1171 
1172 			struct thread *thread = thread_get_current_thread();
1173 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1174 			semaphore->Enqueue(&queueEntry, waitOnZero);
1175 
1176 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1177 
1178 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1179 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1180 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1181 
1182 			// We are back to life. Find out why!
1183 			// Make sure the set hasn't been deleted or worst yet
1184 			// replaced.
1185 			setHashLocker.Lock();
1186 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1187 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1188 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1189 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = %ld) "
1190 					"got destroyed\n", semaphoreID, sequenceNumber));
1191 				notDone = false;
1192 				result = EIDRM;
1193 			} else if (result == B_INTERRUPTED) {
1194 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1195 					"waiting on semaphore set id %d\n",(int)thread->id,
1196 					semaphoreID));
1197 				semaphore->Deque(&queueEntry, waitOnZero);
1198 				result = EINTR;
1199 				notDone = false;
1200 			} else {
1201 				setLocker.Lock();
1202 				setHashLocker.Unlock();
1203 			}
1204 		} else {
1205 			// everything worked like a charm (so far)
1206 			notDone = false;
1207 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1208 			// We acquired the semaphore, now records the sem_undo
1209 			// requests
1210 			XsiSemaphore *semaphore = NULL;
1211 			uint32 i = 0;
1212 			for (; i < numOps; i++) {
1213 				short semaphoreNumber = operations[i].sem_num;
1214 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1215 				short operation = operations[i].sem_op;
1216 				if (operations[i].sem_flg & SEM_UNDO)
1217 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1218 						!= B_OK) {
1219 						// Unlikely scenario, but we might get here.
1220 						// Undo everything!
1221 						// Start with semaphore operations
1222 						for (uint32 j = 0; j < numOps; j++) {
1223 							short semaphoreNumber = operations[j].sem_num;
1224 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1225 							short operation = operations[j].sem_op;
1226 							if (operation != 0)
1227 								semaphore->Revert(operation);
1228 						}
1229 						// Remove all previously registered sem_undo request
1230 						for (uint32 j = 0; j < i; j++) {
1231 							if (operations[j].sem_flg & SEM_UNDO)
1232 								semaphoreSet->RevertUndo(operations[j].sem_num,
1233 									operations[j].sem_op);
1234 						}
1235 						result = ENOSPC;
1236 					}
1237 			}
1238 		}
1239 	}
1240 
1241 	// We did it. Set the pid of all semaphores used
1242 	if (result == 0) {
1243 		for (uint32 i = 0; i < numOps; i++) {
1244 			short semaphoreNumber = operations[i].sem_num;
1245 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1246 			semaphore->SetPid(getpid());
1247 		}
1248 	}
1249 	return result;
1250 }
1251