xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision e5d65858f2361fe0552495b61620c84dcee6bc00)
1 /*
2  * Copyright 2008-2011, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 
26 
27 //#define TRACE_XSI_SEM
28 #ifdef TRACE_XSI_SEM
29 #	define TRACE(x)			dprintf x
30 #	define TRACE_ERROR(x)	dprintf x
31 #else
32 #	define TRACE(x)			/* nothing */
33 #	define TRACE_ERROR(x)	dprintf x
34 #endif
35 
36 // Queue for holding blocked threads
37 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
38 	queued_thread(Thread *thread, int32 count)
39 		:
40 		thread(thread),
41 		count(count),
42 		queued(false)
43 	{
44 	}
45 
46 	Thread	*thread;
47 	int32	count;
48 	bool	queued;
49 };
50 
51 typedef DoublyLinkedList<queued_thread> ThreadQueue;
52 
53 class XsiSemaphoreSet;
54 
55 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
56 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
57 		:
58 		semaphore_set(semaphoreSet),
59 		team(team),
60 		undo_values(undoValues)
61 	{
62 	}
63 
64 	DoublyLinkedListLink<sem_undo>		team_link;
65 	XsiSemaphoreSet						*semaphore_set;
66 	Team								*team;
67 	int16								*undo_values;
68 };
69 
70 typedef DoublyLinkedList<sem_undo> UndoList;
71 typedef DoublyLinkedList<sem_undo,
72 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
73 
74 struct xsi_sem_context {
75 	xsi_sem_context()
76 	{
77 		mutex_init(&lock, "Private team undo_list lock");
78 	}
79 
80 	~xsi_sem_context()
81 	{
82 		mutex_destroy(&lock);
83 	}
84 
85 	TeamList	undo_list;
86 	mutex		lock;
87 };
88 
89 // Xsi semaphore definition
90 class XsiSemaphore {
91 public:
92 	XsiSemaphore()
93 		: fLastPidOperation(0),
94 		fThreadsWaitingToIncrease(0),
95 		fThreadsWaitingToBeZero(0),
96 		fValue(0)
97 	{
98 	}
99 
100 	~XsiSemaphore()
101 	{
102 		// For some reason the semaphore is getting destroyed.
103 		// Wake up any remaing awaiting threads
104 		InterruptsSpinLocker schedulerLocker(gSchedulerLock);
105 
106 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
107 			entry->queued = false;
108 			thread_unblock_locked(entry->thread, EIDRM);
109 		}
110 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
111 			entry->queued = false;
112 			thread_unblock_locked(entry->thread, EIDRM);
113 		}
114 		// No need to remove any sem_undo request still
115 		// hanging. When the process exit and doesn't found
116 		// the semaphore set, it'll just ignore the sem_undo
117 		// request. That's better than iterating trough the
118 		// whole sUndoList. Beside we don't know our semaphore
119 		// number nor our semaphore set id.
120 	}
121 
122 	// We return true in case the operation causes the
123 	// caller to wait, so it can undo all the operations
124 	// previously done
125 	bool Add(short value)
126 	{
127 		if ((int)(fValue + value) < 0) {
128 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
129 			return true;
130 		} else {
131 			fValue += value;
132 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
133 				WakeUpThread(true);
134 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
135 				WakeUpThread(false);
136 			return false;
137 		}
138 	}
139 
140 	status_t BlockAndUnlock(Thread *thread, MutexLocker *setLocker)
141 	{
142 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
143 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
144 		// Unlock the set before blocking
145 		setLocker->Unlock();
146 
147 		InterruptsSpinLocker schedulerLocker(gSchedulerLock);
148 // TODO: We've got a serious race condition: If BlockAndUnlock() returned due to
149 // interruption, we will still be queued. A WakeUpThread() at this point will
150 // call thread_unblock() and might thus screw with our trying to re-lock the
151 // mutex.
152 		return thread_block_locked(thread);
153 	}
154 
155 	void Deque(queued_thread *queueEntry, bool waitForZero)
156 	{
157 		if (queueEntry->queued) {
158 			if (waitForZero) {
159 				fWaitingToBeZeroQueue.Remove(queueEntry);
160 				fThreadsWaitingToBeZero--;
161 			} else {
162 				fWaitingToIncreaseQueue.Remove(queueEntry);
163 				fThreadsWaitingToIncrease--;
164 			}
165 		}
166 	}
167 
168 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
169 	{
170 		if (waitForZero) {
171 			fWaitingToBeZeroQueue.Add(queueEntry);
172 			fThreadsWaitingToBeZero++;
173 		} else {
174 			fWaitingToIncreaseQueue.Add(queueEntry);
175 			fThreadsWaitingToIncrease++;
176 		}
177 		queueEntry->queued = true;
178 	}
179 
180 	pid_t LastPid() const
181 	{
182 		return fLastPidOperation;
183 	}
184 
185 	void Revert(short value)
186 	{
187 		fValue -= value;
188 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
189 			WakeUpThread(true);
190 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
191 			WakeUpThread(false);
192 	}
193 
194 	void SetPid(pid_t pid)
195 	{
196 		fLastPidOperation = pid;
197 	}
198 
199 	void SetValue(ushort value)
200 	{
201 		fValue = value;
202 	}
203 
204 	ushort ThreadsWaitingToIncrease() const
205 	{
206 		return fThreadsWaitingToIncrease;
207 	}
208 
209 	ushort ThreadsWaitingToBeZero() const
210 	{
211 		return fThreadsWaitingToBeZero;
212 	}
213 
214 	ushort Value() const
215 	{
216 		return fValue;
217 	}
218 
219 	void WakeUpThread(bool waitingForZero)
220 	{
221 		InterruptsSpinLocker schedulerLocker(gSchedulerLock);
222 		if (waitingForZero) {
223 			// Wake up all threads waiting on zero
224 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
225 				entry->queued = false;
226 				fThreadsWaitingToBeZero--;
227 				thread_unblock_locked(entry->thread, 0);
228 			}
229 		} else {
230 			// Wake up all threads even though they might go back to sleep
231 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
232 				entry->queued = false;
233 				fThreadsWaitingToIncrease--;
234 				thread_unblock_locked(entry->thread, 0);
235 			}
236 		}
237 	}
238 
239 private:
240 	pid_t				fLastPidOperation;				// sempid
241 	ushort				fThreadsWaitingToIncrease;		// semncnt
242 	ushort				fThreadsWaitingToBeZero;		// semzcnt
243 	ushort				fValue;							// semval
244 
245 	ThreadQueue			fWaitingToIncreaseQueue;
246 	ThreadQueue			fWaitingToBeZeroQueue;
247 };
248 
249 #define MAX_XSI_SEMS_PER_TEAM	128
250 
251 // Xsi semaphore set definition (semid_ds)
252 class XsiSemaphoreSet {
253 public:
254 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
255 		: fInitOK(false),
256 		fLastSemctlTime((time_t)real_time_clock()),
257 		fLastSemopTime(0),
258 		fNumberOfSemaphores(numberOfSemaphores),
259 		fSemaphores(0)
260 	{
261 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
262 		SetIpcKey((key_t)-1);
263 		SetPermissions(flags);
264 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
265 		if (fSemaphores == NULL) {
266 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
267 				"XsiSemaphore object\n"));
268 		} else
269 			fInitOK = true;
270 	}
271 
272 	~XsiSemaphoreSet()
273 	{
274 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
275 			"set %d\n", fID));
276 		mutex_destroy(&fLock);
277 		delete[] fSemaphores;
278 	}
279 
280 	void ClearUndo(unsigned short semaphoreNumber)
281 	{
282 		Team *team = thread_get_current_thread()->team;
283 		UndoList::Iterator iterator = fUndoList.GetIterator();
284 		while (iterator.HasNext()) {
285 			struct sem_undo *current = iterator.Next();
286 			if (current->team == team) {
287 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
288 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
289 					fID, semaphoreNumber, (int)team->id));
290 				MutexLocker _(team->xsi_sem_context->lock);
291 				current->undo_values[semaphoreNumber] = 0;
292 				return;
293 			}
294 		}
295 	}
296 
297 	void ClearUndos()
298 	{
299 		// Clear all undo_values (POSIX semadj equivalent)
300 		// of the calling team. This happens only on semctl SETALL.
301 		Team *team = thread_get_current_thread()->team;
302 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
303 		while (iterator.HasNext()) {
304 			struct sem_undo *current = iterator.Next();
305 			if (current->team == team) {
306 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
307 					"semaphoreSetID = %d\n", (int)team->id, fID));
308 				MutexLocker _(team->xsi_sem_context->lock);
309 				memset(current->undo_values, 0,
310 					sizeof(int16) * fNumberOfSemaphores);
311 				return;
312 			}
313 		}
314 	}
315 
316 	void DoIpcSet(struct semid_ds *result)
317 	{
318 		fPermissions.uid = result->sem_perm.uid;
319 		fPermissions.gid = result->sem_perm.gid;
320 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
321 			| (result->sem_perm.mode & 0x01ff);
322 	}
323 
324 	bool HasPermission() const
325 	{
326 		if ((fPermissions.mode & S_IWOTH) != 0)
327 			return true;
328 
329 		uid_t uid = geteuid();
330 		if (uid == 0 || (uid == fPermissions.uid
331 			&& (fPermissions.mode & S_IWUSR) != 0))
332 			return true;
333 
334 		gid_t gid = getegid();
335 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
336 			return true;
337 
338 		return false;
339 	}
340 
341 	bool HasReadPermission() const
342 	{
343 		// TODO: fix this
344 		return HasPermission();
345 	}
346 
347 	int ID() const
348 	{
349 		return fID;
350 	}
351 
352 	bool InitOK()
353 	{
354 		return fInitOK;
355 	}
356 
357 	key_t IpcKey() const
358 	{
359 		return fPermissions.key;
360 	}
361 
362 	struct ipc_perm IpcPermission() const
363 	{
364 		return fPermissions;
365 	}
366 
367 	time_t LastSemctlTime() const
368 	{
369 		return fLastSemctlTime;
370 	}
371 
372 	time_t LastSemopTime() const
373 	{
374 		return fLastSemopTime;
375 	}
376 
377 	mutex &Lock()
378 	{
379 		return fLock;
380 	}
381 
382 	ushort NumberOfSemaphores() const
383 	{
384 		return fNumberOfSemaphores;
385 	}
386 
387 	// Record the sem_undo operation into our private fUndoList and
388 	// the team undo_list. The only limit here is the memory needed
389 	// for creating a new sem_undo structure.
390 	int RecordUndo(short semaphoreNumber, short value)
391 	{
392 		// Look if there is already a record from the team caller
393 		// for the same semaphore set
394 		bool notFound = true;
395 		Team *team = thread_get_current_thread()->team;
396 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
397 		while (iterator.HasNext()) {
398 			struct sem_undo *current = iterator.Next();
399 			if (current->team == team) {
400 				// Update its undo value
401 				MutexLocker _(team->xsi_sem_context->lock);
402 				int newValue = current->undo_values[semaphoreNumber] + value;
403 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
404 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
405 						"out of range\n", newValue));
406 					return ERANGE;
407 				}
408 				current->undo_values[semaphoreNumber] = newValue;
409 				notFound = false;
410 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
411 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
412 					(int)team->id, fID, semaphoreNumber,
413 					current->undo_values[semaphoreNumber]));
414 				break;
415 			}
416 		}
417 
418 		if (notFound) {
419 			// First sem_undo request from this team for this
420 			// semaphore set
421 			int16 *undoValues
422 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
423 			if (undoValues == NULL)
424 				return B_NO_MEMORY;
425 			struct sem_undo *request
426 				= new(std::nothrow) sem_undo(this, team, undoValues);
427 			if (request == NULL) {
428 				free(undoValues);
429 				return B_NO_MEMORY;
430 			}
431 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
432 			request->undo_values[semaphoreNumber] = value;
433 
434 			// Check if it's the very first sem_undo request for this team
435 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
436 			if (context == NULL) {
437 				// Create the context
438 				context = new(std::nothrow) xsi_sem_context;
439 				if (context == NULL) {
440 					free(request->undo_values);
441 					delete request;
442 					return B_NO_MEMORY;
443 				}
444 				// Since we don't hold any global lock, someone
445 				// else could have been quicker than us, so we have
446 				// to delete the one we just created and use the one
447 				// in place.
448 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
449 					(xsi_sem_context *)NULL) != NULL)
450 					delete context;
451 			}
452 
453 			// Add the request to both XsiSemaphoreSet and team list
454 			fUndoList.Add(request);
455 			MutexLocker _(team->xsi_sem_context->lock);
456 			team->xsi_sem_context->undo_list.Add(request);
457 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
458 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
459 				(int)team->id, fID, semaphoreNumber, value));
460 		}
461 		return B_OK;
462 	}
463 
464 	void RevertUndo(short semaphoreNumber, short value)
465 	{
466 		// This can be called only when RecordUndo fails.
467 		Team *team = thread_get_current_thread()->team;
468 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
469 		while (iterator.HasNext()) {
470 			struct sem_undo *current = iterator.Next();
471 			if (current->team == team) {
472 				MutexLocker _(team->xsi_sem_context->lock);
473 				fSemaphores[semaphoreNumber].Revert(value);
474 				break;
475 			}
476 		}
477 	}
478 
479 	XsiSemaphore* Semaphore(int nth) const
480 	{
481 		return &fSemaphores[nth];
482 	}
483 
484 	uint32 SequenceNumber() const
485 	{
486 		return fSequenceNumber;
487 	}
488 
489 	// Implemented after sGlobalSequenceNumber is declared
490 	void SetID();
491 
492 	void SetIpcKey(key_t key)
493 	{
494 		fPermissions.key = key;
495 	}
496 
497 	void SetLastSemctlTime()
498 	{
499 		fLastSemctlTime = real_time_clock();
500 	}
501 
502 	void SetLastSemopTime()
503 	{
504 		fLastSemopTime = real_time_clock();
505 	}
506 
507 	void SetPermissions(int flags)
508 	{
509 		fPermissions.uid = fPermissions.cuid = geteuid();
510 		fPermissions.gid = fPermissions.cgid = getegid();
511 		fPermissions.mode = (flags & 0x01ff);
512 	}
513 
514 	UndoList &GetUndoList()
515 	{
516 		return fUndoList;
517 	}
518 
519 	XsiSemaphoreSet*& Link()
520 	{
521 		return fLink;
522 	}
523 
524 private:
525 	int							fID;					// semaphore set id
526 	bool						fInitOK;
527 	time_t						fLastSemctlTime;		// sem_ctime
528 	time_t						fLastSemopTime;			// sem_otime
529 	mutex 						fLock;					// private lock
530 	ushort						fNumberOfSemaphores;	// sem_nsems
531 	struct ipc_perm				fPermissions;			// sem_perm
532 	XsiSemaphore				*fSemaphores;			// array of semaphores
533 	uint32						fSequenceNumber;		// used as a second id
534 	UndoList					fUndoList;				// undo list requests
535 
536 	XsiSemaphoreSet*			fLink;
537 };
538 
539 // Xsi semaphore set hash table
540 struct SemaphoreHashTableDefinition {
541 	typedef int					KeyType;
542 	typedef XsiSemaphoreSet		ValueType;
543 
544 	size_t HashKey (const int key) const
545 	{
546 		return (size_t)key;
547 	}
548 
549 	size_t Hash(XsiSemaphoreSet *variable) const
550 	{
551 		return (size_t)variable->ID();
552 	}
553 
554 	bool Compare(const int key, XsiSemaphoreSet *variable) const
555 	{
556 		return (int)key == (int)variable->ID();
557 	}
558 
559 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
560 	{
561 		return variable->Link();
562 	}
563 };
564 
565 
566 // IPC class
567 class Ipc {
568 public:
569 	Ipc(key_t key)
570 		: fKey(key),
571 		fSemaphoreSetId(-1)
572 	{
573 	}
574 
575 	key_t Key() const
576 	{
577 		return fKey;
578 	}
579 
580 	int SemaphoreSetID() const
581 	{
582 		return fSemaphoreSetId;
583 	}
584 
585 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
586 	{
587 		fSemaphoreSetId = semaphoreSet->ID();
588 	}
589 
590 	Ipc*& Link()
591 	{
592 		return fLink;
593 	}
594 
595 private:
596 	key_t				fKey;
597 	int					fSemaphoreSetId;
598 	Ipc*				fLink;
599 };
600 
601 
602 struct IpcHashTableDefinition {
603 	typedef key_t	KeyType;
604 	typedef Ipc		ValueType;
605 
606 	size_t HashKey (const key_t key) const
607 	{
608 		return (size_t)(key);
609 	}
610 
611 	size_t Hash(Ipc *variable) const
612 	{
613 		return (size_t)HashKey(variable->Key());
614 	}
615 
616 	bool Compare(const key_t key, Ipc *variable) const
617 	{
618 		return (key_t)key == (key_t)variable->Key();
619 	}
620 
621 	Ipc*& GetLink(Ipc *variable) const
622 	{
623 		return variable->Link();
624 	}
625 };
626 
627 // Arbitrary limit
628 #define MAX_XSI_SEMAPHORE		4096
629 #define MAX_XSI_SEMAPHORE_SET	2048
630 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
631 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
632 
633 static mutex sIpcLock;
634 static mutex sXsiSemaphoreSetLock;
635 
636 static uint32 sGlobalSequenceNumber = 1;
637 static vint32 sXsiSemaphoreCount = 0;
638 static vint32 sXsiSemaphoreSetCount = 0;
639 
640 
641 //	#pragma mark -
642 
643 
644 void
645 XsiSemaphoreSet::SetID()
646 {
647 	fID = real_time_clock();
648 	// The lock is held before calling us
649 	while (true) {
650 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
651 			break;
652 		fID = (fID + 1) % INT_MAX;
653 	}
654 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
655 	fSequenceNumber = sGlobalSequenceNumber;
656 }
657 
658 
659 //	#pragma mark - Kernel exported API
660 
661 
662 void
663 xsi_sem_init()
664 {
665 	// Initialize hash tables
666 	status_t status = sIpcHashTable.Init();
667 	if (status != B_OK)
668 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
669 	status =  sSemaphoreHashTable.Init();
670 	if (status != B_OK)
671 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
672 
673 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
674 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
675 }
676 
677 
678 /*!	Function called on team exit to process any sem_undo requests */
679 void
680 xsi_sem_undo(Team *team)
681 {
682 	if (team->xsi_sem_context == NULL)
683 		return;
684 
685 	// By acquiring first the semaphore hash table lock
686 	// we make sure the semaphore set in our sem_undo
687 	// list won't get removed by IPC_RMID call
688 	MutexLocker _(sXsiSemaphoreSetLock);
689 
690 	// Process all sem_undo request in the team sem undo list
691 	// if any
692 	TeamList::Iterator iterator
693 		= team->xsi_sem_context->undo_list.GetIterator();
694 	while (iterator.HasNext()) {
695 		struct sem_undo *current = iterator.Next();
696 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
697 		// Acquire the set lock in order to prevent race
698 		// condition with RecordUndo
699 		MutexLocker setLocker(semaphoreSet->Lock());
700 		MutexLocker _(team->xsi_sem_context->lock);
701 		// Revert the changes done by this process
702 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
703 			if (current->undo_values[i] != 0) {
704 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
705 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
706 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
707 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
708 			}
709 
710 		// Remove and free the sem_undo structure from both lists
711 		iterator.Remove();
712 		semaphoreSet->GetUndoList().Remove(current);
713 		delete current;
714 	}
715 	delete team->xsi_sem_context;
716 	team->xsi_sem_context = NULL;
717 }
718 
719 
720 //	#pragma mark - Syscalls
721 
722 
723 int
724 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
725 {
726 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
727 		(int)key, numberOfSemaphores, flags));
728 	XsiSemaphoreSet *semaphoreSet = NULL;
729 	Ipc *ipcKey = NULL;
730 	// Default assumptions
731 	bool isPrivate = true;
732 	bool create = true;
733 
734 	MutexLocker _(sIpcLock);
735 	if (key != IPC_PRIVATE) {
736 		isPrivate = false;
737 		// Check if key already exist, if it does it already has a semaphore
738 		// set associated with it
739 		ipcKey = sIpcHashTable.Lookup(key);
740 		if (ipcKey == NULL) {
741 			// The ipc key does not exist. Create it and add it to the system
742 			if (!(flags & IPC_CREAT)) {
743 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
744 					"caller did not ask for creation\n",(int)key));
745 				return ENOENT;
746 			}
747 			ipcKey = new(std::nothrow) Ipc(key);
748 			if (ipcKey == NULL) {
749 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
750 					"for key %d\n",	(int)key));
751 				return ENOMEM;
752 			}
753 			sIpcHashTable.Insert(ipcKey);
754 		} else {
755 			// The IPC key exist and it already has a semaphore
756 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
757 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
758 				return EEXIST;
759 			}
760 			int semaphoreSetID = ipcKey->SemaphoreSetID();
761 
762 			MutexLocker _(sXsiSemaphoreSetLock);
763 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
764 			if (!semaphoreSet->HasPermission()) {
765 				TRACE_ERROR(("xsi_semget: calling process has not permission "
766 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
767 					(int)key));
768 				return EACCES;
769 			}
770 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
771 				&& numberOfSemaphores != 0) {
772 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
773 					"one associated with semaphore %d, key %d\n",
774 					semaphoreSet->ID(), (int)key));
775 				return EINVAL;
776 			}
777 			create = false;
778 		}
779 	}
780 
781 	if (create) {
782 		// Create a new sempahore set for this key
783 		if (numberOfSemaphores <= 0
784 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
785 			TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
786 			return EINVAL;
787 		}
788 		if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
789 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
790 			TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
791 				"semaphores allowed\n"));
792 			return ENOSPC;
793 		}
794 
795 		semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
796 			flags);
797 		if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
798 			TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
799 				"semaphore set\n"));
800 			delete semaphoreSet;
801 			return ENOMEM;
802 		}
803 		atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
804 		atomic_add(&sXsiSemaphoreSetCount, 1);
805 
806 		MutexLocker _(sXsiSemaphoreSetLock);
807 		semaphoreSet->SetID();
808 		if (isPrivate)
809 			semaphoreSet->SetIpcKey((key_t)-1);
810 		else {
811 			semaphoreSet->SetIpcKey(key);
812 			ipcKey->SetSemaphoreSetID(semaphoreSet);
813 		}
814 		sSemaphoreHashTable.Insert(semaphoreSet);
815 		TRACE(("semget: new set = %d created, sequence = %ld\n",
816 			semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
817 	}
818 
819 	return semaphoreSet->ID();
820 }
821 
822 
823 int
824 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
825 	union semun *args)
826 {
827 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
828 		semaphoreID, semaphoreNumber, command));
829 	MutexLocker ipcHashLocker(sIpcLock);
830 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
831 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
832 	if (semaphoreSet == NULL) {
833 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
834 			semaphoreID));
835 		return EINVAL;
836 	}
837 	if (semaphoreNumber < 0
838 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
839 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
840 			"semaphore %d\n", semaphoreNumber, semaphoreID));
841 		return EINVAL;
842 	}
843 	if (args != 0 && !IS_USER_ADDRESS(args)) {
844 		TRACE_ERROR(("xsi_semctl: semun address is not valid\n"));
845 		return B_BAD_ADDRESS;
846 	}
847 
848 	// Lock the semaphore set itself and release both the semaphore
849 	// set hash table lock and the ipc hash table lock _only_ if
850 	// the command it's not IPC_RMID, this prevents undesidered
851 	// situation from happening while (hopefully) improving the
852 	// concurrency.
853 	MutexLocker setLocker;
854 	if (command != IPC_RMID) {
855 		setLocker.SetTo(&semaphoreSet->Lock(), false);
856 		setHashLocker.Unlock();
857 		ipcHashLocker.Unlock();
858 	} else
859 		// We are about to delete the set along with its mutex, so
860 		// we can't use the MutexLocker class, as the mutex itself
861 		// won't exist on function exit
862 		mutex_lock(&semaphoreSet->Lock());
863 
864 	int result = 0;
865 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
866 	switch (command) {
867 		case GETVAL: {
868 			if (!semaphoreSet->HasReadPermission()) {
869 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
870 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
871 					(int)semaphoreSet->IpcKey()));
872 				result = EACCES;
873 			} else
874 				result = semaphore->Value();
875 			break;
876 		}
877 
878 		case SETVAL: {
879 			if (!semaphoreSet->HasPermission()) {
880 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
881 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
882 					(int)semaphoreSet->IpcKey()));
883 				result = EACCES;
884 			} else {
885 				int value;
886 				if (user_memcpy(&value, &args->val, sizeof(int)) < B_OK) {
887 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
888 					result = B_BAD_ADDRESS;
889 				} else if (value > USHRT_MAX) {
890 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", value));
891 					result = ERANGE;
892 				} else {
893 					semaphore->SetValue(value);
894 					semaphoreSet->ClearUndo(semaphoreNumber);
895 				}
896 			}
897 			break;
898 		}
899 
900 		case GETPID: {
901 			if (!semaphoreSet->HasReadPermission()) {
902 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
903 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
904 					(int)semaphoreSet->IpcKey()));
905 				result = EACCES;
906 			} else
907 				result = semaphore->LastPid();
908 			break;
909 		}
910 
911 		case GETNCNT: {
912 			if (!semaphoreSet->HasReadPermission()) {
913 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
914 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
915 					(int)semaphoreSet->IpcKey()));
916 				result = EACCES;
917 			} else
918 				result = semaphore->ThreadsWaitingToIncrease();
919 			break;
920 		}
921 
922 		case GETZCNT: {
923 			if (!semaphoreSet->HasReadPermission()) {
924 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
925 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
926 					(int)semaphoreSet->IpcKey()));
927 				result = EACCES;
928 			} else
929 				result = semaphore->ThreadsWaitingToBeZero();
930 			break;
931 		}
932 
933 		case GETALL: {
934 			if (!semaphoreSet->HasReadPermission()) {
935 				TRACE_ERROR(("xsi_semctl: calling process has not read "
936 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
937 					(int)semaphoreSet->IpcKey()));
938 				result = EACCES;
939 			} else
940 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
941 					semaphore = semaphoreSet->Semaphore(i);
942 					unsigned short value = semaphore->Value();
943 					if (user_memcpy(&args->array[i], &value,
944 						sizeof(unsigned short)) < B_OK) {
945 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
946 						result = B_BAD_ADDRESS;
947 						break;
948 					}
949 				}
950 			break;
951 		}
952 
953 		case SETALL: {
954 			if (!semaphoreSet->HasPermission()) {
955 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
956 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
957 					(int)semaphoreSet->IpcKey()));
958 				result = EACCES;
959 			} else {
960 				bool doClear = true;
961 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
962 					semaphore = semaphoreSet->Semaphore(i);
963 					unsigned short value;
964 					if (user_memcpy(&value, &args->array[i], sizeof(unsigned short))
965 						< B_OK) {
966 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
967 						result = B_BAD_ADDRESS;
968 						doClear = false;
969 						break;
970 					} else
971 						semaphore->SetValue(value);
972 				}
973 				if (doClear)
974 					semaphoreSet->ClearUndos();
975 			}
976 			break;
977 		}
978 
979 		case IPC_STAT: {
980 			if (!semaphoreSet->HasReadPermission()) {
981 				TRACE_ERROR(("xsi_semctl: calling process has not read "
982 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
983 					(int)semaphoreSet->IpcKey()));
984 				result = EACCES;
985 			} else {
986 				struct semid_ds sem;
987 				sem.sem_perm = semaphoreSet->IpcPermission();
988 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
989 				sem.sem_otime = semaphoreSet->LastSemopTime();
990 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
991 				if (user_memcpy(args->buf, &sem, sizeof(struct semid_ds))
992 					< B_OK) {
993 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
994 					result = B_BAD_ADDRESS;
995 				}
996 			}
997 			break;
998 		}
999 
1000 		case IPC_SET: {
1001 			if (!semaphoreSet->HasPermission()) {
1002 				TRACE_ERROR(("xsi_semctl: calling process has not "
1003 					"permission on semaphore %d, key %d\n",
1004 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1005 				result = EACCES;
1006 			} else {
1007 				struct semid_ds sem;
1008 				if (user_memcpy(&sem, args->buf, sizeof(struct semid_ds))
1009 					< B_OK) {
1010 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1011 					result = B_BAD_ADDRESS;
1012 				} else
1013 					semaphoreSet->DoIpcSet(&sem);
1014 			}
1015 			break;
1016 		}
1017 
1018 		case IPC_RMID: {
1019 			// If this was the command, we are still holding
1020 			// the semaphore set hash table lock along with the
1021 			// ipc hash table lock and the semaphore set lock
1022 			// itself, this way we are sure there is not
1023 			// one waiting in the queue of the mutex.
1024 			if (!semaphoreSet->HasPermission()) {
1025 				TRACE_ERROR(("xsi_semctl: calling process has not "
1026 					"permission on semaphore %d, key %d\n",
1027 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1028 				return EACCES;
1029 			}
1030 			key_t key = semaphoreSet->IpcKey();
1031 			Ipc *ipcKey = NULL;
1032 			if (key != -1) {
1033 				ipcKey = sIpcHashTable.Lookup(key);
1034 				sIpcHashTable.Remove(ipcKey);
1035 			}
1036 			sSemaphoreHashTable.Remove(semaphoreSet);
1037 			// Wake up of threads waiting on this set
1038 			// happens in the destructor
1039 			if (key != -1)
1040 				delete ipcKey;
1041 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1042 			atomic_add(&sXsiSemaphoreSetCount, -1);
1043 			// Remove any sem_undo request
1044 			while (struct sem_undo *entry
1045 					= semaphoreSet->GetUndoList().RemoveHead()) {
1046 				MutexLocker _(entry->team->xsi_sem_context->lock);
1047 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1048 				delete entry;
1049 			}
1050 
1051 			delete semaphoreSet;
1052 			return 0;
1053 		}
1054 
1055 		default:
1056 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1057 			result = EINVAL;
1058 	}
1059 
1060 	return result;
1061 }
1062 
1063 
1064 status_t
1065 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1066 {
1067 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1068 		semaphoreID, ops, numOps));
1069 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1070 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1071 	if (semaphoreSet == NULL) {
1072 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1073 			semaphoreID));
1074 		return EINVAL;
1075 	}
1076 	MutexLocker setLocker(semaphoreSet->Lock());
1077 	setHashLocker.Unlock();
1078 
1079 	if (!IS_USER_ADDRESS(ops)) {
1080 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1081 		return B_BAD_ADDRESS;
1082 	}
1083 
1084 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1085 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1086 		return EINVAL;
1087 	}
1088 
1089 	struct sembuf *operations
1090 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1091 	if (operations == NULL) {
1092 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1093 		return B_NO_MEMORY;
1094 	}
1095 
1096 	if (user_memcpy(operations, ops,
1097 		(sizeof(struct sembuf) * numOps)) < B_OK) {
1098 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1099 		free(operations);
1100 		return B_BAD_ADDRESS;
1101 	}
1102 
1103 	// We won't do partial request, that is operations
1104 	// only on some sempahores belonging to the set and then
1105 	// going to sleep. If we must wait on a semaphore, we undo
1106 	// all the operations already done and go to sleep, otherwise
1107 	// we may caused some unwanted deadlock among threads
1108 	// fighting for the same set.
1109 	bool notDone = true;
1110 	status_t result = 0;
1111 	while (notDone) {
1112 		XsiSemaphore *semaphore = NULL;
1113 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1114 		bool goToSleep = false;
1115 
1116 		uint32 i = 0;
1117 		for (; i < numOps; i++) {
1118 			short semaphoreNumber = operations[i].sem_num;
1119 			if (semaphoreNumber >= numberOfSemaphores) {
1120 				TRACE_ERROR(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1121 					"\n", i));
1122 				result = EINVAL;
1123 				break;
1124 			}
1125 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1126 			unsigned short value = semaphore->Value();
1127 			short operation = operations[i].sem_op;
1128 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1129 				semaphoreNumber, value));
1130 			if (operation < 0) {
1131 				if (semaphore->Add(operation)) {
1132 					if (operations[i].sem_flg & IPC_NOWAIT)
1133 						result = EAGAIN;
1134 					else
1135 						goToSleep = true;
1136 					break;
1137 				}
1138 			} else if (operation == 0) {
1139 				if (value == 0)
1140 					continue;
1141 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1142 					result = EAGAIN;
1143 					break;
1144 				} else {
1145 					goToSleep = true;
1146 					break;
1147 				}
1148 			} else {
1149 				// Operation must be greater than zero,
1150 				// just add the value and continue
1151 				semaphore->Add(operation);
1152 			}
1153 		}
1154 
1155 		// Either we have to wait or an error occured
1156 		if (goToSleep || result != 0) {
1157 			// Undo all previously done operations
1158 			for (uint32 j = 0; j < i; j++) {
1159 				short semaphoreNumber = operations[j].sem_num;
1160 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1161 				short operation = operations[j].sem_op;
1162 				if (operation != 0)
1163 					semaphore->Revert(operation);
1164 			}
1165 			if (result != 0)
1166 				return result;
1167 
1168 			// We have to wait: first enqueue the thread
1169 			// in the appropriate set waiting list, then
1170 			// unlock the set itself and block the thread.
1171 			bool waitOnZero = true;
1172 			if (operations[i].sem_op != 0)
1173 				waitOnZero = false;
1174 
1175 			Thread *thread = thread_get_current_thread();
1176 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1177 			semaphore->Enqueue(&queueEntry, waitOnZero);
1178 
1179 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1180 
1181 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1182 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1183 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1184 
1185 			// We are back to life. Find out why!
1186 			// Make sure the set hasn't been deleted or worst yet
1187 			// replaced.
1188 			setHashLocker.Lock();
1189 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1190 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1191 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1192 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = "
1193 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1194 					sequenceNumber));
1195 				notDone = false;
1196 				result = EIDRM;
1197 			} else if (result == B_INTERRUPTED) {
1198 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1199 					"waiting on semaphore set id %d\n",(int)thread->id,
1200 					semaphoreID));
1201 				semaphore->Deque(&queueEntry, waitOnZero);
1202 				result = EINTR;
1203 				notDone = false;
1204 			} else {
1205 				setLocker.Lock();
1206 				setHashLocker.Unlock();
1207 			}
1208 		} else {
1209 			// everything worked like a charm (so far)
1210 			notDone = false;
1211 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1212 			// We acquired the semaphore, now records the sem_undo
1213 			// requests
1214 			XsiSemaphore *semaphore = NULL;
1215 			uint32 i = 0;
1216 			for (; i < numOps; i++) {
1217 				short semaphoreNumber = operations[i].sem_num;
1218 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1219 				short operation = operations[i].sem_op;
1220 				if (operations[i].sem_flg & SEM_UNDO)
1221 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1222 						!= B_OK) {
1223 						// Unlikely scenario, but we might get here.
1224 						// Undo everything!
1225 						// Start with semaphore operations
1226 						for (uint32 j = 0; j < numOps; j++) {
1227 							short semaphoreNumber = operations[j].sem_num;
1228 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1229 							short operation = operations[j].sem_op;
1230 							if (operation != 0)
1231 								semaphore->Revert(operation);
1232 						}
1233 						// Remove all previously registered sem_undo request
1234 						for (uint32 j = 0; j < i; j++) {
1235 							if (operations[j].sem_flg & SEM_UNDO)
1236 								semaphoreSet->RevertUndo(operations[j].sem_num,
1237 									operations[j].sem_op);
1238 						}
1239 						result = ENOSPC;
1240 					}
1241 			}
1242 		}
1243 	}
1244 
1245 	// We did it. Set the pid of all semaphores used
1246 	if (result == 0) {
1247 		for (uint32 i = 0; i < numOps; i++) {
1248 			short semaphoreNumber = operations[i].sem_num;
1249 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1250 			semaphore->SetPid(getpid());
1251 		}
1252 	}
1253 	return result;
1254 }
1255