xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision 8a6724a0ee3803f1e9f487d8111bb3f6cb8d16db)
1 /*
2  * Copyright 2008-2011, Haiku, Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 #include <AutoDeleter.h>
26 
27 
28 //#define TRACE_XSI_SEM
29 #ifdef TRACE_XSI_SEM
30 #	define TRACE(x)			dprintf x
31 #	define TRACE_ERROR(x)	dprintf x
32 #else
33 #	define TRACE(x)			/* nothing */
34 #	define TRACE_ERROR(x)	dprintf x
35 #endif
36 
37 
38 namespace {
39 
40 // Queue for holding blocked threads
41 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
42 	queued_thread(Thread *thread, int32 count)
43 		:
44 		thread(thread),
45 		count(count),
46 		queued(false)
47 	{
48 	}
49 
50 	Thread	*thread;
51 	int32	count;
52 	bool	queued;
53 };
54 
55 typedef DoublyLinkedList<queued_thread> ThreadQueue;
56 
57 class XsiSemaphoreSet;
58 
59 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
60 	sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
61 		:
62 		semaphore_set(semaphoreSet),
63 		team(team),
64 		undo_values(undoValues)
65 	{
66 	}
67 
68 	DoublyLinkedListLink<sem_undo>		team_link;
69 	XsiSemaphoreSet						*semaphore_set;
70 	Team								*team;
71 	int16								*undo_values;
72 };
73 
74 typedef DoublyLinkedList<sem_undo> UndoList;
75 typedef DoublyLinkedList<sem_undo,
76 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
77 
78 } // namespace
79 
80 
81 // Forward declared in global namespace.
82 struct xsi_sem_context {
83 	xsi_sem_context()
84 	{
85 		mutex_init(&lock, "Private team undo_list lock");
86 	}
87 
88 	~xsi_sem_context()
89 	{
90 		mutex_destroy(&lock);
91 	}
92 
93 	TeamList	undo_list;
94 	mutex		lock;
95 };
96 
97 
98 namespace {
99 
100 // Xsi semaphore definition
101 class XsiSemaphore {
102 public:
103 	XsiSemaphore()
104 		: fLastPidOperation(0),
105 		fThreadsWaitingToIncrease(0),
106 		fThreadsWaitingToBeZero(0),
107 		fValue(0)
108 	{
109 	}
110 
111 	~XsiSemaphore()
112 	{
113 		// For some reason the semaphore is getting destroyed.
114 		// Wake up any remaing awaiting threads
115 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
116 			entry->queued = false;
117 			thread_unblock(entry->thread, EIDRM);
118 		}
119 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
120 			entry->queued = false;
121 			thread_unblock(entry->thread, EIDRM);
122 		}
123 		// No need to remove any sem_undo request still
124 		// hanging. When the process exit and doesn't found
125 		// the semaphore set, it'll just ignore the sem_undo
126 		// request. That's better than iterating trough the
127 		// whole sUndoList. Beside we don't know our semaphore
128 		// number nor our semaphore set id.
129 	}
130 
131 	// We return true in case the operation causes the
132 	// caller to wait, so it can undo all the operations
133 	// previously done
134 	bool Add(short value)
135 	{
136 		if ((int)(fValue + value) < 0) {
137 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
138 			return true;
139 		} else {
140 			fValue += value;
141 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
142 				WakeUpThread(true);
143 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
144 				WakeUpThread(false);
145 			return false;
146 		}
147 	}
148 
149 	status_t BlockAndUnlock(Thread *thread, MutexLocker *setLocker)
150 	{
151 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
152 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
153 		// Unlock the set before blocking
154 		setLocker->Unlock();
155 
156 // TODO: We've got a serious race condition: If BlockAndUnlock() returned due to
157 // interruption, we will still be queued. A WakeUpThread() at this point will
158 // call thread_unblock() and might thus screw with our trying to re-lock the
159 // mutex.
160 		return thread_block();
161 	}
162 
163 	void Deque(queued_thread *queueEntry, bool waitForZero)
164 	{
165 		if (queueEntry->queued) {
166 			if (waitForZero) {
167 				fWaitingToBeZeroQueue.Remove(queueEntry);
168 				fThreadsWaitingToBeZero--;
169 			} else {
170 				fWaitingToIncreaseQueue.Remove(queueEntry);
171 				fThreadsWaitingToIncrease--;
172 			}
173 		}
174 	}
175 
176 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
177 	{
178 		if (waitForZero) {
179 			fWaitingToBeZeroQueue.Add(queueEntry);
180 			fThreadsWaitingToBeZero++;
181 		} else {
182 			fWaitingToIncreaseQueue.Add(queueEntry);
183 			fThreadsWaitingToIncrease++;
184 		}
185 		queueEntry->queued = true;
186 	}
187 
188 	pid_t LastPid() const
189 	{
190 		return fLastPidOperation;
191 	}
192 
193 	void Revert(short value)
194 	{
195 		fValue -= value;
196 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
197 			WakeUpThread(true);
198 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
199 			WakeUpThread(false);
200 	}
201 
202 	void SetPid(pid_t pid)
203 	{
204 		fLastPidOperation = pid;
205 	}
206 
207 	void SetValue(ushort value)
208 	{
209 		fValue = value;
210 	}
211 
212 	ushort ThreadsWaitingToIncrease() const
213 	{
214 		return fThreadsWaitingToIncrease;
215 	}
216 
217 	ushort ThreadsWaitingToBeZero() const
218 	{
219 		return fThreadsWaitingToBeZero;
220 	}
221 
222 	ushort Value() const
223 	{
224 		return fValue;
225 	}
226 
227 	void WakeUpThread(bool waitingForZero)
228 	{
229 		if (waitingForZero) {
230 			// Wake up all threads waiting on zero
231 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
232 				entry->queued = false;
233 				fThreadsWaitingToBeZero--;
234 				thread_unblock(entry->thread, 0);
235 			}
236 		} else {
237 			// Wake up all threads even though they might go back to sleep
238 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
239 				entry->queued = false;
240 				fThreadsWaitingToIncrease--;
241 				thread_unblock(entry->thread, 0);
242 			}
243 		}
244 	}
245 
246 private:
247 	pid_t				fLastPidOperation;				// sempid
248 	ushort				fThreadsWaitingToIncrease;		// semncnt
249 	ushort				fThreadsWaitingToBeZero;		// semzcnt
250 	ushort				fValue;							// semval
251 
252 	ThreadQueue			fWaitingToIncreaseQueue;
253 	ThreadQueue			fWaitingToBeZeroQueue;
254 };
255 
256 #define MAX_XSI_SEMS_PER_TEAM	128
257 
258 // Xsi semaphore set definition (semid_ds)
259 class XsiSemaphoreSet {
260 public:
261 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
262 		: fInitOK(false),
263 		fLastSemctlTime((time_t)real_time_clock()),
264 		fLastSemopTime(0),
265 		fNumberOfSemaphores(numberOfSemaphores),
266 		fSemaphores(0)
267 	{
268 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
269 		SetIpcKey((key_t)-1);
270 		SetPermissions(flags);
271 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
272 		if (fSemaphores == NULL) {
273 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
274 				"XsiSemaphore object\n"));
275 		} else
276 			fInitOK = true;
277 	}
278 
279 	~XsiSemaphoreSet()
280 	{
281 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
282 			"set %d\n", fID));
283 		mutex_destroy(&fLock);
284 		delete[] fSemaphores;
285 	}
286 
287 	void ClearUndo(unsigned short semaphoreNumber)
288 	{
289 		Team *team = thread_get_current_thread()->team;
290 		UndoList::Iterator iterator = fUndoList.GetIterator();
291 		while (iterator.HasNext()) {
292 			struct sem_undo *current = iterator.Next();
293 			if (current->team == team) {
294 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
295 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
296 					fID, semaphoreNumber, (int)team->id));
297 				MutexLocker _(team->xsi_sem_context->lock);
298 				current->undo_values[semaphoreNumber] = 0;
299 				return;
300 			}
301 		}
302 	}
303 
304 	void ClearUndos()
305 	{
306 		// Clear all undo_values (POSIX semadj equivalent)
307 		// of the calling team. This happens only on semctl SETALL.
308 		Team *team = thread_get_current_thread()->team;
309 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
310 		while (iterator.HasNext()) {
311 			struct sem_undo *current = iterator.Next();
312 			if (current->team == team) {
313 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
314 					"semaphoreSetID = %d\n", (int)team->id, fID));
315 				MutexLocker _(team->xsi_sem_context->lock);
316 				memset(current->undo_values, 0,
317 					sizeof(int16) * fNumberOfSemaphores);
318 				return;
319 			}
320 		}
321 	}
322 
323 	void DoIpcSet(struct semid_ds *result)
324 	{
325 		fPermissions.uid = result->sem_perm.uid;
326 		fPermissions.gid = result->sem_perm.gid;
327 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
328 			| (result->sem_perm.mode & 0x01ff);
329 	}
330 
331 	bool HasPermission() const
332 	{
333 		if ((fPermissions.mode & S_IWOTH) != 0)
334 			return true;
335 
336 		uid_t uid = geteuid();
337 		if (uid == 0 || (uid == fPermissions.uid
338 			&& (fPermissions.mode & S_IWUSR) != 0))
339 			return true;
340 
341 		gid_t gid = getegid();
342 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
343 			return true;
344 
345 		return false;
346 	}
347 
348 	bool HasReadPermission() const
349 	{
350 		// TODO: fix this
351 		return HasPermission();
352 	}
353 
354 	int ID() const
355 	{
356 		return fID;
357 	}
358 
359 	bool InitOK()
360 	{
361 		return fInitOK;
362 	}
363 
364 	key_t IpcKey() const
365 	{
366 		return fPermissions.key;
367 	}
368 
369 	struct ipc_perm IpcPermission() const
370 	{
371 		return fPermissions;
372 	}
373 
374 	time_t LastSemctlTime() const
375 	{
376 		return fLastSemctlTime;
377 	}
378 
379 	time_t LastSemopTime() const
380 	{
381 		return fLastSemopTime;
382 	}
383 
384 	mutex &Lock()
385 	{
386 		return fLock;
387 	}
388 
389 	ushort NumberOfSemaphores() const
390 	{
391 		return fNumberOfSemaphores;
392 	}
393 
394 	// Record the sem_undo operation into our private fUndoList and
395 	// the team undo_list. The only limit here is the memory needed
396 	// for creating a new sem_undo structure.
397 	int RecordUndo(short semaphoreNumber, short value)
398 	{
399 		// Look if there is already a record from the team caller
400 		// for the same semaphore set
401 		bool notFound = true;
402 		Team *team = thread_get_current_thread()->team;
403 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
404 		while (iterator.HasNext()) {
405 			struct sem_undo *current = iterator.Next();
406 			if (current->team == team) {
407 				// Update its undo value
408 				MutexLocker _(team->xsi_sem_context->lock);
409 				int newValue = current->undo_values[semaphoreNumber] + value;
410 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
411 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
412 						"out of range\n", newValue));
413 					return ERANGE;
414 				}
415 				current->undo_values[semaphoreNumber] = newValue;
416 				notFound = false;
417 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
418 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
419 					(int)team->id, fID, semaphoreNumber,
420 					current->undo_values[semaphoreNumber]));
421 				break;
422 			}
423 		}
424 
425 		if (notFound) {
426 			// First sem_undo request from this team for this
427 			// semaphore set
428 			int16 *undoValues
429 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
430 			if (undoValues == NULL)
431 				return B_NO_MEMORY;
432 			struct sem_undo *request
433 				= new(std::nothrow) sem_undo(this, team, undoValues);
434 			if (request == NULL) {
435 				free(undoValues);
436 				return B_NO_MEMORY;
437 			}
438 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
439 			request->undo_values[semaphoreNumber] = value;
440 
441 			// Check if it's the very first sem_undo request for this team
442 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
443 			if (context == NULL) {
444 				// Create the context
445 				context = new(std::nothrow) xsi_sem_context;
446 				if (context == NULL) {
447 					free(request->undo_values);
448 					delete request;
449 					return B_NO_MEMORY;
450 				}
451 				// Since we don't hold any global lock, someone
452 				// else could have been quicker than us, so we have
453 				// to delete the one we just created and use the one
454 				// in place.
455 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
456 					(xsi_sem_context *)NULL) != NULL)
457 					delete context;
458 			}
459 
460 			// Add the request to both XsiSemaphoreSet and team list
461 			fUndoList.Add(request);
462 			MutexLocker _(team->xsi_sem_context->lock);
463 			team->xsi_sem_context->undo_list.Add(request);
464 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
465 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
466 				(int)team->id, fID, semaphoreNumber, value));
467 		}
468 		return B_OK;
469 	}
470 
471 	void RevertUndo(short semaphoreNumber, short value)
472 	{
473 		// This can be called only when RecordUndo fails.
474 		Team *team = thread_get_current_thread()->team;
475 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
476 		while (iterator.HasNext()) {
477 			struct sem_undo *current = iterator.Next();
478 			if (current->team == team) {
479 				MutexLocker _(team->xsi_sem_context->lock);
480 				fSemaphores[semaphoreNumber].Revert(value);
481 				break;
482 			}
483 		}
484 	}
485 
486 	XsiSemaphore* Semaphore(int nth) const
487 	{
488 		return &fSemaphores[nth];
489 	}
490 
491 	uint32 SequenceNumber() const
492 	{
493 		return fSequenceNumber;
494 	}
495 
496 	// Implemented after sGlobalSequenceNumber is declared
497 	void SetID();
498 
499 	void SetIpcKey(key_t key)
500 	{
501 		fPermissions.key = key;
502 	}
503 
504 	void SetLastSemctlTime()
505 	{
506 		fLastSemctlTime = real_time_clock();
507 	}
508 
509 	void SetLastSemopTime()
510 	{
511 		fLastSemopTime = real_time_clock();
512 	}
513 
514 	void SetPermissions(int flags)
515 	{
516 		fPermissions.uid = fPermissions.cuid = geteuid();
517 		fPermissions.gid = fPermissions.cgid = getegid();
518 		fPermissions.mode = (flags & 0x01ff);
519 	}
520 
521 	UndoList &GetUndoList()
522 	{
523 		return fUndoList;
524 	}
525 
526 	XsiSemaphoreSet*& Link()
527 	{
528 		return fLink;
529 	}
530 
531 private:
532 	int							fID;					// semaphore set id
533 	bool						fInitOK;
534 	time_t						fLastSemctlTime;		// sem_ctime
535 	time_t						fLastSemopTime;			// sem_otime
536 	mutex 						fLock;					// private lock
537 	ushort						fNumberOfSemaphores;	// sem_nsems
538 	struct ipc_perm				fPermissions;			// sem_perm
539 	XsiSemaphore				*fSemaphores;			// array of semaphores
540 	uint32						fSequenceNumber;		// used as a second id
541 	UndoList					fUndoList;				// undo list requests
542 
543 	XsiSemaphoreSet*			fLink;
544 };
545 
546 // Xsi semaphore set hash table
547 struct SemaphoreHashTableDefinition {
548 	typedef int					KeyType;
549 	typedef XsiSemaphoreSet		ValueType;
550 
551 	size_t HashKey (const int key) const
552 	{
553 		return (size_t)key;
554 	}
555 
556 	size_t Hash(XsiSemaphoreSet *variable) const
557 	{
558 		return (size_t)variable->ID();
559 	}
560 
561 	bool Compare(const int key, XsiSemaphoreSet *variable) const
562 	{
563 		return (int)key == (int)variable->ID();
564 	}
565 
566 	XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
567 	{
568 		return variable->Link();
569 	}
570 };
571 
572 
573 // IPC class
574 class Ipc {
575 public:
576 	Ipc(key_t key)
577 		: fKey(key),
578 		fSemaphoreSetId(-1)
579 	{
580 	}
581 
582 	key_t Key() const
583 	{
584 		return fKey;
585 	}
586 
587 	int SemaphoreSetID() const
588 	{
589 		return fSemaphoreSetId;
590 	}
591 
592 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
593 	{
594 		fSemaphoreSetId = semaphoreSet->ID();
595 	}
596 
597 	Ipc*& Link()
598 	{
599 		return fLink;
600 	}
601 
602 private:
603 	key_t				fKey;
604 	int					fSemaphoreSetId;
605 	Ipc*				fLink;
606 };
607 
608 
609 struct IpcHashTableDefinition {
610 	typedef key_t	KeyType;
611 	typedef Ipc		ValueType;
612 
613 	size_t HashKey (const key_t key) const
614 	{
615 		return (size_t)(key);
616 	}
617 
618 	size_t Hash(Ipc *variable) const
619 	{
620 		return (size_t)HashKey(variable->Key());
621 	}
622 
623 	bool Compare(const key_t key, Ipc *variable) const
624 	{
625 		return (key_t)key == (key_t)variable->Key();
626 	}
627 
628 	Ipc*& GetLink(Ipc *variable) const
629 	{
630 		return variable->Link();
631 	}
632 };
633 
634 } // namespace
635 
636 
637 // Arbitrary limit
638 #define MAX_XSI_SEMAPHORE		4096
639 #define MAX_XSI_SEMAPHORE_SET	2048
640 static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
641 static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
642 
643 static mutex sIpcLock;
644 static mutex sXsiSemaphoreSetLock;
645 
646 static uint32 sGlobalSequenceNumber = 1;
647 static int32 sXsiSemaphoreCount = 0;
648 static int32 sXsiSemaphoreSetCount = 0;
649 
650 
651 //	#pragma mark -
652 
653 
654 void
655 XsiSemaphoreSet::SetID()
656 {
657 	fID = real_time_clock();
658 	// The lock is held before calling us
659 	while (true) {
660 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
661 			break;
662 		fID = (fID + 1) % INT_MAX;
663 	}
664 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
665 	fSequenceNumber = sGlobalSequenceNumber;
666 }
667 
668 
669 //	#pragma mark - Kernel exported API
670 
671 
672 void
673 xsi_sem_init()
674 {
675 	// Initialize hash tables
676 	status_t status = sIpcHashTable.Init();
677 	if (status != B_OK)
678 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
679 	status =  sSemaphoreHashTable.Init();
680 	if (status != B_OK)
681 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
682 
683 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
684 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
685 }
686 
687 
688 /*!	Function called on team exit to process any sem_undo requests */
689 void
690 xsi_sem_undo(Team *team)
691 {
692 	if (team->xsi_sem_context == NULL)
693 		return;
694 
695 	// By acquiring first the semaphore hash table lock
696 	// we make sure the semaphore set in our sem_undo
697 	// list won't get removed by IPC_RMID call
698 	MutexLocker _(sXsiSemaphoreSetLock);
699 
700 	// Process all sem_undo request in the team sem undo list
701 	// if any
702 	TeamList::Iterator iterator
703 		= team->xsi_sem_context->undo_list.GetIterator();
704 	while (iterator.HasNext()) {
705 		struct sem_undo *current = iterator.Next();
706 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
707 		// Acquire the set lock in order to prevent race
708 		// condition with RecordUndo
709 		MutexLocker setLocker(semaphoreSet->Lock());
710 		MutexLocker _(team->xsi_sem_context->lock);
711 		// Revert the changes done by this process
712 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
713 			if (current->undo_values[i] != 0) {
714 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
715 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
716 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
717 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
718 			}
719 
720 		// Remove and free the sem_undo structure from both lists
721 		iterator.Remove();
722 		semaphoreSet->GetUndoList().Remove(current);
723 		delete current;
724 	}
725 	delete team->xsi_sem_context;
726 	team->xsi_sem_context = NULL;
727 }
728 
729 
730 //	#pragma mark - Syscalls
731 
732 
733 int
734 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
735 {
736 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
737 		(int)key, numberOfSemaphores, flags));
738 	XsiSemaphoreSet *semaphoreSet = NULL;
739 	Ipc *ipcKey = NULL;
740 	// Default assumptions
741 	bool isPrivate = true;
742 	bool create = true;
743 
744 	MutexLocker _(sIpcLock);
745 	if (key != IPC_PRIVATE) {
746 		isPrivate = false;
747 		// Check if key already exist, if it does it already has a semaphore
748 		// set associated with it
749 		ipcKey = sIpcHashTable.Lookup(key);
750 		if (ipcKey == NULL) {
751 			// The ipc key does not exist. Create it and add it to the system
752 			if (!(flags & IPC_CREAT)) {
753 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
754 					"caller did not ask for creation\n",(int)key));
755 				return ENOENT;
756 			}
757 			ipcKey = new(std::nothrow) Ipc(key);
758 			if (ipcKey == NULL) {
759 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
760 					"for key %d\n",	(int)key));
761 				return ENOMEM;
762 			}
763 			sIpcHashTable.Insert(ipcKey);
764 		} else {
765 			// The IPC key exist and it already has a semaphore
766 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
767 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
768 				return EEXIST;
769 			}
770 			int semaphoreSetID = ipcKey->SemaphoreSetID();
771 
772 			MutexLocker _(sXsiSemaphoreSetLock);
773 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
774 			if (!semaphoreSet->HasPermission()) {
775 				TRACE_ERROR(("xsi_semget: calling process has not permission "
776 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
777 					(int)key));
778 				return EACCES;
779 			}
780 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
781 				&& numberOfSemaphores != 0) {
782 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
783 					"one associated with semaphore %d, key %d\n",
784 					semaphoreSet->ID(), (int)key));
785 				return EINVAL;
786 			}
787 			create = false;
788 		}
789 	}
790 
791 	if (create) {
792 		// Create a new sempahore set for this key
793 		if (numberOfSemaphores <= 0
794 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
795 			TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
796 			return EINVAL;
797 		}
798 		if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
799 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
800 			TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
801 				"semaphores allowed\n"));
802 			return ENOSPC;
803 		}
804 
805 		semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
806 			flags);
807 		if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
808 			TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
809 				"semaphore set\n"));
810 			delete semaphoreSet;
811 			return ENOMEM;
812 		}
813 		atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
814 		atomic_add(&sXsiSemaphoreSetCount, 1);
815 
816 		MutexLocker _(sXsiSemaphoreSetLock);
817 		semaphoreSet->SetID();
818 		if (isPrivate)
819 			semaphoreSet->SetIpcKey((key_t)-1);
820 		else {
821 			semaphoreSet->SetIpcKey(key);
822 			ipcKey->SetSemaphoreSetID(semaphoreSet);
823 		}
824 		sSemaphoreHashTable.Insert(semaphoreSet);
825 		TRACE(("semget: new set = %d created, sequence = %ld\n",
826 			semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
827 	}
828 
829 	return semaphoreSet->ID();
830 }
831 
832 
833 int
834 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
835 	union semun *args)
836 {
837 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
838 		semaphoreID, semaphoreNumber, command));
839 	MutexLocker ipcHashLocker(sIpcLock);
840 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
841 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
842 	if (semaphoreSet == NULL) {
843 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
844 			semaphoreID));
845 		return EINVAL;
846 	}
847 	if (semaphoreNumber < 0
848 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
849 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
850 			"semaphore %d\n", semaphoreNumber, semaphoreID));
851 		return EINVAL;
852 	}
853 	if (args != 0 && !IS_USER_ADDRESS(args)) {
854 		TRACE_ERROR(("xsi_semctl: semun address is not valid\n"));
855 		return B_BAD_ADDRESS;
856 	}
857 
858 	// Lock the semaphore set itself and release both the semaphore
859 	// set hash table lock and the ipc hash table lock _only_ if
860 	// the command it's not IPC_RMID, this prevents undesidered
861 	// situation from happening while (hopefully) improving the
862 	// concurrency.
863 	MutexLocker setLocker;
864 	if (command != IPC_RMID) {
865 		setLocker.SetTo(&semaphoreSet->Lock(), false);
866 		setHashLocker.Unlock();
867 		ipcHashLocker.Unlock();
868 	} else
869 		// We are about to delete the set along with its mutex, so
870 		// we can't use the MutexLocker class, as the mutex itself
871 		// won't exist on function exit
872 		mutex_lock(&semaphoreSet->Lock());
873 
874 	int result = 0;
875 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
876 	switch (command) {
877 		case GETVAL: {
878 			if (!semaphoreSet->HasReadPermission()) {
879 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
880 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
881 					(int)semaphoreSet->IpcKey()));
882 				result = EACCES;
883 			} else
884 				result = semaphore->Value();
885 			break;
886 		}
887 
888 		case SETVAL: {
889 			if (!semaphoreSet->HasPermission()) {
890 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
891 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
892 					(int)semaphoreSet->IpcKey()));
893 				result = EACCES;
894 			} else {
895 				int value;
896 				if (user_memcpy(&value, &args->val, sizeof(int)) < B_OK) {
897 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
898 					result = B_BAD_ADDRESS;
899 				} else if (value > USHRT_MAX) {
900 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", value));
901 					result = ERANGE;
902 				} else {
903 					semaphore->SetValue(value);
904 					semaphoreSet->ClearUndo(semaphoreNumber);
905 				}
906 			}
907 			break;
908 		}
909 
910 		case GETPID: {
911 			if (!semaphoreSet->HasReadPermission()) {
912 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
913 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
914 					(int)semaphoreSet->IpcKey()));
915 				result = EACCES;
916 			} else
917 				result = semaphore->LastPid();
918 			break;
919 		}
920 
921 		case GETNCNT: {
922 			if (!semaphoreSet->HasReadPermission()) {
923 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
924 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
925 					(int)semaphoreSet->IpcKey()));
926 				result = EACCES;
927 			} else
928 				result = semaphore->ThreadsWaitingToIncrease();
929 			break;
930 		}
931 
932 		case GETZCNT: {
933 			if (!semaphoreSet->HasReadPermission()) {
934 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
935 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
936 					(int)semaphoreSet->IpcKey()));
937 				result = EACCES;
938 			} else
939 				result = semaphore->ThreadsWaitingToBeZero();
940 			break;
941 		}
942 
943 		case GETALL: {
944 			if (!semaphoreSet->HasReadPermission()) {
945 				TRACE_ERROR(("xsi_semctl: calling process has not read "
946 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
947 					(int)semaphoreSet->IpcKey()));
948 				result = EACCES;
949 			} else
950 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
951 					semaphore = semaphoreSet->Semaphore(i);
952 					unsigned short value = semaphore->Value();
953 					if (user_memcpy(&args->array[i], &value,
954 						sizeof(unsigned short)) < B_OK) {
955 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
956 						result = B_BAD_ADDRESS;
957 						break;
958 					}
959 				}
960 			break;
961 		}
962 
963 		case SETALL: {
964 			if (!semaphoreSet->HasPermission()) {
965 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
966 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
967 					(int)semaphoreSet->IpcKey()));
968 				result = EACCES;
969 			} else {
970 				bool doClear = true;
971 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
972 					semaphore = semaphoreSet->Semaphore(i);
973 					unsigned short value;
974 					if (user_memcpy(&value, &args->array[i], sizeof(unsigned short))
975 						< B_OK) {
976 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
977 						result = B_BAD_ADDRESS;
978 						doClear = false;
979 						break;
980 					} else
981 						semaphore->SetValue(value);
982 				}
983 				if (doClear)
984 					semaphoreSet->ClearUndos();
985 			}
986 			break;
987 		}
988 
989 		case IPC_STAT: {
990 			if (!semaphoreSet->HasReadPermission()) {
991 				TRACE_ERROR(("xsi_semctl: calling process has not read "
992 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
993 					(int)semaphoreSet->IpcKey()));
994 				result = EACCES;
995 			} else {
996 				struct semid_ds sem;
997 				sem.sem_perm = semaphoreSet->IpcPermission();
998 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
999 				sem.sem_otime = semaphoreSet->LastSemopTime();
1000 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
1001 				if (user_memcpy(args->buf, &sem, sizeof(struct semid_ds))
1002 					< B_OK) {
1003 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1004 					result = B_BAD_ADDRESS;
1005 				}
1006 			}
1007 			break;
1008 		}
1009 
1010 		case IPC_SET: {
1011 			if (!semaphoreSet->HasPermission()) {
1012 				TRACE_ERROR(("xsi_semctl: calling process has not "
1013 					"permission on semaphore %d, key %d\n",
1014 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1015 				result = EACCES;
1016 			} else {
1017 				struct semid_ds sem;
1018 				if (user_memcpy(&sem, args->buf, sizeof(struct semid_ds))
1019 					< B_OK) {
1020 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1021 					result = B_BAD_ADDRESS;
1022 				} else
1023 					semaphoreSet->DoIpcSet(&sem);
1024 			}
1025 			break;
1026 		}
1027 
1028 		case IPC_RMID: {
1029 			// If this was the command, we are still holding
1030 			// the semaphore set hash table lock along with the
1031 			// ipc hash table lock and the semaphore set lock
1032 			// itself, this way we are sure there is not
1033 			// one waiting in the queue of the mutex.
1034 			if (!semaphoreSet->HasPermission()) {
1035 				TRACE_ERROR(("xsi_semctl: calling process has not "
1036 					"permission on semaphore %d, key %d\n",
1037 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1038 				return EACCES;
1039 			}
1040 			key_t key = semaphoreSet->IpcKey();
1041 			Ipc *ipcKey = NULL;
1042 			if (key != -1) {
1043 				ipcKey = sIpcHashTable.Lookup(key);
1044 				sIpcHashTable.Remove(ipcKey);
1045 			}
1046 			sSemaphoreHashTable.Remove(semaphoreSet);
1047 			// Wake up of threads waiting on this set
1048 			// happens in the destructor
1049 			if (key != -1)
1050 				delete ipcKey;
1051 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1052 			atomic_add(&sXsiSemaphoreSetCount, -1);
1053 			// Remove any sem_undo request
1054 			while (struct sem_undo *entry
1055 					= semaphoreSet->GetUndoList().RemoveHead()) {
1056 				MutexLocker _(entry->team->xsi_sem_context->lock);
1057 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1058 				delete entry;
1059 			}
1060 
1061 			delete semaphoreSet;
1062 			return 0;
1063 		}
1064 
1065 		default:
1066 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1067 			result = EINVAL;
1068 	}
1069 
1070 	return result;
1071 }
1072 
1073 
1074 status_t
1075 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1076 {
1077 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1078 		semaphoreID, ops, numOps));
1079 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1080 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1081 	if (semaphoreSet == NULL) {
1082 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1083 			semaphoreID));
1084 		return EINVAL;
1085 	}
1086 	MutexLocker setLocker(semaphoreSet->Lock());
1087 	setHashLocker.Unlock();
1088 
1089 	if (!IS_USER_ADDRESS(ops)) {
1090 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1091 		return B_BAD_ADDRESS;
1092 	}
1093 
1094 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1095 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1096 		return EINVAL;
1097 	}
1098 
1099 	struct sembuf *operations
1100 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1101 	if (operations == NULL) {
1102 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1103 		return B_NO_MEMORY;
1104 	}
1105        MemoryDeleter operationsDeleter(operations);
1106 
1107 	if (user_memcpy(operations, ops,
1108 		(sizeof(struct sembuf) * numOps)) < B_OK) {
1109 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1110 		return B_BAD_ADDRESS;
1111 	}
1112 
1113 	// We won't do partial request, that is operations
1114 	// only on some sempahores belonging to the set and then
1115 	// going to sleep. If we must wait on a semaphore, we undo
1116 	// all the operations already done and go to sleep, otherwise
1117 	// we may caused some unwanted deadlock among threads
1118 	// fighting for the same set.
1119 	bool notDone = true;
1120 	status_t result = 0;
1121 	while (notDone) {
1122 		XsiSemaphore *semaphore = NULL;
1123 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1124 		bool goToSleep = false;
1125 
1126 		uint32 i = 0;
1127 		for (; i < numOps; i++) {
1128 			short semaphoreNumber = operations[i].sem_num;
1129 			if (semaphoreNumber >= numberOfSemaphores) {
1130 				TRACE_ERROR(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
1131 					"\n", i));
1132 				result = EINVAL;
1133 				break;
1134 			}
1135 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1136 			unsigned short value = semaphore->Value();
1137 			short operation = operations[i].sem_op;
1138 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1139 				semaphoreNumber, value));
1140 			if (operation < 0) {
1141 				if (semaphore->Add(operation)) {
1142 					if (operations[i].sem_flg & IPC_NOWAIT)
1143 						result = EAGAIN;
1144 					else
1145 						goToSleep = true;
1146 					break;
1147 				}
1148 			} else if (operation == 0) {
1149 				if (value == 0)
1150 					continue;
1151 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1152 					result = EAGAIN;
1153 					break;
1154 				} else {
1155 					goToSleep = true;
1156 					break;
1157 				}
1158 			} else {
1159 				// Operation must be greater than zero,
1160 				// just add the value and continue
1161 				semaphore->Add(operation);
1162 			}
1163 		}
1164 
1165 		// Either we have to wait or an error occured
1166 		if (goToSleep || result != 0) {
1167 			// Undo all previously done operations
1168 			for (uint32 j = 0; j < i; j++) {
1169 				short semaphoreNumber = operations[j].sem_num;
1170 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1171 				short operation = operations[j].sem_op;
1172 				if (operation != 0)
1173 					semaphore->Revert(operation);
1174 			}
1175                        if (result != 0)
1176 				return result;
1177 
1178 			// We have to wait: first enqueue the thread
1179 			// in the appropriate set waiting list, then
1180 			// unlock the set itself and block the thread.
1181 			bool waitOnZero = true;
1182 			if (operations[i].sem_op != 0)
1183 				waitOnZero = false;
1184 
1185 			Thread *thread = thread_get_current_thread();
1186 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1187 			semaphore->Enqueue(&queueEntry, waitOnZero);
1188 
1189 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1190 
1191 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1192 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1193 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1194 
1195 			// We are back to life. Find out why!
1196 			// Make sure the set hasn't been deleted or worst yet
1197 			// replaced.
1198 			setHashLocker.Lock();
1199 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1200 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1201 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1202 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = "
1203 					"%" B_PRIu32 ") got destroyed\n", semaphoreID,
1204 					sequenceNumber));
1205 				notDone = false;
1206 				result = EIDRM;
1207 			} else if (result == B_INTERRUPTED) {
1208 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1209 					"waiting on semaphore set id %d\n",(int)thread->id,
1210 					semaphoreID));
1211 				semaphore->Deque(&queueEntry, waitOnZero);
1212 				result = EINTR;
1213 				notDone = false;
1214 			} else {
1215 				setLocker.Lock();
1216 				setHashLocker.Unlock();
1217 			}
1218 		} else {
1219 			// everything worked like a charm (so far)
1220 			notDone = false;
1221 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1222 			// We acquired the semaphore, now records the sem_undo
1223 			// requests
1224 			XsiSemaphore *semaphore = NULL;
1225 			uint32 i = 0;
1226 			for (; i < numOps; i++) {
1227 				short semaphoreNumber = operations[i].sem_num;
1228 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1229 				short operation = operations[i].sem_op;
1230 				if (operations[i].sem_flg & SEM_UNDO)
1231 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1232 						!= B_OK) {
1233 						// Unlikely scenario, but we might get here.
1234 						// Undo everything!
1235 						// Start with semaphore operations
1236 						for (uint32 j = 0; j < numOps; j++) {
1237 							short semaphoreNumber = operations[j].sem_num;
1238 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1239 							short operation = operations[j].sem_op;
1240 							if (operation != 0)
1241 								semaphore->Revert(operation);
1242 						}
1243 						// Remove all previously registered sem_undo request
1244 						for (uint32 j = 0; j < i; j++) {
1245 							if (operations[j].sem_flg & SEM_UNDO)
1246 								semaphoreSet->RevertUndo(operations[j].sem_num,
1247 									operations[j].sem_op);
1248 						}
1249 						result = ENOSPC;
1250 					}
1251 			}
1252 		}
1253 	}
1254 
1255 	// We did it. Set the pid of all semaphores used
1256 	if (result == 0) {
1257 		for (uint32 i = 0; i < numOps; i++) {
1258 			short semaphoreNumber = operations[i].sem_num;
1259 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1260 			semaphore->SetPid(getpid());
1261 		}
1262 	}
1263 	return result;
1264 }
1265