xref: /haiku/src/system/kernel/posix/xsi_semaphore.cpp (revision f75a7bf508f3156d63a14f8fd77c5e0ca4d08c42)
1 /*
2  * Copyright 2008, Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Salvatore Benedetto <salvatore.benedetto@gmail.com>
7  */
8 
9 #include <posix/xsi_semaphore.h>
10 
11 #include <new>
12 
13 #include <sys/ipc.h>
14 #include <sys/types.h>
15 
16 #include <OS.h>
17 
18 #include <kernel.h>
19 #include <syscall_restart.h>
20 
21 #include <util/atomic.h>
22 #include <util/AutoLock.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/OpenHashTable.h>
25 
26 
27 //#define TRACE_XSI_SEM
28 #ifdef TRACE_XSI_SEM
29 #	define TRACE(x)			dprintf x
30 #	define TRACE_ERROR(x)	dprintf x
31 #else
32 #	define TRACE(x)			/* nothing */
33 #	define TRACE_ERROR(x)	dprintf x
34 #endif
35 
36 // Queue for holding blocked threads
37 struct queued_thread : DoublyLinkedListLinkImpl<queued_thread> {
38 	queued_thread(struct thread *thread, int32 count)
39 		:
40 		thread(thread),
41 		count(count),
42 		queued(false)
43 	{
44 	}
45 
46 	struct thread	*thread;
47 	int32			count;
48 	bool			queued;
49 };
50 
51 typedef DoublyLinkedList<queued_thread> ThreadQueue;
52 
53 class XsiSemaphoreSet;
54 
55 struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
56 	sem_undo(XsiSemaphoreSet *semaphoreSet, struct team *team, int16 *undoValues)
57 		:
58 		semaphore_set(semaphoreSet),
59 		team(team),
60 		undo_values(undoValues)
61 	{
62 	}
63 
64 	DoublyLinkedListLink<sem_undo>		team_link;
65 	XsiSemaphoreSet						*semaphore_set;
66 	struct team							*team;
67 	int16								*undo_values;
68 };
69 
70 typedef DoublyLinkedList<sem_undo> UndoList;
71 typedef DoublyLinkedList<sem_undo,
72 	DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;
73 
74 struct xsi_sem_context {
75 	xsi_sem_context()
76 	{
77 		mutex_init(&lock, "Private team undo_list lock");
78 	}
79 
80 	~xsi_sem_context()
81 	{
82 		mutex_destroy(&lock);
83 	}
84 
85 	TeamList	undo_list;
86 	mutex		lock;
87 };
88 
89 // Xsi semaphore definition
90 class XsiSemaphore {
91 public:
92 	XsiSemaphore()
93 		: fLastPidOperation(0),
94 		fThreadsWaitingToIncrease(0),
95 		fThreadsWaitingToBeZero(0),
96 		fValue(0)
97 	{
98 	}
99 
100 	~XsiSemaphore()
101 	{
102 		// For some reason the semaphore is getting destroyed.
103 		// Wake up any remaing awaiting threads
104 		InterruptsSpinLocker _(gThreadSpinlock);
105 		while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
106 			entry->queued = false;
107 			thread_unblock_locked(entry->thread, EIDRM);
108 		}
109 		while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
110 			entry->queued = false;
111 			thread_unblock_locked(entry->thread, EIDRM);
112 		}
113 		// No need to remove any sem_undo request still
114 		// hanging. When the process exit and doesn't found
115 		// the semaphore set, it'll just ignore the sem_undo
116 		// request. That's better than iterating trough the
117 		// whole sUndoList. Beside we don't know our semaphore
118 		// number nor our semaphore set id.
119 	}
120 
121 	// We return true in case the operation causes the
122 	// caller to wait, so it can undo all the operations
123 	// previously done
124 	bool Add(short value)
125 	{
126 		if ((int)(fValue + value) < 0) {
127 			TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
128 			return true;
129 		} else {
130 			fValue += value;
131 			if (fValue == 0 && fThreadsWaitingToBeZero > 0)
132 				WakeUpThread(true);
133 			else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
134 				WakeUpThread(false);
135 			return false;
136 		}
137 	}
138 
139 	status_t BlockAndUnlock(struct thread *thread, MutexLocker *setLocker)
140 	{
141 		thread_prepare_to_block(thread, B_CAN_INTERRUPT,
142 			THREAD_BLOCK_TYPE_OTHER, (void*)"xsi semaphore");
143 		// Unlock the set before blocking
144 		setLocker->Unlock();
145 
146 		InterruptsSpinLocker _(gThreadSpinlock);
147 		return thread_block_locked(thread);
148 	}
149 
150 	void Deque(queued_thread *queueEntry, bool waitForZero)
151 	{
152 		if (queueEntry->queued) {
153 			if (waitForZero) {
154 				fWaitingToBeZeroQueue.Remove(queueEntry);
155 				fThreadsWaitingToBeZero--;
156 			} else {
157 				fWaitingToIncreaseQueue.Remove(queueEntry);
158 				fThreadsWaitingToIncrease--;
159 			}
160 		}
161 	}
162 
163 	void Enqueue(queued_thread *queueEntry, bool waitForZero)
164 	{
165 		if (waitForZero) {
166 			fWaitingToBeZeroQueue.Add(queueEntry);
167 			fThreadsWaitingToBeZero++;
168 		} else {
169 			fWaitingToIncreaseQueue.Add(queueEntry);
170 			fThreadsWaitingToIncrease++;
171 		}
172 		queueEntry->queued = true;
173 	}
174 
175 	pid_t LastPid() const
176 	{
177 		return fLastPidOperation;
178 	}
179 
180 	void Revert(short value)
181 	{
182 		fValue -= value;
183 		if (fValue == 0 && fThreadsWaitingToBeZero > 0)
184 			WakeUpThread(true);
185 		else if (fValue > 0 && fThreadsWaitingToIncrease > 0)
186 			WakeUpThread(false);
187 	}
188 
189 	void SetPid(pid_t pid)
190 	{
191 		fLastPidOperation = pid;
192 	}
193 
194 	void SetValue(ushort value)
195 	{
196 		fValue = value;
197 	}
198 
199 	ushort ThreadsWaitingToIncrease() const
200 	{
201 		return fThreadsWaitingToIncrease;
202 	}
203 
204 	ushort ThreadsWaitingToBeZero() const
205 	{
206 		return fThreadsWaitingToBeZero;
207 	}
208 
209 	ushort Value() const
210 	{
211 		return fValue;
212 	}
213 
214 	void WakeUpThread(bool waitingForZero)
215 	{
216 		InterruptsSpinLocker _(gThreadSpinlock);
217 		if (waitingForZero) {
218 			// Wake up all threads waiting on zero
219 			while (queued_thread *entry = fWaitingToBeZeroQueue.RemoveHead()) {
220 				entry->queued = false;
221 				fThreadsWaitingToBeZero--;
222 				thread_unblock_locked(entry->thread, 0);
223 			}
224 		} else {
225 			// Wake up all threads even though they might go back to sleep
226 			while (queued_thread *entry = fWaitingToIncreaseQueue.RemoveHead()) {
227 				entry->queued = false;
228 				fThreadsWaitingToIncrease--;
229 				thread_unblock_locked(entry->thread, 0);
230 			}
231 		}
232 	}
233 
234 private:
235 	pid_t				fLastPidOperation;				// sempid
236 	ushort				fThreadsWaitingToIncrease;		// semncnt
237 	ushort				fThreadsWaitingToBeZero;		// semzcnt
238 	ushort				fValue;							// semval
239 
240 	ThreadQueue			fWaitingToIncreaseQueue;
241 	ThreadQueue			fWaitingToBeZeroQueue;
242 };
243 
244 #define MAX_XSI_SEMS_PER_TEAM	128
245 
246 // Xsi semaphore set definition (semid_ds)
247 class XsiSemaphoreSet {
248 public:
249 	XsiSemaphoreSet(int numberOfSemaphores, int flags)
250 		: fInitOK(false),
251 		fLastSemctlTime((time_t)real_time_clock()),
252 		fLastSemopTime(0),
253 		fNumberOfSemaphores(numberOfSemaphores),
254 		fSemaphores(0)
255 	{
256 		mutex_init(&fLock, "XsiSemaphoreSet private mutex");
257 		SetIpcKey((key_t)-1);
258 		SetPermissions(flags);
259 		fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
260 		if (fSemaphores == NULL) {
261 			TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
262 				"XsiSemaphore object\n"));
263 		} else
264 			fInitOK = true;
265 	}
266 
267 	~XsiSemaphoreSet()
268 	{
269 		TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
270 			"set %d\n", fID));
271 		mutex_destroy(&fLock);
272 		delete[] fSemaphores;
273 	}
274 
275 	void ClearUndo(unsigned short semaphoreNumber)
276 	{
277 		struct team *team = thread_get_current_thread()->team;
278 		UndoList::Iterator iterator = fUndoList.GetIterator();
279 		while (iterator.HasNext()) {
280 			struct sem_undo *current = iterator.Next();
281 			if (current->team == team) {
282 				TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
283 					"semaphoreSetID = %d, semaphoreNumber = %d\n",
284 					fID, semaphoreNumber, (int)team->id));
285 				MutexLocker _(team->xsi_sem_context->lock);
286 				current->undo_values[semaphoreNumber] = 0;
287 				return;
288 			}
289 		}
290 	}
291 
292 	void ClearUndos()
293 	{
294 		// Clear all undo_values (POSIX semadj equivalent)
295 		// of the calling team. This happens only on semctl SETALL.
296 		struct team *team = thread_get_current_thread()->team;
297 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
298 		while (iterator.HasNext()) {
299 			struct sem_undo *current = iterator.Next();
300 			if (current->team == team) {
301 				TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
302 					"semaphoreSetID = %d\n", (int)team->id, fID));
303 				MutexLocker _(team->xsi_sem_context->lock);
304 				memset(current->undo_values, 0,
305 					sizeof(int16) * fNumberOfSemaphores);
306 				return;
307 			}
308 		}
309 	}
310 
311 	void DoIpcSet(struct semid_ds *result)
312 	{
313 		fPermissions.uid = result->sem_perm.uid;
314 		fPermissions.gid = result->sem_perm.gid;
315 		fPermissions.mode = (fPermissions.mode & ~0x01ff)
316 			| (result->sem_perm.mode & 0x01ff);
317 	}
318 
319 	bool HasPermission() const
320 	{
321 		if ((fPermissions.mode & S_IWOTH) != 0)
322 			return true;
323 
324 		uid_t uid = geteuid();
325 		if (uid == 0 || (uid == fPermissions.uid
326 			&& (fPermissions.mode & S_IWUSR) != 0))
327 			return true;
328 
329 		gid_t gid = getegid();
330 		if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
331 			return true;
332 
333 		return false;
334 	}
335 
336 	bool HasReadPermission() const
337 	{
338 		// TODO: fix this
339 		return HasPermission();
340 	}
341 
342 	int ID() const
343 	{
344 		return fID;
345 	}
346 
347 	bool InitOK()
348 	{
349 		return fInitOK;
350 	}
351 
352 	key_t IpcKey() const
353 	{
354 		return fPermissions.key;
355 	}
356 
357 	struct ipc_perm IpcPermission() const
358 	{
359 		return fPermissions;
360 	}
361 
362 	time_t LastSemctlTime() const
363 	{
364 		return fLastSemctlTime;
365 	}
366 
367 	time_t LastSemopTime() const
368 	{
369 		return fLastSemopTime;
370 	}
371 
372 	mutex &Lock()
373 	{
374 		return fLock;
375 	}
376 
377 	ushort NumberOfSemaphores() const
378 	{
379 		return fNumberOfSemaphores;
380 	}
381 
382 	// Record the sem_undo operation into our private fUndoList and
383 	// the team undo_list. The only limit here is the memory needed
384 	// for creating a new sem_undo structure.
385 	int RecordUndo(short semaphoreNumber, short value)
386 	{
387 		// Look if there is already a record from the team caller
388 		// for the same semaphore set
389 		bool notFound = true;
390 		struct team *team = thread_get_current_thread()->team;
391 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
392 		while (iterator.HasNext()) {
393 			struct sem_undo *current = iterator.Next();
394 			if (current->team == team) {
395 				// Update its undo value
396 				MutexLocker _(team->xsi_sem_context->lock);
397 				int newValue = current->undo_values[semaphoreNumber] + value;
398 				if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
399 					TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
400 						"out of range\n", newValue));
401 					return ERANGE;
402 				}
403 				current->undo_values[semaphoreNumber] = newValue;
404 				notFound = false;
405 				TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
406 					"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
407 					(int)team->id, fID, semaphoreNumber,
408 					current->undo_values[semaphoreNumber]));
409 				break;
410 			}
411 		}
412 
413 		if (notFound) {
414 			// First sem_undo request from this team for this
415 			// semaphore set
416 			int16 *undoValues
417 				= (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
418 			if (undoValues == NULL)
419 				return B_NO_MEMORY;
420 			struct sem_undo *request
421 				= new(std::nothrow) sem_undo(this, team, undoValues);
422 			if (request == NULL) {
423 				free(undoValues);
424 				return B_NO_MEMORY;
425 			}
426 			memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
427 			request->undo_values[semaphoreNumber] = value;
428 
429 			// Check if it's the very first sem_undo request for this team
430 			xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
431 			if (context == NULL) {
432 				// Create the context
433 				context = new(std::nothrow) xsi_sem_context;
434 				if (context == NULL) {
435 					free(request->undo_values);
436 					delete request;
437 					return B_NO_MEMORY;
438 				}
439 				// Since we don't hold any global lock, someone
440 				// else could have been quicker than us, so we have
441 				// to delete the one we just created and use the one
442 				// in place.
443 				if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
444 					(xsi_sem_context *)NULL) != NULL)
445 					delete context;
446 			}
447 
448 			// Add the request to both XsiSemaphoreSet and team list
449 			fUndoList.Add(request);
450 			MutexLocker _(team->xsi_sem_context->lock);
451 			team->xsi_sem_context->undo_list.Add(request);
452 			TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
453 				"semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
454 				(int)team->id, fID, semaphoreNumber, value));
455 		}
456 		return B_OK;
457 	}
458 
459 	void RevertUndo(short semaphoreNumber, short value)
460 	{
461 		// This can be called only when RecordUndo fails.
462 		struct team *team = thread_get_current_thread()->team;
463 		DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
464 		while (iterator.HasNext()) {
465 			struct sem_undo *current = iterator.Next();
466 			if (current->team == team) {
467 				MutexLocker _(team->xsi_sem_context->lock);
468 				fSemaphores[semaphoreNumber].Revert(value);
469 				break;
470 			}
471 		}
472 	}
473 
474 	XsiSemaphore* Semaphore(int nth) const
475 	{
476 		return &fSemaphores[nth];
477 	}
478 
479 	uint32 SequenceNumber() const
480 	{
481 		return fSequenceNumber;
482 	}
483 
484 	// Implemented after sGlobalSequenceNumber is declared
485 	void SetID();
486 
487 	void SetIpcKey(key_t key)
488 	{
489 		fPermissions.key = key;
490 	}
491 
492 	void SetLastSemctlTime()
493 	{
494 		fLastSemctlTime = real_time_clock();
495 	}
496 
497 	void SetLastSemopTime()
498 	{
499 		fLastSemopTime = real_time_clock();
500 	}
501 
502 	void SetPermissions(int flags)
503 	{
504 		fPermissions.uid = fPermissions.cuid = geteuid();
505 		fPermissions.gid = fPermissions.cgid = getegid();
506 		fPermissions.mode = (flags & 0x01ff);
507 	}
508 
509 	UndoList &GetUndoList()
510 	{
511 		return fUndoList;
512 	}
513 
514 	HashTableLink<XsiSemaphoreSet>* Link()
515 	{
516 		return &fLink;
517 	}
518 
519 private:
520 	bool						fInitOK;
521 	int							fID;					// semaphore set id
522 	time_t						fLastSemctlTime;		// sem_ctime
523 	time_t						fLastSemopTime;			// sem_otime
524 	ushort						fNumberOfSemaphores;	// sem_nsems
525 	struct ipc_perm				fPermissions;			// sem_perm
526 	uint32						fSequenceNumber;		// used as a second id
527 	XsiSemaphore				*fSemaphores;			// array of semaphores
528 	UndoList					fUndoList;				// undo list requests
529 	mutex 						fLock;					// private lock
530 
531 	::HashTableLink<XsiSemaphoreSet> fLink;
532 };
533 
534 // Xsi semaphore set hash table
535 struct SemaphoreHashTableDefinition {
536 	typedef int					KeyType;
537 	typedef XsiSemaphoreSet		ValueType;
538 
539 	size_t HashKey (const int key) const
540 	{
541 		return (size_t)key;
542 	}
543 
544 	size_t Hash(XsiSemaphoreSet *variable) const
545 	{
546 		return (size_t)variable->ID();
547 	}
548 
549 	bool Compare(const int key, XsiSemaphoreSet *variable) const
550 	{
551 		return (int)key == (int)variable->ID();
552 	}
553 
554 	HashTableLink<XsiSemaphoreSet>* GetLink(XsiSemaphoreSet *variable) const
555 	{
556 		return variable->Link();
557 	}
558 };
559 
560 
561 // IPC class
562 class Ipc {
563 public:
564 	Ipc(key_t key)
565 		: fKey(key),
566 		fSemaphoreSetId(-1)
567 	{
568 	}
569 
570 	key_t Key() const
571 	{
572 		return fKey;
573 	}
574 
575 	int SemaphoreSetID() const
576 	{
577 		return fSemaphoreSetId;
578 	}
579 
580 	void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
581 	{
582 		fSemaphoreSetId = semaphoreSet->ID();
583 	}
584 
585 	HashTableLink<Ipc>* Link()
586 	{
587 		return &fLink;
588 	}
589 
590 private:
591 	key_t				fKey;
592 	int					fSemaphoreSetId;
593 	HashTableLink<Ipc>	fLink;
594 };
595 
596 
597 struct IpcHashTableDefinition {
598 	typedef key_t	KeyType;
599 	typedef Ipc		ValueType;
600 
601 	size_t HashKey (const key_t key) const
602 	{
603 		return (size_t)(key);
604 	}
605 
606 	size_t Hash(Ipc *variable) const
607 	{
608 		return (size_t)HashKey(variable->Key());
609 	}
610 
611 	bool Compare(const key_t key, Ipc *variable) const
612 	{
613 		return (key_t)key == (key_t)variable->Key();
614 	}
615 
616 	HashTableLink<Ipc>* GetLink(Ipc *variable) const
617 	{
618 		return variable->Link();
619 	}
620 };
621 
622 // Arbitrary limit
623 #define MAX_XSI_SEMAPHORE		4096
624 #define MAX_XSI_SEMAPHORE_SET	2048
625 static OpenHashTable<IpcHashTableDefinition> sIpcHashTable;
626 static OpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;
627 
628 static mutex sIpcLock;
629 static mutex sXsiSemaphoreSetLock;
630 
631 static uint32 sGlobalSequenceNumber = 1;
632 static vint32 sXsiSemaphoreCount = 0;
633 static vint32 sXsiSemaphoreSetCount = 0;
634 
635 
636 //	#pragma mark -
637 
638 
639 void
640 XsiSemaphoreSet::SetID()
641 {
642 	fID = real_time_clock();
643 	// The lock is held before calling us
644 	while (true) {
645 		if (sSemaphoreHashTable.Lookup(fID) == NULL)
646 			break;
647 		fID = (fID + 1) % INT_MAX;
648 	}
649 	sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
650 	fSequenceNumber = sGlobalSequenceNumber;
651 }
652 
653 
654 //	#pragma mark - Kernel exported API
655 
656 
657 void
658 xsi_sem_init()
659 {
660 	// Initialize hash tables
661 	status_t status = sIpcHashTable.Init();
662 	if (status != B_OK)
663 		panic("xsi_sem_init() failed to initialize ipc hash table\n");
664 	status =  sSemaphoreHashTable.Init();
665 	if (status != B_OK)
666 		panic("xsi_sem_init() failed to initialize semaphore hash table\n");
667 
668 	mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
669 	mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
670 }
671 
672 
673 /*!	Function called on team exit to process any sem_undo requests */
674 void
675 xsi_sem_undo(struct team *team)
676 {
677 	if (team->xsi_sem_context == NULL)
678 		return;
679 
680 	// By acquiring first the semaphore hash table lock
681 	// we make sure the semaphore set in our sem_undo
682 	// list won't get removed by IPC_RMID call
683 	MutexLocker _(sXsiSemaphoreSetLock);
684 
685 	// Process all sem_undo request in the team sem undo list
686 	// if any
687 	TeamList::Iterator iterator
688 		= team->xsi_sem_context->undo_list.GetIterator();
689 	while (iterator.HasNext()) {
690 		struct sem_undo *current = iterator.Next();
691 		XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
692 		// Acquire the set lock in order to prevent race
693 		// condition with RecordUndo
694 		MutexLocker setLocker(semaphoreSet->Lock());
695 		MutexLocker _(team->xsi_sem_context->lock);
696 		// Revert the changes done by this process
697 		for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
698 			if (current->undo_values[i] != 0) {
699 				TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
700 					"SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
701 					semaphoreSet->ID(), i, (int)current->undo_values[i]));
702 				semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
703 			}
704 
705 		// Remove and free the sem_undo structure from both lists
706 		iterator.Remove();
707 		semaphoreSet->GetUndoList().Remove(current);
708 		delete current;
709 	}
710 	delete team->xsi_sem_context;
711 	team->xsi_sem_context = NULL;
712 }
713 
714 
715 //	#pragma mark - Syscalls
716 
717 
718 int
719 _user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
720 {
721 	TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
722 		(int)key, numberOfSemaphores, flags));
723 	XsiSemaphoreSet *semaphoreSet = NULL;
724 	Ipc *ipcKey = NULL;
725 	// Default assumptions
726 	bool isPrivate = true;
727 	bool create = true;
728 
729 	MutexLocker _(sIpcLock);
730 	if (key != IPC_PRIVATE) {
731 		isPrivate = false;
732 		// Check if key already exist, if it does it already has a semaphore
733 		// set associated with it
734 		ipcKey = sIpcHashTable.Lookup(key);
735 		if (ipcKey == NULL) {
736 			// The ipc key does not exist. Create it and add it to the system
737 			if (!(flags & IPC_CREAT)) {
738 				TRACE_ERROR(("xsi_semget: key %d does not exist, but the "
739 					"caller did not ask for creation\n",(int)key));
740 				return ENOENT;
741 			}
742 			ipcKey = new(std::nothrow) Ipc(key);
743 			if (ipcKey == NULL) {
744 				TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
745 					"for key %d\n",	(int)key));
746 				return ENOMEM;
747 			}
748 			sIpcHashTable.Insert(ipcKey);
749 		} else {
750 			// The IPC key exist and it already has a semaphore
751 			if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
752 				TRACE_ERROR(("xsi_semget: key %d already exist\n", (int)key));
753 				return EEXIST;
754 			}
755 			int semaphoreSetID = ipcKey->SemaphoreSetID();
756 
757 			MutexLocker _(sXsiSemaphoreSetLock);
758 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
759 			if (!semaphoreSet->HasPermission()) {
760 				TRACE_ERROR(("xsi_semget: calling process has not permission "
761 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
762 					(int)key));
763 				return EACCES;
764 			}
765 			if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
766 				&& numberOfSemaphores != 0) {
767 				TRACE_ERROR(("xsi_semget: numberOfSemaphores greater than the "
768 					"one associated with semaphore %d, key %d\n",
769 					semaphoreSet->ID(), (int)key));
770 				return EINVAL;
771 			}
772 			create = false;
773 		}
774 	}
775 
776 	if (create) {
777 		// Create a new sempahore set for this key
778 		if (numberOfSemaphores <= 0
779 			|| numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
780 			TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
781 			return EINVAL;
782 		}
783 		if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
784 			|| sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
785 			TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
786 				"semaphores allowed\n"));
787 			return ENOSPC;
788 		}
789 
790 		semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores,
791 			flags);
792 		if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
793 			TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
794 				"semaphore set\n"));
795 			return ENOMEM;
796 		}
797 		atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
798 		atomic_add(&sXsiSemaphoreSetCount, 1);
799 
800 		MutexLocker _(sXsiSemaphoreSetLock);
801 		semaphoreSet->SetID();
802 		if (isPrivate)
803 			semaphoreSet->SetIpcKey((key_t)-1);
804 		else {
805 			semaphoreSet->SetIpcKey(key);
806 			ipcKey->SetSemaphoreSetID(semaphoreSet);
807 		}
808 		sSemaphoreHashTable.Insert(semaphoreSet);
809 		TRACE(("semget: new set = %d created, sequence = %ld\n",
810 			semaphoreSet->ID(), semaphoreSet->SequenceNumber()));
811 	}
812 
813 	return semaphoreSet->ID();
814 }
815 
816 
817 int
818 _user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
819 	union semun *args)
820 {
821 	TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
822 		semaphoreID, semaphoreNumber, command));
823 	MutexLocker ipcHashLocker(sIpcLock);
824 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
825 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
826 	if (semaphoreSet == NULL) {
827 		TRACE_ERROR(("xsi_semctl: semaphore set id %d not valid\n",
828 			semaphoreID));
829 		return EINVAL;
830 	}
831 	if (semaphoreNumber < 0
832 		|| semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
833 		TRACE_ERROR(("xsi_semctl: semaphore number %d not valid for "
834 			"semaphore %d\n", semaphoreNumber, semaphoreID));
835 		return EINVAL;
836 	}
837 	if (args != 0 && !IS_USER_ADDRESS(args)) {
838 		TRACE_ERROR(("xsi_semctl: semun address is not valid\n"));
839 		return B_BAD_ADDRESS;
840 	}
841 
842 	// Lock the semaphore set itself and release both the semaphore
843 	// set hash table lock and the ipc hash table lock _only_ if
844 	// the command it's not IPC_RMID, this prevents undesidered
845 	// situation from happening while (hopefully) improving the
846 	// concurrency.
847 	MutexLocker setLocker;
848 	if (command != IPC_RMID) {
849 		setLocker.SetTo(&semaphoreSet->Lock(), false);
850 		setHashLocker.Unlock();
851 		ipcHashLocker.Unlock();
852 	} else
853 		// We are about to delete the set along with its mutex, so
854 		// we can't use the MutexLocker class, as the mutex itself
855 		// won't exist on function exit
856 		mutex_lock(&semaphoreSet->Lock());
857 
858 	int result = 0;
859 	XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
860 	switch (command) {
861 		case GETVAL: {
862 			if (!semaphoreSet->HasReadPermission()) {
863 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
864 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
865 					(int)semaphoreSet->IpcKey()));
866 				result = EACCES;
867 			} else
868 				result = semaphore->Value();
869 			break;
870 		}
871 
872 		case SETVAL: {
873 			if (!semaphoreSet->HasPermission()) {
874 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
875 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
876 					(int)semaphoreSet->IpcKey()));
877 				result = EACCES;
878 			} else {
879 				int value;
880 				if (user_memcpy(&value, &args->val, sizeof(int)) < B_OK) {
881 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
882 					result = B_BAD_ADDRESS;
883 				} else if (value > USHRT_MAX) {
884 					TRACE_ERROR(("xsi_semctl: value %d out of range\n", value));
885 					result = ERANGE;
886 				} else {
887 					semaphore->SetValue(value);
888 					semaphoreSet->ClearUndo(semaphoreNumber);
889 				}
890 			}
891 			break;
892 		}
893 
894 		case GETPID: {
895 			if (!semaphoreSet->HasReadPermission()) {
896 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
897 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
898 					(int)semaphoreSet->IpcKey()));
899 				result = EACCES;
900 			} else
901 				result = semaphore->LastPid();
902 			break;
903 		}
904 
905 		case GETNCNT: {
906 			if (!semaphoreSet->HasReadPermission()) {
907 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
908 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
909 					(int)semaphoreSet->IpcKey()));
910 				result = EACCES;
911 			} else
912 				result = semaphore->ThreadsWaitingToIncrease();
913 			break;
914 		}
915 
916 		case GETZCNT: {
917 			if (!semaphoreSet->HasReadPermission()) {
918 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
919 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
920 					(int)semaphoreSet->IpcKey()));
921 				result = EACCES;
922 			} else
923 				result = semaphore->ThreadsWaitingToBeZero();
924 			break;
925 		}
926 
927 		case GETALL: {
928 			if (!semaphoreSet->HasReadPermission()) {
929 				TRACE_ERROR(("xsi_semctl: calling process has not read "
930 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
931 					(int)semaphoreSet->IpcKey()));
932 				result = EACCES;
933 			} else
934 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
935 					semaphore = semaphoreSet->Semaphore(i);
936 					unsigned short value = semaphore->Value();
937 					if (user_memcpy(&args->array[i], &value,
938 						sizeof(unsigned short)) < B_OK) {
939 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
940 						result = B_BAD_ADDRESS;
941 						break;
942 					}
943 				}
944 			break;
945 		}
946 
947 		case SETALL: {
948 			if (!semaphoreSet->HasPermission()) {
949 				TRACE_ERROR(("xsi_semctl: calling process has not permission "
950 					"on semaphore %d, key %d\n", semaphoreSet->ID(),
951 					(int)semaphoreSet->IpcKey()));
952 				result = EACCES;
953 			} else {
954 				bool doClear = true;
955 				for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
956 					semaphore = semaphoreSet->Semaphore(i);
957 					unsigned short value;
958 					if (user_memcpy(&value, &args->array[i], sizeof(unsigned short))
959 						< B_OK) {
960 						TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
961 						result = B_BAD_ADDRESS;
962 						doClear = false;
963 						break;
964 					} else
965 						semaphore->SetValue(value);
966 				}
967 				if (doClear)
968 					semaphoreSet->ClearUndos();
969 			}
970 			break;
971 		}
972 
973 		case IPC_STAT: {
974 			if (!semaphoreSet->HasReadPermission()) {
975 				TRACE_ERROR(("xsi_semctl: calling process has not read "
976 					"permission on semaphore %d, key %d\n", semaphoreSet->ID(),
977 					(int)semaphoreSet->IpcKey()));
978 				result = EACCES;
979 			} else {
980 				struct semid_ds sem;
981 				sem.sem_perm = semaphoreSet->IpcPermission();
982 				sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
983 				sem.sem_otime = semaphoreSet->LastSemopTime();
984 				sem.sem_ctime = semaphoreSet->LastSemctlTime();
985 				if (user_memcpy(args->buf, &sem, sizeof(struct semid_ds))
986 					< B_OK) {
987 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
988 					result = B_BAD_ADDRESS;
989 				}
990 			}
991 			break;
992 		}
993 
994 		case IPC_SET: {
995 			if (!semaphoreSet->HasPermission()) {
996 				TRACE_ERROR(("xsi_semctl: calling process has not "
997 					"permission on semaphore %d, key %d\n",
998 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
999 				result = EACCES;
1000 			} else {
1001 				struct semid_ds sem;
1002 				if (user_memcpy(&sem, args->buf, sizeof(struct semid_ds))
1003 					< B_OK) {
1004 					TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
1005 					result = B_BAD_ADDRESS;
1006 				} else
1007 					semaphoreSet->DoIpcSet(&sem);
1008 			}
1009 			break;
1010 		}
1011 
1012 		case IPC_RMID: {
1013 			// If this was the command, we are still holding
1014 			// the semaphore set hash table lock along with the
1015 			// ipc hash table lock and the semaphore set lock
1016 			// itself, this way we are sure there is not
1017 			// one waiting in the queue of the mutex.
1018 			if (!semaphoreSet->HasPermission()) {
1019 				TRACE_ERROR(("xsi_semctl: calling process has not "
1020 					"permission on semaphore %d, key %d\n",
1021 					semaphoreSet->ID(),	(int)semaphoreSet->IpcKey()));
1022 				return EACCES;
1023 			}
1024 			key_t key = semaphoreSet->IpcKey();
1025 			Ipc *ipcKey = NULL;
1026 			if (key != -1) {
1027 				ipcKey = sIpcHashTable.Lookup(key);
1028 				sIpcHashTable.Remove(ipcKey);
1029 			}
1030 			sSemaphoreHashTable.Remove(semaphoreSet);
1031 			// Wake up of threads waiting on this set
1032 			// happens in the destructor
1033 			if (key != -1)
1034 				delete ipcKey;
1035 			atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
1036 			atomic_add(&sXsiSemaphoreSetCount, -1);
1037 			// Remove any sem_undo request
1038 			while (struct sem_undo *entry
1039 					= semaphoreSet->GetUndoList().RemoveHead()) {
1040 				MutexLocker _(entry->team->xsi_sem_context->lock);
1041 				entry->team->xsi_sem_context->undo_list.Remove(entry);
1042 				delete entry;
1043 			}
1044 
1045 			delete semaphoreSet;
1046 			return 0;
1047 		}
1048 
1049 		default:
1050 			TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
1051 			result = EINVAL;
1052 	}
1053 
1054 	return result;
1055 }
1056 
1057 
1058 status_t
1059 _user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
1060 {
1061 	TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
1062 		semaphoreID, ops, numOps));
1063 	MutexLocker setHashLocker(sXsiSemaphoreSetLock);
1064 	XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1065 	if (semaphoreSet == NULL) {
1066 		TRACE_ERROR(("xsi_semop: semaphore set id %d not valid\n",
1067 			semaphoreID));
1068 		return EINVAL;
1069 	}
1070 	MutexLocker setLocker(semaphoreSet->Lock());
1071 	setHashLocker.Unlock();
1072 
1073 	if (!IS_USER_ADDRESS(ops)) {
1074 		TRACE_ERROR(("xsi_semop: sembuf address is not valid\n"));
1075 		return B_BAD_ADDRESS;
1076 	}
1077 
1078 	if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
1079 		TRACE_ERROR(("xsi_semop: numOps out of range\n"));
1080 		return EINVAL;
1081 	}
1082 
1083 	struct sembuf *operations
1084 		= (struct sembuf *)malloc(sizeof(struct sembuf) * numOps);
1085 	if (operations == NULL) {
1086 		TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
1087 		return B_NO_MEMORY;
1088 	}
1089 
1090 	if (user_memcpy(operations, ops,
1091 		(sizeof(struct sembuf) * numOps)) < B_OK) {
1092 		TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
1093 		free(operations);
1094 		return B_BAD_ADDRESS;
1095 	}
1096 
1097 	// We won't do partial request, that is operations
1098 	// only on some sempahores belonging to the set and then
1099 	// going to sleep. If we must wait on a semaphore, we undo
1100 	// all the operations already done and go to sleep, otherwise
1101 	// we may caused some unwanted deadlock among threads
1102 	// fighting for the same set.
1103 	bool notDone = true;
1104 	status_t result = 0;
1105 	while (notDone) {
1106 		XsiSemaphore *semaphore = NULL;
1107 		short numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
1108 		bool goToSleep = false;
1109 
1110 		uint32 i = 0;
1111 		for (; i < numOps; i++) {
1112 			short semaphoreNumber = operations[i].sem_num;
1113 			if (semaphoreNumber >= numberOfSemaphores) {
1114 				TRACE_ERROR(("xsi_semop: %ld invalid semaphore number\n", i));
1115 				result = EINVAL;
1116 				break;
1117 			}
1118 			semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1119 			unsigned short value = semaphore->Value();
1120 			short operation = operations[i].sem_op;
1121 			TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
1122 				semaphoreNumber, value));
1123 			if (operation < 0) {
1124 				if (semaphore->Add(operation)) {
1125 					if (operations[i].sem_flg & IPC_NOWAIT)
1126 						result = EAGAIN;
1127 					else
1128 						goToSleep = true;
1129 					break;
1130 				}
1131 			} else if (operation == 0) {
1132 				if (value == 0)
1133 					continue;
1134 				else if (operations[i].sem_flg & IPC_NOWAIT) {
1135 					result = EAGAIN;
1136 					break;
1137 				} else {
1138 					goToSleep = true;
1139 					break;
1140 				}
1141 			} else {
1142 				// Operation must be greater than zero,
1143 				// just add the value and continue
1144 				semaphore->Add(operation);
1145 			}
1146 		}
1147 
1148 		// Either we have to wait or an error occured
1149 		if (goToSleep || result != 0) {
1150 			// Undo all previously done operations
1151 			for (uint32 j = 0; j < i; j++) {
1152 				short semaphoreNumber = operations[j].sem_num;
1153 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1154 				short operation = operations[j].sem_op;
1155 				if (operation != 0)
1156 					semaphore->Revert(operation);
1157 			}
1158 			if (result != 0)
1159 				return result;
1160 
1161 			// We have to wait: first enqueue the thread
1162 			// in the appropriate set waiting list, then
1163 			// unlock the set itself and block the thread.
1164 			bool waitOnZero = true;
1165 			if (operations[i].sem_op != 0)
1166 				waitOnZero = false;
1167 
1168 			struct thread *thread = thread_get_current_thread();
1169 			queued_thread queueEntry(thread, (int32)operations[i].sem_op);
1170 			semaphore->Enqueue(&queueEntry, waitOnZero);
1171 
1172 			uint32 sequenceNumber = semaphoreSet->SequenceNumber();
1173 
1174 			TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
1175 			result = semaphore->BlockAndUnlock(thread, &setLocker);
1176 			TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));
1177 
1178 			// We are back to life. Find out why!
1179 			// Make sure the set hasn't been deleted or worst yet
1180 			// replaced.
1181 			setHashLocker.Lock();
1182 			semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
1183 			if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
1184 				&& sequenceNumber != semaphoreSet->SequenceNumber())) {
1185 				TRACE_ERROR(("xsi_semop: semaphore set id %d (sequence = %ld) "
1186 					"got destroyed\n", semaphoreID, sequenceNumber));
1187 				notDone = false;
1188 				result = EIDRM;
1189 			} else if (result == B_INTERRUPTED) {
1190 				TRACE_ERROR(("xsi_semop: thread %d got interrupted while "
1191 					"waiting on semaphore set id %d\n",(int)thread->id,
1192 					semaphoreID));
1193 				semaphore->Deque(&queueEntry, waitOnZero);
1194 				result = EINTR;
1195 				notDone = false;
1196 			} else {
1197 				setLocker.Lock();
1198 				setHashLocker.Unlock();
1199 			}
1200 		} else {
1201 			// everything worked like a charm (so far)
1202 			notDone = false;
1203 			TRACE(("xsi_semop: semaphore acquired succesfully\n"));
1204 			// We acquired the semaphore, now records the sem_undo
1205 			// requests
1206 			XsiSemaphore *semaphore = NULL;
1207 			uint32 i = 0;
1208 			for (; i < numOps; i++) {
1209 				short semaphoreNumber = operations[i].sem_num;
1210 				semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1211 				short operation = operations[i].sem_op;
1212 				if (operations[i].sem_flg & SEM_UNDO)
1213 					if (semaphoreSet->RecordUndo(semaphoreNumber, operation)
1214 						!= B_OK) {
1215 						// Unlikely scenario, but we might get here.
1216 						// Undo everything!
1217 						// Start with semaphore operations
1218 						for (uint32 j = 0; j < numOps; j++) {
1219 							short semaphoreNumber = operations[j].sem_num;
1220 							semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1221 							short operation = operations[j].sem_op;
1222 							if (operation != 0)
1223 								semaphore->Revert(operation);
1224 						}
1225 						// Remove all previously registered sem_undo request
1226 						for (uint32 j = 0; j < i; j++) {
1227 							if (operations[j].sem_flg & SEM_UNDO)
1228 								semaphoreSet->RevertUndo(operations[j].sem_num,
1229 									operations[j].sem_op);
1230 						}
1231 						result = ENOSPC;
1232 					}
1233 			}
1234 		}
1235 	}
1236 
1237 	// We did it. Set the pid of all semaphores used
1238 	if (result == 0) {
1239 		for (uint32 i = 0; i < numOps; i++) {
1240 			short semaphoreNumber = operations[i].sem_num;
1241 			XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
1242 			semaphore->SetPid(getpid());
1243 		}
1244 	}
1245 	return result;
1246 }
1247