xref: /haiku/src/system/kernel/debug/system_profiler.cpp (revision 3be9edf8da228afd9fec0390f408c964766122aa)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 #include <system_profiler.h>
7 
8 #include <AutoDeleter.h>
9 #include <Referenceable.h>
10 
11 #include <util/AutoLock.h>
12 
13 #include <system_profiler_defs.h>
14 
15 #include <cpu.h>
16 #include <kernel.h>
17 #include <kimage.h>
18 #include <kscheduler.h>
19 #include <listeners.h>
20 #include <Notifications.h>
21 #include <sem.h>
22 #include <team.h>
23 #include <thread.h>
24 #include <user_debugger.h>
25 #include <vm.h>
26 
27 #include <arch/debug.h>
28 
29 
30 // This is the kernel-side implementation of the system profiling support.
31 // A userland team can register as system profiler, providing an area as buffer
32 // for events. Those events are team, thread, and image changes (added/removed)
33 // and periodic sampling of the return address stack for each CPU.
34 
35 
36 class SystemProfiler;
37 
38 
39 // minimum/maximum size of the table used for wait object caching
40 #define MIN_WAIT_OBJECT_COUNT	128
41 #define MAX_WAIT_OBJECT_COUNT	1024
42 
43 
44 static spinlock sProfilerLock = B_SPINLOCK_INITIALIZER;
45 static SystemProfiler* sProfiler = NULL;
46 static struct system_profiler_parameters* sRecordedParameters = NULL;
47 
48 
49 class SystemProfiler : public Referenceable, private NotificationListener,
50 	private SchedulerListener, private WaitObjectListener {
51 public:
52 								SystemProfiler(team_id team,
53 									const area_info& userAreaInfo,
54 									const system_profiler_parameters&
55 										parameters);
56 								~SystemProfiler();
57 
58 			team_id				Team() const	{ return fTeam; }
59 
60 			status_t			Init();
61 			status_t			NextBuffer(size_t bytesRead,
62 									uint64* _droppedEvents);
63 
64 private:
65     virtual	void				EventOccurred(NotificationService& service,
66 									const KMessage* event);
67 
68 	virtual	void				ThreadEnqueuedInRunQueue(struct thread* thread);
69 	virtual	void				ThreadRemovedFromRunQueue(
70 									struct thread* thread);
71 	virtual	void				ThreadScheduled(struct thread* oldThread,
72 									struct thread* newThread);
73 
74 	virtual	void				SemaphoreCreated(sem_id id,
75 									const char* name);
76 	virtual	void				ConditionVariableInitialized(
77 									ConditionVariable* variable);
78 	virtual	void				MutexInitialized(mutex* lock);
79 	virtual	void				RWLockInitialized(rw_lock* lock);
80 
81 			bool				_TeamAdded(struct team* team);
82 			bool				_TeamRemoved(struct team* team);
83 			bool				_TeamExec(struct team* team);
84 
85 			bool				_ThreadAdded(struct thread* thread);
86 			bool				_ThreadRemoved(struct thread* thread);
87 
88 			bool				_ImageAdded(struct image* image);
89 			bool				_ImageRemoved(struct image* image);
90 
91 			void				_WaitObjectCreated(addr_t object, uint32 type);
92 			void				_WaitObjectUsed(addr_t object, uint32 type);
93 
94 	inline	void				_MaybeNotifyProfilerThreadLocked();
95 	inline	void				_MaybeNotifyProfilerThread();
96 
97 	static	bool				_InitialTeamIterator(struct team* team,
98 									void* cookie);
99 	static	bool				_InitialThreadIterator(struct thread* thread,
100 									void* cookie);
101 	static	bool				_InitialImageIterator(struct image* image,
102 									void* cookie);
103 
104 			void*				_AllocateBuffer(size_t size, int event, int cpu,
105 									int count);
106 
107 	static	void				_InitTimers(void* cookie, int cpu);
108 	static	void				_UninitTimers(void* cookie, int cpu);
109 			void				_ScheduleTimer(int cpu);
110 
111 			void				_DoSample();
112 
113 	static	int32				_ProfilingEvent(struct timer* timer);
114 
115 private:
116 			struct CPUProfileData {
117 				struct timer	timer;
118 				bigtime_t		timerEnd;
119 				bool			timerScheduled;
120 				addr_t			buffer[B_DEBUG_STACK_TRACE_DEPTH];
121 			};
122 
123 			struct WaitObjectKey {
124 				addr_t	object;
125 				uint32	type;
126 			};
127 
128 			struct WaitObject : DoublyLinkedListLinkImpl<WaitObject>,
129 					WaitObjectKey {
130 				struct WaitObject* hash_link;
131 			};
132 
133 			struct WaitObjectTableDefinition {
134 				typedef WaitObjectKey	KeyType;
135 				typedef	WaitObject		ValueType;
136 
137 				size_t HashKey(const WaitObjectKey& key) const
138 				{
139 					return (size_t)key.object ^ (size_t)key.type;
140 				}
141 
142 				size_t Hash(const WaitObject* value) const
143 				{
144 					return HashKey(*value);
145 				}
146 
147 				bool Compare(const WaitObjectKey& key,
148 					const WaitObject* value) const
149 				{
150 					return value->type == key.type
151 						&& value->object == key.object;
152 				}
153 
154 				WaitObject*& GetLink(WaitObject* value) const
155 				{
156 					return value->hash_link;
157 				}
158 			};
159 
160 			typedef DoublyLinkedList<WaitObject> WaitObjectList;
161 			typedef BOpenHashTable<WaitObjectTableDefinition> WaitObjectTable;
162 
163 private:
164 			spinlock			fLock;
165 			team_id				fTeam;
166 			area_id				fUserArea;
167 			area_id				fKernelArea;
168 			size_t				fAreaSize;
169 			uint32				fFlags;
170 			uint32				fStackDepth;
171 			bigtime_t			fInterval;
172 			system_profiler_buffer_header* fHeader;
173 			uint8*				fBufferBase;
174 			size_t				fBufferCapacity;
175 			size_t				fBufferStart;
176 			size_t				fBufferSize;
177 			uint64				fDroppedEvents;
178 			bool				fTeamNotificationsRequested;
179 			bool				fTeamNotificationsEnabled;
180 			bool				fThreadNotificationsRequested;
181 			bool				fThreadNotificationsEnabled;
182 			bool				fImageNotificationsRequested;
183 			bool				fImageNotificationsEnabled;
184 			bool				fSchedulerNotificationsRequested;
185 			bool				fWaitObjectNotificationsRequested;
186 			ConditionVariable	fProfilerWaitCondition;
187 			bool				fProfilerWaiting;
188 			bool				fProfilingActive;
189 			bool				fReentered[B_MAX_CPU_COUNT];
190 			CPUProfileData		fCPUData[B_MAX_CPU_COUNT];
191 			struct thread**		fRunningThreads;
192 			WaitObject*			fWaitObjectBuffer;
193 			int32				fWaitObjectCount;
194 			WaitObjectList		fUsedWaitObjects;
195 			WaitObjectList		fFreeWaitObjects;
196 			WaitObjectTable		fWaitObjectTable;
197 };
198 
199 
200 inline void
201 SystemProfiler::_MaybeNotifyProfilerThreadLocked()
202 {
203 	// If the buffer is full enough, notify the profiler.
204 	if (fProfilerWaiting && fBufferSize > fBufferCapacity / 2) {
205 		fProfilerWaiting = false;
206 		int cpu = smp_get_current_cpu();
207 		fReentered[cpu] = true;
208 		fProfilerWaitCondition.NotifyOne(true);
209 		fReentered[cpu] = false;
210 	}
211 }
212 
213 
214 inline void
215 SystemProfiler::_MaybeNotifyProfilerThread()
216 {
217 	if (!fProfilerWaiting)
218 		return;
219 
220 	InterruptsSpinLocker threadsLocker(gThreadSpinlock);
221 	SpinLocker locker(fLock);
222 
223 	_MaybeNotifyProfilerThreadLocked();
224 }
225 
226 
227 SystemProfiler::SystemProfiler(team_id team, const area_info& userAreaInfo,
228 	const system_profiler_parameters& parameters)
229 	:
230 	fTeam(team),
231 	fUserArea(userAreaInfo.area),
232 	fKernelArea(-1),
233 	fAreaSize(userAreaInfo.size),
234 	fFlags(parameters.flags),
235 	fStackDepth(parameters.stack_depth),
236 	fInterval(parameters.interval),
237 	fHeader(NULL),
238 	fBufferBase(NULL),
239 	fBufferCapacity(0),
240 	fBufferStart(0),
241 	fBufferSize(0),
242 	fDroppedEvents(0),
243 	fTeamNotificationsRequested(false),
244 	fTeamNotificationsEnabled(false),
245 	fThreadNotificationsRequested(false),
246 	fThreadNotificationsEnabled(false),
247 	fImageNotificationsRequested(false),
248 	fImageNotificationsEnabled(false),
249 	fSchedulerNotificationsRequested(false),
250 	fWaitObjectNotificationsRequested(false),
251 	fProfilerWaiting(false),
252 	fWaitObjectBuffer(NULL),
253 	fWaitObjectCount(0),
254 	fUsedWaitObjects(),
255 	fFreeWaitObjects(),
256 	fWaitObjectTable()
257 {
258 	B_INITIALIZE_SPINLOCK(&fLock);
259 	fProfilerWaitCondition.Init(this, "system profiler");
260 
261 	memset(fReentered, 0, sizeof(fReentered));
262 
263 	// compute the number wait objects we want to cache
264 	if ((fFlags & B_SYSTEM_PROFILER_SCHEDULING_EVENTS) != 0) {
265 		fWaitObjectCount = parameters.locking_lookup_size
266 			/ (sizeof(WaitObject) + (sizeof(void*) * 3 / 2));
267 		if (fWaitObjectCount < MIN_WAIT_OBJECT_COUNT)
268 			fWaitObjectCount = MIN_WAIT_OBJECT_COUNT;
269 		if (fWaitObjectCount > MAX_WAIT_OBJECT_COUNT)
270 			fWaitObjectCount = MAX_WAIT_OBJECT_COUNT;
271 	}
272 }
273 
274 
275 SystemProfiler::~SystemProfiler()
276 {
277 	// Wake up the user thread, if it is waiting, and mark profiling
278 	// inactive.
279 	InterruptsSpinLocker locker(fLock);
280 	if (fProfilerWaiting) {
281 		fProfilerWaiting = false;
282 		fProfilerWaitCondition.NotifyAll();
283 	}
284 	fProfilingActive = false;
285 	locker.Unlock();
286 
287 	// stop scheduler listening
288 	if (fSchedulerNotificationsRequested) {
289 		InterruptsSpinLocker threadsLocker(gThreadSpinlock);
290 		scheduler_remove_listener(this);
291 	}
292 
293 	// stop wait object listening
294 	if (fWaitObjectNotificationsRequested) {
295 		InterruptsSpinLocker locker(gWaitObjectListenerLock);
296 		remove_wait_object_listener(this);
297 	}
298 
299 	// deactivate the profiling timers on all CPUs
300 	if ((fFlags & B_SYSTEM_PROFILER_SAMPLING_EVENTS) != 0)
301 		call_all_cpus(_UninitTimers, this);
302 
303 	// cancel notifications
304 	NotificationManager& notificationManager
305 		= NotificationManager::Manager();
306 
307 	// images
308 	if (fImageNotificationsRequested) {
309 		fImageNotificationsRequested = false;
310 		notificationManager.RemoveListener("images", NULL, *this);
311 	}
312 
313 	// threads
314 	if (fThreadNotificationsRequested) {
315 		fThreadNotificationsRequested = false;
316 		notificationManager.RemoveListener("threads", NULL, *this);
317 	}
318 
319 	// teams
320 	if (fTeamNotificationsRequested) {
321 		fTeamNotificationsRequested = false;
322 		notificationManager.RemoveListener("teams", NULL, *this);
323 	}
324 
325 	// delete wait object related allocations
326 	fWaitObjectTable.Clear();
327 	delete[] fWaitObjectBuffer;
328 
329 	// unlock the memory and delete the area
330 	if (fKernelArea >= 0) {
331 		unlock_memory(fHeader, fAreaSize, B_READ_DEVICE);
332 		delete_area(fKernelArea);
333 		fKernelArea = -1;
334 	}
335 }
336 
337 
338 status_t
339 SystemProfiler::Init()
340 {
341 	// clone the user area
342 	void* areaBase;
343 	fKernelArea = clone_area("profiling samples", &areaBase,
344 		B_ANY_KERNEL_ADDRESS, B_READ_AREA | B_WRITE_AREA,
345 		fUserArea);
346 	if (fKernelArea < 0)
347 		return fKernelArea;
348 
349 	// we need the memory locked
350 	status_t error = lock_memory(areaBase, fAreaSize, B_READ_DEVICE);
351 	if (error != B_OK) {
352 		delete_area(fKernelArea);
353 		fKernelArea = -1;
354 		return error;
355 	}
356 
357 	// the buffer is ready for use
358 	fHeader = (system_profiler_buffer_header*)areaBase;
359 	fBufferBase = (uint8*)(fHeader + 1);
360 	fBufferCapacity = fAreaSize - (fBufferBase - (uint8*)areaBase);
361 	fHeader->start = 0;
362 	fHeader->size = 0;
363 
364 	// allocate the wait object buffer and init the hash table
365 	if (fWaitObjectCount > 0) {
366 		fWaitObjectBuffer = new(std::nothrow) WaitObject[fWaitObjectCount];
367 		if (fWaitObjectBuffer == NULL)
368 			return B_NO_MEMORY;
369 
370 		for (int32 i = 0; i < fWaitObjectCount; i++)
371 			fFreeWaitObjects.Add(fWaitObjectBuffer + i);
372 
373 		error = fWaitObjectTable.Init(fWaitObjectCount * 3 / 2);
374 		if (error != B_OK)
375 			return error;
376 	}
377 
378 	// start listening for notifications
379 
380 	// teams
381 	NotificationManager& notificationManager
382 		= NotificationManager::Manager();
383 	if ((fFlags & B_SYSTEM_PROFILER_TEAM_EVENTS) != 0) {
384 		error = notificationManager.AddListener("teams",
385 			TEAM_ADDED | TEAM_REMOVED | TEAM_EXEC, *this);
386 		if (error != B_OK)
387 			return error;
388 		fTeamNotificationsRequested = true;
389 	}
390 
391 	// threads
392 	if ((fFlags & B_SYSTEM_PROFILER_THREAD_EVENTS) != 0) {
393 		error = notificationManager.AddListener("threads",
394 			THREAD_ADDED | THREAD_REMOVED, *this);
395 		if (error != B_OK)
396 			return error;
397 		fThreadNotificationsRequested = true;
398 	}
399 
400 	// images
401 	if ((fFlags & B_SYSTEM_PROFILER_IMAGE_EVENTS) != 0) {
402 		error = notificationManager.AddListener("images",
403 			IMAGE_ADDED | IMAGE_REMOVED, *this);
404 		if (error != B_OK)
405 			return error;
406 		fImageNotificationsRequested = true;
407 	}
408 
409 	// We need to fill the buffer with the initial state of teams, threads,
410 	// and images.
411 
412 	// teams
413 	if ((fFlags & B_SYSTEM_PROFILER_TEAM_EVENTS) != 0) {
414 		InterruptsSpinLocker teamsLocker(gTeamSpinlock);
415 		if (team_iterate_through_teams(&_InitialTeamIterator, this) != NULL)
416 			return B_BUFFER_OVERFLOW;
417 		fTeamNotificationsEnabled = true;
418 		teamsLocker.Unlock();
419 	}
420 
421 	// images
422 	if ((fFlags & B_SYSTEM_PROFILER_IMAGE_EVENTS) != 0) {
423 		if (image_iterate_through_images(&_InitialImageIterator, this) != NULL)
424 			return B_BUFFER_OVERFLOW;
425 	}
426 
427 	// threads
428 	struct thread* runningThreads[B_MAX_CPU_COUNT];
429 	memset(runningThreads, 0, sizeof(runningThreads));
430 	fRunningThreads = runningThreads;
431 
432 	InterruptsSpinLocker threadsLocker(gThreadSpinlock);
433 	if ((fFlags & B_SYSTEM_PROFILER_THREAD_EVENTS) != 0
434 		|| (fFlags & B_SYSTEM_PROFILER_SCHEDULING_EVENTS) != 0) {
435 		if (thread_iterate_through_threads(&_InitialThreadIterator, this)
436 				!= NULL) {
437 			return B_BUFFER_OVERFLOW;
438 		}
439 		fThreadNotificationsEnabled
440 			= (fFlags & B_SYSTEM_PROFILER_THREAD_EVENTS) != 0;
441 	}
442 
443 	fProfilingActive = true;
444 
445 	// start scheduler and wait object listening
446 	if ((fFlags & B_SYSTEM_PROFILER_SCHEDULING_EVENTS) != 0) {
447 		scheduler_add_listener(this);
448 		fSchedulerNotificationsRequested = true;
449 
450 		SpinLocker waitObjectLocker(gWaitObjectListenerLock);
451 		add_wait_object_listener(this);
452 		fWaitObjectNotificationsRequested = true;
453 		waitObjectLocker.Unlock();
454 
455 		// fake schedule events for the initially running threads
456 		int32 cpuCount = smp_get_num_cpus();
457 		for (int32 i = 0; i < cpuCount; i++) {
458 			if (runningThreads[i] != NULL)
459 				ThreadScheduled(runningThreads[i], runningThreads[i]);
460 		}
461 	}
462 
463 	threadsLocker.Unlock();
464 
465 	// activate the profiling timers on all CPUs
466 	if ((fFlags & B_SYSTEM_PROFILER_SAMPLING_EVENTS) != 0)
467 		call_all_cpus(_InitTimers, this);
468 
469 	return B_OK;
470 }
471 
472 
473 status_t
474 SystemProfiler::NextBuffer(size_t bytesRead, uint64* _droppedEvents)
475 {
476 	InterruptsSpinLocker locker(fLock);
477 
478 	if (fProfilerWaiting || !fProfilingActive || bytesRead > fBufferSize)
479 		return B_BAD_VALUE;
480 
481 	fBufferSize -= bytesRead;
482 	fBufferStart += bytesRead;
483 	if (fBufferStart > fBufferCapacity)
484 		fBufferStart -= fBufferCapacity;
485 	fHeader->size = fBufferSize;
486 	fHeader->start = fBufferStart;
487 
488 	// already enough data in the buffer to return?
489 	if (fBufferSize > fBufferCapacity / 2)
490 		return B_OK;
491 
492 	// Wait until the buffer gets too full or an error or a timeout occurs.
493 	while (true) {
494 		ConditionVariableEntry waitEntry;
495 		fProfilerWaitCondition.Add(&waitEntry);
496 
497 		fProfilerWaiting = true;
498 
499 		locker.Unlock();
500 
501 		status_t error = waitEntry.Wait(
502 			B_CAN_INTERRUPT | B_RELATIVE_TIMEOUT, 1000000);
503 
504 		locker.Lock();
505 
506 		if (error == B_OK) {
507 			// the caller has unset fProfilerWaiting for us
508 			break;
509 		}
510 
511 		fProfilerWaiting = false;
512 
513 		if (error != B_TIMED_OUT)
514 			return error;
515 
516 		// just the timeout -- return, if the buffer is not empty
517 		if (fBufferSize > 0)
518 			break;
519 	}
520 
521 	if (_droppedEvents != NULL) {
522 		*_droppedEvents = fDroppedEvents;
523 		fDroppedEvents = 0;
524 	}
525 
526 	return B_OK;
527 }
528 
529 
530 void
531 SystemProfiler::EventOccurred(NotificationService& service,
532 	const KMessage* event)
533 {
534 	int32 eventCode;
535 	if (event->FindInt32("event", &eventCode) != B_OK)
536 		return;
537 
538 	if (strcmp(service.Name(), "teams") == 0) {
539 		if (!fTeamNotificationsEnabled)
540 			return;
541 
542 		struct team* team = (struct team*)event->GetPointer("teamStruct",
543 			NULL);
544 		if (team == NULL)
545 			return;
546 
547 		switch (eventCode) {
548 			case TEAM_ADDED:
549 				_TeamAdded(team);
550 				break;
551 
552 			case TEAM_REMOVED:
553 				if (team->id == fTeam) {
554 					// The profiling team is gone -- uninstall the profiler!
555 					InterruptsSpinLocker locker(sProfilerLock);
556 					if (sProfiler != this)
557 						return;
558 
559 					sProfiler = NULL;
560 					locker.Unlock();
561 
562 					RemoveReference();
563 				} else
564 					_TeamRemoved(team);
565 
566 				break;
567 
568 			case TEAM_EXEC:
569 				_TeamExec(team);
570 				break;
571 		}
572 	} else if (strcmp(service.Name(), "threads") == 0) {
573 		if (!fThreadNotificationsEnabled)
574 			return;
575 
576 		struct thread* thread = (struct thread*)event->GetPointer(
577 			"threadStruct", NULL);
578 		if (thread == NULL)
579 			return;
580 
581 		switch (eventCode) {
582 			case THREAD_ADDED:
583 				_ThreadAdded(thread);
584 				break;
585 
586 			case THREAD_REMOVED:
587 				_ThreadRemoved(thread);
588 				break;
589 		}
590 	} else if (strcmp(service.Name(), "images") == 0) {
591 		if (!fImageNotificationsEnabled)
592 			return;
593 
594 		struct image* image = (struct image*)event->GetPointer(
595 			"imageStruct", NULL);
596 		if (image == NULL)
597 			return;
598 
599 		switch (eventCode) {
600 			case IMAGE_ADDED:
601 				_ImageAdded(image);
602 				break;
603 
604 			case IMAGE_REMOVED:
605 				_ImageRemoved(image);
606 				break;
607 		}
608 	}
609 
610 	_MaybeNotifyProfilerThread();
611 }
612 
613 
614 void
615 SystemProfiler::ThreadEnqueuedInRunQueue(struct thread* thread)
616 {
617 	int cpu = smp_get_current_cpu();
618 
619 	SpinLocker locker(fLock, false, !fReentered[cpu]);
620 		// When re-entering, we already hold the lock.
621 
622 	system_profiler_thread_enqueued_in_run_queue* event
623 		= (system_profiler_thread_enqueued_in_run_queue*)
624 			_AllocateBuffer(
625 				sizeof(system_profiler_thread_enqueued_in_run_queue),
626 				B_SYSTEM_PROFILER_THREAD_ENQUEUED_IN_RUN_QUEUE, cpu, 0);
627 	if (event == NULL)
628 		return;
629 
630 	event->time = system_time();
631 	event->thread = thread->id;
632 	event->priority = thread->priority;
633 
634 	fHeader->size = fBufferSize;
635 
636 	// Unblock the profiler thread, if necessary, but don't unblock the thread,
637 	// if it had been waiting on a condition variable, since then we'd likely
638 	// deadlock in ConditionVariable::NotifyOne(), as it acquires a static
639 	// spinlock.
640 	if (thread->wait.type != THREAD_BLOCK_TYPE_CONDITION_VARIABLE)
641 		_MaybeNotifyProfilerThreadLocked();
642 }
643 
644 
645 void
646 SystemProfiler::ThreadRemovedFromRunQueue(struct thread* thread)
647 {
648 	int cpu = smp_get_current_cpu();
649 
650 	SpinLocker locker(fLock, false, !fReentered[cpu]);
651 		// When re-entering, we already hold the lock.
652 
653 	system_profiler_thread_removed_from_run_queue* event
654 		= (system_profiler_thread_removed_from_run_queue*)
655 			_AllocateBuffer(
656 				sizeof(system_profiler_thread_removed_from_run_queue),
657 				B_SYSTEM_PROFILER_THREAD_REMOVED_FROM_RUN_QUEUE, cpu, 0);
658 	if (event == NULL)
659 		return;
660 
661 	event->time = system_time();
662 	event->thread = thread->id;
663 
664 	fHeader->size = fBufferSize;
665 
666 	// unblock the profiler thread, if necessary
667 	_MaybeNotifyProfilerThreadLocked();
668 }
669 
670 
671 void
672 SystemProfiler::ThreadScheduled(struct thread* oldThread,
673 	struct thread* newThread)
674 {
675 	int cpu = smp_get_current_cpu();
676 
677 	SpinLocker locker(fLock, false, !fReentered[cpu]);
678 		// When re-entering, we already hold the lock.
679 
680 	// If the old thread starts waiting, handle the wait object.
681 	if (oldThread->state == B_THREAD_WAITING)
682 		_WaitObjectUsed((addr_t)oldThread->wait.object, oldThread->wait.type);
683 
684 	system_profiler_thread_scheduled* event
685 		= (system_profiler_thread_scheduled*)
686 			_AllocateBuffer(sizeof(system_profiler_thread_scheduled),
687 				B_SYSTEM_PROFILER_THREAD_SCHEDULED, cpu, 0);
688 	if (event == NULL)
689 		return;
690 
691 	event->time = system_time();
692 	event->thread = newThread->id;
693 	event->previous_thread = oldThread->id;
694 	event->previous_thread_state = oldThread->state;
695 	event->previous_thread_wait_object_type = oldThread->wait.type;
696 	event->previous_thread_wait_object = (addr_t)oldThread->wait.object;
697 
698 	fHeader->size = fBufferSize;
699 
700 	// unblock the profiler thread, if necessary
701 	_MaybeNotifyProfilerThreadLocked();
702 }
703 
704 
705 void
706 SystemProfiler::SemaphoreCreated(sem_id id, const char* name)
707 {
708 	_WaitObjectCreated((addr_t)id, THREAD_BLOCK_TYPE_SEMAPHORE);
709 }
710 
711 
712 void
713 SystemProfiler::ConditionVariableInitialized(ConditionVariable* variable)
714 {
715 	_WaitObjectCreated((addr_t)variable, THREAD_BLOCK_TYPE_CONDITION_VARIABLE);
716 }
717 
718 
719 void
720 SystemProfiler::MutexInitialized(mutex* lock)
721 {
722 	_WaitObjectCreated((addr_t)lock, THREAD_BLOCK_TYPE_MUTEX);
723 }
724 
725 
726 void
727 SystemProfiler::RWLockInitialized(rw_lock* lock)
728 {
729 	_WaitObjectCreated((addr_t)lock, THREAD_BLOCK_TYPE_RW_LOCK);
730 }
731 
732 
733 bool
734 SystemProfiler::_TeamAdded(struct team* team)
735 {
736 	InterruptsSpinLocker locker(fLock);
737 
738 	size_t nameLen = strlen(team->name);
739 	size_t argsLen = strlen(team->args);
740 
741 	system_profiler_team_added* event = (system_profiler_team_added*)
742 		_AllocateBuffer(
743 			sizeof(system_profiler_team_added) + nameLen + 1 + argsLen,
744 			B_SYSTEM_PROFILER_TEAM_ADDED, 0, 0);
745 	if (event == NULL)
746 		return false;
747 
748 	event->team = team->id;
749 	strcpy(event->name, team->name);
750 	event->args_offset = nameLen + 1;
751 	strcpy(event->name + nameLen + 1, team->args);
752 
753 	fHeader->size = fBufferSize;
754 
755 	return true;
756 }
757 
758 
759 bool
760 SystemProfiler::_TeamRemoved(struct team* team)
761 {
762 	InterruptsSpinLocker locker(fLock);
763 
764 	system_profiler_team_removed* event = (system_profiler_team_removed*)
765 		_AllocateBuffer(sizeof(system_profiler_team_removed),
766 			B_SYSTEM_PROFILER_TEAM_REMOVED, 0, 0);
767 	if (event == NULL)
768 		return false;
769 
770 	event->team = team->id;
771 
772 	fHeader->size = fBufferSize;
773 
774 	return true;
775 }
776 
777 
778 bool
779 SystemProfiler::_TeamExec(struct team* team)
780 {
781 	InterruptsSpinLocker locker(fLock);
782 
783 	size_t argsLen = strlen(team->args);
784 
785 	system_profiler_team_exec* event = (system_profiler_team_exec*)
786 		_AllocateBuffer(sizeof(system_profiler_team_exec) + argsLen,
787 			B_SYSTEM_PROFILER_TEAM_EXEC, 0, 0);
788 	if (event == NULL)
789 		return false;
790 
791 	event->team = team->id;
792 	strlcpy(event->thread_name, team->main_thread->name,
793 		sizeof(event->thread_name));
794 	strcpy(event->args, team->args);
795 
796 	fHeader->size = fBufferSize;
797 
798 	return true;
799 }
800 
801 
802 bool
803 SystemProfiler::_ThreadAdded(struct thread* thread)
804 {
805 	InterruptsSpinLocker locker(fLock);
806 
807 	system_profiler_thread_added* event = (system_profiler_thread_added*)
808 		_AllocateBuffer(sizeof(system_profiler_thread_added),
809 			B_SYSTEM_PROFILER_THREAD_ADDED, 0, 0);
810 	if (event == NULL)
811 		return false;
812 
813 	event->team = thread->team->id;
814 	event->thread = thread->id;
815 	strlcpy(event->name, thread->name, sizeof(event->name));
816 
817 	fHeader->size = fBufferSize;
818 
819 	return true;
820 }
821 
822 
823 bool
824 SystemProfiler::_ThreadRemoved(struct thread* thread)
825 {
826 	InterruptsSpinLocker locker(fLock);
827 
828 	system_profiler_thread_removed* event
829 		= (system_profiler_thread_removed*)
830 			_AllocateBuffer(sizeof(system_profiler_thread_removed),
831 				B_SYSTEM_PROFILER_THREAD_REMOVED, 0, 0);
832 	if (event == NULL)
833 		return false;
834 
835 	event->team = thread->team->id;
836 	event->thread = thread->id;
837 
838 	fHeader->size = fBufferSize;
839 
840 	return true;
841 }
842 
843 
844 bool
845 SystemProfiler::_ImageAdded(struct image* image)
846 {
847 	InterruptsSpinLocker locker(fLock);
848 
849 	system_profiler_image_added* event = (system_profiler_image_added*)
850 		_AllocateBuffer(sizeof(system_profiler_image_added),
851 			B_SYSTEM_PROFILER_IMAGE_ADDED, 0, 0);
852 	if (event == NULL)
853 		return false;
854 
855 	event->team = image->team;
856 	event->info = image->info;
857 
858 	fHeader->size = fBufferSize;
859 
860 	return true;
861 }
862 
863 
864 bool
865 SystemProfiler::_ImageRemoved(struct image* image)
866 {
867 	InterruptsSpinLocker locker(fLock);
868 
869 	system_profiler_image_removed* event = (system_profiler_image_removed*)
870 		_AllocateBuffer(sizeof(system_profiler_image_removed),
871 			B_SYSTEM_PROFILER_IMAGE_REMOVED, 0, 0);
872 	if (event == NULL)
873 		return false;
874 
875 	event->team = image->team;
876 	event->image = image->info.id;
877 
878 	fHeader->size = fBufferSize;
879 
880 	return true;
881 }
882 
883 
884 void
885 SystemProfiler::_WaitObjectCreated(addr_t object, uint32 type)
886 {
887 	SpinLocker locker(fLock);
888 
889 	// look up the object
890 	WaitObjectKey key;
891 	key.object = object;
892 	key.type = type;
893 	WaitObject* waitObject = fWaitObjectTable.Lookup(key);
894 
895 	// If found, remove it and add it to the free list. This might sound weird,
896 	// but it makes sense, since we lazily track *used* wait objects only.
897 	// I.e. the object in the table is now guaranteedly obsolete.
898 	if (waitObject) {
899 		fWaitObjectTable.RemoveUnchecked(waitObject);
900 		fUsedWaitObjects.Remove(waitObject);
901 		fFreeWaitObjects.Add(waitObject, false);
902 	}
903 }
904 
905 void
906 SystemProfiler::_WaitObjectUsed(addr_t object, uint32 type)
907 {
908 	// look up the object
909 	WaitObjectKey key;
910 	key.object = object;
911 	key.type = type;
912 	WaitObject* waitObject = fWaitObjectTable.Lookup(key);
913 
914 	// If already known, re-queue it as most recently used and be done.
915 	if (waitObject != NULL) {
916 		fUsedWaitObjects.Remove(waitObject);
917 		fUsedWaitObjects.Add(waitObject);
918 		return;
919 	}
920 
921 	// not known yet -- get the info
922 	const char* name = NULL;
923 	const void* referencedObject = NULL;
924 
925 	switch (type) {
926 		case THREAD_BLOCK_TYPE_SEMAPHORE:
927 		{
928 			name = sem_get_name_unsafe((sem_id)object);
929 			break;
930 		}
931 
932 		case THREAD_BLOCK_TYPE_CONDITION_VARIABLE:
933 		{
934 			ConditionVariable* variable = (ConditionVariable*)object;
935 			name = variable->ObjectType();
936 			referencedObject = variable->Object();
937 			break;
938 		}
939 
940 		case THREAD_BLOCK_TYPE_MUTEX:
941 		{
942 			mutex* lock = (mutex*)object;
943 			name = lock->name;
944 			break;
945 		}
946 
947 		case THREAD_BLOCK_TYPE_RW_LOCK:
948 		{
949 			rw_lock* lock = (rw_lock*)object;
950 			name = lock->name;
951 			break;
952 		}
953 
954 		case THREAD_BLOCK_TYPE_OTHER:
955 		{
956 			name = (const char*)(void*)object;
957 			break;
958 		}
959 
960 		case THREAD_BLOCK_TYPE_SNOOZE:
961 		case THREAD_BLOCK_TYPE_SIGNAL:
962 		default:
963 			return;
964 	}
965 
966 	// add the event
967 	size_t nameLen = name != NULL ? strlen(name) : 0;
968 
969 	system_profiler_wait_object_info* event
970 		= (system_profiler_wait_object_info*)
971 			_AllocateBuffer(sizeof(system_profiler_wait_object_info) + nameLen,
972 				B_SYSTEM_PROFILER_WAIT_OBJECT_INFO, 0, 0);
973 	if (event == NULL)
974 		return;
975 
976 	event->type = type;
977 	event->object = object;
978 	event->referenced_object = (addr_t)referencedObject;
979 	if (name != NULL)
980 		strcpy(event->name, name);
981 	else
982 		event->name[0] = '\0';
983 
984 	fHeader->size = fBufferSize;
985 
986 	// add the wait object
987 
988 	// get a free one or steal the least recently used one
989 	waitObject = fFreeWaitObjects.RemoveHead();
990 	if (waitObject == NULL) {
991 		waitObject = fUsedWaitObjects.RemoveHead();
992 		fWaitObjectTable.RemoveUnchecked(waitObject);
993 	}
994 
995 	waitObject->object = object;
996 	waitObject->type = type;
997 	fWaitObjectTable.InsertUnchecked(waitObject);
998 	fUsedWaitObjects.Add(waitObject);
999 }
1000 
1001 
1002 /*static*/ bool
1003 SystemProfiler::_InitialTeamIterator(struct team* team, void* cookie)
1004 {
1005 	SystemProfiler* self = (SystemProfiler*)cookie;
1006 	return !self->_TeamAdded(team);
1007 }
1008 
1009 
1010 /*static*/ bool
1011 SystemProfiler::_InitialThreadIterator(struct thread* thread, void* cookie)
1012 {
1013 	SystemProfiler* self = (SystemProfiler*)cookie;
1014 
1015 	if ((self->fFlags & B_SYSTEM_PROFILER_SCHEDULING_EVENTS) != 0
1016 		&& thread->state == B_THREAD_RUNNING && thread->cpu != NULL) {
1017 		self->fRunningThreads[thread->cpu->cpu_num] = thread;
1018 	}
1019 
1020 	return !self->_ThreadAdded(thread);
1021 }
1022 
1023 
1024 /*static*/ bool
1025 SystemProfiler::_InitialImageIterator(struct image* image, void* cookie)
1026 {
1027 	SystemProfiler* self = (SystemProfiler*)cookie;
1028 	self->fImageNotificationsEnabled = true;
1029 		// Set that here, since the image lock is being held now.
1030 	return !self->_ImageAdded(image);
1031 }
1032 
1033 
1034 void*
1035 SystemProfiler::_AllocateBuffer(size_t size, int event, int cpu, int count)
1036 {
1037 	size = (size + 3) / 4 * 4;
1038 	size += sizeof(system_profiler_event_header);
1039 
1040 	size_t end = fBufferStart + fBufferSize;
1041 	if (end + size > fBufferCapacity) {
1042 		// Buffer is wrapped or needs wrapping.
1043 		if (end < fBufferCapacity) {
1044 			// not wrapped yet, but needed
1045 			system_profiler_event_header* header
1046 				= (system_profiler_event_header*)(fBufferBase + end);
1047 			header->event = B_SYSTEM_PROFILER_BUFFER_END;
1048 			fBufferSize = fBufferCapacity - fBufferStart;
1049 			end = 0;
1050 		} else
1051 			end -= fBufferCapacity;
1052 
1053 		if (end + size > fBufferStart) {
1054 			fDroppedEvents++;
1055 			return NULL;
1056 		}
1057 	}
1058 
1059 	system_profiler_event_header* header
1060 		= (system_profiler_event_header*)(fBufferBase + end);
1061 	header->event = event;
1062 	header->cpu = cpu;
1063 	header->size = size - sizeof(system_profiler_event_header);
1064 
1065 	fBufferSize += size;
1066 
1067 	return header + 1;
1068 }
1069 
1070 
1071 /*static*/ void
1072 SystemProfiler::_InitTimers(void* cookie, int cpu)
1073 {
1074 	SystemProfiler* self = (SystemProfiler*)cookie;
1075 	self->_ScheduleTimer(cpu);
1076 }
1077 
1078 
1079 /*static*/ void
1080 SystemProfiler::_UninitTimers(void* cookie, int cpu)
1081 {
1082 	SystemProfiler* self = (SystemProfiler*)cookie;
1083 
1084 	CPUProfileData& cpuData = self->fCPUData[cpu];
1085 	cancel_timer(&cpuData.timer);
1086 	cpuData.timerScheduled = false;
1087 }
1088 
1089 
1090 void
1091 SystemProfiler::_ScheduleTimer(int cpu)
1092 {
1093 	CPUProfileData& cpuData = fCPUData[cpu];
1094 	cpuData.timerEnd = system_time() + fInterval;
1095 	cpuData.timer.user_data = this;
1096 	add_timer(&cpuData.timer, &_ProfilingEvent, fInterval,
1097 		B_ONE_SHOT_RELATIVE_TIMER);
1098 	cpuData.timerScheduled = true;
1099 }
1100 
1101 
1102 void
1103 SystemProfiler::_DoSample()
1104 {
1105 	struct thread* thread = thread_get_current_thread();
1106 	int cpu = thread->cpu->cpu_num;
1107 	CPUProfileData& cpuData = fCPUData[cpu];
1108 
1109 	// get the samples
1110 	int32 count = arch_debug_get_stack_trace(cpuData.buffer, fStackDepth, 1,
1111 		0, false);
1112 
1113 	InterruptsSpinLocker locker(fLock);
1114 
1115 	system_profiler_samples* event = (system_profiler_samples*)
1116 		_AllocateBuffer(sizeof(system_profiler_samples)
1117 				+ count * sizeof(addr_t),
1118 			B_SYSTEM_PROFILER_SAMPLES, cpu, count);
1119 	if (event == NULL)
1120 		return;
1121 
1122 	event->thread = thread->id;
1123 	memcpy(event->samples, cpuData.buffer, count * sizeof(addr_t));
1124 
1125 	fHeader->size = fBufferSize;
1126 }
1127 
1128 
1129 /*static*/ int32
1130 SystemProfiler::_ProfilingEvent(struct timer* timer)
1131 {
1132 	SystemProfiler* self = (SystemProfiler*)timer->user_data;
1133 
1134 	self->_DoSample();
1135 	self->_ScheduleTimer(timer->cpu);
1136 
1137 	return B_HANDLED_INTERRUPT;
1138 }
1139 
1140 
1141 // #pragma mark - private kernel API
1142 
1143 
1144 status_t
1145 start_system_profiler(size_t areaSize, uint32 stackDepth, bigtime_t interval)
1146 {
1147 	struct ParameterDeleter {
1148 		ParameterDeleter(area_id area)
1149 			:
1150 			fArea(area),
1151 			fDetached(false)
1152 		{
1153 		}
1154 
1155 		~ParameterDeleter()
1156 		{
1157 			if (!fDetached) {
1158 				delete_area(fArea);
1159 				delete sRecordedParameters;
1160 				sRecordedParameters = NULL;
1161 			}
1162 		}
1163 
1164 		void Detach()
1165 		{
1166 			fDetached = true;
1167 		}
1168 
1169 	private:
1170 		area_id	fArea;
1171 		bool	fDetached;
1172 	};
1173 
1174 	void* address;
1175 	area_id area = create_area("kernel profile data", &address,
1176 		B_ANY_KERNEL_ADDRESS, areaSize, B_FULL_LOCK,
1177 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1178 	if (area < 0)
1179 		return area;
1180 
1181 	ParameterDeleter parameterDeleter(area);
1182 
1183 	sRecordedParameters = new(std::nothrow) system_profiler_parameters;
1184 	if (sRecordedParameters == NULL)
1185 		return B_NO_MEMORY;
1186 
1187 	sRecordedParameters->buffer_area = area;
1188 	sRecordedParameters->flags = B_SYSTEM_PROFILER_TEAM_EVENTS
1189 		| B_SYSTEM_PROFILER_THREAD_EVENTS | B_SYSTEM_PROFILER_IMAGE_EVENTS
1190 		| B_SYSTEM_PROFILER_SAMPLING_EVENTS;
1191 	sRecordedParameters->locking_lookup_size = 4096;
1192 	sRecordedParameters->interval = interval;
1193 	sRecordedParameters->stack_depth = stackDepth;
1194 
1195 	area_info areaInfo;
1196 	get_area_info(area, &areaInfo);
1197 
1198 	// initialize the profiler
1199 	SystemProfiler* profiler = new(std::nothrow) SystemProfiler(B_SYSTEM_TEAM,
1200 		areaInfo, *sRecordedParameters);
1201 	if (profiler == NULL)
1202 		return B_NO_MEMORY;
1203 
1204 	ObjectDeleter<SystemProfiler> profilerDeleter(profiler);
1205 
1206 	status_t error = profiler->Init();
1207 	if (error != B_OK)
1208 		return error;
1209 
1210 	// set the new profiler
1211 	InterruptsSpinLocker locker(sProfilerLock);
1212 	if (sProfiler != NULL)
1213 		return B_BUSY;
1214 
1215 	parameterDeleter.Detach();
1216 	profilerDeleter.Detach();
1217 	sProfiler = profiler;
1218 	locker.Unlock();
1219 
1220 	return B_OK;
1221 }
1222 
1223 
1224 void
1225 stop_system_profiler()
1226 {
1227 	InterruptsSpinLocker locker(sProfilerLock);
1228 	if (sProfiler == NULL)
1229 		return;
1230 
1231 	SystemProfiler* profiler = sProfiler;
1232 	sProfiler = NULL;
1233 	locker.Unlock();
1234 
1235 	profiler->RemoveReference();
1236 }
1237 
1238 
1239 // #pragma mark - syscalls
1240 
1241 
1242 status_t
1243 _user_system_profiler_start(struct system_profiler_parameters* userParameters)
1244 {
1245 	// copy params to the kernel
1246 	struct system_profiler_parameters parameters;
1247 	if (userParameters == NULL || !IS_USER_ADDRESS(userParameters)
1248 		|| user_memcpy(&parameters, userParameters, sizeof(parameters))
1249 			!= B_OK) {
1250 		return B_BAD_ADDRESS;
1251 	}
1252 
1253 	// check the parameters
1254 	team_id team = thread_get_current_thread()->team->id;
1255 
1256 	area_info areaInfo;
1257 	status_t error = get_area_info(parameters.buffer_area, &areaInfo);
1258 	if (error != B_OK)
1259 		return error;
1260 
1261 	if (areaInfo.team != team)
1262 		return B_BAD_VALUE;
1263 
1264 	if ((parameters.flags & B_SYSTEM_PROFILER_SAMPLING_EVENTS) != 0) {
1265 		if (parameters.stack_depth < 1)
1266 			return B_BAD_VALUE;
1267 
1268 		if (parameters.interval < B_DEBUG_MIN_PROFILE_INTERVAL)
1269 			parameters.interval = B_DEBUG_MIN_PROFILE_INTERVAL;
1270 
1271 		if (parameters.stack_depth > B_DEBUG_STACK_TRACE_DEPTH)
1272 			parameters.stack_depth = B_DEBUG_STACK_TRACE_DEPTH;
1273 	}
1274 
1275 	// quick check to see whether we do already have a profiler installed
1276 	InterruptsSpinLocker locker(sProfilerLock);
1277 	if (sProfiler != NULL)
1278 		return B_BUSY;
1279 	locker.Unlock();
1280 
1281 	// initialize the profiler
1282 	SystemProfiler* profiler = new(std::nothrow) SystemProfiler(team, areaInfo,
1283 		parameters);
1284 	if (profiler == NULL)
1285 		return B_NO_MEMORY;
1286 	ObjectDeleter<SystemProfiler> profilerDeleter(profiler);
1287 
1288 	error = profiler->Init();
1289 	if (error != B_OK)
1290 		return error;
1291 
1292 	// set the new profiler
1293 	locker.Lock();
1294 	if (sProfiler != NULL)
1295 		return B_BUSY;
1296 
1297 	profilerDeleter.Detach();
1298 	sProfiler = profiler;
1299 	locker.Unlock();
1300 
1301 	return B_OK;
1302 }
1303 
1304 
1305 status_t
1306 _user_system_profiler_next_buffer(size_t bytesRead, uint64* _droppedEvents)
1307 {
1308 	if (_droppedEvents != NULL && !IS_USER_ADDRESS(_droppedEvents))
1309 		return B_BAD_ADDRESS;
1310 
1311 	team_id team = thread_get_current_thread()->team->id;
1312 
1313 	InterruptsSpinLocker locker(sProfilerLock);
1314 	if (sProfiler == NULL || sProfiler->Team() != team)
1315 		return B_BAD_VALUE;
1316 
1317 	// get a reference to the profiler
1318 	SystemProfiler* profiler = sProfiler;
1319 	Reference<SystemProfiler> reference(profiler);
1320 	locker.Unlock();
1321 
1322 	uint64 droppedEvents;
1323 	status_t error = profiler->NextBuffer(bytesRead,
1324 		_droppedEvents != NULL ? &droppedEvents : NULL);
1325 	if (error == B_OK && _droppedEvents != NULL)
1326 		user_memcpy(_droppedEvents, &droppedEvents, sizeof(droppedEvents));
1327 
1328 	return error;
1329 }
1330 
1331 
1332 status_t
1333 _user_system_profiler_stop()
1334 {
1335 	team_id team = thread_get_current_thread()->team->id;
1336 
1337 	InterruptsSpinLocker locker(sProfilerLock);
1338 	if (sProfiler == NULL || sProfiler->Team() != team)
1339 		return B_BAD_VALUE;
1340 
1341 	SystemProfiler* profiler = sProfiler;
1342 	sProfiler = NULL;
1343 	locker.Unlock();
1344 
1345 	profiler->RemoveReference();
1346 
1347 	return B_OK;
1348 }
1349 
1350 
1351 status_t
1352 _user_system_profiler_recorded(struct system_profiler_parameters* userParameters)
1353 {
1354 	if (userParameters == NULL || !IS_USER_ADDRESS(userParameters))
1355 		return B_BAD_ADDRESS;
1356 	if (sRecordedParameters == NULL)
1357 		return B_ERROR;
1358 
1359 	// Transfer the area to the userland process
1360 
1361 	void* address;
1362 	area_id newArea = transfer_area(sRecordedParameters->buffer_area, &address,
1363 		B_ANY_ADDRESS, team_get_current_team_id(), true);
1364 	if (newArea < 0)
1365 		return newArea;
1366 
1367 	status_t status = set_area_protection(newArea, B_READ_AREA);
1368 	if (status == B_OK) {
1369 		sRecordedParameters->buffer_area = newArea;
1370 
1371 		status = user_memcpy(userParameters, sRecordedParameters,
1372 			sizeof(system_profiler_parameters));
1373 	}
1374 	if (status != B_OK)
1375 		delete_area(newArea);
1376 
1377 	delete sRecordedParameters;
1378 	sRecordedParameters = NULL;
1379 
1380 	return status;
1381 }
1382