1/* 2 * Copyright (c) 1996 3 * Silicon Graphics Computer Systems, Inc. 4 * 5 * Permission to use, copy, modify, distribute and sell this software 6 * and its documentation for any purpose is hereby granted without fee, 7 * provided that the above copyright notice appear in all copies and 8 * that both that copyright notice and this permission notice appear 9 * in supporting documentation. Silicon Graphics makes no 10 * representations about the suitability of this software for any 11 * purpose. It is provided "as is" without express or implied warranty. 12 */ 13 14#ifndef __SGI_STL_PTHREAD_ALLOC 15#define __SGI_STL_PTHREAD_ALLOC 16 17// Pthread-specific node allocator. 18// This is similar to the default allocator, except that free-list 19// information is kept separately for each thread, avoiding locking. 20// This should be reasonably fast even in the presence of threads. 21// The down side is that storage may not be well-utilized. 22// It is not an error to allocate memory in thread A and deallocate 23// it in thread B. But this effectively transfers ownership of the memory, 24// so that it can only be reallocated by thread B. Thus this can effectively 25// result in a storage leak if it's done on a regular basis. 26// It can also result in frequent sharing of 27// cache lines among processors, with potentially serious performance 28// consequences. 29 30#include <stl_config.h> 31#include <stl_alloc.h> 32#ifndef __RESTRICT 33# define __RESTRICT 34#endif 35 36__STL_BEGIN_NAMESPACE 37 38#define __STL_DATA_ALIGNMENT 8 39 40union _Pthread_alloc_obj { 41 union _Pthread_alloc_obj * __free_list_link; 42 char __client_data[__STL_DATA_ALIGNMENT]; /* The client sees this. */ 43}; 44 45// Pthread allocators don't appear to the client to have meaningful 46// instances. We do in fact need to associate some state with each 47// thread. That state is represented by 48// _Pthread_alloc_per_thread_state<_Max_size>. 49 50template<size_t _Max_size> 51struct _Pthread_alloc_per_thread_state { 52 typedef _Pthread_alloc_obj __obj; 53 enum { _S_NFREELISTS = _Max_size/__STL_DATA_ALIGNMENT }; 54 _Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS]; 55 _Pthread_alloc_per_thread_state<_Max_size> * __next; 56 // Free list link for list of available per thread structures. 57 // When one of these becomes available for reuse due to thread 58 // termination, any objects in its free list remain associated 59 // with it. The whole structure may then be used by a newly 60 // created thread. 61 _Pthread_alloc_per_thread_state() : __next(0) 62 { 63 memset((void *)__free_list, 0, _S_NFREELISTS * sizeof(__obj *)); 64 } 65 // Returns an object of size __n, and possibly adds to size n free list. 66 void *_M_refill(size_t __n); 67}; 68 69// Pthread-specific allocator. 70// The argument specifies the largest object size allocated from per-thread 71// free lists. Larger objects are allocated using malloc_alloc. 72// Max_size must be a power of 2. 73template <size_t _Max_size = 128> 74class _Pthread_alloc_template { 75 76public: // but only for internal use: 77 78 typedef _Pthread_alloc_obj __obj; 79 80 // Allocates a chunk for nobjs of size "size". nobjs may be reduced 81 // if it is inconvenient to allocate the requested number. 82 static char *_S_chunk_alloc(size_t __size, int &__nobjs); 83 84 enum {_S_ALIGN = __STL_DATA_ALIGNMENT}; 85 86 static size_t _S_round_up(size_t __bytes) { 87 return (((__bytes) + _S_ALIGN-1) & ~(_S_ALIGN - 1)); 88 } 89 static size_t _S_freelist_index(size_t __bytes) { 90 return (((__bytes) + _S_ALIGN-1)/_S_ALIGN - 1); 91 } 92 93private: 94 // Chunk allocation state. And other shared state. 95 // Protected by _S_chunk_allocator_lock. 96 static pthread_mutex_t _S_chunk_allocator_lock; 97 static char *_S_start_free; 98 static char *_S_end_free; 99 static size_t _S_heap_size; 100 static _Pthread_alloc_per_thread_state<_Max_size>* _S_free_per_thread_states; 101 static pthread_key_t _S_key; 102 static bool _S_key_initialized; 103 // Pthread key under which per thread state is stored. 104 // Allocator instances that are currently unclaimed by any thread. 105 static void _S_destructor(void *instance); 106 // Function to be called on thread exit to reclaim per thread 107 // state. 108 static _Pthread_alloc_per_thread_state<_Max_size> *_S_new_per_thread_state(); 109 // Return a recycled or new per thread state. 110 static _Pthread_alloc_per_thread_state<_Max_size> *_S_get_per_thread_state(); 111 // ensure that the current thread has an associated 112 // per thread state. 113 friend class _M_lock; 114 class _M_lock { 115 public: 116 _M_lock () { pthread_mutex_lock(&_S_chunk_allocator_lock); } 117 ~_M_lock () { pthread_mutex_unlock(&_S_chunk_allocator_lock); } 118 }; 119 120public: 121 122 /* n must be > 0 */ 123 static void * allocate(size_t __n) 124 { 125 __obj * volatile * __my_free_list; 126 __obj * __RESTRICT __result; 127 _Pthread_alloc_per_thread_state<_Max_size>* __a; 128 129 if (__n > _Max_size) { 130 return(malloc_alloc::allocate(__n)); 131 } 132 if (!_S_key_initialized || 133 !(__a = (_Pthread_alloc_per_thread_state<_Max_size>*) 134 pthread_getspecific(_S_key))) { 135 __a = _S_get_per_thread_state(); 136 } 137 __my_free_list = __a -> __free_list + _S_freelist_index(__n); 138 __result = *__my_free_list; 139 if (__result == 0) { 140 void *__r = __a -> _M_refill(_S_round_up(__n)); 141 return __r; 142 } 143 *__my_free_list = __result -> __free_list_link; 144 return (__result); 145 }; 146 147 /* p may not be 0 */ 148 static void deallocate(void *__p, size_t __n) 149 { 150 __obj *__q = (__obj *)__p; 151 __obj * volatile * __my_free_list; 152 _Pthread_alloc_per_thread_state<_Max_size>* __a; 153 154 if (__n > _Max_size) { 155 malloc_alloc::deallocate(__p, __n); 156 return; 157 } 158 if (!_S_key_initialized || 159 !(__a = (_Pthread_alloc_per_thread_state<_Max_size> *) 160 pthread_getspecific(_S_key))) { 161 __a = _S_get_per_thread_state(); 162 } 163 __my_free_list = __a->__free_list + _S_freelist_index(__n); 164 __q -> __free_list_link = *__my_free_list; 165 *__my_free_list = __q; 166 } 167 168 static void * reallocate(void *__p, size_t __old_sz, size_t __new_sz); 169 170} ; 171 172typedef _Pthread_alloc_template<> pthread_alloc; 173 174 175template <size_t _Max_size> 176void _Pthread_alloc_template<_Max_size>::_S_destructor(void * __instance) 177{ 178 _M_lock __lock_instance; // Need to acquire lock here. 179 _Pthread_alloc_per_thread_state<_Max_size>* __s = 180 (_Pthread_alloc_per_thread_state<_Max_size> *)__instance; 181 __s -> __next = _S_free_per_thread_states; 182 _S_free_per_thread_states = __s; 183} 184 185template <size_t _Max_size> 186_Pthread_alloc_per_thread_state<_Max_size> * 187_Pthread_alloc_template<_Max_size>::_S_new_per_thread_state() 188{ 189 /* lock already held here. */ 190 if (0 != _S_free_per_thread_states) { 191 _Pthread_alloc_per_thread_state<_Max_size> *__result = 192 _S_free_per_thread_states; 193 _S_free_per_thread_states = _S_free_per_thread_states -> __next; 194 return __result; 195 } else { 196 return new _Pthread_alloc_per_thread_state<_Max_size>; 197 } 198} 199 200template <size_t _Max_size> 201_Pthread_alloc_per_thread_state<_Max_size> * 202_Pthread_alloc_template<_Max_size>::_S_get_per_thread_state() 203{ 204 /*REFERENCED*/ 205 _M_lock __lock_instance; // Need to acquire lock here. 206 _Pthread_alloc_per_thread_state<_Max_size> * __result; 207 if (!_S_key_initialized) { 208 if (pthread_key_create(&_S_key, _S_destructor)) { 209 abort(); // failed 210 } 211 _S_key_initialized = true; 212 } 213 __result = _S_new_per_thread_state(); 214 if (pthread_setspecific(_S_key, __result)) abort(); 215 return __result; 216} 217 218/* We allocate memory in large chunks in order to avoid fragmenting */ 219/* the malloc heap too much. */ 220/* We assume that size is properly aligned. */ 221template <size_t _Max_size> 222char *_Pthread_alloc_template<_Max_size> 223::_S_chunk_alloc(size_t __size, int &__nobjs) 224{ 225 { 226 char * __result; 227 size_t __total_bytes; 228 size_t __bytes_left; 229 /*REFERENCED*/ 230 _M_lock __lock_instance; // Acquire lock for this routine 231 232 __total_bytes = __size * __nobjs; 233 __bytes_left = _S_end_free - _S_start_free; 234 if (__bytes_left >= __total_bytes) { 235 __result = _S_start_free; 236 _S_start_free += __total_bytes; 237 return(__result); 238 } else if (__bytes_left >= __size) { 239 __nobjs = __bytes_left/__size; 240 __total_bytes = __size * __nobjs; 241 __result = _S_start_free; 242 _S_start_free += __total_bytes; 243 return(__result); 244 } else { 245 size_t __bytes_to_get = 246 2 * __total_bytes + _S_round_up(_S_heap_size >> 4); 247 // Try to make use of the left-over piece. 248 if (__bytes_left > 0) { 249 _Pthread_alloc_per_thread_state<_Max_size>* __a = 250 (_Pthread_alloc_per_thread_state<_Max_size>*) 251 pthread_getspecific(_S_key); 252 __obj * volatile * __my_free_list = 253 __a->__free_list + _S_freelist_index(__bytes_left); 254 255 ((__obj *)_S_start_free) -> __free_list_link = *__my_free_list; 256 *__my_free_list = (__obj *)_S_start_free; 257 } 258# ifdef _SGI_SOURCE 259 // Try to get memory that's aligned on something like a 260 // cache line boundary, so as to avoid parceling out 261 // parts of the same line to different threads and thus 262 // possibly different processors. 263 { 264 const int __cache_line_size = 128; // probable upper bound 265 __bytes_to_get &= ~(__cache_line_size-1); 266 _S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get); 267 if (0 == _S_start_free) { 268 _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get); 269 } 270 } 271# else /* !SGI_SOURCE */ 272 _S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get); 273# endif 274 _S_heap_size += __bytes_to_get; 275 _S_end_free = _S_start_free + __bytes_to_get; 276 } 277 } 278 // lock is released here 279 return(_S_chunk_alloc(__size, __nobjs)); 280} 281 282 283/* Returns an object of size n, and optionally adds to size n free list.*/ 284/* We assume that n is properly aligned. */ 285/* We hold the allocation lock. */ 286template <size_t _Max_size> 287void *_Pthread_alloc_per_thread_state<_Max_size> 288::_M_refill(size_t __n) 289{ 290 int __nobjs = 128; 291 char * __chunk = 292 _Pthread_alloc_template<_Max_size>::_S_chunk_alloc(__n, __nobjs); 293 __obj * volatile * __my_free_list; 294 __obj * __result; 295 __obj * __current_obj, * __next_obj; 296 int __i; 297 298 if (1 == __nobjs) { 299 return(__chunk); 300 } 301 __my_free_list = __free_list 302 + _Pthread_alloc_template<_Max_size>::_S_freelist_index(__n); 303 304 /* Build free list in chunk */ 305 __result = (__obj *)__chunk; 306 *__my_free_list = __next_obj = (__obj *)(__chunk + __n); 307 for (__i = 1; ; __i++) { 308 __current_obj = __next_obj; 309 __next_obj = (__obj *)((char *)__next_obj + __n); 310 if (__nobjs - 1 == __i) { 311 __current_obj -> __free_list_link = 0; 312 break; 313 } else { 314 __current_obj -> __free_list_link = __next_obj; 315 } 316 } 317 return(__result); 318} 319 320template <size_t _Max_size> 321void *_Pthread_alloc_template<_Max_size> 322::reallocate(void *__p, size_t __old_sz, size_t __new_sz) 323{ 324 void * __result; 325 size_t __copy_sz; 326 327 if (__old_sz > _Max_size 328 && __new_sz > _Max_size) { 329 return(realloc(__p, __new_sz)); 330 } 331 if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return(__p); 332 __result = allocate(__new_sz); 333 __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz; 334 memcpy(__result, __p, __copy_sz); 335 deallocate(__p, __old_sz); 336 return(__result); 337} 338 339template <size_t _Max_size> 340_Pthread_alloc_per_thread_state<_Max_size> * 341_Pthread_alloc_template<_Max_size>::_S_free_per_thread_states = 0; 342 343template <size_t _Max_size> 344pthread_key_t _Pthread_alloc_template<_Max_size>::_S_key; 345 346template <size_t _Max_size> 347bool _Pthread_alloc_template<_Max_size>::_S_key_initialized = false; 348 349template <size_t _Max_size> 350pthread_mutex_t _Pthread_alloc_template<_Max_size>::_S_chunk_allocator_lock 351= PTHREAD_MUTEX_INITIALIZER; 352 353template <size_t _Max_size> 354char *_Pthread_alloc_template<_Max_size> 355::_S_start_free = 0; 356 357template <size_t _Max_size> 358char *_Pthread_alloc_template<_Max_size> 359::_S_end_free = 0; 360 361template <size_t _Max_size> 362size_t _Pthread_alloc_template<_Max_size> 363::_S_heap_size = 0; 364 365#ifdef __STL_USE_STD_ALLOCATORS 366 367template <class _Tp> 368class pthread_allocator { 369 typedef pthread_alloc _S_Alloc; // The underlying allocator. 370public: 371 typedef size_t size_type; 372 typedef ptrdiff_t difference_type; 373 typedef _Tp* pointer; 374 typedef const _Tp* const_pointer; 375 typedef _Tp& reference; 376 typedef const _Tp& const_reference; 377 typedef _Tp value_type; 378 379 template <class _Up> struct rebind { 380 typedef pthread_allocator<_Up> other; 381 }; 382 383 pthread_allocator() __STL_NOTHROW {} 384 pthread_allocator(const pthread_allocator& a) __STL_NOTHROW {} 385 template <class _Up> pthread_allocator(const pthread_allocator<_Up>&) 386 __STL_NOTHROW {} 387 ~pthread_allocator() __STL_NOTHROW {} 388 389 pointer address(reference __x) const { return &__x; } 390 const_pointer address(const_reference __x) const { return &__x; } 391 392 // __n is permitted to be 0. The C++ standard says nothing about what 393 // the return value is when __n == 0. 394 _Tp* allocate(size_type __n, const void* = 0) { 395 return __n != 0 ? static_cast<_Tp*>(_S_Alloc::allocate(__n * sizeof(_Tp))) 396 : 0; 397 } 398 399 // p is not permitted to be a null pointer. 400 void deallocate(pointer __p, size_type __n) 401 { _S_Alloc::deallocate(__p, __n * sizeof(_Tp)); } 402 403 size_type max_size() const __STL_NOTHROW 404 { return size_t(-1) / sizeof(_Tp); } 405 406 void construct(pointer __p, const _Tp& __val) { new(__p) _Tp(__val); } 407 void destroy(pointer _p) { _p->~_Tp(); } 408}; 409 410template<> 411class pthread_allocator<void> { 412public: 413 typedef size_t size_type; 414 typedef ptrdiff_t difference_type; 415 typedef void* pointer; 416 typedef const void* const_pointer; 417 typedef void value_type; 418 419 template <class _Up> struct rebind { 420 typedef pthread_allocator<_Up> other; 421 }; 422}; 423 424template <size_t _Max_size> 425inline bool operator==(const _Pthread_alloc_template<_Max_size>&, 426 const _Pthread_alloc_template<_Max_size>&) 427{ 428 return true; 429} 430 431template <class _T1, class _T2> 432inline bool operator==(const pthread_allocator<_T1>&, 433 const pthread_allocator<_T2>& a2) 434{ 435 return true; 436} 437 438template <class _T1, class _T2> 439inline bool operator!=(const pthread_allocator<_T1>&, 440 const pthread_allocator<_T2>&) 441{ 442 return false; 443} 444 445template <class _Tp, size_t _Max_size> 446struct _Alloc_traits<_Tp, _Pthread_alloc_template<_Max_size> > 447{ 448 static const bool _S_instanceless = true; 449 typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max_size> > _Alloc_type; 450 typedef __allocator<_Tp, _Pthread_alloc_template<_Max_size> > 451 allocator_type; 452}; 453 454template <class _Tp, class _Up, size_t _Max> 455struct _Alloc_traits<_Tp, __allocator<_Up, _Pthread_alloc_template<_Max> > > 456{ 457 static const bool _S_instanceless = true; 458 typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max> > _Alloc_type; 459 typedef __allocator<_Tp, _Pthread_alloc_template<_Max> > allocator_type; 460}; 461 462template <class _Tp, class _Up> 463struct _Alloc_traits<_Tp, pthread_allocator<_Up> > 464{ 465 static const bool _S_instanceless = true; 466 typedef simple_alloc<_Tp, _Pthread_alloc_template<> > _Alloc_type; 467 typedef pthread_allocator<_Tp> allocator_type; 468}; 469 470 471#endif /* __STL_USE_STD_ALLOCATORS */ 472 473__STL_END_NAMESPACE 474 475#endif /* __SGI_STL_PTHREAD_ALLOC */ 476 477// Local Variables: 478// mode:C++ 479// End: 480