thread_local.cc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "util/thread_local.h"
  10. #include "util/mutexlock.h"
  11. #include "port/likely.h"
  12. #include <stdlib.h>
  13. namespace ROCKSDB_NAMESPACE {
  14. struct Entry {
  15. Entry() : ptr(nullptr) {}
  16. Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
  17. std::atomic<void*> ptr;
  18. };
  19. class StaticMeta;
  20. // This is the structure that is declared as "thread_local" storage.
  21. // The vector keep list of atomic pointer for all instances for "current"
  22. // thread. The vector is indexed by an Id that is unique in process and
  23. // associated with one ThreadLocalPtr instance. The Id is assigned by a
  24. // global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
  25. // instances, each thread will have a ThreadData with a vector of size 3:
  26. // ---------------------------------------------------
  27. // | | instance 1 | instance 2 | instnace 3 |
  28. // ---------------------------------------------------
  29. // | thread 1 | void* | void* | void* | <- ThreadData
  30. // ---------------------------------------------------
  31. // | thread 2 | void* | void* | void* | <- ThreadData
  32. // ---------------------------------------------------
  33. // | thread 3 | void* | void* | void* | <- ThreadData
  34. // ---------------------------------------------------
  35. struct ThreadData {
  36. explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
  37. : entries(),
  38. next(nullptr),
  39. prev(nullptr),
  40. inst(_inst) {}
  41. std::vector<Entry> entries;
  42. ThreadData* next;
  43. ThreadData* prev;
  44. ThreadLocalPtr::StaticMeta* inst;
  45. };
  46. class ThreadLocalPtr::StaticMeta {
  47. public:
  48. StaticMeta();
  49. // Return the next available Id
  50. uint32_t GetId();
  51. // Return the next available Id without claiming it
  52. uint32_t PeekId() const;
  53. // Return the given Id back to the free pool. This also triggers
  54. // UnrefHandler for associated pointer value (if not NULL) for all threads.
  55. void ReclaimId(uint32_t id);
  56. // Return the pointer value for the given id for the current thread.
  57. void* Get(uint32_t id) const;
  58. // Reset the pointer value for the given id for the current thread.
  59. void Reset(uint32_t id, void* ptr);
  60. // Atomically swap the supplied ptr and return the previous value
  61. void* Swap(uint32_t id, void* ptr);
  62. // Atomically compare and swap the provided value only if it equals
  63. // to expected value.
  64. bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
  65. // Reset all thread local data to replacement, and return non-nullptr
  66. // data for all existing threads
  67. void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
  68. // Update res by applying func on each thread-local value. Holds a lock that
  69. // prevents unref handler from running during this call, but clients must
  70. // still provide external synchronization since the owning thread can
  71. // access the values without internal locking, e.g., via Get() and Reset().
  72. void Fold(uint32_t id, FoldFunc func, void* res);
  73. // Register the UnrefHandler for id
  74. void SetHandler(uint32_t id, UnrefHandler handler);
  75. // protect inst, next_instance_id_, free_instance_ids_, head_,
  76. // ThreadData.entries
  77. //
  78. // Note that here we prefer function static variable instead of the usual
  79. // global static variable. The reason is that c++ destruction order of
  80. // static variables in the reverse order of their construction order.
  81. // However, C++ does not guarantee any construction order when global
  82. // static variables are defined in different files, while the function
  83. // static variables are initialized when their function are first called.
  84. // As a result, the construction order of the function static variables
  85. // can be controlled by properly invoke their first function calls in
  86. // the right order.
  87. //
  88. // For instance, the following function contains a function static
  89. // variable. We place a dummy function call of this inside
  90. // Env::Default() to ensure the construction order of the construction
  91. // order.
  92. static port::Mutex* Mutex();
  93. // Returns the member mutex of the current StaticMeta. In general,
  94. // Mutex() should be used instead of this one. However, in case where
  95. // the static variable inside Instance() goes out of scope, MemberMutex()
  96. // should be used. One example is OnThreadExit() function.
  97. port::Mutex* MemberMutex() { return &mutex_; }
  98. private:
  99. // Get UnrefHandler for id with acquiring mutex
  100. // REQUIRES: mutex locked
  101. UnrefHandler GetHandler(uint32_t id);
  102. // Triggered before a thread terminates
  103. static void OnThreadExit(void* ptr);
  104. // Add current thread's ThreadData to the global chain
  105. // REQUIRES: mutex locked
  106. void AddThreadData(ThreadData* d);
  107. // Remove current thread's ThreadData from the global chain
  108. // REQUIRES: mutex locked
  109. void RemoveThreadData(ThreadData* d);
  110. static ThreadData* GetThreadLocal();
  111. uint32_t next_instance_id_;
  112. // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
  113. // frequently. This also prevents it from blowing up the vector space.
  114. autovector<uint32_t> free_instance_ids_;
  115. // Chain all thread local structure together. This is necessary since
  116. // when one ThreadLocalPtr gets destroyed, we need to loop over each
  117. // thread's version of pointer corresponding to that instance and
  118. // call UnrefHandler for it.
  119. ThreadData head_;
  120. std::unordered_map<uint32_t, UnrefHandler> handler_map_;
  121. // The private mutex. Developers should always use Mutex() instead of
  122. // using this variable directly.
  123. port::Mutex mutex_;
  124. #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
  125. // Thread local storage
  126. static __thread ThreadData* tls_;
  127. #endif
  128. // Used to make thread exit trigger possible if !defined(OS_MACOSX).
  129. // Otherwise, used to retrieve thread data.
  130. pthread_key_t pthread_key_;
  131. };
  132. #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
  133. __thread ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
  134. #endif
  135. // Windows doesn't support a per-thread destructor with its
  136. // TLS primitives. So, we build it manually by inserting a
  137. // function to be called on each thread's exit.
  138. // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
  139. // and http://www.nynaeve.net/?p=183
  140. //
  141. // really we do this to have clear conscience since using TLS with thread-pools
  142. // is iffy
  143. // although OK within a request. But otherwise, threads have no identity in its
  144. // modern use.
  145. // This runs on windows only called from the System Loader
  146. #ifdef OS_WIN
  147. // Windows cleanup routine is invoked from a System Loader with a different
  148. // signature so we can not directly hookup the original OnThreadExit which is
  149. // private member
  150. // so we make StaticMeta class share with the us the address of the function so
  151. // we can invoke it.
  152. namespace wintlscleanup {
  153. // This is set to OnThreadExit in StaticMeta singleton constructor
  154. UnrefHandler thread_local_inclass_routine = nullptr;
  155. pthread_key_t thread_local_key = pthread_key_t (-1);
  156. // Static callback function to call with each thread termination.
  157. void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
  158. // We decided to punt on PROCESS_EXIT
  159. if (DLL_THREAD_DETACH == reason) {
  160. if (thread_local_key != pthread_key_t(-1) &&
  161. thread_local_inclass_routine != nullptr) {
  162. void* tls = TlsGetValue(thread_local_key);
  163. if (tls != nullptr) {
  164. thread_local_inclass_routine(tls);
  165. }
  166. }
  167. }
  168. }
  169. } // wintlscleanup
  170. // extern "C" suppresses C++ name mangling so we know the symbol name for the
  171. // linker /INCLUDE:symbol pragma above.
  172. extern "C" {
  173. #ifdef _MSC_VER
  174. // The linker must not discard thread_callback_on_exit. (We force a reference
  175. // to this variable with a linker /include:symbol pragma to ensure that.) If
  176. // this variable is discarded, the OnThreadExit function will never be called.
  177. #ifndef _X86_
  178. // .CRT section is merged with .rdata on x64 so it must be constant data.
  179. #pragma const_seg(".CRT$XLB")
  180. // When defining a const variable, it must have external linkage to be sure the
  181. // linker doesn't discard it.
  182. extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
  183. const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
  184. wintlscleanup::WinOnThreadExit;
  185. // Reset the default section.
  186. #pragma const_seg()
  187. #pragma comment(linker, "/include:_tls_used")
  188. #pragma comment(linker, "/include:p_thread_callback_on_exit")
  189. #else // _X86_
  190. #pragma data_seg(".CRT$XLB")
  191. PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
  192. // Reset the default section.
  193. #pragma data_seg()
  194. #pragma comment(linker, "/INCLUDE:__tls_used")
  195. #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
  196. #endif // _X86_
  197. #else
  198. // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
  199. BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
  200. if (dwReason == DLL_THREAD_DETACH)
  201. wintlscleanup::WinOnThreadExit(h, dwReason, pv);
  202. return TRUE;
  203. }
  204. #endif
  205. } // extern "C"
  206. #endif // OS_WIN
  207. void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
  208. ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
  209. // Here we prefer function static variable instead of global
  210. // static variable as function static variable is initialized
  211. // when the function is first call. As a result, we can properly
  212. // control their construction order by properly preparing their
  213. // first function call.
  214. //
  215. // Note that here we decide to make "inst" a static pointer w/o deleting
  216. // it at the end instead of a static variable. This is to avoid the following
  217. // destruction order disaster happens when a child thread using ThreadLocalPtr
  218. // dies AFTER the main thread dies: When a child thread happens to use
  219. // ThreadLocalPtr, it will try to delete its thread-local data on its
  220. // OnThreadExit when the child thread dies. However, OnThreadExit depends
  221. // on the following variable. As a result, if the main thread dies before any
  222. // child thread happen to use ThreadLocalPtr dies, then the destruction of
  223. // the following variable will go first, then OnThreadExit, therefore causing
  224. // invalid access.
  225. //
  226. // The above problem can be solved by using thread_local to store tls_ instead
  227. // of using __thread. The major difference between thread_local and __thread
  228. // is that thread_local supports dynamic construction and destruction of
  229. // non-primitive typed variables. As a result, we can guarantee the
  230. // destruction order even when the main thread dies before any child threads.
  231. // However, thread_local is not supported in all compilers that accept -std=c++11
  232. // (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
  233. static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
  234. return inst;
  235. }
  236. port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
  237. void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
  238. auto* tls = static_cast<ThreadData*>(ptr);
  239. assert(tls != nullptr);
  240. // Use the cached StaticMeta::Instance() instead of directly calling
  241. // the variable inside StaticMeta::Instance() might already go out of
  242. // scope here in case this OnThreadExit is called after the main thread
  243. // dies.
  244. auto* inst = tls->inst;
  245. pthread_setspecific(inst->pthread_key_, nullptr);
  246. MutexLock l(inst->MemberMutex());
  247. inst->RemoveThreadData(tls);
  248. // Unref stored pointers of current thread from all instances
  249. uint32_t id = 0;
  250. for (auto& e : tls->entries) {
  251. void* raw = e.ptr.load();
  252. if (raw != nullptr) {
  253. auto unref = inst->GetHandler(id);
  254. if (unref != nullptr) {
  255. unref(raw);
  256. }
  257. }
  258. ++id;
  259. }
  260. // Delete thread local structure no matter if it is Mac platform
  261. delete tls;
  262. }
  263. ThreadLocalPtr::StaticMeta::StaticMeta()
  264. : next_instance_id_(0),
  265. head_(this),
  266. pthread_key_(0) {
  267. if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
  268. abort();
  269. }
  270. // OnThreadExit is not getting called on the main thread.
  271. // Call through the static destructor mechanism to avoid memory leak.
  272. //
  273. // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
  274. // singleton (destructors are invoked in reverse order of constructor
  275. // _completion_); the latter must not mutate internal members. This
  276. // cleanup mechanism inherently relies on use-after-release of the
  277. // StaticMeta, and is brittle with respect to compiler-specific handling
  278. // of memory backing destructed statically-scoped objects. Perhaps
  279. // registering with atexit(3) would be more robust.
  280. //
  281. // This is not required on Windows.
  282. #if !defined(OS_WIN)
  283. static struct A {
  284. ~A() {
  285. #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
  286. ThreadData* tls_ =
  287. static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
  288. #endif
  289. if (tls_) {
  290. OnThreadExit(tls_);
  291. }
  292. }
  293. } a;
  294. #endif // !defined(OS_WIN)
  295. head_.next = &head_;
  296. head_.prev = &head_;
  297. #ifdef OS_WIN
  298. // Share with Windows its cleanup routine and the key
  299. wintlscleanup::thread_local_inclass_routine = OnThreadExit;
  300. wintlscleanup::thread_local_key = pthread_key_;
  301. #endif
  302. }
  303. void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
  304. Mutex()->AssertHeld();
  305. d->next = &head_;
  306. d->prev = head_.prev;
  307. head_.prev->next = d;
  308. head_.prev = d;
  309. }
  310. void ThreadLocalPtr::StaticMeta::RemoveThreadData(
  311. ThreadData* d) {
  312. Mutex()->AssertHeld();
  313. d->next->prev = d->prev;
  314. d->prev->next = d->next;
  315. d->next = d->prev = d;
  316. }
  317. ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
  318. #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
  319. // Make this local variable name look like a member variable so that we
  320. // can share all the code below
  321. ThreadData* tls_ =
  322. static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
  323. #endif
  324. if (UNLIKELY(tls_ == nullptr)) {
  325. auto* inst = Instance();
  326. tls_ = new ThreadData(inst);
  327. {
  328. // Register it in the global chain, needs to be done before thread exit
  329. // handler registration
  330. MutexLock l(Mutex());
  331. inst->AddThreadData(tls_);
  332. }
  333. // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
  334. // its exit handler will be triggered.
  335. if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
  336. {
  337. MutexLock l(Mutex());
  338. inst->RemoveThreadData(tls_);
  339. }
  340. delete tls_;
  341. abort();
  342. }
  343. }
  344. return tls_;
  345. }
  346. void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
  347. auto* tls = GetThreadLocal();
  348. if (UNLIKELY(id >= tls->entries.size())) {
  349. return nullptr;
  350. }
  351. return tls->entries[id].ptr.load(std::memory_order_acquire);
  352. }
  353. void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
  354. auto* tls = GetThreadLocal();
  355. if (UNLIKELY(id >= tls->entries.size())) {
  356. // Need mutex to protect entries access within ReclaimId
  357. MutexLock l(Mutex());
  358. tls->entries.resize(id + 1);
  359. }
  360. tls->entries[id].ptr.store(ptr, std::memory_order_release);
  361. }
  362. void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
  363. auto* tls = GetThreadLocal();
  364. if (UNLIKELY(id >= tls->entries.size())) {
  365. // Need mutex to protect entries access within ReclaimId
  366. MutexLock l(Mutex());
  367. tls->entries.resize(id + 1);
  368. }
  369. return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
  370. }
  371. bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
  372. void*& expected) {
  373. auto* tls = GetThreadLocal();
  374. if (UNLIKELY(id >= tls->entries.size())) {
  375. // Need mutex to protect entries access within ReclaimId
  376. MutexLock l(Mutex());
  377. tls->entries.resize(id + 1);
  378. }
  379. return tls->entries[id].ptr.compare_exchange_strong(
  380. expected, ptr, std::memory_order_release, std::memory_order_relaxed);
  381. }
  382. void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
  383. void* const replacement) {
  384. MutexLock l(Mutex());
  385. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  386. if (id < t->entries.size()) {
  387. void* ptr =
  388. t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
  389. if (ptr != nullptr) {
  390. ptrs->push_back(ptr);
  391. }
  392. }
  393. }
  394. }
  395. void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
  396. MutexLock l(Mutex());
  397. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  398. if (id < t->entries.size()) {
  399. void* ptr = t->entries[id].ptr.load();
  400. if (ptr != nullptr) {
  401. func(ptr, res);
  402. }
  403. }
  404. }
  405. }
  406. uint32_t ThreadLocalPtr::TEST_PeekId() {
  407. return Instance()->PeekId();
  408. }
  409. void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
  410. MutexLock l(Mutex());
  411. handler_map_[id] = handler;
  412. }
  413. UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
  414. Mutex()->AssertHeld();
  415. auto iter = handler_map_.find(id);
  416. if (iter == handler_map_.end()) {
  417. return nullptr;
  418. }
  419. return iter->second;
  420. }
  421. uint32_t ThreadLocalPtr::StaticMeta::GetId() {
  422. MutexLock l(Mutex());
  423. if (free_instance_ids_.empty()) {
  424. return next_instance_id_++;
  425. }
  426. uint32_t id = free_instance_ids_.back();
  427. free_instance_ids_.pop_back();
  428. return id;
  429. }
  430. uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
  431. MutexLock l(Mutex());
  432. if (!free_instance_ids_.empty()) {
  433. return free_instance_ids_.back();
  434. }
  435. return next_instance_id_;
  436. }
  437. void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
  438. // This id is not used, go through all thread local data and release
  439. // corresponding value
  440. MutexLock l(Mutex());
  441. auto unref = GetHandler(id);
  442. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  443. if (id < t->entries.size()) {
  444. void* ptr = t->entries[id].ptr.exchange(nullptr);
  445. if (ptr != nullptr && unref != nullptr) {
  446. unref(ptr);
  447. }
  448. }
  449. }
  450. handler_map_[id] = nullptr;
  451. free_instance_ids_.push_back(id);
  452. }
  453. ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
  454. : id_(Instance()->GetId()) {
  455. if (handler != nullptr) {
  456. Instance()->SetHandler(id_, handler);
  457. }
  458. }
  459. ThreadLocalPtr::~ThreadLocalPtr() {
  460. Instance()->ReclaimId(id_);
  461. }
  462. void* ThreadLocalPtr::Get() const {
  463. return Instance()->Get(id_);
  464. }
  465. void ThreadLocalPtr::Reset(void* ptr) {
  466. Instance()->Reset(id_, ptr);
  467. }
  468. void* ThreadLocalPtr::Swap(void* ptr) {
  469. return Instance()->Swap(id_, ptr);
  470. }
  471. bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
  472. return Instance()->CompareAndSwap(id_, ptr, expected);
  473. }
  474. void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
  475. Instance()->Scrape(id_, ptrs, replacement);
  476. }
  477. void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
  478. Instance()->Fold(id_, func, res);
  479. }
  480. } // namespace ROCKSDB_NAMESPACE