thread_local.cc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "util/thread_local.h"
  10. #include <cstdlib>
  11. #include "port/likely.h"
  12. #include "util/mutexlock.h"
  13. namespace ROCKSDB_NAMESPACE {
  14. struct Entry {
  15. Entry() : ptr(nullptr) {}
  16. Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
  17. std::atomic<void*> ptr;
  18. };
  19. class StaticMeta;
  20. // This is the structure that is declared as "thread_local" storage.
  21. // The vector keep list of atomic pointer for all instances for "current"
  22. // thread. The vector is indexed by an Id that is unique in process and
  23. // associated with one ThreadLocalPtr instance. The Id is assigned by a
  24. // global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
  25. // instances, each thread will have a ThreadData with a vector of size 3:
  26. // ---------------------------------------------------
  27. // | | instance 1 | instance 2 | instance 3 |
  28. // ---------------------------------------------------
  29. // | thread 1 | void* | void* | void* | <- ThreadData
  30. // ---------------------------------------------------
  31. // | thread 2 | void* | void* | void* | <- ThreadData
  32. // ---------------------------------------------------
  33. // | thread 3 | void* | void* | void* | <- ThreadData
  34. // ---------------------------------------------------
  35. struct ThreadData {
  36. explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
  37. : entries(), next(nullptr), prev(nullptr), inst(_inst) {}
  38. std::vector<Entry> entries;
  39. ThreadData* next;
  40. ThreadData* prev;
  41. ThreadLocalPtr::StaticMeta* inst;
  42. };
  43. class ThreadLocalPtr::StaticMeta {
  44. public:
  45. StaticMeta();
  46. // Return the next available Id
  47. uint32_t GetId();
  48. // Return the next available Id without claiming it
  49. uint32_t PeekId() const;
  50. // Return the given Id back to the free pool. This also triggers
  51. // UnrefHandler for associated pointer value (if not NULL) for all threads.
  52. void ReclaimId(uint32_t id);
  53. // Return the pointer value for the given id for the current thread.
  54. void* Get(uint32_t id) const;
  55. // Reset the pointer value for the given id for the current thread.
  56. void Reset(uint32_t id, void* ptr);
  57. // Atomically swap the supplied ptr and return the previous value
  58. void* Swap(uint32_t id, void* ptr);
  59. // Atomically compare and swap the provided value only if it equals
  60. // to expected value.
  61. bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
  62. // Reset all thread local data to replacement, and return non-nullptr
  63. // data for all existing threads
  64. void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
  65. // Update res by applying func on each thread-local value. Holds a lock that
  66. // prevents unref handler from running during this call, but clients must
  67. // still provide external synchronization since the owning thread can
  68. // access the values without internal locking, e.g., via Get() and Reset().
  69. void Fold(uint32_t id, FoldFunc func, void* res);
  70. // Register the UnrefHandler for id
  71. void SetHandler(uint32_t id, UnrefHandler handler);
  72. // protect inst, next_instance_id_, free_instance_ids_, head_,
  73. // ThreadData.entries
  74. //
  75. // Note that here we prefer function static variable instead of the usual
  76. // global static variable. The reason is that c++ destruction order of
  77. // static variables in the reverse order of their construction order.
  78. // However, C++ does not guarantee any construction order when global
  79. // static variables are defined in different files, while the function
  80. // static variables are initialized when their function are first called.
  81. // As a result, the construction order of the function static variables
  82. // can be controlled by properly invoke their first function calls in
  83. // the right order.
  84. //
  85. // For instance, the following function contains a function static
  86. // variable. We place a dummy function call of this inside
  87. // Env::Default() to ensure the construction order of the construction
  88. // order.
  89. static port::Mutex* Mutex();
  90. // Returns the member mutex of the current StaticMeta. In general,
  91. // Mutex() should be used instead of this one. However, in case where
  92. // the static variable inside Instance() goes out of scope, MemberMutex()
  93. // should be used. One example is OnThreadExit() function.
  94. port::Mutex* MemberMutex() { return &mutex_; }
  95. private:
  96. // Get UnrefHandler for id with acquiring mutex
  97. // REQUIRES: mutex locked
  98. UnrefHandler GetHandler(uint32_t id);
  99. // Triggered before a thread terminates
  100. static void OnThreadExit(void* ptr);
  101. // Add current thread's ThreadData to the global chain
  102. // REQUIRES: mutex locked
  103. void AddThreadData(ThreadData* d);
  104. // Remove current thread's ThreadData from the global chain
  105. // REQUIRES: mutex locked
  106. void RemoveThreadData(ThreadData* d);
  107. static ThreadData* GetThreadLocal();
  108. uint32_t next_instance_id_;
  109. // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
  110. // frequently. This also prevents it from blowing up the vector space.
  111. autovector<uint32_t> free_instance_ids_;
  112. // Chain all thread local structure together. This is necessary since
  113. // when one ThreadLocalPtr gets destroyed, we need to loop over each
  114. // thread's version of pointer corresponding to that instance and
  115. // call UnrefHandler for it.
  116. ThreadData head_;
  117. std::unordered_map<uint32_t, UnrefHandler> handler_map_;
  118. // The private mutex. Developers should always use Mutex() instead of
  119. // using this variable directly.
  120. port::Mutex mutex_;
  121. // Thread local storage
  122. static thread_local ThreadData* tls_;
  123. // Used to make thread exit trigger possible if !defined(OS_MACOSX).
  124. // Otherwise, used to retrieve thread data.
  125. pthread_key_t pthread_key_;
  126. };
  127. thread_local ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
  128. // Windows doesn't support a per-thread destructor with its
  129. // TLS primitives. So, we build it manually by inserting a
  130. // function to be called on each thread's exit.
  131. // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
  132. // and http://www.nynaeve.net/?p=183
  133. //
  134. // really we do this to have clear conscience since using TLS with thread-pools
  135. // is iffy
  136. // although OK within a request. But otherwise, threads have no identity in its
  137. // modern use.
  138. // This runs on windows only called from the System Loader
  139. #ifdef OS_WIN
  140. // Windows cleanup routine is invoked from a System Loader with a different
  141. // signature so we can not directly hookup the original OnThreadExit which is
  142. // private member
  143. // so we make StaticMeta class share with the us the address of the function so
  144. // we can invoke it.
  145. namespace wintlscleanup {
  146. // This is set to OnThreadExit in StaticMeta singleton constructor
  147. UnrefHandler thread_local_inclass_routine = nullptr;
  148. pthread_key_t thread_local_key = pthread_key_t(-1);
  149. // Static callback function to call with each thread termination.
  150. void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
  151. // We decided to punt on PROCESS_EXIT
  152. if (DLL_THREAD_DETACH == reason) {
  153. if (thread_local_key != pthread_key_t(-1) &&
  154. thread_local_inclass_routine != nullptr) {
  155. void* tls = TlsGetValue(thread_local_key);
  156. if (tls != nullptr) {
  157. thread_local_inclass_routine(tls);
  158. }
  159. }
  160. }
  161. }
  162. } // namespace wintlscleanup
  163. // extern "C" suppresses C++ name mangling so we know the symbol name for the
  164. // linker /INCLUDE:symbol pragma above.
  165. extern "C" {
  166. #ifdef _MSC_VER
  167. // The linker must not discard thread_callback_on_exit. (We force a reference
  168. // to this variable with a linker /include:symbol pragma to ensure that.) If
  169. // this variable is discarded, the OnThreadExit function will never be called.
  170. #ifndef _X86_
  171. // .CRT section is merged with .rdata on x64 so it must be constant data.
  172. #pragma const_seg(".CRT$XLB")
  173. // When defining a const variable, it must have external linkage to be sure the
  174. // linker doesn't discard it.
  175. extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
  176. const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
  177. wintlscleanup::WinOnThreadExit;
  178. // Reset the default section.
  179. #pragma const_seg()
  180. #pragma comment(linker, "/include:_tls_used")
  181. #pragma comment(linker, "/include:p_thread_callback_on_exit")
  182. #else // _X86_
  183. #pragma data_seg(".CRT$XLB")
  184. PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
  185. // Reset the default section.
  186. #pragma data_seg()
  187. #pragma comment(linker, "/INCLUDE:__tls_used")
  188. #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
  189. #endif // _X86_
  190. #else
  191. // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
  192. BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
  193. if (dwReason == DLL_THREAD_DETACH)
  194. wintlscleanup::WinOnThreadExit(h, dwReason, pv);
  195. return TRUE;
  196. }
  197. #endif
  198. } // extern "C"
  199. #endif // OS_WIN
  200. void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
  201. ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
  202. // Here we prefer function static variable instead of global
  203. // static variable as function static variable is initialized
  204. // when the function is first call. As a result, we can properly
  205. // control their construction order by properly preparing their
  206. // first function call.
  207. //
  208. // Note that here we decide to make "inst" a static pointer w/o deleting
  209. // it at the end instead of a static variable. This is to avoid the following
  210. // destruction order disaster happens when a child thread using ThreadLocalPtr
  211. // dies AFTER the main thread dies: When a child thread happens to use
  212. // ThreadLocalPtr, it will try to delete its thread-local data on its
  213. // OnThreadExit when the child thread dies. However, OnThreadExit depends
  214. // on the following variable. As a result, if the main thread dies before any
  215. // child thread happen to use ThreadLocalPtr dies, then the destruction of
  216. // the following variable will go first, then OnThreadExit, therefore causing
  217. // invalid access.
  218. //
  219. // The above problem can be solved by using thread_local to store tls_.
  220. // thread_local supports dynamic construction and destruction of
  221. // non-primitive typed variables. As a result, we can guarantee the
  222. // destruction order even when the main thread dies before any child threads.
  223. static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
  224. return inst;
  225. }
  226. port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
  227. void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
  228. auto* tls = static_cast<ThreadData*>(ptr);
  229. assert(tls != nullptr);
  230. // Use the cached StaticMeta::Instance() instead of directly calling
  231. // the variable inside StaticMeta::Instance() might already go out of
  232. // scope here in case this OnThreadExit is called after the main thread
  233. // dies.
  234. auto* inst = tls->inst;
  235. pthread_setspecific(inst->pthread_key_, nullptr);
  236. MutexLock l(inst->MemberMutex());
  237. inst->RemoveThreadData(tls);
  238. // Unref stored pointers of current thread from all instances
  239. uint32_t id = 0;
  240. for (auto& e : tls->entries) {
  241. void* raw = e.ptr.load();
  242. if (raw != nullptr) {
  243. auto unref = inst->GetHandler(id);
  244. if (unref != nullptr) {
  245. unref(raw);
  246. }
  247. }
  248. ++id;
  249. }
  250. // Delete thread local structure no matter if it is Mac platform
  251. delete tls;
  252. }
  253. ThreadLocalPtr::StaticMeta::StaticMeta()
  254. : next_instance_id_(0), head_(this), pthread_key_(0) {
  255. if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
  256. abort();
  257. }
  258. // OnThreadExit is not getting called on the main thread.
  259. // Call through the static destructor mechanism to avoid memory leak.
  260. //
  261. // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
  262. // singleton (destructors are invoked in reverse order of constructor
  263. // _completion_); the latter must not mutate internal members. This
  264. // cleanup mechanism inherently relies on use-after-release of the
  265. // StaticMeta, and is brittle with respect to compiler-specific handling
  266. // of memory backing destructed statically-scoped objects. Perhaps
  267. // registering with atexit(3) would be more robust.
  268. //
  269. // This is not required on Windows.
  270. #if !defined(OS_WIN)
  271. static struct A {
  272. ~A() {
  273. if (tls_) {
  274. OnThreadExit(tls_);
  275. }
  276. }
  277. } a;
  278. #endif // !defined(OS_WIN)
  279. head_.next = &head_;
  280. head_.prev = &head_;
  281. #ifdef OS_WIN
  282. // Share with Windows its cleanup routine and the key
  283. wintlscleanup::thread_local_inclass_routine = OnThreadExit;
  284. wintlscleanup::thread_local_key = pthread_key_;
  285. #endif
  286. }
  287. void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
  288. Mutex()->AssertHeld();
  289. d->next = &head_;
  290. d->prev = head_.prev;
  291. head_.prev->next = d;
  292. head_.prev = d;
  293. }
  294. void ThreadLocalPtr::StaticMeta::RemoveThreadData(ThreadData* d) {
  295. Mutex()->AssertHeld();
  296. d->next->prev = d->prev;
  297. d->prev->next = d->next;
  298. d->next = d->prev = d;
  299. }
  300. ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
  301. if (UNLIKELY(tls_ == nullptr)) {
  302. auto* inst = Instance();
  303. tls_ = new ThreadData(inst);
  304. {
  305. // Register it in the global chain, needs to be done before thread exit
  306. // handler registration
  307. MutexLock l(Mutex());
  308. inst->AddThreadData(tls_);
  309. }
  310. // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
  311. // its exit handler will be triggered.
  312. if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
  313. {
  314. MutexLock l(Mutex());
  315. inst->RemoveThreadData(tls_);
  316. }
  317. delete tls_;
  318. abort();
  319. }
  320. }
  321. return tls_;
  322. }
  323. void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
  324. auto* tls = GetThreadLocal();
  325. if (UNLIKELY(id >= tls->entries.size())) {
  326. return nullptr;
  327. }
  328. return tls->entries[id].ptr.load(std::memory_order_acquire);
  329. }
  330. void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
  331. auto* tls = GetThreadLocal();
  332. if (UNLIKELY(id >= tls->entries.size())) {
  333. // Need mutex to protect entries access within ReclaimId
  334. MutexLock l(Mutex());
  335. tls->entries.resize(id + 1);
  336. }
  337. tls->entries[id].ptr.store(ptr, std::memory_order_release);
  338. }
  339. void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
  340. auto* tls = GetThreadLocal();
  341. if (UNLIKELY(id >= tls->entries.size())) {
  342. // Need mutex to protect entries access within ReclaimId
  343. MutexLock l(Mutex());
  344. tls->entries.resize(id + 1);
  345. }
  346. return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
  347. }
  348. bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
  349. void*& expected) {
  350. auto* tls = GetThreadLocal();
  351. if (UNLIKELY(id >= tls->entries.size())) {
  352. // Need mutex to protect entries access within ReclaimId
  353. MutexLock l(Mutex());
  354. tls->entries.resize(id + 1);
  355. }
  356. return tls->entries[id].ptr.compare_exchange_strong(
  357. expected, ptr, std::memory_order_release, std::memory_order_relaxed);
  358. }
  359. void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
  360. void* const replacement) {
  361. MutexLock l(Mutex());
  362. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  363. if (id < t->entries.size()) {
  364. void* ptr =
  365. t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
  366. if (ptr != nullptr) {
  367. ptrs->push_back(ptr);
  368. }
  369. }
  370. }
  371. }
  372. void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
  373. MutexLock l(Mutex());
  374. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  375. if (id < t->entries.size()) {
  376. void* ptr = t->entries[id].ptr.load();
  377. if (ptr != nullptr) {
  378. func(ptr, res);
  379. }
  380. }
  381. }
  382. }
  383. uint32_t ThreadLocalPtr::TEST_PeekId() { return Instance()->PeekId(); }
  384. void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
  385. MutexLock l(Mutex());
  386. handler_map_[id] = handler;
  387. }
  388. UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
  389. Mutex()->AssertHeld();
  390. auto iter = handler_map_.find(id);
  391. if (iter == handler_map_.end()) {
  392. return nullptr;
  393. }
  394. return iter->second;
  395. }
  396. uint32_t ThreadLocalPtr::StaticMeta::GetId() {
  397. MutexLock l(Mutex());
  398. if (free_instance_ids_.empty()) {
  399. return next_instance_id_++;
  400. }
  401. uint32_t id = free_instance_ids_.back();
  402. free_instance_ids_.pop_back();
  403. return id;
  404. }
  405. uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
  406. MutexLock l(Mutex());
  407. if (!free_instance_ids_.empty()) {
  408. return free_instance_ids_.back();
  409. }
  410. return next_instance_id_;
  411. }
  412. void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
  413. // This id is not used, go through all thread local data and release
  414. // corresponding value
  415. MutexLock l(Mutex());
  416. auto unref = GetHandler(id);
  417. for (ThreadData* t = head_.next; t != &head_; t = t->next) {
  418. if (id < t->entries.size()) {
  419. void* ptr = t->entries[id].ptr.exchange(nullptr);
  420. if (ptr != nullptr && unref != nullptr) {
  421. unref(ptr);
  422. }
  423. }
  424. }
  425. handler_map_[id] = nullptr;
  426. free_instance_ids_.push_back(id);
  427. }
  428. ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
  429. : id_(Instance()->GetId()) {
  430. if (handler != nullptr) {
  431. Instance()->SetHandler(id_, handler);
  432. }
  433. }
  434. ThreadLocalPtr::~ThreadLocalPtr() { Instance()->ReclaimId(id_); }
  435. void* ThreadLocalPtr::Get() const { return Instance()->Get(id_); }
  436. void ThreadLocalPtr::Reset(void* ptr) { Instance()->Reset(id_, ptr); }
  437. void* ThreadLocalPtr::Swap(void* ptr) { return Instance()->Swap(id_, ptr); }
  438. bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
  439. return Instance()->CompareAndSwap(id_, ptr, expected);
  440. }
  441. void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
  442. Instance()->Scrape(id_, ptrs, replacement);
  443. }
  444. void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
  445. Instance()->Fold(id_, func, res);
  446. }
  447. } // namespace ROCKSDB_NAMESPACE