backup_engine.cc 150 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <algorithm>
  10. #include <atomic>
  11. #include <cinttypes>
  12. #include <cstdlib>
  13. #include <exception>
  14. #include <functional>
  15. #include <future>
  16. #include <limits>
  17. #include <map>
  18. #include <mutex>
  19. #include <optional>
  20. #include <sstream>
  21. #include <string>
  22. #include <thread>
  23. #include <unordered_map>
  24. #include <unordered_set>
  25. #include <vector>
  26. #include "env/composite_env_wrapper.h"
  27. #include "env/fs_readonly.h"
  28. #include "env/fs_remap.h"
  29. #include "file/filename.h"
  30. #include "file/line_file_reader.h"
  31. #include "file/sequence_file_reader.h"
  32. #include "file/writable_file_writer.h"
  33. #include "logging/logging.h"
  34. #include "monitoring/iostats_context_imp.h"
  35. #include "options/options_helper.h"
  36. #include "port/port.h"
  37. #include "rocksdb/advanced_options.h"
  38. #include "rocksdb/env.h"
  39. #include "rocksdb/rate_limiter.h"
  40. #include "rocksdb/statistics.h"
  41. #include "rocksdb/transaction_log.h"
  42. #include "table/sst_file_dumper.h"
  43. #include "test_util/sync_point.h"
  44. #include "util/cast_util.h"
  45. #include "util/channel.h"
  46. #include "util/coding.h"
  47. #include "util/crc32c.h"
  48. #include "util/math.h"
  49. #include "util/rate_limiter_impl.h"
  50. #include "util/string_util.h"
  51. #include "utilities/backup/backup_engine_impl.h"
  52. #include "utilities/checkpoint/checkpoint_impl.h"
  53. namespace ROCKSDB_NAMESPACE {
  54. namespace {
  55. using ShareFilesNaming = BackupEngineOptions::ShareFilesNaming;
  56. constexpr BackupID kLatestBackupIDMarker = static_cast<BackupID>(-2);
  57. inline uint32_t ChecksumHexToInt32(const std::string& checksum_hex) {
  58. std::string checksum_str;
  59. Slice(checksum_hex).DecodeHex(&checksum_str);
  60. return EndianSwapValue(DecodeFixed32(checksum_str.c_str()));
  61. }
  62. inline std::string ChecksumStrToHex(const std::string& checksum_str) {
  63. return Slice(checksum_str).ToString(true);
  64. }
  65. inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) {
  66. std::string checksum_str;
  67. PutFixed32(&checksum_str, EndianSwapValue(checksum_value));
  68. return ChecksumStrToHex(checksum_str);
  69. }
  70. const std::string kPrivateDirName = "private";
  71. const std::string kMetaDirName = "meta";
  72. const std::string kSharedDirName = "shared";
  73. const std::string kSharedChecksumDirName = "shared_checksum";
  74. const std::string kPrivateDirSlash = kPrivateDirName + "/";
  75. const std::string kMetaDirSlash = kMetaDirName + "/";
  76. const std::string kSharedDirSlash = kSharedDirName + "/";
  77. const std::string kSharedChecksumDirSlash = kSharedChecksumDirName + "/";
  78. } // namespace
  79. void BackupStatistics::IncrementNumberSuccessBackup() {
  80. number_success_backup++;
  81. }
  82. void BackupStatistics::IncrementNumberFailBackup() { number_fail_backup++; }
  83. uint32_t BackupStatistics::GetNumberSuccessBackup() const {
  84. return number_success_backup;
  85. }
  86. uint32_t BackupStatistics::GetNumberFailBackup() const {
  87. return number_fail_backup;
  88. }
  89. std::string BackupStatistics::ToString() const {
  90. char result[50];
  91. snprintf(result, sizeof(result), "# success backup: %u, # fail backup: %u",
  92. GetNumberSuccessBackup(), GetNumberFailBackup());
  93. return result;
  94. }
  95. void BackupEngineOptions::Dump(Logger* logger) const {
  96. ROCKS_LOG_INFO(logger, " Options.backup_dir: %s",
  97. backup_dir.c_str());
  98. ROCKS_LOG_INFO(logger, " Options.backup_env: %p", backup_env);
  99. ROCKS_LOG_INFO(logger, " Options.share_table_files: %d",
  100. static_cast<int>(share_table_files));
  101. ROCKS_LOG_INFO(logger, " Options.info_log: %p", info_log);
  102. ROCKS_LOG_INFO(logger, " Options.sync: %d",
  103. static_cast<int>(sync));
  104. ROCKS_LOG_INFO(logger, " Options.destroy_old_data: %d",
  105. static_cast<int>(destroy_old_data));
  106. ROCKS_LOG_INFO(logger, " Options.backup_log_files: %d",
  107. static_cast<int>(backup_log_files));
  108. ROCKS_LOG_INFO(logger, " Options.backup_rate_limit: %" PRIu64,
  109. backup_rate_limit);
  110. ROCKS_LOG_INFO(logger, " Options.restore_rate_limit: %" PRIu64,
  111. restore_rate_limit);
  112. ROCKS_LOG_INFO(logger, "Options.max_background_operations: %d",
  113. max_background_operations);
  114. }
  115. namespace {
  116. // -------- BackupEngineImpl class ---------
  117. class BackupEngineImpl {
  118. public:
  119. BackupEngineImpl(const BackupEngineOptions& options, Env* db_env,
  120. bool read_only = false);
  121. ~BackupEngineImpl();
  122. IOStatus CreateNewBackupWithMetadata(const CreateBackupOptions& options,
  123. DB* db, const std::string& app_metadata,
  124. BackupID* new_backup_id_ptr);
  125. IOStatus PurgeOldBackups(uint32_t num_backups_to_keep);
  126. IOStatus DeleteBackup(BackupID backup_id);
  127. void StopBackup() { stop_backup_.store(true, std::memory_order_release); }
  128. IOStatus GarbageCollect();
  129. // The returned BackupInfos are in chronological order, which means the
  130. // latest backup comes last.
  131. void GetBackupInfo(std::vector<BackupInfo>* backup_info,
  132. bool include_file_details) const;
  133. Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
  134. bool include_file_details = false) const;
  135. void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) const;
  136. IOStatus RestoreDBFromBackup(
  137. const RestoreOptions& options, BackupID backup_id,
  138. const std::string& db_dir, const std::string& wal_dir,
  139. const std::list<const BackupEngineImpl*>& locked_restore_from_dirs) const;
  140. IOStatus VerifyBackup(BackupID backup_id,
  141. bool verify_with_checksum = false) const;
  142. IOStatus Initialize();
  143. ShareFilesNaming GetNamingNoFlags() const {
  144. return options_.share_files_with_checksum_naming &
  145. BackupEngineOptions::kMaskNoNamingFlags;
  146. }
  147. ShareFilesNaming GetNamingFlags() const {
  148. return options_.share_files_with_checksum_naming &
  149. BackupEngineOptions::kMaskNamingFlags;
  150. }
  151. void TEST_SetDefaultRateLimitersClock(
  152. const std::shared_ptr<SystemClock>& backup_rate_limiter_clock,
  153. const std::shared_ptr<SystemClock>& restore_rate_limiter_clock) {
  154. if (backup_rate_limiter_clock) {
  155. static_cast<GenericRateLimiter*>(options_.backup_rate_limiter.get())
  156. ->TEST_SetClock(backup_rate_limiter_clock);
  157. }
  158. if (restore_rate_limiter_clock) {
  159. static_cast<GenericRateLimiter*>(options_.restore_rate_limiter.get())
  160. ->TEST_SetClock(restore_rate_limiter_clock);
  161. }
  162. }
  163. private:
  164. void DeleteChildren(const std::string& dir,
  165. const std::unordered_set<uint64_t>& files_to_keep,
  166. uint32_t file_type_filter = 0) const;
  167. IOStatus DeleteBackupNoGC(BackupID backup_id);
  168. // Extends the "result" map with pathname->size mappings for the contents of
  169. // "dir" in "env". Pathnames are prefixed with "dir".
  170. IOStatus ReadChildFileCurrentSizes(
  171. const std::string& dir, const std::shared_ptr<FileSystem>&,
  172. std::unordered_map<std::string, uint64_t>* result) const;
  173. struct FileInfo {
  174. FileInfo(const std::string& fname, uint64_t sz, const std::string& checksum,
  175. const std::string& id, const std::string& sid, Temperature _temp)
  176. : refs(0),
  177. filename(fname),
  178. size(sz),
  179. checksum_hex(checksum),
  180. db_id(id),
  181. db_session_id(sid),
  182. temp(_temp) {}
  183. FileInfo(const FileInfo&) = delete;
  184. FileInfo& operator=(const FileInfo&) = delete;
  185. int refs;
  186. // Relative path from backup dir
  187. const std::string filename;
  188. const uint64_t size;
  189. // crc32c checksum as hex. empty == unknown / unavailable
  190. std::string checksum_hex;
  191. // DB identities
  192. // db_id is obtained for potential usage in the future but not used
  193. // currently
  194. const std::string db_id;
  195. // db_session_id appears in the backup SST filename if the table naming
  196. // option is kUseDbSessionId
  197. const std::string db_session_id;
  198. Temperature temp;
  199. std::string GetDbFileName() const {
  200. std::string rv;
  201. // extract the filename part
  202. size_t slash = filename.find_last_of('/');
  203. // file will either be shared/<file>, shared_checksum/<file_crc32c_size>,
  204. // shared_checksum/<file_session>, shared_checksum/<file_crc32c_session>,
  205. // or private/<number>/<file>
  206. assert(slash != std::string::npos);
  207. rv = filename.substr(slash + 1);
  208. // if the file was in shared_checksum, extract the real file name
  209. // in this case the file is <number>_<checksum>_<size>.<type>,
  210. // <number>_<session>.<type>, or <number>_<checksum>_<session>.<type>
  211. if (filename.substr(0, slash) == kSharedChecksumDirName) {
  212. rv = GetFileFromChecksumFile(rv);
  213. }
  214. return rv;
  215. }
  216. };
  217. // TODO: deprecate this function once we migrate all BackupEngine's rate
  218. // limiting to lower-level ones (i.e, ones in file access wrapper level like
  219. // `WritableFileWriter`)
  220. static void LoopRateLimitRequestHelper(const size_t total_bytes_to_request,
  221. RateLimiter* rate_limiter,
  222. const Env::IOPriority pri,
  223. Statistics* stats,
  224. const RateLimiter::OpType op_type);
  225. static inline std::string WithoutTrailingSlash(const std::string& path) {
  226. if (path.empty() || path.back() != '/') {
  227. return path;
  228. } else {
  229. return path.substr(path.size() - 1);
  230. }
  231. }
  232. static inline std::string WithTrailingSlash(const std::string& path) {
  233. if (path.empty() || path.back() != '/') {
  234. return path + '/';
  235. } else {
  236. return path;
  237. }
  238. }
  239. // A filesystem wrapper that makes shared backup files appear to be in the
  240. // private backup directory (dst_dir), so that the private backup dir can
  241. // be opened as a read-only DB.
  242. class RemapSharedFileSystem : public RemapFileSystem {
  243. public:
  244. RemapSharedFileSystem(const std::shared_ptr<FileSystem>& base,
  245. const std::string& dst_dir,
  246. const std::string& src_base_dir,
  247. const std::vector<std::shared_ptr<FileInfo>>& files)
  248. : RemapFileSystem(base),
  249. dst_dir_(WithoutTrailingSlash(dst_dir)),
  250. dst_dir_slash_(WithTrailingSlash(dst_dir)),
  251. src_base_dir_(WithTrailingSlash(src_base_dir)) {
  252. for (auto& info : files) {
  253. if (!StartsWith(info->filename, kPrivateDirSlash)) {
  254. assert(StartsWith(info->filename, kSharedDirSlash) ||
  255. StartsWith(info->filename, kSharedChecksumDirSlash));
  256. remaps_[info->GetDbFileName()] = info;
  257. }
  258. }
  259. }
  260. const char* Name() const override {
  261. return "BackupEngineImpl::RemapSharedFileSystem";
  262. }
  263. // Sometimes a directory listing is required in opening a DB
  264. IOStatus GetChildren(const std::string& dir, const IOOptions& options,
  265. std::vector<std::string>* result,
  266. IODebugContext* dbg) override {
  267. IOStatus s = RemapFileSystem::GetChildren(dir, options, result, dbg);
  268. if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) {
  269. // Assume remapped files exist
  270. for (auto& r : remaps_) {
  271. result->push_back(r.first);
  272. }
  273. }
  274. return s;
  275. }
  276. // Sometimes a directory listing is required in opening a DB
  277. IOStatus GetChildrenFileAttributes(const std::string& dir,
  278. const IOOptions& options,
  279. std::vector<FileAttributes>* result,
  280. IODebugContext* dbg) override {
  281. IOStatus s =
  282. RemapFileSystem::GetChildrenFileAttributes(dir, options, result, dbg);
  283. if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) {
  284. // Assume remapped files exist with recorded size
  285. for (auto& r : remaps_) {
  286. result->emplace_back(); // clean up with C++20
  287. FileAttributes& attr = result->back();
  288. attr.name = r.first;
  289. attr.size_bytes = r.second->size;
  290. }
  291. }
  292. return s;
  293. }
  294. protected:
  295. // When a file in dst_dir is requested, see if we need to remap to shared
  296. // file path.
  297. std::pair<IOStatus, std::string> EncodePath(
  298. const std::string& path) override {
  299. if (path.empty() || path[0] != '/') {
  300. return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""};
  301. }
  302. std::pair<IOStatus, std::string> rv{IOStatus(), path};
  303. if (StartsWith(path, dst_dir_slash_)) {
  304. std::string relative = path.substr(dst_dir_slash_.size());
  305. auto it = remaps_.find(relative);
  306. if (it != remaps_.end()) {
  307. rv.second = src_base_dir_ + it->second->filename;
  308. }
  309. }
  310. return rv;
  311. }
  312. private:
  313. // Absolute path to a directory that some extra files will be mapped into.
  314. const std::string dst_dir_;
  315. // Includes a trailing slash.
  316. const std::string dst_dir_slash_;
  317. // Absolute path to a directory containing some files to be mapped into
  318. // dst_dir_. Includes a trailing slash.
  319. const std::string src_base_dir_;
  320. // If remaps_[x] exists, attempt to read dst_dir_ / x should instead read
  321. // src_base_dir_ / remaps_[x]->filename. FileInfo is used to maximize
  322. // sharing with other backup data in memory.
  323. std::unordered_map<std::string, std::shared_ptr<FileInfo>> remaps_;
  324. };
  325. class BackupMeta {
  326. public:
  327. BackupMeta(
  328. const std::string& meta_filename, const std::string& meta_tmp_filename,
  329. std::unordered_map<std::string, std::shared_ptr<FileInfo>>* file_infos,
  330. Env* env, const std::shared_ptr<FileSystem>& fs)
  331. : timestamp_(0),
  332. sequence_number_(0),
  333. size_(0),
  334. meta_filename_(meta_filename),
  335. meta_tmp_filename_(meta_tmp_filename),
  336. file_infos_(file_infos),
  337. env_(env),
  338. fs_(fs) {}
  339. BackupMeta(const BackupMeta&) = delete;
  340. BackupMeta& operator=(const BackupMeta&) = delete;
  341. ~BackupMeta() = default;
  342. void RecordTimestamp() {
  343. // Best effort
  344. Status s = env_->GetCurrentTime(&timestamp_);
  345. if (!s.ok()) {
  346. timestamp_ = /* something clearly fabricated */ 1;
  347. }
  348. }
  349. int64_t GetTimestamp() const { return timestamp_; }
  350. uint64_t GetSize() const { return size_; }
  351. uint32_t GetNumberFiles() const {
  352. return static_cast<uint32_t>(files_.size());
  353. }
  354. void SetSequenceNumber(uint64_t sequence_number) {
  355. sequence_number_ = sequence_number;
  356. }
  357. uint64_t GetSequenceNumber() const { return sequence_number_; }
  358. const std::string& GetAppMetadata() const { return app_metadata_; }
  359. void SetAppMetadata(const std::string& app_metadata) {
  360. app_metadata_ = app_metadata;
  361. }
  362. IOStatus AddFile(std::shared_ptr<FileInfo> file_info);
  363. void AddExcludedFile(const std::string& relative_file) {
  364. excluded_files_.emplace_back(relative_file);
  365. }
  366. IOStatus Delete(bool delete_meta = true);
  367. bool Empty() const { return files_.empty(); }
  368. std::shared_ptr<FileInfo> GetFile(const std::string& filename) const {
  369. auto it = file_infos_->find(filename);
  370. if (it == file_infos_->end()) {
  371. return nullptr;
  372. }
  373. return it->second;
  374. }
  375. const std::vector<std::shared_ptr<FileInfo>>& GetFiles() const {
  376. return files_;
  377. }
  378. const std::vector<BackupExcludedFileInfo>& GetExcludedFiles() const {
  379. return excluded_files_;
  380. }
  381. // @param abs_path_to_size Pre-fetched file sizes (bytes).
  382. IOStatus LoadFromFile(
  383. const std::string& backup_dir,
  384. const std::unordered_map<std::string, uint64_t>& abs_path_to_size,
  385. RateLimiter* rate_limiter, Logger* info_log,
  386. std::unordered_set<std::string>* reported_ignored_fields);
  387. IOStatus StoreToFile(
  388. bool sync, int schema_version,
  389. const TEST_BackupMetaSchemaOptions* schema_test_options);
  390. std::string GetInfoString() {
  391. std::ostringstream ss;
  392. ss << "Timestamp: " << timestamp_ << std::endl;
  393. char human_size[16];
  394. AppendHumanBytes(size_, human_size, sizeof(human_size));
  395. ss << "Size: " << human_size << std::endl;
  396. ss << "Files:" << std::endl;
  397. for (const auto& file : files_) {
  398. AppendHumanBytes(file->size, human_size, sizeof(human_size));
  399. ss << file->filename << ", size " << human_size << ", refs "
  400. << file->refs << std::endl;
  401. }
  402. return ss.str();
  403. }
  404. const std::shared_ptr<Env>& GetEnvForOpen() const {
  405. if (!env_for_open_) {
  406. // Lazy initialize
  407. // Find directories
  408. std::string dst_dir = meta_filename_;
  409. auto i = dst_dir.rfind(kMetaDirSlash);
  410. assert(i != std::string::npos);
  411. std::string src_base_dir = dst_dir.substr(0, i);
  412. dst_dir.replace(i, kMetaDirSlash.size(), kPrivateDirSlash);
  413. // Make the RemapSharedFileSystem
  414. std::shared_ptr<FileSystem> remap_fs =
  415. std::make_shared<RemapSharedFileSystem>(fs_, dst_dir, src_base_dir,
  416. files_);
  417. // Make it read-only for safety
  418. remap_fs = std::make_shared<ReadOnlyFileSystem>(remap_fs);
  419. // Make an Env wrapper
  420. env_for_open_ = std::make_shared<CompositeEnvWrapper>(env_, remap_fs);
  421. }
  422. return env_for_open_;
  423. }
  424. private:
  425. int64_t timestamp_;
  426. // sequence number is only approximate, should not be used
  427. // by clients
  428. uint64_t sequence_number_;
  429. uint64_t size_;
  430. std::string app_metadata_;
  431. std::string const meta_filename_;
  432. std::string const meta_tmp_filename_;
  433. // files with relative paths (without "/" prefix!!)
  434. std::vector<std::shared_ptr<FileInfo>> files_;
  435. std::vector<BackupExcludedFileInfo> excluded_files_;
  436. std::unordered_map<std::string, std::shared_ptr<FileInfo>>* file_infos_;
  437. Env* env_;
  438. mutable std::shared_ptr<Env> env_for_open_;
  439. std::shared_ptr<FileSystem> fs_;
  440. IOOptions iooptions_ = IOOptions();
  441. }; // BackupMeta
  442. void SetBackupInfoFromBackupMeta(BackupID id, const BackupMeta& meta,
  443. BackupInfo* backup_info,
  444. bool include_file_details) const;
  445. // Infers set of existing destination files that could be retained
  446. // during the incremental restore procedure in accordance with user
  447. // selected strategy (RestoreMode). File can be retained only if it's
  448. // deemed to exist in the referenced backup set.
  449. void InferDBFilesToRetainInRestore(
  450. const std::vector<std::pair<const BackupEngineImpl*, const FileInfo*>>&
  451. restore_file_infos,
  452. std::unordered_set<std::string>& unowned_backups,
  453. const std::string& db_dir, RestoreOptions::Mode mode,
  454. std::unordered_set<uint64_t>& files_to_keep) const;
  455. inline std::string GetAbsolutePath(
  456. const std::string& relative_path = "") const {
  457. assert(relative_path.size() == 0 || relative_path[0] != '/');
  458. return options_.backup_dir + "/" + relative_path;
  459. }
  460. inline std::string GetPrivateFileRel(BackupID backup_id, bool tmp = false,
  461. const std::string& file = "") const {
  462. assert(file.size() == 0 || file[0] != '/');
  463. return kPrivateDirSlash + std::to_string(backup_id) + (tmp ? ".tmp" : "") +
  464. "/" + file;
  465. }
  466. inline std::string GetSharedFileRel(const std::string& file = "",
  467. bool tmp = false) const {
  468. assert(file.size() == 0 || file[0] != '/');
  469. return kSharedDirSlash + std::string(tmp ? "." : "") + file +
  470. (tmp ? ".tmp" : "");
  471. }
  472. inline std::string GetSharedFileWithChecksumRel(const std::string& file = "",
  473. bool tmp = false) const {
  474. assert(file.size() == 0 || file[0] != '/');
  475. return kSharedChecksumDirSlash + std::string(tmp ? "." : "") + file +
  476. (tmp ? ".tmp" : "");
  477. }
  478. inline bool UseLegacyNaming(const std::string& sid) const {
  479. return GetNamingNoFlags() ==
  480. BackupEngineOptions::kLegacyCrc32cAndFileSize ||
  481. sid.empty();
  482. }
  483. inline std::string GetSharedFileWithChecksum(
  484. const std::string& file, const std::string& checksum_hex,
  485. const uint64_t file_size, const std::string& db_session_id) const {
  486. assert(file.size() == 0 || file[0] != '/');
  487. std::string file_copy = file;
  488. if (UseLegacyNaming(db_session_id)) {
  489. assert(!checksum_hex.empty());
  490. file_copy.insert(file_copy.find_last_of('.'),
  491. "_" + std::to_string(ChecksumHexToInt32(checksum_hex)) +
  492. "_" + std::to_string(file_size));
  493. } else {
  494. file_copy.insert(file_copy.find_last_of('.'), "_s" + db_session_id);
  495. if (GetNamingFlags() & BackupEngineOptions::kFlagIncludeFileSize) {
  496. file_copy.insert(file_copy.find_last_of('.'),
  497. "_" + std::to_string(file_size));
  498. }
  499. }
  500. return file_copy;
  501. }
  502. inline std::string GenerateSharedFileWithDbSessionIdAndSize(
  503. const std::string& file, const uint64_t file_size,
  504. const std::string& db_session_id) const {
  505. assert(file.size() == 0 || file[0] != '/');
  506. std::string file_copy = file;
  507. file_copy.insert(file_copy.find_last_of('.'), "_s" + db_session_id);
  508. file_copy.insert(file_copy.find_last_of('.'),
  509. "_" + std::to_string(file_size));
  510. return kSharedChecksumDirSlash + file_copy;
  511. }
  512. static inline std::string GetFileFromChecksumFile(const std::string& file) {
  513. assert(file.size() == 0 || file[0] != '/');
  514. std::string file_copy = file;
  515. size_t first_underscore = file_copy.find_first_of('_');
  516. return file_copy.erase(first_underscore,
  517. file_copy.find_last_of('.') - first_underscore);
  518. }
  519. inline std::string GetBackupMetaFile(BackupID backup_id, bool tmp) const {
  520. return GetAbsolutePath(kMetaDirName) + "/" + (tmp ? "." : "") +
  521. std::to_string(backup_id) + (tmp ? ".tmp" : "");
  522. }
  523. // If size_limit == 0, there is no size limit, copy everything.
  524. //
  525. // Exactly one of src and contents must be non-empty.
  526. //
  527. // @param src If non-empty, the file is copied from this pathname.
  528. // @param contents If non-empty, the file will be created with these contents.
  529. // @param src_temperature Pass in expected temperature of src, return back
  530. // temperature reported by FileSystem
  531. IOStatus CopyOrCreateFile(const std::string& src, const std::string& dst,
  532. const std::string& contents, uint64_t size_limit,
  533. Env* src_env, Env* dst_env,
  534. const EnvOptions& src_env_options, bool sync,
  535. RateLimiter* rate_limiter,
  536. std::function<void()> progress_callback,
  537. Temperature* src_temperature,
  538. Temperature dst_temperature,
  539. uint64_t* bytes_toward_next_callback,
  540. uint64_t* size, std::string* checksum_hex);
  541. uint64_t CalculateIOBufferSize(RateLimiter* rate_limiter) const;
  542. IOStatus ReadFileAndComputeChecksum(const std::string& src,
  543. const std::shared_ptr<FileSystem>& src_fs,
  544. const EnvOptions& src_env_options,
  545. uint64_t size_limit,
  546. std::string* checksum_hex,
  547. const Temperature src_temperature) const;
  548. // Obtain db_id and db_session_id from the table properties of file_path
  549. Status GetFileDbIdentities(Env* src_env, const EnvOptions& src_env_options,
  550. const std::string& file_path,
  551. Temperature file_temp, RateLimiter* rate_limiter,
  552. std::string* db_id,
  553. std::string* db_session_id) const;
  554. struct WorkItemResult {
  555. WorkItemResult()
  556. : size(0),
  557. expected_src_temperature(Temperature::kUnknown),
  558. current_src_temperature(Temperature::kUnknown) {}
  559. WorkItemResult(const WorkItemResult& other) = delete;
  560. WorkItemResult& operator=(const WorkItemResult& other) = delete;
  561. WorkItemResult(WorkItemResult&& o) noexcept { *this = std::move(o); }
  562. WorkItemResult& operator=(WorkItemResult&& o) noexcept {
  563. size = o.size;
  564. checksum_hex = std::move(o.checksum_hex);
  565. db_id = std::move(o.db_id);
  566. db_session_id = std::move(o.db_session_id);
  567. io_status = std::move(o.io_status);
  568. expected_src_temperature = o.expected_src_temperature;
  569. current_src_temperature = o.current_src_temperature;
  570. return *this;
  571. }
  572. ~WorkItemResult() {
  573. // The Status needs to be ignored here for two reasons.
  574. // First, if the BackupEngineImpl shuts down with jobs outstanding, then
  575. // it is possible that the Status in the future/promise is never read,
  576. // resulting in an unchecked Status. Second, if there are items in the
  577. // channel when the BackupEngineImpl is shutdown, these will also have
  578. // Status that have not been checked. This
  579. // TODO: Fix those issues so that the Status
  580. io_status.PermitUncheckedError();
  581. }
  582. uint64_t size;
  583. std::string checksum_hex;
  584. std::string db_id;
  585. std::string db_session_id;
  586. IOStatus io_status;
  587. Temperature expected_src_temperature = Temperature::kUnknown;
  588. Temperature current_src_temperature = Temperature::kUnknown;
  589. };
  590. enum WorkItemType : uint64_t {
  591. CopyOrCreate = 1U,
  592. ComputeChecksum = 2U,
  593. };
  594. // Exactly one of src_path and contents must be non-empty. If src_path is
  595. // non-empty, the file is copied from this pathname. Otherwise, if contents is
  596. // non-empty, the file will be created at dst_path with these contents.
  597. struct WorkItem {
  598. std::string src_path;
  599. std::string dst_path;
  600. Temperature src_temperature;
  601. Temperature dst_temperature;
  602. std::string contents;
  603. Env* src_env;
  604. Env* dst_env;
  605. EnvOptions src_env_options;
  606. bool sync;
  607. RateLimiter* rate_limiter;
  608. uint64_t size_limit;
  609. Statistics* stats;
  610. std::promise<WorkItemResult> result;
  611. std::function<void()> progress_callback;
  612. std::string src_checksum_func_name;
  613. std::string src_checksum_hex;
  614. std::string db_id;
  615. std::string db_session_id;
  616. WorkItemType type;
  617. WorkItem()
  618. : src_temperature(Temperature::kUnknown),
  619. dst_temperature(Temperature::kUnknown),
  620. src_env(nullptr),
  621. dst_env(nullptr),
  622. src_env_options(),
  623. sync(false),
  624. rate_limiter(nullptr),
  625. size_limit(0),
  626. stats(nullptr),
  627. src_checksum_func_name(kUnknownFileChecksumFuncName),
  628. type(WorkItemType::CopyOrCreate) {}
  629. WorkItem(const WorkItem&) = delete;
  630. WorkItem& operator=(const WorkItem&) = delete;
  631. WorkItem(WorkItem&& o) noexcept { *this = std::move(o); }
  632. WorkItem& operator=(WorkItem&& o) noexcept {
  633. src_path = std::move(o.src_path);
  634. dst_path = std::move(o.dst_path);
  635. src_temperature = std::move(o.src_temperature);
  636. dst_temperature = std::move(o.dst_temperature);
  637. contents = std::move(o.contents);
  638. src_env = o.src_env;
  639. dst_env = o.dst_env;
  640. src_env_options = std::move(o.src_env_options);
  641. sync = o.sync;
  642. rate_limiter = o.rate_limiter;
  643. size_limit = o.size_limit;
  644. stats = o.stats;
  645. result = std::move(o.result);
  646. progress_callback = std::move(o.progress_callback);
  647. src_checksum_func_name = std::move(o.src_checksum_func_name);
  648. src_checksum_hex = std::move(o.src_checksum_hex);
  649. db_id = std::move(o.db_id);
  650. db_session_id = std::move(o.db_session_id);
  651. src_temperature = o.src_temperature;
  652. type = std::move(o.type);
  653. return *this;
  654. }
  655. WorkItem(std::string _src_path, std::string _dst_path,
  656. const Temperature _src_temperature,
  657. const Temperature _dst_temperature, std::string _contents,
  658. Env* _src_env, Env* _dst_env, EnvOptions _src_env_options,
  659. bool _sync, RateLimiter* _rate_limiter, uint64_t _size_limit,
  660. Statistics* _stats, std::function<void()> _progress_callback = {},
  661. const std::string& _src_checksum_func_name =
  662. kUnknownFileChecksumFuncName,
  663. const std::string& _src_checksum_hex = "",
  664. const std::string& _db_id = "",
  665. const std::string& _db_session_id = "",
  666. WorkItemType _type = WorkItemType::CopyOrCreate)
  667. : src_path(std::move(_src_path)),
  668. dst_path(std::move(_dst_path)),
  669. src_temperature(_src_temperature),
  670. dst_temperature(_dst_temperature),
  671. contents(std::move(_contents)),
  672. src_env(_src_env),
  673. dst_env(_dst_env),
  674. src_env_options(std::move(_src_env_options)),
  675. sync(_sync),
  676. rate_limiter(_rate_limiter),
  677. size_limit(_size_limit),
  678. stats(_stats),
  679. progress_callback(_progress_callback),
  680. src_checksum_func_name(_src_checksum_func_name),
  681. src_checksum_hex(_src_checksum_hex),
  682. db_id(_db_id),
  683. db_session_id(_db_session_id),
  684. type(_type) {}
  685. ~WorkItem() = default;
  686. };
  687. struct BackupAfterCopyOrCreateWorkItem {
  688. std::future<WorkItemResult> result;
  689. bool shared;
  690. bool needed_to_copy;
  691. Env* backup_env;
  692. std::string dst_path_tmp;
  693. std::string dst_path;
  694. std::string dst_relative;
  695. BackupAfterCopyOrCreateWorkItem()
  696. : shared(false), needed_to_copy(false), backup_env(nullptr) {}
  697. BackupAfterCopyOrCreateWorkItem(
  698. BackupAfterCopyOrCreateWorkItem&& o) noexcept {
  699. *this = std::move(o);
  700. }
  701. BackupAfterCopyOrCreateWorkItem& operator=(
  702. BackupAfterCopyOrCreateWorkItem&& o) noexcept {
  703. result = std::move(o.result);
  704. shared = o.shared;
  705. needed_to_copy = o.needed_to_copy;
  706. backup_env = o.backup_env;
  707. dst_path_tmp = std::move(o.dst_path_tmp);
  708. dst_path = std::move(o.dst_path);
  709. dst_relative = std::move(o.dst_relative);
  710. return *this;
  711. }
  712. BackupAfterCopyOrCreateWorkItem(std::future<WorkItemResult>&& _result,
  713. bool _shared, bool _needed_to_copy,
  714. Env* _backup_env, std::string _dst_path_tmp,
  715. std::string _dst_path,
  716. std::string _dst_relative)
  717. : result(std::move(_result)),
  718. shared(_shared),
  719. needed_to_copy(_needed_to_copy),
  720. backup_env(_backup_env),
  721. dst_path_tmp(std::move(_dst_path_tmp)),
  722. dst_path(std::move(_dst_path)),
  723. dst_relative(std::move(_dst_relative)) {}
  724. };
  725. using BackupWorkItemPair =
  726. std::pair<WorkItem, BackupAfterCopyOrCreateWorkItem>;
  727. struct ComputeChecksumWorkItem {
  728. std::future<WorkItemResult> result;
  729. std::string file_path;
  730. uint64_t file_number;
  731. ComputeChecksumWorkItem(std::future<WorkItemResult>&& _result,
  732. const std::string& _file_path,
  733. uint64_t _file_number)
  734. : result(std::move(_result)),
  735. file_path(_file_path),
  736. file_number(_file_number) {}
  737. ComputeChecksumWorkItem(const ComputeChecksumWorkItem&) = delete;
  738. ComputeChecksumWorkItem& operator=(const ComputeChecksumWorkItem&) = delete;
  739. ComputeChecksumWorkItem(ComputeChecksumWorkItem&& o) noexcept {
  740. *this = std::move(o);
  741. }
  742. ComputeChecksumWorkItem& operator=(ComputeChecksumWorkItem&& o) noexcept {
  743. result = std::move(o.result);
  744. file_path = std::move(o.file_path);
  745. file_number = o.file_number;
  746. return *this;
  747. }
  748. ~ComputeChecksumWorkItem() = default;
  749. };
  750. struct RestoreAfterCopyOrCreateWorkItem {
  751. std::future<WorkItemResult> result;
  752. std::string from_file;
  753. std::string to_file;
  754. std::string checksum_hex;
  755. RestoreAfterCopyOrCreateWorkItem() {}
  756. RestoreAfterCopyOrCreateWorkItem(std::future<WorkItemResult>&& _result,
  757. const std::string& _from_file,
  758. const std::string& _to_file,
  759. const std::string& _checksum_hex)
  760. : result(std::move(_result)),
  761. from_file(_from_file),
  762. to_file(_to_file),
  763. checksum_hex(_checksum_hex) {}
  764. RestoreAfterCopyOrCreateWorkItem(
  765. RestoreAfterCopyOrCreateWorkItem&& o) noexcept {
  766. *this = std::move(o);
  767. }
  768. RestoreAfterCopyOrCreateWorkItem& operator=(
  769. RestoreAfterCopyOrCreateWorkItem&& o) noexcept {
  770. result = std::move(o.result);
  771. checksum_hex = std::move(o.checksum_hex);
  772. return *this;
  773. }
  774. };
  775. bool initialized_;
  776. std::mutex byte_report_mutex_;
  777. mutable channel<WorkItem> work_items_;
  778. std::vector<port::Thread> threads_;
  779. std::atomic<CpuPriority> threads_cpu_priority_;
  780. // Certain operations like PurgeOldBackups and DeleteBackup will trigger
  781. // automatic GarbageCollect (true) unless we've already done one in this
  782. // session and have not failed to delete backup files since then (false).
  783. bool might_need_garbage_collect_ = true;
  784. // Adds a file to the backup work queue to be copied or created if it doesn't
  785. // already exist.
  786. //
  787. // Exactly one of src_dir and contents must be non-empty.
  788. //
  789. // @param src_dir If non-empty, the file in this directory named fname will be
  790. // copied.
  791. // @param fname Name of destination file and, in case of copy, source file.
  792. // @param contents If non-empty, the file will be created with these contents.
  793. IOStatus AddBackupFileWorkItem(
  794. std::unordered_set<std::string>& live_dst_paths,
  795. std::deque<BackupAfterCopyOrCreateWorkItem>& backup_items_to_finish,
  796. std::deque<BackupWorkItemPair>* excludable_items, BackupID backup_id,
  797. bool shared, const std::string& src_dir,
  798. const std::string& fname, // starts with "/"
  799. const EnvOptions& src_env_options, RateLimiter* rate_limiter,
  800. FileType file_type, uint64_t size_bytes, Statistics* stats,
  801. uint64_t size_limit = 0, bool shared_checksum = false,
  802. std::function<void()> progress_callback = {},
  803. const std::string& contents = std::string(),
  804. const std::string& src_checksum_func_name = kUnknownFileChecksumFuncName,
  805. const std::string& src_checksum_str = kUnknownFileChecksum,
  806. const Temperature src_temperature = Temperature::kUnknown);
  807. // backup state data
  808. BackupID latest_backup_id_;
  809. BackupID latest_valid_backup_id_;
  810. std::map<BackupID, std::unique_ptr<BackupMeta>> backups_;
  811. std::map<BackupID, std::pair<IOStatus, std::unique_ptr<BackupMeta>>>
  812. corrupt_backups_;
  813. std::unordered_map<std::string, std::shared_ptr<FileInfo>>
  814. backuped_file_infos_;
  815. std::atomic<bool> stop_backup_;
  816. // options data
  817. BackupEngineOptions options_;
  818. Env* db_env_;
  819. Env* backup_env_;
  820. // directories
  821. std::unique_ptr<FSDirectory> backup_directory_;
  822. std::unique_ptr<FSDirectory> shared_directory_;
  823. std::unique_ptr<FSDirectory> meta_directory_;
  824. std::unique_ptr<FSDirectory> private_directory_;
  825. static const size_t kDefaultCopyFileBufferSize = 5 * 1024 * 1024LL; // 5MB
  826. bool read_only_;
  827. BackupStatistics backup_statistics_;
  828. std::unordered_set<std::string> reported_ignored_fields_;
  829. static const size_t kMaxAppMetaSize = 1024 * 1024; // 1MB
  830. std::shared_ptr<FileSystem> db_fs_;
  831. std::shared_ptr<FileSystem> backup_fs_;
  832. IOOptions io_options_ = IOOptions();
  833. public:
  834. std::unique_ptr<TEST_BackupMetaSchemaOptions> schema_test_options_;
  835. };
  836. // -------- BackupEngineImplThreadSafe class ---------
  837. // This locking layer for thread safety in the public API is layered on
  838. // top to prevent accidental recursive locking with RWMutex, which is UB.
  839. // Note: BackupEngineReadOnlyBase inherited twice, but has no fields
  840. class BackupEngineImplThreadSafe : public BackupEngine,
  841. public BackupEngineReadOnly {
  842. public:
  843. BackupEngineImplThreadSafe(const BackupEngineOptions& options, Env* db_env,
  844. bool read_only = false)
  845. : impl_(options, db_env, read_only) {}
  846. ~BackupEngineImplThreadSafe() override = default;
  847. using BackupEngine::CreateNewBackupWithMetadata;
  848. IOStatus CreateNewBackupWithMetadata(const CreateBackupOptions& options,
  849. DB* db, const std::string& app_metadata,
  850. BackupID* new_backup_id) override {
  851. WriteLock lock(&mutex_);
  852. return impl_.CreateNewBackupWithMetadata(options, db, app_metadata,
  853. new_backup_id);
  854. }
  855. IOStatus PurgeOldBackups(uint32_t num_backups_to_keep) override {
  856. WriteLock lock(&mutex_);
  857. return impl_.PurgeOldBackups(num_backups_to_keep);
  858. }
  859. IOStatus DeleteBackup(BackupID backup_id) override {
  860. WriteLock lock(&mutex_);
  861. return impl_.DeleteBackup(backup_id);
  862. }
  863. void StopBackup() override {
  864. // No locking needed
  865. impl_.StopBackup();
  866. }
  867. IOStatus GarbageCollect() override {
  868. WriteLock lock(&mutex_);
  869. return impl_.GarbageCollect();
  870. }
  871. Status GetLatestBackupInfo(BackupInfo* backup_info,
  872. bool include_file_details = false) const override {
  873. ReadLock lock(&mutex_);
  874. return impl_.GetBackupInfo(kLatestBackupIDMarker, backup_info,
  875. include_file_details);
  876. }
  877. Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
  878. bool include_file_details = false) const override {
  879. ReadLock lock(&mutex_);
  880. return impl_.GetBackupInfo(backup_id, backup_info, include_file_details);
  881. }
  882. void GetBackupInfo(std::vector<BackupInfo>* backup_info,
  883. bool include_file_details) const override {
  884. ReadLock lock(&mutex_);
  885. impl_.GetBackupInfo(backup_info, include_file_details);
  886. }
  887. void GetCorruptedBackups(
  888. std::vector<BackupID>* corrupt_backup_ids) const override {
  889. ReadLock lock(&mutex_);
  890. impl_.GetCorruptedBackups(corrupt_backup_ids);
  891. }
  892. using BackupEngine::RestoreDBFromBackup;
  893. IOStatus RestoreDBFromBackup(const RestoreOptions& options,
  894. BackupID backup_id, const std::string& db_dir,
  895. const std::string& wal_dir) const override {
  896. // TSAN reports a lock inversion (potential deadlock) if we acquire read
  897. // locks in different orders. Assuming the implementation of RWMutex
  898. // allows simultaneous read locks, there should be no deadlock, because
  899. // there is no write lock involved here. Nevertheless, to appease TSAN and
  900. // in case of degraded RWMutex implementation, we lock the BackupEngines
  901. // including this one and those in options.alternate_dirs in a consistent
  902. // order.
  903. // However, locked_restore_from_dirs is kept in "search" order.
  904. std::list<const BackupEngineImpl*> locked_restore_from_dirs;
  905. std::vector<port::RWMutex*> mutexes;
  906. // Add `this`
  907. locked_restore_from_dirs.emplace_back(&impl_);
  908. mutexes.push_back(&mutex_);
  909. // Add alternates
  910. for (BackupEngineReadOnlyBase* be : options.alternate_dirs) {
  911. BackupEngineImplThreadSafe* bets =
  912. static_cast_with_check<BackupEngineImplThreadSafe>(
  913. be->AsBackupEngine());
  914. locked_restore_from_dirs.emplace_back(&bets->impl_);
  915. mutexes.push_back(&bets->mutex_);
  916. }
  917. // Acquire read locks in pointer order
  918. std::sort(mutexes.begin(), mutexes.end());
  919. std::vector<ReadLock> locks(mutexes.begin(), mutexes.end());
  920. // Impl
  921. return impl_.RestoreDBFromBackup(options, backup_id, db_dir, wal_dir,
  922. locked_restore_from_dirs);
  923. }
  924. using BackupEngine::RestoreDBFromLatestBackup;
  925. IOStatus RestoreDBFromLatestBackup(
  926. const RestoreOptions& options, const std::string& db_dir,
  927. const std::string& wal_dir) const override {
  928. // Defer to above function, which locks
  929. return RestoreDBFromBackup(options, kLatestBackupIDMarker, db_dir, wal_dir);
  930. }
  931. IOStatus VerifyBackup(BackupID backup_id,
  932. bool verify_with_checksum = false) const override {
  933. ReadLock lock(&mutex_);
  934. return impl_.VerifyBackup(backup_id, verify_with_checksum);
  935. }
  936. BackupEngine* AsBackupEngine() override { return this; }
  937. // Not public API but needed
  938. IOStatus Initialize() {
  939. // No locking needed
  940. return impl_.Initialize();
  941. }
  942. // Not public API but used in testing
  943. void TEST_SetBackupMetaSchemaOptions(
  944. const TEST_BackupMetaSchemaOptions& options) {
  945. impl_.schema_test_options_.reset(new TEST_BackupMetaSchemaOptions(options));
  946. }
  947. // Not public API but used in testing
  948. void TEST_SetDefaultRateLimitersClock(
  949. const std::shared_ptr<SystemClock>& backup_rate_limiter_clock = nullptr,
  950. const std::shared_ptr<SystemClock>& restore_rate_limiter_clock =
  951. nullptr) {
  952. impl_.TEST_SetDefaultRateLimitersClock(backup_rate_limiter_clock,
  953. restore_rate_limiter_clock);
  954. }
  955. private:
  956. mutable port::RWMutex mutex_;
  957. BackupEngineImpl impl_;
  958. };
  959. } // namespace
  960. IOStatus BackupEngine::Open(const BackupEngineOptions& options, Env* env,
  961. BackupEngine** backup_engine_ptr) {
  962. std::unique_ptr<BackupEngineImplThreadSafe> backup_engine(
  963. new BackupEngineImplThreadSafe(options, env));
  964. auto s = backup_engine->Initialize();
  965. if (!s.ok()) {
  966. *backup_engine_ptr = nullptr;
  967. return s;
  968. }
  969. *backup_engine_ptr = backup_engine.release();
  970. return IOStatus::OK();
  971. }
  972. namespace {
  973. BackupEngineImpl::BackupEngineImpl(const BackupEngineOptions& options,
  974. Env* db_env, bool read_only)
  975. : initialized_(false),
  976. threads_cpu_priority_(),
  977. latest_backup_id_(0),
  978. latest_valid_backup_id_(0),
  979. stop_backup_(false),
  980. options_(options),
  981. db_env_(db_env),
  982. backup_env_(options.backup_env != nullptr ? options.backup_env : db_env_),
  983. read_only_(read_only) {
  984. if (options_.backup_rate_limiter == nullptr &&
  985. options_.backup_rate_limit > 0) {
  986. options_.backup_rate_limiter.reset(
  987. NewGenericRateLimiter(options_.backup_rate_limit));
  988. }
  989. if (options_.restore_rate_limiter == nullptr &&
  990. options_.restore_rate_limit > 0) {
  991. options_.restore_rate_limiter.reset(
  992. NewGenericRateLimiter(options_.restore_rate_limit));
  993. }
  994. db_fs_ = db_env_->GetFileSystem();
  995. backup_fs_ = backup_env_->GetFileSystem();
  996. }
  997. BackupEngineImpl::~BackupEngineImpl() {
  998. work_items_.sendEof();
  999. for (auto& t : threads_) {
  1000. t.join();
  1001. }
  1002. LogFlush(options_.info_log);
  1003. for (const auto& it : corrupt_backups_) {
  1004. it.second.first.PermitUncheckedError();
  1005. }
  1006. }
  1007. IOStatus BackupEngineImpl::Initialize() {
  1008. assert(!initialized_);
  1009. initialized_ = true;
  1010. if (read_only_) {
  1011. ROCKS_LOG_INFO(options_.info_log, "Starting read_only backup engine");
  1012. }
  1013. options_.Dump(options_.info_log);
  1014. auto meta_path = GetAbsolutePath(kMetaDirName);
  1015. if (!read_only_) {
  1016. // we might need to clean up from previous crash or I/O errors
  1017. might_need_garbage_collect_ = true;
  1018. if (options_.max_valid_backups_to_open !=
  1019. std::numeric_limits<int32_t>::max()) {
  1020. options_.max_valid_backups_to_open = std::numeric_limits<int32_t>::max();
  1021. ROCKS_LOG_WARN(
  1022. options_.info_log,
  1023. "`max_valid_backups_to_open` is not set to the default value. "
  1024. "Ignoring its value since BackupEngine is not read-only.");
  1025. }
  1026. // gather the list of directories that we need to create
  1027. std::vector<std::pair<std::string, std::unique_ptr<FSDirectory>*>>
  1028. directories;
  1029. directories.emplace_back(GetAbsolutePath(), &backup_directory_);
  1030. if (options_.share_table_files) {
  1031. if (options_.share_files_with_checksum) {
  1032. directories.emplace_back(
  1033. GetAbsolutePath(GetSharedFileWithChecksumRel()),
  1034. &shared_directory_);
  1035. } else {
  1036. directories.emplace_back(GetAbsolutePath(GetSharedFileRel()),
  1037. &shared_directory_);
  1038. }
  1039. }
  1040. directories.emplace_back(GetAbsolutePath(kPrivateDirName),
  1041. &private_directory_);
  1042. directories.emplace_back(meta_path, &meta_directory_);
  1043. // create all the dirs we need
  1044. for (const auto& d : directories) {
  1045. IOStatus io_s =
  1046. backup_fs_->CreateDirIfMissing(d.first, io_options_, nullptr);
  1047. if (io_s.ok()) {
  1048. io_s =
  1049. backup_fs_->NewDirectory(d.first, io_options_, d.second, nullptr);
  1050. }
  1051. if (!io_s.ok()) {
  1052. return io_s;
  1053. }
  1054. }
  1055. }
  1056. std::vector<std::string> backup_meta_files;
  1057. {
  1058. IOStatus io_s = backup_fs_->GetChildren(meta_path, io_options_,
  1059. &backup_meta_files, nullptr);
  1060. if (io_s.IsNotFound()) {
  1061. return IOStatus::NotFound(meta_path + " is missing");
  1062. } else if (!io_s.ok()) {
  1063. return io_s;
  1064. }
  1065. }
  1066. // create backups_ structure
  1067. for (auto& file : backup_meta_files) {
  1068. ROCKS_LOG_INFO(options_.info_log, "Detected backup %s", file.c_str());
  1069. BackupID backup_id = 0;
  1070. sscanf(file.c_str(), "%u", &backup_id);
  1071. if (backup_id == 0 || file != std::to_string(backup_id)) {
  1072. // Invalid file name, will be deleted with auto-GC when user
  1073. // initiates an append or write operation. (Behave as read-only until
  1074. // then.)
  1075. ROCKS_LOG_INFO(options_.info_log, "Skipping unrecognized meta file %s",
  1076. file.c_str());
  1077. continue;
  1078. }
  1079. assert(backups_.find(backup_id) == backups_.end());
  1080. // Insert all the (backup_id, BackupMeta) that will be loaded later
  1081. // The loading performed later will check whether there are corrupt backups
  1082. // and move the corrupt backups to corrupt_backups_
  1083. backups_.insert(std::make_pair(
  1084. backup_id, std::unique_ptr<BackupMeta>(new BackupMeta(
  1085. GetBackupMetaFile(backup_id, false /* tmp */),
  1086. GetBackupMetaFile(backup_id, true /* tmp */),
  1087. &backuped_file_infos_, backup_env_, backup_fs_))));
  1088. }
  1089. latest_backup_id_ = 0;
  1090. latest_valid_backup_id_ = 0;
  1091. if (options_.destroy_old_data) { // Destroy old data
  1092. assert(!read_only_);
  1093. ROCKS_LOG_INFO(
  1094. options_.info_log,
  1095. "Backup Engine started with destroy_old_data == true, deleting all "
  1096. "backups");
  1097. IOStatus io_s = PurgeOldBackups(0);
  1098. if (io_s.ok()) {
  1099. io_s = GarbageCollect();
  1100. }
  1101. if (!io_s.ok()) {
  1102. return io_s;
  1103. }
  1104. } else { // Load data from storage
  1105. // abs_path_to_size: maps absolute paths of files in backup directory to
  1106. // their corresponding sizes
  1107. std::unordered_map<std::string, uint64_t> abs_path_to_size;
  1108. // Insert files and their sizes in backup sub-directories (shared and
  1109. // shared_checksum) to abs_path_to_size
  1110. for (const auto& rel_dir :
  1111. {GetSharedFileRel(), GetSharedFileWithChecksumRel()}) {
  1112. const auto abs_dir = GetAbsolutePath(rel_dir);
  1113. IOStatus io_s =
  1114. ReadChildFileCurrentSizes(abs_dir, backup_fs_, &abs_path_to_size);
  1115. if (!io_s.ok()) {
  1116. // I/O error likely impacting all backups
  1117. return io_s;
  1118. }
  1119. }
  1120. // load the backups if any, until valid_backups_to_open of the latest
  1121. // non-corrupted backups have been successfully opened.
  1122. int valid_backups_to_open = options_.max_valid_backups_to_open;
  1123. for (auto backup_iter = backups_.rbegin(); backup_iter != backups_.rend();
  1124. ++backup_iter) {
  1125. assert(latest_backup_id_ == 0 || latest_backup_id_ > backup_iter->first);
  1126. if (latest_backup_id_ == 0) {
  1127. latest_backup_id_ = backup_iter->first;
  1128. }
  1129. if (valid_backups_to_open == 0) {
  1130. break;
  1131. }
  1132. // Insert files and their sizes in backup sub-directories
  1133. // (private/backup_id) to abs_path_to_size
  1134. IOStatus io_s = ReadChildFileCurrentSizes(
  1135. GetAbsolutePath(GetPrivateFileRel(backup_iter->first)), backup_fs_,
  1136. &abs_path_to_size);
  1137. if (io_s.ok()) {
  1138. io_s = backup_iter->second->LoadFromFile(
  1139. options_.backup_dir, abs_path_to_size,
  1140. options_.backup_rate_limiter.get(), options_.info_log,
  1141. &reported_ignored_fields_);
  1142. }
  1143. if (io_s.IsCorruption() || io_s.IsNotSupported()) {
  1144. ROCKS_LOG_INFO(options_.info_log, "Backup %u corrupted -- %s",
  1145. backup_iter->first, io_s.ToString().c_str());
  1146. corrupt_backups_.insert(std::make_pair(
  1147. backup_iter->first,
  1148. std::make_pair(io_s, std::move(backup_iter->second))));
  1149. } else if (!io_s.ok()) {
  1150. // Distinguish corruption errors from errors in the backup Env.
  1151. // Errors in the backup Env (i.e., this code path) will cause Open() to
  1152. // fail, whereas corruption errors would not cause Open() failures.
  1153. return io_s;
  1154. } else {
  1155. ROCKS_LOG_INFO(options_.info_log, "Loading backup %" PRIu32 " OK:\n%s",
  1156. backup_iter->first,
  1157. backup_iter->second->GetInfoString().c_str());
  1158. assert(latest_valid_backup_id_ == 0 ||
  1159. latest_valid_backup_id_ > backup_iter->first);
  1160. if (latest_valid_backup_id_ == 0) {
  1161. latest_valid_backup_id_ = backup_iter->first;
  1162. }
  1163. --valid_backups_to_open;
  1164. }
  1165. }
  1166. for (const auto& corrupt : corrupt_backups_) {
  1167. backups_.erase(backups_.find(corrupt.first));
  1168. }
  1169. // erase the backups before max_valid_backups_to_open
  1170. int num_unopened_backups;
  1171. if (options_.max_valid_backups_to_open == 0) {
  1172. num_unopened_backups = 0;
  1173. } else {
  1174. num_unopened_backups =
  1175. std::max(0, static_cast<int>(backups_.size()) -
  1176. options_.max_valid_backups_to_open);
  1177. }
  1178. for (int i = 0; i < num_unopened_backups; ++i) {
  1179. assert(backups_.begin()->second->Empty());
  1180. backups_.erase(backups_.begin());
  1181. }
  1182. }
  1183. ROCKS_LOG_INFO(options_.info_log, "Latest backup is %u", latest_backup_id_);
  1184. ROCKS_LOG_INFO(options_.info_log, "Latest valid backup is %u",
  1185. latest_valid_backup_id_);
  1186. // set up threads to perform file creation / copy or checksum computations
  1187. // from work_items_ in the background.
  1188. threads_cpu_priority_ = CpuPriority::kNormal;
  1189. threads_.reserve(options_.max_background_operations);
  1190. for (int t = 0; t < options_.max_background_operations; t++) {
  1191. threads_.emplace_back([this]() {
  1192. #if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
  1193. #if __GLIBC_PREREQ(2, 12)
  1194. pthread_setname_np(pthread_self(), "backup_engine");
  1195. #endif
  1196. #endif
  1197. CpuPriority current_priority = CpuPriority::kNormal;
  1198. WorkItem work_item;
  1199. uint64_t bytes_toward_next_callback = 0;
  1200. while (work_items_.read(work_item)) {
  1201. CpuPriority priority = threads_cpu_priority_;
  1202. if (current_priority != priority) {
  1203. TEST_SYNC_POINT_CALLBACK(
  1204. "BackupEngineImpl::Initialize:SetCpuPriority", &priority);
  1205. port::SetCpuPriority(0, priority);
  1206. current_priority = priority;
  1207. }
  1208. // `bytes_read` and `bytes_written` stats are enabled based on
  1209. // compile-time support and cannot be dynamically toggled. So we do not
  1210. // need to worry about `PerfLevel` here, unlike many other
  1211. // `IOStatsContext` / `PerfContext` stats.
  1212. uint64_t prev_bytes_read = IOSTATS(bytes_read);
  1213. uint64_t prev_bytes_written = IOSTATS(bytes_written);
  1214. WorkItemResult result;
  1215. if (work_item.type == WorkItemType::CopyOrCreate) {
  1216. Temperature temp = work_item.src_temperature;
  1217. result.io_status = CopyOrCreateFile(
  1218. work_item.src_path, work_item.dst_path, work_item.contents,
  1219. work_item.size_limit, work_item.src_env, work_item.dst_env,
  1220. work_item.src_env_options, work_item.sync, work_item.rate_limiter,
  1221. work_item.progress_callback, &temp, work_item.dst_temperature,
  1222. &bytes_toward_next_callback, &result.size, &result.checksum_hex);
  1223. RecordTick(work_item.stats, BACKUP_READ_BYTES,
  1224. IOSTATS(bytes_read) - prev_bytes_read);
  1225. RecordTick(work_item.stats, BACKUP_WRITE_BYTES,
  1226. IOSTATS(bytes_written) - prev_bytes_written);
  1227. result.db_id = work_item.db_id;
  1228. result.db_session_id = work_item.db_session_id;
  1229. result.expected_src_temperature = work_item.src_temperature;
  1230. result.current_src_temperature = temp;
  1231. if (result.io_status.ok() && !work_item.src_checksum_hex.empty()) {
  1232. // unknown checksum function name implies no db table file checksum
  1233. // in db manifest; work_item.src_checksum_hex not empty means backup
  1234. // engine has calculated its crc32c checksum for the table file;
  1235. // therefore, we are able to compare the checksums.
  1236. if (work_item.src_checksum_func_name ==
  1237. kUnknownFileChecksumFuncName ||
  1238. work_item.src_checksum_func_name == kDbFileChecksumFuncName) {
  1239. if (work_item.src_checksum_hex != result.checksum_hex) {
  1240. std::string checksum_info(
  1241. "Expected checksum is " + work_item.src_checksum_hex +
  1242. " while computed checksum is " + result.checksum_hex);
  1243. result.io_status = IOStatus::Corruption(
  1244. "Checksum mismatch after copying to " + work_item.dst_path +
  1245. ": " + checksum_info);
  1246. }
  1247. } else {
  1248. // FIXME(peterd): dead code?
  1249. std::string checksum_function_info(
  1250. "Existing checksum function is " +
  1251. work_item.src_checksum_func_name +
  1252. " while provided checksum function is " +
  1253. kBackupFileChecksumFuncName);
  1254. ROCKS_LOG_INFO(
  1255. options_.info_log,
  1256. "Unable to verify checksum after copying to %s: %s\n",
  1257. work_item.dst_path.c_str(), checksum_function_info.c_str());
  1258. }
  1259. }
  1260. } else if (work_item.type == ComputeChecksum) {
  1261. result.io_status = ReadFileAndComputeChecksum(
  1262. work_item.src_path, work_item.src_env->GetFileSystem(),
  1263. work_item.src_env_options, work_item.size_limit,
  1264. &result.checksum_hex, work_item.src_temperature);
  1265. result.db_id = work_item.db_id;
  1266. result.db_session_id = work_item.db_session_id;
  1267. } else {
  1268. result.io_status = IOStatus::InvalidArgument(
  1269. "Unknown work item type: " + std::to_string(work_item.type));
  1270. }
  1271. work_item.result.set_value(std::move(result));
  1272. }
  1273. });
  1274. }
  1275. ROCKS_LOG_INFO(options_.info_log, "Initialized BackupEngine");
  1276. return IOStatus::OK();
  1277. }
  1278. IOStatus BackupEngineImpl::CreateNewBackupWithMetadata(
  1279. const CreateBackupOptions& options, DB* db, const std::string& app_metadata,
  1280. BackupID* new_backup_id_ptr) {
  1281. assert(initialized_);
  1282. assert(!read_only_);
  1283. if (app_metadata.size() > kMaxAppMetaSize) {
  1284. return IOStatus::InvalidArgument("App metadata too large");
  1285. }
  1286. bool maybe_exclude_items = bool{options.exclude_files_callback};
  1287. if (maybe_exclude_items && options_.schema_version < 2) {
  1288. return IOStatus::InvalidArgument(
  1289. "exclude_files_callback requires schema_version >= 2");
  1290. }
  1291. if (options.decrease_background_thread_cpu_priority) {
  1292. if (options.background_thread_cpu_priority < threads_cpu_priority_) {
  1293. threads_cpu_priority_.store(options.background_thread_cpu_priority);
  1294. }
  1295. }
  1296. BackupID new_backup_id = latest_backup_id_ + 1;
  1297. // `bytes_read` and `bytes_written` stats are enabled based on compile-time
  1298. // support and cannot be dynamically toggled. So we do not need to worry about
  1299. // `PerfLevel` here, unlike many other `IOStatsContext` / `PerfContext` stats.
  1300. uint64_t prev_bytes_read = IOSTATS(bytes_read);
  1301. uint64_t prev_bytes_written = IOSTATS(bytes_written);
  1302. assert(backups_.find(new_backup_id) == backups_.end());
  1303. auto private_dir = GetAbsolutePath(GetPrivateFileRel(new_backup_id));
  1304. IOStatus io_s = backup_fs_->FileExists(private_dir, io_options_, nullptr);
  1305. if (io_s.ok()) {
  1306. // maybe last backup failed and left partial state behind, clean it up.
  1307. // need to do this before updating backups_ such that a private dir
  1308. // named after new_backup_id will be cleaned up.
  1309. // (If an incomplete new backup is followed by an incomplete delete
  1310. // of the latest full backup, then there could be more than one next
  1311. // id with a private dir, the last thing to be deleted in delete
  1312. // backup, but all will be cleaned up with a GarbageCollect.)
  1313. io_s = GarbageCollect();
  1314. } else if (io_s.IsNotFound()) {
  1315. // normal case, the new backup's private dir doesn't exist yet
  1316. io_s = IOStatus::OK();
  1317. }
  1318. auto ret = backups_.insert(std::make_pair(
  1319. new_backup_id, std::unique_ptr<BackupMeta>(new BackupMeta(
  1320. GetBackupMetaFile(new_backup_id, false /* tmp */),
  1321. GetBackupMetaFile(new_backup_id, true /* tmp */),
  1322. &backuped_file_infos_, backup_env_, backup_fs_))));
  1323. assert(ret.second == true);
  1324. auto& new_backup = ret.first->second;
  1325. new_backup->RecordTimestamp();
  1326. new_backup->SetAppMetadata(app_metadata);
  1327. auto start_backup = backup_env_->NowMicros();
  1328. ROCKS_LOG_INFO(options_.info_log,
  1329. "Started the backup process -- creating backup %u",
  1330. new_backup_id);
  1331. if (options_.share_table_files && !options_.share_files_with_checksum) {
  1332. ROCKS_LOG_WARN(options_.info_log,
  1333. "BackupEngineOptions::share_files_with_checksum=false is "
  1334. "DEPRECATED and could lead to data loss.");
  1335. }
  1336. if (io_s.ok()) {
  1337. io_s = backup_fs_->CreateDir(private_dir, io_options_, nullptr);
  1338. }
  1339. // A set into which we will insert the dst_paths that are calculated for live
  1340. // files and live WAL files.
  1341. // This is used to check whether a live files shares a dst_path with another
  1342. // live file.
  1343. std::unordered_set<std::string> live_dst_paths;
  1344. std::deque<BackupWorkItemPair> excludable_items;
  1345. std::deque<BackupAfterCopyOrCreateWorkItem> backup_items_to_finish;
  1346. // Add a WorkItem to the channel for each live file
  1347. Status disabled = db->DisableFileDeletions();
  1348. DBOptions db_options = db->GetDBOptions();
  1349. Statistics* stats = db_options.statistics.get();
  1350. if (io_s.ok()) {
  1351. CheckpointImpl checkpoint(db);
  1352. uint64_t sequence_number = 0;
  1353. FileChecksumGenFactory* db_checksum_factory =
  1354. db_options.file_checksum_gen_factory.get();
  1355. const std::string kFileChecksumGenFactoryName =
  1356. "FileChecksumGenCrc32cFactory";
  1357. bool compare_checksum =
  1358. db_checksum_factory != nullptr &&
  1359. db_checksum_factory->Name() == kFileChecksumGenFactoryName
  1360. ? true
  1361. : false;
  1362. EnvOptions src_raw_env_options(db_options);
  1363. RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
  1364. io_s = status_to_io_status(checkpoint.CreateCustomCheckpoint(
  1365. [&](const std::string& /*src_dirname*/, const std::string& /*fname*/,
  1366. FileType) {
  1367. // custom checkpoint will switch to calling copy_file_cb after it sees
  1368. // NotSupported returned from link_file_cb.
  1369. return IOStatus::NotSupported();
  1370. } /* link_file_cb */,
  1371. [&](const std::string& src_dirname, const std::string& fname,
  1372. uint64_t size_limit_bytes, FileType type,
  1373. const std::string& checksum_func_name,
  1374. const std::string& checksum_val,
  1375. const Temperature src_temperature) {
  1376. if (type == kWalFile && !options_.backup_log_files) {
  1377. return IOStatus::OK();
  1378. }
  1379. Log(options_.info_log, "add file for backup %s", fname.c_str());
  1380. uint64_t size_bytes = 0;
  1381. IOStatus io_st;
  1382. if (type == kTableFile || type == kBlobFile) {
  1383. io_st = db_fs_->GetFileSize(src_dirname + "/" + fname, io_options_,
  1384. &size_bytes, nullptr);
  1385. if (!io_st.ok()) {
  1386. Log(options_.info_log, "GetFileSize is failed: %s",
  1387. io_st.ToString().c_str());
  1388. return io_st;
  1389. }
  1390. }
  1391. EnvOptions src_env_options;
  1392. switch (type) {
  1393. case kWalFile:
  1394. src_env_options =
  1395. db_env_->OptimizeForLogRead(src_raw_env_options);
  1396. break;
  1397. case kTableFile:
  1398. src_env_options = db_env_->OptimizeForCompactionTableRead(
  1399. src_raw_env_options, ImmutableDBOptions(db_options));
  1400. break;
  1401. case kDescriptorFile:
  1402. src_env_options =
  1403. db_env_->OptimizeForManifestRead(src_raw_env_options);
  1404. break;
  1405. case kBlobFile:
  1406. src_env_options = db_env_->OptimizeForBlobFileRead(
  1407. src_raw_env_options, ImmutableDBOptions(db_options));
  1408. break;
  1409. default:
  1410. // Other backed up files (like options file) are not read by live
  1411. // DB, so don't need to worry about avoiding mixing buffered and
  1412. // direct I/O. Just use plain defaults.
  1413. src_env_options = src_raw_env_options;
  1414. break;
  1415. }
  1416. io_st = AddBackupFileWorkItem(
  1417. live_dst_paths, backup_items_to_finish,
  1418. maybe_exclude_items ? &excludable_items : nullptr, new_backup_id,
  1419. options_.share_table_files &&
  1420. (type == kTableFile || type == kBlobFile),
  1421. src_dirname, fname, src_env_options, rate_limiter, type,
  1422. size_bytes, db_options.statistics.get(), size_limit_bytes,
  1423. options_.share_files_with_checksum &&
  1424. (type == kTableFile || type == kBlobFile),
  1425. options.progress_callback, "" /* contents */, checksum_func_name,
  1426. checksum_val, src_temperature);
  1427. return io_st;
  1428. } /* copy_file_cb */,
  1429. [&](const std::string& fname, const std::string& contents,
  1430. FileType type) {
  1431. Log(options_.info_log, "add file for backup %s", fname.c_str());
  1432. return AddBackupFileWorkItem(
  1433. live_dst_paths, backup_items_to_finish,
  1434. maybe_exclude_items ? &excludable_items : nullptr, new_backup_id,
  1435. false /* shared */, "" /* src_dir */, fname,
  1436. EnvOptions() /* src_env_options */, rate_limiter, type,
  1437. contents.size(), db_options.statistics.get(), 0 /* size_limit */,
  1438. false /* shared_checksum */, options.progress_callback, contents);
  1439. } /* create_file_cb */,
  1440. &sequence_number,
  1441. options.flush_before_backup ? 0 : std::numeric_limits<uint64_t>::max(),
  1442. compare_checksum));
  1443. if (io_s.ok()) {
  1444. new_backup->SetSequenceNumber(sequence_number);
  1445. }
  1446. }
  1447. ROCKS_LOG_INFO(options_.info_log, "add files for backup done.");
  1448. if (io_s.ok() && maybe_exclude_items) {
  1449. assert(options.exclude_files_callback);
  1450. size_t count = excludable_items.size();
  1451. std::vector<MaybeExcludeBackupFile> maybe_exclude_files;
  1452. maybe_exclude_files.reserve(count);
  1453. for (auto& e : excludable_items) {
  1454. maybe_exclude_files.emplace_back(
  1455. BackupExcludedFileInfo(e.second.dst_relative));
  1456. }
  1457. if (count > 0) {
  1458. try {
  1459. options.exclude_files_callback(
  1460. &maybe_exclude_files.front(),
  1461. /*end pointer*/ &maybe_exclude_files.back() + 1);
  1462. } catch (const std::exception& exn) {
  1463. io_s = IOStatus::Aborted("Exception in exclude_files_callback: " +
  1464. std::string(exn.what()));
  1465. } catch (...) {
  1466. io_s = IOStatus::Aborted("Unknown exception in exclude_files_callback");
  1467. }
  1468. }
  1469. if (io_s.ok()) {
  1470. for (size_t i = 0; i < count; ++i) {
  1471. auto& e = excludable_items[i];
  1472. if (maybe_exclude_files[i].exclude_decision) {
  1473. new_backup.get()->AddExcludedFile(e.second.dst_relative);
  1474. } else {
  1475. work_items_.write(std::move(e.first));
  1476. backup_items_to_finish.push_back(std::move(e.second));
  1477. }
  1478. }
  1479. }
  1480. excludable_items.clear();
  1481. } else {
  1482. assert(!options.exclude_files_callback);
  1483. assert(excludable_items.empty());
  1484. }
  1485. ROCKS_LOG_INFO(options_.info_log,
  1486. "dispatch files for backup done, wait for finish.");
  1487. for (auto& item : backup_items_to_finish) {
  1488. item.result.wait();
  1489. auto result = item.result.get();
  1490. IOStatus item_io_status = result.io_status;
  1491. Temperature temp = result.expected_src_temperature;
  1492. if (result.current_src_temperature != Temperature::kUnknown &&
  1493. (temp == Temperature::kUnknown ||
  1494. options_.current_temperatures_override_manifest)) {
  1495. temp = result.current_src_temperature;
  1496. }
  1497. if (item_io_status.ok() && item.shared && item.needed_to_copy) {
  1498. item_io_status = item.backup_env->GetFileSystem()->RenameFile(
  1499. item.dst_path_tmp, item.dst_path, io_options_, nullptr);
  1500. }
  1501. if (item_io_status.ok()) {
  1502. item_io_status = new_backup.get()->AddFile(std::make_shared<FileInfo>(
  1503. item.dst_relative, result.size, result.checksum_hex, result.db_id,
  1504. result.db_session_id, temp));
  1505. }
  1506. if (!item_io_status.ok()) {
  1507. io_s = std::move(item_io_status);
  1508. io_s.MustCheck();
  1509. }
  1510. }
  1511. // we copied all the files, enable file deletions
  1512. if (disabled.ok()) { // If we successfully disabled file deletions
  1513. db->EnableFileDeletions().PermitUncheckedError();
  1514. }
  1515. auto backup_time = backup_env_->NowMicros() - start_backup;
  1516. if (io_s.ok()) {
  1517. // persist the backup metadata on the disk
  1518. io_s = new_backup->StoreToFile(options_.sync, options_.schema_version,
  1519. schema_test_options_.get());
  1520. }
  1521. if (io_s.ok() && options_.sync) {
  1522. std::unique_ptr<FSDirectory> backup_private_directory;
  1523. backup_fs_
  1524. ->NewDirectory(GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)),
  1525. io_options_, &backup_private_directory, nullptr)
  1526. .PermitUncheckedError();
  1527. if (backup_private_directory != nullptr) {
  1528. io_s = backup_private_directory->FsyncWithDirOptions(io_options_, nullptr,
  1529. DirFsyncOptions());
  1530. }
  1531. if (io_s.ok() && private_directory_ != nullptr) {
  1532. io_s = private_directory_->FsyncWithDirOptions(io_options_, nullptr,
  1533. DirFsyncOptions());
  1534. }
  1535. if (io_s.ok() && meta_directory_ != nullptr) {
  1536. io_s = meta_directory_->FsyncWithDirOptions(io_options_, nullptr,
  1537. DirFsyncOptions());
  1538. }
  1539. if (io_s.ok() && shared_directory_ != nullptr) {
  1540. io_s = shared_directory_->FsyncWithDirOptions(io_options_, nullptr,
  1541. DirFsyncOptions());
  1542. }
  1543. if (io_s.ok() && backup_directory_ != nullptr) {
  1544. io_s = backup_directory_->FsyncWithDirOptions(io_options_, nullptr,
  1545. DirFsyncOptions());
  1546. }
  1547. }
  1548. if (io_s.ok()) {
  1549. backup_statistics_.IncrementNumberSuccessBackup();
  1550. // here we know that we succeeded and installed the new backup
  1551. latest_backup_id_ = new_backup_id;
  1552. latest_valid_backup_id_ = new_backup_id;
  1553. if (new_backup_id_ptr) {
  1554. *new_backup_id_ptr = new_backup_id;
  1555. }
  1556. ROCKS_LOG_INFO(options_.info_log, "Backup DONE. All is good");
  1557. // backup_speed is in byte/second
  1558. double backup_speed = new_backup->GetSize() / (1.048576 * backup_time);
  1559. ROCKS_LOG_INFO(options_.info_log, "Backup number of files: %u",
  1560. new_backup->GetNumberFiles());
  1561. char human_size[16];
  1562. AppendHumanBytes(new_backup->GetSize(), human_size, sizeof(human_size));
  1563. ROCKS_LOG_INFO(options_.info_log, "Backup size: %s", human_size);
  1564. ROCKS_LOG_INFO(options_.info_log, "Backup time: %" PRIu64 " microseconds",
  1565. backup_time);
  1566. ROCKS_LOG_INFO(options_.info_log, "Backup speed: %.3f MB/s", backup_speed);
  1567. ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s",
  1568. backup_statistics_.ToString().c_str());
  1569. } else {
  1570. backup_statistics_.IncrementNumberFailBackup();
  1571. // clean all the files we might have created
  1572. ROCKS_LOG_INFO(options_.info_log, "Backup failed -- %s",
  1573. io_s.ToString().c_str());
  1574. ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s\n",
  1575. backup_statistics_.ToString().c_str());
  1576. // delete files that we might have already written
  1577. might_need_garbage_collect_ = true;
  1578. DeleteBackup(new_backup_id).PermitUncheckedError();
  1579. }
  1580. RecordTick(stats, BACKUP_READ_BYTES, IOSTATS(bytes_read) - prev_bytes_read);
  1581. RecordTick(stats, BACKUP_WRITE_BYTES,
  1582. IOSTATS(bytes_written) - prev_bytes_written);
  1583. return io_s;
  1584. }
  1585. IOStatus BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
  1586. assert(initialized_);
  1587. assert(!read_only_);
  1588. // Best effort deletion even with errors
  1589. IOStatus overall_status = IOStatus::OK();
  1590. ROCKS_LOG_INFO(options_.info_log, "Purging old backups, keeping %u",
  1591. num_backups_to_keep);
  1592. std::vector<BackupID> to_delete;
  1593. auto itr = backups_.begin();
  1594. while ((backups_.size() - to_delete.size()) > num_backups_to_keep) {
  1595. to_delete.push_back(itr->first);
  1596. itr++;
  1597. }
  1598. for (auto backup_id : to_delete) {
  1599. // Do not GC until end
  1600. IOStatus io_s = DeleteBackupNoGC(backup_id);
  1601. if (!io_s.ok()) {
  1602. overall_status = io_s;
  1603. }
  1604. }
  1605. // Clean up after any incomplete backup deletion, potentially from
  1606. // earlier session.
  1607. if (might_need_garbage_collect_) {
  1608. IOStatus io_s = GarbageCollect();
  1609. if (!io_s.ok() && overall_status.ok()) {
  1610. overall_status = io_s;
  1611. }
  1612. }
  1613. return overall_status;
  1614. }
  1615. IOStatus BackupEngineImpl::DeleteBackup(BackupID backup_id) {
  1616. IOStatus s1 = DeleteBackupNoGC(backup_id);
  1617. IOStatus s2 = IOStatus::OK();
  1618. // Clean up after any incomplete backup deletion, potentially from
  1619. // earlier session.
  1620. if (might_need_garbage_collect_) {
  1621. s2 = GarbageCollect();
  1622. }
  1623. if (!s1.ok()) {
  1624. // Any failure in the primary objective trumps any failure in the
  1625. // secondary objective.
  1626. s2.PermitUncheckedError();
  1627. return s1;
  1628. } else {
  1629. return s2;
  1630. }
  1631. }
  1632. // Does not auto-GarbageCollect nor lock
  1633. IOStatus BackupEngineImpl::DeleteBackupNoGC(BackupID backup_id) {
  1634. assert(initialized_);
  1635. assert(!read_only_);
  1636. ROCKS_LOG_INFO(options_.info_log, "Deleting backup %u", backup_id);
  1637. auto backup = backups_.find(backup_id);
  1638. if (backup != backups_.end()) {
  1639. IOStatus io_s = backup->second->Delete();
  1640. if (!io_s.ok()) {
  1641. return io_s;
  1642. }
  1643. backups_.erase(backup);
  1644. if (backups_.empty()) {
  1645. latest_valid_backup_id_ = 0;
  1646. } else {
  1647. latest_valid_backup_id_ = backups_.rbegin()->first;
  1648. }
  1649. } else {
  1650. auto corrupt = corrupt_backups_.find(backup_id);
  1651. if (corrupt == corrupt_backups_.end()) {
  1652. return IOStatus::NotFound("Backup not found");
  1653. }
  1654. IOStatus io_s = corrupt->second.second->Delete();
  1655. if (!io_s.ok()) {
  1656. return io_s;
  1657. }
  1658. corrupt->second.first.PermitUncheckedError();
  1659. corrupt_backups_.erase(corrupt);
  1660. }
  1661. // After removing meta file, best effort deletion even with errors.
  1662. // (Don't delete other files if we can't delete the meta file right
  1663. // now.)
  1664. std::vector<std::string> to_delete;
  1665. for (auto& itr : backuped_file_infos_) {
  1666. if (itr.second->refs == 0) {
  1667. IOStatus io_s = backup_fs_->DeleteFile(GetAbsolutePath(itr.first),
  1668. io_options_, nullptr);
  1669. ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
  1670. io_s.ToString().c_str());
  1671. to_delete.push_back(itr.first);
  1672. if (!io_s.ok()) {
  1673. // Trying again later might work
  1674. might_need_garbage_collect_ = true;
  1675. }
  1676. }
  1677. }
  1678. for (auto& td : to_delete) {
  1679. backuped_file_infos_.erase(td);
  1680. }
  1681. // take care of private dirs -- GarbageCollect() will take care of them
  1682. // if they are not empty
  1683. std::string private_dir = GetPrivateFileRel(backup_id);
  1684. IOStatus io_s =
  1685. backup_fs_->DeleteDir(GetAbsolutePath(private_dir), io_options_, nullptr);
  1686. ROCKS_LOG_INFO(options_.info_log, "Deleting private dir %s -- %s",
  1687. private_dir.c_str(), io_s.ToString().c_str());
  1688. if (!io_s.ok()) {
  1689. // Full gc or trying again later might work
  1690. might_need_garbage_collect_ = true;
  1691. }
  1692. return IOStatus::OK();
  1693. }
  1694. void BackupEngineImpl::SetBackupInfoFromBackupMeta(
  1695. BackupID id, const BackupMeta& meta, BackupInfo* backup_info,
  1696. bool include_file_details) const {
  1697. *backup_info = BackupInfo(id, meta.GetTimestamp(), meta.GetSize(),
  1698. meta.GetNumberFiles(), meta.GetAppMetadata());
  1699. std::string dir =
  1700. options_.backup_dir + "/" + kPrivateDirSlash + std::to_string(id);
  1701. if (include_file_details) {
  1702. auto& file_details = backup_info->file_details;
  1703. file_details.reserve(meta.GetFiles().size());
  1704. for (auto& file_ptr : meta.GetFiles()) {
  1705. BackupFileInfo& finfo = file_details.emplace_back();
  1706. finfo.relative_filename = file_ptr->filename;
  1707. finfo.size = file_ptr->size;
  1708. finfo.directory = dir;
  1709. uint64_t number;
  1710. FileType type;
  1711. bool ok = ParseFileName(file_ptr->filename, &number, &type);
  1712. if (ok) {
  1713. finfo.file_number = number;
  1714. finfo.file_type = type;
  1715. }
  1716. // TODO: temperature, file_checksum, file_checksum_func_name
  1717. // finfo.temperature = file_ptr->temp;
  1718. }
  1719. backup_info->excluded_files = meta.GetExcludedFiles();
  1720. backup_info->name_for_open = GetAbsolutePath(GetPrivateFileRel(id));
  1721. backup_info->name_for_open.pop_back(); // remove trailing '/'
  1722. backup_info->env_for_open = meta.GetEnvForOpen();
  1723. }
  1724. }
  1725. Status BackupEngineImpl::GetBackupInfo(BackupID backup_id,
  1726. BackupInfo* backup_info,
  1727. bool include_file_details) const {
  1728. assert(initialized_);
  1729. if (backup_id == kLatestBackupIDMarker) {
  1730. // Note: Read latest_valid_backup_id_ inside of lock
  1731. backup_id = latest_valid_backup_id_;
  1732. }
  1733. auto corrupt_itr = corrupt_backups_.find(backup_id);
  1734. if (corrupt_itr != corrupt_backups_.end()) {
  1735. return Status::Corruption(corrupt_itr->second.first.ToString());
  1736. }
  1737. auto backup_itr = backups_.find(backup_id);
  1738. if (backup_itr == backups_.end()) {
  1739. return Status::NotFound("Backup not found");
  1740. }
  1741. auto& backup = backup_itr->second;
  1742. if (backup->Empty()) {
  1743. return Status::NotFound("Backup not found");
  1744. }
  1745. SetBackupInfoFromBackupMeta(backup_id, *backup, backup_info,
  1746. include_file_details);
  1747. return Status::OK();
  1748. }
  1749. void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
  1750. bool include_file_details) const {
  1751. assert(initialized_);
  1752. backup_info->resize(backups_.size());
  1753. size_t i = 0;
  1754. for (auto& backup : backups_) {
  1755. const BackupMeta& meta = *backup.second;
  1756. if (!meta.Empty()) {
  1757. SetBackupInfoFromBackupMeta(backup.first, meta, &backup_info->at(i++),
  1758. include_file_details);
  1759. }
  1760. }
  1761. }
  1762. void BackupEngineImpl::GetCorruptedBackups(
  1763. std::vector<BackupID>* corrupt_backup_ids) const {
  1764. assert(initialized_);
  1765. corrupt_backup_ids->reserve(corrupt_backups_.size());
  1766. for (auto& backup : corrupt_backups_) {
  1767. corrupt_backup_ids->push_back(backup.first);
  1768. }
  1769. }
  1770. IOStatus BackupEngineImpl::RestoreDBFromBackup(
  1771. const RestoreOptions& options, BackupID backup_id,
  1772. const std::string& db_dir, const std::string& wal_dir,
  1773. const std::list<const BackupEngineImpl*>& locked_restore_from_dirs) const {
  1774. assert(initialized_);
  1775. if (backup_id == kLatestBackupIDMarker) {
  1776. // Note: Read latest_valid_backup_id_ inside of lock
  1777. backup_id = latest_valid_backup_id_;
  1778. }
  1779. auto corrupt_itr = corrupt_backups_.find(backup_id);
  1780. if (corrupt_itr != corrupt_backups_.end()) {
  1781. return corrupt_itr->second.first;
  1782. }
  1783. auto backup_itr = backups_.find(backup_id);
  1784. if (backup_itr == backups_.end()) {
  1785. return IOStatus::NotFound("Backup not found");
  1786. }
  1787. auto& backup = backup_itr->second;
  1788. if (backup->Empty()) {
  1789. return IOStatus::NotFound("Backup not found");
  1790. }
  1791. ROCKS_LOG_INFO(options_.info_log, "Restoring backup id %u\n", backup_id);
  1792. ROCKS_LOG_INFO(options_.info_log, "keep_log_files: %d\n",
  1793. static_cast<int>(options.keep_log_files));
  1794. // just in case. Ignore errors
  1795. db_fs_->CreateDirIfMissing(db_dir, io_options_, nullptr)
  1796. .PermitUncheckedError();
  1797. db_fs_->CreateDirIfMissing(wal_dir, io_options_, nullptr)
  1798. .PermitUncheckedError();
  1799. // Files to restore, and from where (taking into account excluded files)
  1800. std::vector<std::pair<const BackupEngineImpl*, const FileInfo*>>
  1801. restore_file_infos;
  1802. restore_file_infos.reserve(backup->GetFiles().size() +
  1803. backup->GetExcludedFiles().size());
  1804. std::unordered_set<std::string> unowned_backups;
  1805. for (const auto& ef : backup->GetExcludedFiles()) {
  1806. const std::string& file = ef.relative_file;
  1807. bool found = false;
  1808. for (auto be : locked_restore_from_dirs) {
  1809. auto it = be->backuped_file_infos_.find(file);
  1810. if (it != backuped_file_infos_.end()) {
  1811. restore_file_infos.emplace_back(be, &*it->second);
  1812. found = true;
  1813. break;
  1814. }
  1815. }
  1816. if (!found) {
  1817. // In `kKeepLatestDbSessionIdFiles` restore mode, it's not strictly
  1818. // required for the corresponding backup file to be present for as long
  1819. // as existing, on-disk db file metadata matches this unowned backup file
  1820. // db_session_id and size.
  1821. if (options.mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
  1822. unowned_backups.insert(ef.relative_file);
  1823. continue;
  1824. }
  1825. return IOStatus::InvalidArgument(
  1826. "Excluded file " + file + " not found in any of %d" +
  1827. std::to_string(locked_restore_from_dirs.size() - 1) +
  1828. "backup directories!");
  1829. }
  1830. }
  1831. // Non-excluded files
  1832. for (const auto& file_info_shared : backup->GetFiles()) {
  1833. restore_file_infos.emplace_back(this, &*file_info_shared);
  1834. }
  1835. std::unordered_set<uint64_t> files_to_keep;
  1836. InferDBFilesToRetainInRestore(restore_file_infos, unowned_backups, db_dir,
  1837. options.mode, files_to_keep);
  1838. if (!unowned_backups.empty()) {
  1839. return IOStatus::InvalidArgument(
  1840. "Excluded file " + *unowned_backups.begin() + " (one amongst " +
  1841. std::to_string(unowned_backups.size()) + ") not found in any of" +
  1842. std::to_string(locked_restore_from_dirs.size() - 1) +
  1843. "backup directories!");
  1844. }
  1845. if (options.keep_log_files) {
  1846. // delete non-matching files in db_dir, but keep all the log files
  1847. DeleteChildren(db_dir, files_to_keep, 1 << kWalFile);
  1848. // move all the files from archive dir to wal_dir
  1849. std::string archive_dir = ArchivalDirectory(wal_dir);
  1850. std::vector<std::string> archive_files;
  1851. db_fs_->GetChildren(archive_dir, io_options_, &archive_files, nullptr)
  1852. .PermitUncheckedError(); // ignore errors
  1853. for (const auto& f : archive_files) {
  1854. uint64_t number;
  1855. FileType type;
  1856. bool ok = ParseFileName(f, &number, &type);
  1857. if (ok && type == kWalFile) {
  1858. ROCKS_LOG_INFO(options_.info_log,
  1859. "Moving log file from archive/ to wal_dir: %s",
  1860. f.c_str());
  1861. IOStatus io_s = db_fs_->RenameFile(
  1862. archive_dir + "/" + f, wal_dir + "/" + f, io_options_, nullptr);
  1863. if (!io_s.ok()) {
  1864. // if we can't move log file from archive_dir to wal_dir,
  1865. // we should fail, since it might mean data loss
  1866. return io_s;
  1867. }
  1868. }
  1869. }
  1870. } else {
  1871. DeleteChildren(wal_dir, files_to_keep);
  1872. DeleteChildren(ArchivalDirectory(wal_dir), files_to_keep);
  1873. DeleteChildren(db_dir, files_to_keep);
  1874. }
  1875. IOStatus io_s;
  1876. std::vector<RestoreAfterCopyOrCreateWorkItem> restore_items_to_finish;
  1877. std::string temporary_current_file;
  1878. std::string final_current_file;
  1879. std::unique_ptr<FSDirectory> db_dir_for_fsync;
  1880. std::unique_ptr<FSDirectory> wal_dir_for_fsync;
  1881. for (const auto& engine_and_file_info : restore_file_infos) {
  1882. const FileInfo* file_info = engine_and_file_info.second;
  1883. const std::string& file = file_info->filename;
  1884. std::string absolute_file =
  1885. engine_and_file_info.first->GetAbsolutePath(file);
  1886. Env* src_env = engine_and_file_info.first->backup_env_;
  1887. // 1. get DB filename
  1888. std::string dst = file_info->GetDbFileName();
  1889. // 2. find the filetype
  1890. uint64_t number;
  1891. FileType type;
  1892. bool ok = ParseFileName(dst, &number, &type);
  1893. if (!ok) {
  1894. return IOStatus::Corruption("Backup corrupted: Fail to parse filename " +
  1895. dst);
  1896. }
  1897. // `files_to_keep` identifies existing database files with contents
  1898. // 'identical' to their respective backup files (standard or excluded)
  1899. // as per user-selected RestoreOptions::Mode.
  1900. if (files_to_keep.find(number) != files_to_keep.end()) {
  1901. // This file is already in the destination directory. Skip restore.
  1902. continue;
  1903. }
  1904. // 3. Construct the final path
  1905. // kWalFile lives in wal_dir and all the rest live in db_dir
  1906. if (type == kWalFile) {
  1907. dst = wal_dir + "/" + dst;
  1908. if (options_.sync && !wal_dir_for_fsync) {
  1909. io_s = db_fs_->NewDirectory(wal_dir, io_options_, &wal_dir_for_fsync,
  1910. nullptr);
  1911. if (!io_s.ok()) {
  1912. return io_s;
  1913. }
  1914. }
  1915. } else {
  1916. dst = db_dir + "/" + dst;
  1917. if (options_.sync && !db_dir_for_fsync) {
  1918. io_s = db_fs_->NewDirectory(db_dir, io_options_, &db_dir_for_fsync,
  1919. nullptr);
  1920. if (!io_s.ok()) {
  1921. return io_s;
  1922. }
  1923. }
  1924. }
  1925. // For atomicity, initially restore CURRENT file to a temporary name.
  1926. // This is useful even without options_.sync e.g. in case the restore
  1927. // process is interrupted.
  1928. if (type == kCurrentFile) {
  1929. final_current_file = dst;
  1930. dst = temporary_current_file = dst + ".tmp";
  1931. }
  1932. ROCKS_LOG_INFO(options_.info_log, "Restoring %s to %s\n", file.c_str(),
  1933. dst.c_str());
  1934. // When file is being copied over, it means that it was either non-existent,
  1935. // purged or its' original on-disk representation didn't meet incremental
  1936. // restore tiering criteria. As such, we need to unconditionally recompute
  1937. // the checksum on the newly restored files - even if checksum was already
  1938. // computed on its' seed backup file in early assessment phase. Protection
  1939. // is put in place to ensure that there are no bugs in the actual restore /
  1940. // file copy logic and we're not producing garbage db files.
  1941. WorkItem copy_or_create_work_item(
  1942. absolute_file, dst, Temperature::kUnknown /* src_temp */,
  1943. file_info->temp, "" /* contents */, src_env, db_env_,
  1944. EnvOptions() /* src_env_options */, options_.sync,
  1945. options_.restore_rate_limiter.get(), file_info->size,
  1946. nullptr /* stats */);
  1947. RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
  1948. copy_or_create_work_item.result.get_future(), file, dst,
  1949. file_info->checksum_hex);
  1950. work_items_.write(std::move(copy_or_create_work_item));
  1951. restore_items_to_finish.push_back(
  1952. std::move(after_copy_or_create_work_item));
  1953. }
  1954. IOStatus item_io_status;
  1955. for (auto& item : restore_items_to_finish) {
  1956. item.result.wait();
  1957. auto result = item.result.get();
  1958. item_io_status = result.io_status;
  1959. // Note: It is possible that both of the following bad-status cases occur
  1960. // during copying. But, we only return one status.
  1961. if (!item_io_status.ok()) {
  1962. io_s = item_io_status;
  1963. break;
  1964. } else if (!item.checksum_hex.empty() &&
  1965. item.checksum_hex != result.checksum_hex) {
  1966. io_s = IOStatus::Corruption(
  1967. "While restoring " + item.from_file + " -> " + item.to_file +
  1968. ": expected checksum is " + item.checksum_hex +
  1969. " while computed checksum is " + result.checksum_hex);
  1970. break;
  1971. }
  1972. }
  1973. // When enabled, the first FsyncWithDirOptions is to ensure all files are
  1974. // fully persisted before renaming CURRENT.tmp
  1975. if (io_s.ok() && db_dir_for_fsync) {
  1976. ROCKS_LOG_INFO(options_.info_log, "Restore: fsync\n");
  1977. io_s = db_dir_for_fsync->FsyncWithDirOptions(io_options_, nullptr,
  1978. DirFsyncOptions());
  1979. }
  1980. if (io_s.ok() && wal_dir_for_fsync) {
  1981. io_s = wal_dir_for_fsync->FsyncWithDirOptions(io_options_, nullptr,
  1982. DirFsyncOptions());
  1983. }
  1984. if (io_s.ok() && !temporary_current_file.empty()) {
  1985. ROCKS_LOG_INFO(options_.info_log, "Restore: atomic rename CURRENT.tmp\n");
  1986. assert(!final_current_file.empty());
  1987. io_s = db_fs_->RenameFile(temporary_current_file, final_current_file,
  1988. io_options_, nullptr);
  1989. }
  1990. if (io_s.ok() && db_dir_for_fsync && !temporary_current_file.empty()) {
  1991. // Second FsyncWithDirOptions is to ensure the final atomic rename of DB
  1992. // restore is fully persisted even if power goes out right after restore
  1993. // operation returns success
  1994. assert(db_dir_for_fsync);
  1995. io_s = db_dir_for_fsync->FsyncWithDirOptions(
  1996. io_options_, nullptr, DirFsyncOptions(final_current_file));
  1997. }
  1998. ROCKS_LOG_INFO(options_.info_log, "Restoring done -- %s\n",
  1999. io_s.ToString().c_str());
  2000. return io_s;
  2001. }
  2002. IOStatus BackupEngineImpl::VerifyBackup(BackupID backup_id,
  2003. bool verify_with_checksum) const {
  2004. assert(initialized_);
  2005. // Check if backup_id is corrupted, or valid and registered
  2006. auto corrupt_itr = corrupt_backups_.find(backup_id);
  2007. if (corrupt_itr != corrupt_backups_.end()) {
  2008. return corrupt_itr->second.first;
  2009. }
  2010. auto backup_itr = backups_.find(backup_id);
  2011. if (backup_itr == backups_.end()) {
  2012. return IOStatus::NotFound();
  2013. }
  2014. auto& backup = backup_itr->second;
  2015. if (backup->Empty()) {
  2016. return IOStatus::NotFound();
  2017. }
  2018. ROCKS_LOG_INFO(options_.info_log, "Verifying backup id %u\n", backup_id);
  2019. // Find all existing backup files belong to backup_id
  2020. std::unordered_map<std::string, uint64_t> curr_abs_path_to_size;
  2021. for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(),
  2022. GetSharedFileWithChecksumRel()}) {
  2023. const auto abs_dir = GetAbsolutePath(rel_dir);
  2024. // Shared directories allowed to be missing in some cases. Expected but
  2025. // missing files will be reported a few lines down.
  2026. ReadChildFileCurrentSizes(abs_dir, backup_fs_, &curr_abs_path_to_size)
  2027. .PermitUncheckedError();
  2028. }
  2029. // For all files registered in backup
  2030. std::vector<ComputeChecksumWorkItem> backup_verification_checksum_work_items;
  2031. std::unordered_map<std::string, std::string>
  2032. file_abs_path_to_checksum_hex_map;
  2033. for (const auto& file_info : backup->GetFiles()) {
  2034. const auto abs_path = GetAbsolutePath(file_info->filename);
  2035. // check existence of the file
  2036. if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) {
  2037. return IOStatus::NotFound("File missing: " + abs_path);
  2038. }
  2039. // verify file size
  2040. if (file_info->size != curr_abs_path_to_size[abs_path]) {
  2041. std::string size_info("Expected file size is " +
  2042. std::to_string(file_info->size) +
  2043. " while found file size is " +
  2044. std::to_string(curr_abs_path_to_size[abs_path]));
  2045. return IOStatus::Corruption("File corrupted: File size mismatch for " +
  2046. abs_path + ": " + size_info);
  2047. }
  2048. if (verify_with_checksum && !file_info->checksum_hex.empty()) {
  2049. const std::string filename = file_info->GetDbFileName();
  2050. uint64_t number;
  2051. FileType type;
  2052. if (!ParseFileName(filename, &number, &type)) {
  2053. // In case of checksum verification, file parsing and its' number
  2054. // retrieval are not strictly required. Rather, it's just a best effort
  2055. // to preserve all the file related metadata within the task scope.
  2056. number = 0;
  2057. }
  2058. file_abs_path_to_checksum_hex_map[abs_path] = file_info->checksum_hex;
  2059. WorkItem backup_file_work_item(
  2060. abs_path, "" /* dst_path */, Temperature::kUnknown,
  2061. Temperature::kUnknown /* dst_temperature */, "" /* contents */,
  2062. backup_env_, nullptr /* dst_env */, EnvOptions(), false /* sync */,
  2063. options_.backup_rate_limiter.get(), 0 /* size_limit */,
  2064. nullptr /* stats */, {} /* progress_callback */,
  2065. kUnknownFileChecksumFuncName /* src_checksum_func_name */,
  2066. "" /* src_checksum_hex */, "" /* db_id */, "" /* db_session_id*/,
  2067. WorkItemType::ComputeChecksum);
  2068. ComputeChecksumWorkItem backup_file_checksum_work_item(
  2069. backup_file_work_item.result.get_future(), abs_path, number);
  2070. ROCKS_LOG_INFO(options_.info_log,
  2071. "Scheduling checksum evaluation for %s...\n",
  2072. abs_path.c_str());
  2073. work_items_.write(std::move(backup_file_work_item));
  2074. backup_verification_checksum_work_items.push_back(
  2075. std::move(backup_file_checksum_work_item));
  2076. }
  2077. }
  2078. IOStatus io_s = IOStatus::OK();
  2079. if (verify_with_checksum) {
  2080. for (auto& item : backup_verification_checksum_work_items) {
  2081. // Given the limitations of the existing simple thread pooling model
  2082. // we deliberately decided to wait on each file checksum computation.
  2083. // Please refer to the comment below for more.
  2084. item.result.wait();
  2085. auto result = item.result.get();
  2086. if (result.io_status.ok()) {
  2087. auto find_it = file_abs_path_to_checksum_hex_map.find(item.file_path);
  2088. assert(find_it != file_abs_path_to_checksum_hex_map.end());
  2089. if (result.checksum_hex == find_it->second) {
  2090. ROCKS_LOG_INFO(options_.info_log,
  2091. "Checksum successfully validated for %s\n",
  2092. item.file_path.c_str());
  2093. continue;
  2094. }
  2095. }
  2096. std::string err_msg;
  2097. if (!result.io_status.ok()) {
  2098. err_msg =
  2099. "Failed to compute checksum for " + item.file_path +
  2100. ", IOStatus(code: ," + std::to_string(result.io_status.code()) +
  2101. ", subcode: " + std::to_string(result.io_status.subcode()) + ")";
  2102. } else { // checksum mismatch
  2103. err_msg =
  2104. "File corruption! Checksum mismatch for " + item.file_path + ". " +
  2105. "Expected: " + file_abs_path_to_checksum_hex_map[item.file_path] +
  2106. ", got: " + result.checksum_hex;
  2107. }
  2108. ROCKS_LOG_WARN(options_.info_log, "%s", err_msg.c_str());
  2109. if (io_s.ok()) {
  2110. // Memoize only the first corruption for reporting purpose.
  2111. io_s = IOStatus::Corruption(err_msg);
  2112. } else {
  2113. // Ideally, we want to bail out as early as possible upon encountering
  2114. // the very first mismatch, which would not only reduce the observed
  2115. // user latency, but also limit (potentially remote) read IO to the
  2116. // absolute minimum and allow the thread pool to reclaim the resources
  2117. // earlier. Even better, if we could cancel all in-progress threads!
  2118. //
  2119. // Unfortunately, with our current simple thread pool implementation
  2120. // we do not have by-tag control / handle over running threads.
  2121. // Having the choice of 1) returning to the caller earlier and having
  2122. // dangling threads occupied in evaluating checksums in the background
  2123. // and 2) waiting for all threads to finish, we choose 2) for cleaner
  2124. // and more intuitive semantics.
  2125. //
  2126. // TODO: Reevaluate after onboarding backup engine to a more
  2127. // sophisticated thread pool abstraction.
  2128. }
  2129. }
  2130. }
  2131. return io_s;
  2132. }
  2133. IOStatus BackupEngineImpl::CopyOrCreateFile(
  2134. const std::string& src, const std::string& dst, const std::string& contents,
  2135. uint64_t size_limit, Env* src_env, Env* dst_env,
  2136. const EnvOptions& src_env_options, bool sync, RateLimiter* rate_limiter,
  2137. std::function<void()> progress_callback, Temperature* src_temperature,
  2138. Temperature dst_temperature, uint64_t* bytes_toward_next_callback,
  2139. uint64_t* size, std::string* checksum_hex) {
  2140. assert(src.empty() != contents.empty());
  2141. IOStatus io_s;
  2142. std::unique_ptr<FSWritableFile> dst_file;
  2143. std::unique_ptr<FSSequentialFile> src_file;
  2144. FileOptions dst_file_options;
  2145. dst_file_options.use_mmap_writes = false;
  2146. dst_file_options.temperature = dst_temperature;
  2147. // TODO:(gzh) maybe use direct reads/writes here if possible
  2148. if (size != nullptr) {
  2149. *size = 0;
  2150. }
  2151. uint32_t checksum_value = 0;
  2152. // Check if size limit is set. if not, set it to very big number
  2153. if (size_limit == 0) {
  2154. size_limit = std::numeric_limits<uint64_t>::max();
  2155. }
  2156. io_s = dst_env->GetFileSystem()->NewWritableFile(dst, dst_file_options,
  2157. &dst_file, nullptr);
  2158. if (!io_s.ok()) {
  2159. return io_s;
  2160. }
  2161. if (!src.empty()) {
  2162. auto src_file_options = FileOptions(src_env_options);
  2163. src_file_options.temperature = *src_temperature;
  2164. io_s = src_env->GetFileSystem()->NewSequentialFile(src, src_file_options,
  2165. &src_file, nullptr);
  2166. }
  2167. if (io_s.IsPathNotFound() && *src_temperature != Temperature::kUnknown) {
  2168. // Retry without temperature hint in case the FileSystem is strict with
  2169. // non-kUnknown temperature option
  2170. io_s = src_env->GetFileSystem()->NewSequentialFile(
  2171. src, FileOptions(src_env_options), &src_file, nullptr);
  2172. }
  2173. if (!io_s.ok()) {
  2174. return io_s;
  2175. }
  2176. size_t buf_size = CalculateIOBufferSize(rate_limiter);
  2177. TEST_SYNC_POINT_CALLBACK(
  2178. "BackupEngineImpl::CopyOrCreateFile:CalculateIOBufferSize", &buf_size);
  2179. // TODO: pass in Histograms if the destination file is sst or blob
  2180. std::unique_ptr<WritableFileWriter> dest_writer(
  2181. new WritableFileWriter(std::move(dst_file), dst, dst_file_options));
  2182. std::unique_ptr<SequentialFileReader> src_reader;
  2183. std::unique_ptr<char[]> buf;
  2184. if (!src.empty()) {
  2185. // Return back current temperature in FileSystem
  2186. *src_temperature = src_file->GetTemperature();
  2187. src_reader.reset(new SequentialFileReader(
  2188. std::move(src_file), src, nullptr /* io_tracer */, {}, rate_limiter));
  2189. buf.reset(new char[buf_size]);
  2190. }
  2191. Slice data;
  2192. const IOOptions opts;
  2193. do {
  2194. if (stop_backup_.load(std::memory_order_acquire)) {
  2195. return status_to_io_status(Status::Incomplete("Backup stopped"));
  2196. }
  2197. if (!src.empty()) {
  2198. size_t buffer_to_read =
  2199. (buf_size < size_limit) ? buf_size : static_cast<size_t>(size_limit);
  2200. io_s = src_reader->Read(buffer_to_read, &data, buf.get(),
  2201. Env::IO_LOW /* rate_limiter_priority */);
  2202. *bytes_toward_next_callback += data.size();
  2203. } else {
  2204. data = contents;
  2205. }
  2206. size_limit -= data.size();
  2207. TEST_SYNC_POINT_CALLBACK(
  2208. "BackupEngineImpl::CopyOrCreateFile:CorruptionDuringBackup",
  2209. (src.length() > 4 && src.rfind(".sst") == src.length() - 4) ? &data
  2210. : nullptr);
  2211. if (!io_s.ok()) {
  2212. return io_s;
  2213. }
  2214. if (size != nullptr) {
  2215. *size += data.size();
  2216. }
  2217. if (checksum_hex != nullptr) {
  2218. checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
  2219. }
  2220. io_s = dest_writer->Append(opts, data);
  2221. if (rate_limiter != nullptr) {
  2222. if (!src.empty()) {
  2223. rate_limiter->Request(data.size(), Env::IO_LOW, nullptr /* stats */,
  2224. RateLimiter::OpType::kWrite);
  2225. } else {
  2226. LoopRateLimitRequestHelper(data.size(), rate_limiter, Env::IO_LOW,
  2227. nullptr /* stats */,
  2228. RateLimiter::OpType::kWrite);
  2229. }
  2230. }
  2231. while (*bytes_toward_next_callback >=
  2232. options_.callback_trigger_interval_size) {
  2233. *bytes_toward_next_callback -= options_.callback_trigger_interval_size;
  2234. if (progress_callback) {
  2235. std::lock_guard<std::mutex> lock(byte_report_mutex_);
  2236. try {
  2237. progress_callback();
  2238. } catch (const std::exception& exn) {
  2239. io_s = IOStatus::Aborted("Exception in progress_callback: " +
  2240. std::string(exn.what()));
  2241. break;
  2242. } catch (...) {
  2243. io_s = IOStatus::Aborted("Unknown exception in progress_callback");
  2244. break;
  2245. }
  2246. }
  2247. }
  2248. } while (io_s.ok() && contents.empty() && data.size() > 0 && size_limit > 0);
  2249. // Convert uint32_t checksum to hex checksum
  2250. if (checksum_hex != nullptr) {
  2251. checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
  2252. }
  2253. if (io_s.ok() && sync) {
  2254. io_s = dest_writer->Sync(opts, false);
  2255. }
  2256. if (io_s.ok()) {
  2257. io_s = dest_writer->Close(opts);
  2258. }
  2259. return io_s;
  2260. }
  2261. uint64_t BackupEngineImpl::CalculateIOBufferSize(
  2262. RateLimiter* rate_limiter) const {
  2263. if (options_.io_buffer_size > 0) {
  2264. return options_.io_buffer_size;
  2265. }
  2266. return rate_limiter != nullptr
  2267. ? static_cast<size_t>(rate_limiter->GetSingleBurstBytes())
  2268. : kDefaultCopyFileBufferSize;
  2269. }
  2270. // fname will always start with "/"
  2271. IOStatus BackupEngineImpl::AddBackupFileWorkItem(
  2272. std::unordered_set<std::string>& live_dst_paths,
  2273. std::deque<BackupAfterCopyOrCreateWorkItem>& backup_items_to_finish,
  2274. std::deque<BackupWorkItemPair>* excludable_items, BackupID backup_id,
  2275. bool shared, const std::string& src_dir, const std::string& fname,
  2276. const EnvOptions& src_env_options, RateLimiter* rate_limiter,
  2277. FileType file_type, uint64_t size_bytes, Statistics* stats,
  2278. uint64_t size_limit, bool shared_checksum,
  2279. std::function<void()> progress_callback, const std::string& contents,
  2280. const std::string& src_checksum_func_name,
  2281. const std::string& src_checksum_str, const Temperature src_temperature) {
  2282. assert(contents.empty() != src_dir.empty());
  2283. std::string src_path = src_dir + "/" + fname;
  2284. std::string dst_relative;
  2285. std::string dst_relative_tmp;
  2286. std::string db_id;
  2287. std::string db_session_id;
  2288. // crc32c checksum in hex. empty == unavailable / unknown
  2289. std::string checksum_hex;
  2290. // Whenever a default checksum function name is passed in, we will compares
  2291. // the corresponding checksum values after copying. Note that only table and
  2292. // blob files may have a known checksum function name passed in.
  2293. //
  2294. // If no default checksum function name is passed in and db session id is not
  2295. // available, we will calculate the checksum *before* copying in two cases
  2296. // (we always calcuate checksums when copying or creating for any file types):
  2297. // a) share_files_with_checksum is true and file type is table;
  2298. // b) share_table_files is true and the file exists already.
  2299. //
  2300. // Step 0: Check if default checksum function name is passed in
  2301. if (kDbFileChecksumFuncName == src_checksum_func_name) {
  2302. if (src_checksum_str == kUnknownFileChecksum) {
  2303. return status_to_io_status(
  2304. Status::Aborted("Unknown checksum value for " + fname));
  2305. }
  2306. checksum_hex = ChecksumStrToHex(src_checksum_str);
  2307. }
  2308. // Step 1: Prepare the relative path to destination
  2309. if (shared && shared_checksum) {
  2310. if (GetNamingNoFlags() != BackupEngineOptions::kLegacyCrc32cAndFileSize &&
  2311. file_type != kBlobFile) {
  2312. // Prepare db_session_id to add to the file name
  2313. Status s = GetFileDbIdentities(db_env_, src_env_options, src_path,
  2314. src_temperature, rate_limiter, &db_id,
  2315. &db_session_id);
  2316. if (s.IsPathNotFound()) {
  2317. // Retry with any temperature
  2318. s = GetFileDbIdentities(db_env_, src_env_options, src_path,
  2319. Temperature::kUnknown, rate_limiter, &db_id,
  2320. &db_session_id);
  2321. }
  2322. if (s.IsNotFound()) {
  2323. // db_id and db_session_id will be empty, which is OK for old files
  2324. } else if (!s.ok()) {
  2325. return status_to_io_status(std::move(s));
  2326. }
  2327. }
  2328. // Calculate checksum if checksum and db session id are not available.
  2329. // If db session id is available, we will not calculate the checksum
  2330. // since the session id should suffice to avoid file name collision in
  2331. // the shared_checksum directory.
  2332. if (checksum_hex.empty() && db_session_id.empty()) {
  2333. IOStatus io_s = ReadFileAndComputeChecksum(
  2334. src_path, db_fs_, src_env_options, size_limit, &checksum_hex,
  2335. src_temperature);
  2336. if (!io_s.ok()) {
  2337. return io_s;
  2338. }
  2339. }
  2340. if (size_bytes == std::numeric_limits<uint64_t>::max()) {
  2341. return IOStatus::NotFound("File missing: " + src_path);
  2342. }
  2343. // dst_relative depends on the following conditions:
  2344. // 1) the naming scheme is kUseDbSessionId,
  2345. // 2) db_session_id is not empty,
  2346. // 3) checksum is available in the DB manifest.
  2347. // If 1,2,3) are satisfied, then dst_relative will be of the form:
  2348. // shared_checksum/<file_number>_<checksum>_<db_session_id>.sst
  2349. // If 1,2) are satisfied, then dst_relative will be of the form:
  2350. // shared_checksum/<file_number>_<db_session_id>.sst
  2351. // Otherwise, dst_relative is of the form
  2352. // shared_checksum/<file_number>_<checksum>_<size>.sst
  2353. //
  2354. // For blob files, db_session_id is not supported with the blob file format.
  2355. // It uses original/legacy naming scheme.
  2356. // dst_relative will be of the form:
  2357. // shared_checksum/<file_number>_<checksum>_<size>.blob
  2358. dst_relative = GetSharedFileWithChecksum(fname, checksum_hex, size_bytes,
  2359. db_session_id);
  2360. dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
  2361. dst_relative = GetSharedFileWithChecksumRel(dst_relative, false);
  2362. } else if (shared) {
  2363. dst_relative_tmp = GetSharedFileRel(fname, true);
  2364. dst_relative = GetSharedFileRel(fname, false);
  2365. } else {
  2366. dst_relative = GetPrivateFileRel(backup_id, false, fname);
  2367. }
  2368. // We copy into `temp_dest_path` and, once finished, rename it to
  2369. // `final_dest_path`. This allows files to atomically appear at
  2370. // `final_dest_path`. We can copy directly to the final path when atomicity
  2371. // is unnecessary, like for files in private backup directories.
  2372. const std::string* copy_dest_path;
  2373. std::string temp_dest_path;
  2374. std::string final_dest_path = GetAbsolutePath(dst_relative);
  2375. if (!dst_relative_tmp.empty()) {
  2376. temp_dest_path = GetAbsolutePath(dst_relative_tmp);
  2377. copy_dest_path = &temp_dest_path;
  2378. } else {
  2379. copy_dest_path = &final_dest_path;
  2380. }
  2381. // Step 2: Determine whether to copy or not
  2382. // if it's shared, we also need to check if it exists -- if it does, no need
  2383. // to copy it again.
  2384. bool need_to_copy = true;
  2385. // true if final_dest_path is the same path as another live file
  2386. const bool same_path =
  2387. live_dst_paths.find(final_dest_path) != live_dst_paths.end();
  2388. bool file_exists = false;
  2389. if (shared && !same_path) {
  2390. // Should be in shared directory but not a live path, check existence in
  2391. // shared directory
  2392. IOStatus exist =
  2393. backup_fs_->FileExists(final_dest_path, io_options_, nullptr);
  2394. if (exist.ok()) {
  2395. file_exists = true;
  2396. } else if (exist.IsNotFound()) {
  2397. file_exists = false;
  2398. } else {
  2399. return exist;
  2400. }
  2401. }
  2402. if (!contents.empty()) {
  2403. need_to_copy = false;
  2404. } else if (shared && (same_path || file_exists)) {
  2405. need_to_copy = false;
  2406. auto find_result = backuped_file_infos_.find(dst_relative);
  2407. if (find_result == backuped_file_infos_.end() && !same_path) {
  2408. // file exists but not referenced
  2409. ROCKS_LOG_INFO(
  2410. options_.info_log,
  2411. "%s already present, but not referenced by any backup. We will "
  2412. "overwrite the file.",
  2413. fname.c_str());
  2414. need_to_copy = true;
  2415. // Defer any failure reporting to when we try to write the file
  2416. backup_fs_->DeleteFile(final_dest_path, io_options_, nullptr)
  2417. .PermitUncheckedError();
  2418. } else {
  2419. // file exists and referenced
  2420. if (checksum_hex.empty()) {
  2421. // same_path should not happen for a standard DB, so OK to
  2422. // read file contents to check for checksum mismatch between
  2423. // two files from same DB getting same name.
  2424. // For compatibility with future meta file that might not have
  2425. // crc32c checksum available, consider it might be empty, but
  2426. // we don't currently generate meta file without crc32c checksum.
  2427. // Therefore we have to read & compute it if we don't have it.
  2428. if (!same_path && !find_result->second->checksum_hex.empty()) {
  2429. assert(find_result != backuped_file_infos_.end());
  2430. // Note: to save I/O on incremental backups, we copy prior known
  2431. // checksum of the file instead of reading entire file contents
  2432. // to recompute it.
  2433. checksum_hex = find_result->second->checksum_hex;
  2434. // Regarding corruption detection, consider:
  2435. // (a) the DB file is corrupt (since previous backup) and the backup
  2436. // file is OK: we failed to detect, but the backup is safe. DB can
  2437. // be repaired/restored once its corruption is detected.
  2438. // (b) the backup file is corrupt (since previous backup) and the
  2439. // db file is OK: we failed to detect, but the backup is corrupt.
  2440. // CreateNewBackup should support fast incremental backups and
  2441. // there's no way to support that without reading all the files.
  2442. // We might add an option for extra checks on incremental backup,
  2443. // but until then, use VerifyBackups to check existing backup data.
  2444. // (c) file name collision with legitimately different content.
  2445. // This is almost inconceivable with a well-generated DB session
  2446. // ID, but even in that case, we double check the file sizes in
  2447. // BackupMeta::AddFile.
  2448. } else {
  2449. IOStatus io_s = ReadFileAndComputeChecksum(
  2450. src_path, db_fs_, src_env_options, size_limit, &checksum_hex,
  2451. src_temperature);
  2452. if (!io_s.ok()) {
  2453. return io_s;
  2454. }
  2455. }
  2456. }
  2457. if (!db_session_id.empty()) {
  2458. ROCKS_LOG_INFO(options_.info_log,
  2459. "%s already present, with checksum %s, size %" PRIu64
  2460. " and DB session identity %s",
  2461. fname.c_str(), checksum_hex.c_str(), size_bytes,
  2462. db_session_id.c_str());
  2463. } else {
  2464. ROCKS_LOG_INFO(options_.info_log,
  2465. "%s already present, with checksum %s and size %" PRIu64,
  2466. fname.c_str(), checksum_hex.c_str(), size_bytes);
  2467. }
  2468. }
  2469. }
  2470. live_dst_paths.insert(final_dest_path);
  2471. // Step 3: Add work item
  2472. if (!contents.empty() || need_to_copy) {
  2473. WorkItem copy_or_create_work_item(
  2474. src_dir.empty() ? "" : src_path, *copy_dest_path, src_temperature,
  2475. Temperature::kUnknown /*dst_temp*/, contents, db_env_, backup_env_,
  2476. src_env_options, options_.sync, rate_limiter, size_limit, stats,
  2477. progress_callback, src_checksum_func_name, checksum_hex, db_id,
  2478. db_session_id);
  2479. BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
  2480. copy_or_create_work_item.result.get_future(), shared, need_to_copy,
  2481. backup_env_, temp_dest_path, final_dest_path, dst_relative);
  2482. if (excludable_items != nullptr && shared && shared_checksum &&
  2483. need_to_copy) {
  2484. ROCKS_LOG_INFO(options_.info_log, "Copying (if not excluded) %s to %s",
  2485. fname.c_str(), copy_dest_path->c_str());
  2486. excludable_items->emplace_back(std::move(copy_or_create_work_item),
  2487. std::move(after_copy_or_create_work_item));
  2488. } else {
  2489. // For files known not excluded, can start copying even before finishing
  2490. // the checkpoint
  2491. ROCKS_LOG_INFO(options_.info_log, "Copying %s to %s", fname.c_str(),
  2492. copy_dest_path->c_str());
  2493. work_items_.write(std::move(copy_or_create_work_item));
  2494. backup_items_to_finish.push_back(
  2495. std::move(after_copy_or_create_work_item));
  2496. }
  2497. } else {
  2498. std::promise<WorkItemResult> promise_result;
  2499. BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item(
  2500. promise_result.get_future(), shared, need_to_copy, backup_env_,
  2501. temp_dest_path, final_dest_path, dst_relative);
  2502. backup_items_to_finish.push_back(std::move(after_copy_or_create_work_item));
  2503. WorkItemResult result;
  2504. result.io_status = IOStatus::OK();
  2505. result.size = size_bytes;
  2506. result.checksum_hex = std::move(checksum_hex);
  2507. result.db_id = std::move(db_id);
  2508. result.db_session_id = std::move(db_session_id);
  2509. promise_result.set_value(std::move(result));
  2510. }
  2511. return IOStatus::OK();
  2512. }
  2513. IOStatus BackupEngineImpl::ReadFileAndComputeChecksum(
  2514. const std::string& src, const std::shared_ptr<FileSystem>& src_fs,
  2515. const EnvOptions& src_env_options, uint64_t size_limit,
  2516. std::string* checksum_hex, const Temperature src_temperature) const {
  2517. if (checksum_hex == nullptr) {
  2518. return status_to_io_status(Status::Aborted("Checksum pointer is null"));
  2519. }
  2520. uint32_t checksum_value = 0;
  2521. if (size_limit == 0) {
  2522. size_limit = std::numeric_limits<uint64_t>::max();
  2523. }
  2524. std::unique_ptr<SequentialFileReader> src_reader;
  2525. auto file_options = FileOptions(src_env_options);
  2526. file_options.temperature = src_temperature;
  2527. RateLimiter* rate_limiter = options_.backup_rate_limiter.get();
  2528. IOStatus io_s = SequentialFileReader::Create(
  2529. src_fs, src, file_options, &src_reader, nullptr /* dbg */, rate_limiter);
  2530. if (io_s.IsPathNotFound() && src_temperature != Temperature::kUnknown) {
  2531. // Retry without temperature hint in case the FileSystem is strict with
  2532. // non-kUnknown temperature option
  2533. file_options.temperature = Temperature::kUnknown;
  2534. io_s = SequentialFileReader::Create(src_fs, src, file_options, &src_reader,
  2535. nullptr /* dbg */, rate_limiter);
  2536. }
  2537. if (!io_s.ok()) {
  2538. return io_s;
  2539. }
  2540. size_t buf_size = CalculateIOBufferSize(rate_limiter);
  2541. std::unique_ptr<char[]> buf(new char[buf_size]);
  2542. Slice data;
  2543. do {
  2544. if (stop_backup_.load(std::memory_order_acquire)) {
  2545. return status_to_io_status(Status::Incomplete("Backup stopped"));
  2546. }
  2547. size_t buffer_to_read =
  2548. (buf_size < size_limit) ? buf_size : static_cast<size_t>(size_limit);
  2549. io_s = src_reader->Read(buffer_to_read, &data, buf.get(),
  2550. Env::IO_LOW /* rate_limiter_priority */);
  2551. if (!io_s.ok()) {
  2552. return io_s;
  2553. }
  2554. size_limit -= data.size();
  2555. checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
  2556. } while (data.size() > 0 && size_limit > 0);
  2557. checksum_hex->assign(ChecksumInt32ToHex(checksum_value));
  2558. return io_s;
  2559. }
  2560. Status BackupEngineImpl::GetFileDbIdentities(Env* src_env,
  2561. const EnvOptions& src_env_options,
  2562. const std::string& file_path,
  2563. Temperature file_temp,
  2564. RateLimiter* rate_limiter,
  2565. std::string* db_id,
  2566. std::string* db_session_id) const {
  2567. assert(db_id != nullptr || db_session_id != nullptr);
  2568. Options options;
  2569. options.env = src_env;
  2570. SstFileDumper sst_reader(options, file_path, file_temp,
  2571. 2 * 1024 * 1024
  2572. /* readahead_size */,
  2573. true /* verify_checksum */, false /* output_hex */,
  2574. false /* decode_blob_index */, src_env_options,
  2575. true /* silent */);
  2576. const TableProperties* table_properties = nullptr;
  2577. std::shared_ptr<const TableProperties> tp;
  2578. Status s = sst_reader.getStatus();
  2579. if (s.ok()) {
  2580. // Try to get table properties from the table reader of sst_reader
  2581. if (!sst_reader.ReadTableProperties(&tp).ok()) {
  2582. // FIXME (peterd): this logic is untested and seems obsolete.
  2583. // Try to use table properties from the initialization of sst_reader
  2584. table_properties = sst_reader.GetInitTableProperties();
  2585. } else {
  2586. table_properties = tp.get();
  2587. if (table_properties != nullptr && rate_limiter != nullptr) {
  2588. // sizeof(*table_properties) is a sufficent but far-from-exact
  2589. // approximation of read bytes due to metaindex block, std::string
  2590. // properties and varint compression
  2591. LoopRateLimitRequestHelper(sizeof(*table_properties), rate_limiter,
  2592. Env::IO_LOW, nullptr /* stats */,
  2593. RateLimiter::OpType::kRead);
  2594. }
  2595. }
  2596. } else {
  2597. ROCKS_LOG_INFO(options_.info_log, "Failed to read %s: %s",
  2598. file_path.c_str(), s.ToString().c_str());
  2599. return s;
  2600. }
  2601. if (table_properties != nullptr) {
  2602. if (db_id != nullptr) {
  2603. db_id->assign(table_properties->db_id);
  2604. }
  2605. if (db_session_id != nullptr) {
  2606. db_session_id->assign(table_properties->db_session_id);
  2607. if (db_session_id->empty()) {
  2608. s = Status::NotFound("DB session identity not found in " + file_path);
  2609. ROCKS_LOG_INFO(options_.info_log, "%s", s.ToString().c_str());
  2610. return s;
  2611. }
  2612. }
  2613. return Status::OK();
  2614. } else {
  2615. s = Status::Corruption("Table properties missing in " + file_path);
  2616. ROCKS_LOG_INFO(options_.info_log, "%s", s.ToString().c_str());
  2617. return s;
  2618. }
  2619. }
  2620. void BackupEngineImpl::LoopRateLimitRequestHelper(
  2621. const size_t total_bytes_to_request, RateLimiter* rate_limiter,
  2622. const Env::IOPriority pri, Statistics* stats,
  2623. const RateLimiter::OpType op_type) {
  2624. assert(rate_limiter != nullptr);
  2625. size_t remaining_bytes = total_bytes_to_request;
  2626. size_t request_bytes = 0;
  2627. while (remaining_bytes > 0) {
  2628. request_bytes =
  2629. std::min(static_cast<size_t>(rate_limiter->GetSingleBurstBytes()),
  2630. remaining_bytes);
  2631. rate_limiter->Request(request_bytes, pri, stats, op_type);
  2632. remaining_bytes -= request_bytes;
  2633. }
  2634. }
  2635. void BackupEngineImpl::InferDBFilesToRetainInRestore(
  2636. const std::vector<std::pair<const BackupEngineImpl*, const FileInfo*>>&
  2637. restore_file_infos,
  2638. std::unordered_set<std::string>& unowned_backups, const std::string& db_dir,
  2639. RestoreOptions::Mode mode,
  2640. std::unordered_set<uint64_t>& files_to_keep) const {
  2641. if (mode == RestoreOptions::Mode::kPurgeAllFiles) {
  2642. return;
  2643. }
  2644. ROCKS_LOG_INFO(options_.info_log,
  2645. "Starting incremental restore evaluation in %" PRIu32 " mode",
  2646. mode);
  2647. ROCKS_LOG_INFO(options_.info_log, "Constructing backup files mapping...");
  2648. std::unordered_map<uint64_t,
  2649. std::pair<const BackupEngineImpl*, const FileInfo*>>
  2650. file_num_to_engine_infos;
  2651. for (const auto& engine_and_file_info : restore_file_infos) {
  2652. uint64_t number;
  2653. FileType type;
  2654. std::string filename = engine_and_file_info.second->GetDbFileName();
  2655. if (!ParseFileName(filename, &number, &type)) {
  2656. continue;
  2657. }
  2658. // We only care to optimize restore for large files - like SSTs and blobs.
  2659. // Blobs are only supported in kVerifyChecksum.
  2660. if (type == kTableFile ||
  2661. (type == kBlobFile && mode == RestoreOptions::Mode::kVerifyChecksum)) {
  2662. file_num_to_engine_infos[number] = engine_and_file_info;
  2663. }
  2664. }
  2665. ROCKS_LOG_INFO(
  2666. options_.info_log,
  2667. "Evaluating existing .sst%s files restore retention eligibility...",
  2668. mode == RestoreOptions::Mode::kVerifyChecksum ? " and .blob files" : "");
  2669. std::vector<std::string> children;
  2670. db_fs_->GetChildren(db_dir, io_options_, &children, nullptr)
  2671. .PermitUncheckedError(); // ignore errors
  2672. std::vector<ComputeChecksumWorkItem> backup_files_compute_checksum_work_items;
  2673. std::vector<ComputeChecksumWorkItem> db_files_compute_checksum_work_items;
  2674. std::unordered_map<uint64_t, std::string> backup_file_num_to_checksum;
  2675. for (const auto& f : children) {
  2676. uint64_t number;
  2677. FileType type;
  2678. bool ok = ParseFileName(f, &number, &type);
  2679. if (!ok) {
  2680. // Couldn't parse existing file name. We deliberately choose to sliently
  2681. // skip here to avoid noisy & excessive logging in user controlled envs.
  2682. continue;
  2683. }
  2684. if (type != kTableFile && type != kBlobFile) {
  2685. // We only care to optimize restore for large files - like SSTs / blobs.
  2686. continue;
  2687. }
  2688. if (type == kBlobFile && mode != RestoreOptions::Mode::kVerifyChecksum) {
  2689. // Blob files are only supported in kVerifyChecksum mode.
  2690. continue;
  2691. }
  2692. uint64_t size_bytes = 0;
  2693. std::string db_file_path = db_dir + "/" + f;
  2694. IOStatus io_st = db_fs_->GetFileSize(db_file_path, io_options_, &size_bytes,
  2695. nullptr /* dbg */);
  2696. if (!io_st.ok()) {
  2697. Log(options_.info_log,
  2698. "Failed to get the file size for existing file: '%s'. IO status: %s",
  2699. f.c_str(), io_st.ToString().c_str());
  2700. continue;
  2701. }
  2702. RateLimiter* rate_limiter = options_.restore_rate_limiter.get();
  2703. if (mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
  2704. // On-disk existing db file names require direct file footer query
  2705. // as they don't follow same naming convention as backups.
  2706. std::string db_id;
  2707. std::string db_session_id;
  2708. Status s = GetFileDbIdentities(
  2709. db_env_, EnvOptions() /* src_env_options */,
  2710. db_file_path /* file_path */, Temperature::kUnknown /* src_temp */,
  2711. rate_limiter, &db_id, &db_session_id);
  2712. if (!s.ok()) {
  2713. Log(options_.info_log,
  2714. "Encountered IO error while obtaining db session id metadata for "
  2715. "existing file '%s'.",
  2716. db_file_path.c_str());
  2717. continue;
  2718. }
  2719. const std::string checksum_hex = "";
  2720. std::string shared_file_name = GenerateSharedFileWithDbSessionIdAndSize(
  2721. f, size_bytes, db_session_id);
  2722. bool found = false;
  2723. const auto& f_ei = file_num_to_engine_infos.find(number);
  2724. if (f_ei != file_num_to_engine_infos.end()) {
  2725. found = f_ei->second.second->filename == shared_file_name;
  2726. }
  2727. if (!found) {
  2728. const auto& uo_sst_bfn = unowned_backups.find(shared_file_name);
  2729. if (uo_sst_bfn != unowned_backups.end()) {
  2730. // Db file has been successfully associated with the excluded backup.
  2731. unowned_backups.erase(shared_file_name);
  2732. found = true;
  2733. }
  2734. }
  2735. if (found) {
  2736. files_to_keep.insert(number);
  2737. ROCKS_LOG_INFO(options_.info_log,
  2738. "Existing db file '%s' is retained for restore.",
  2739. f.c_str());
  2740. }
  2741. } else if (mode == RestoreOptions::Mode::kVerifyChecksum) {
  2742. const auto& f_ei = file_num_to_engine_infos.find(number);
  2743. if (f_ei == file_num_to_engine_infos.end() ||
  2744. f_ei->second.second->GetDbFileName() != f) {
  2745. Log(options_.info_log,
  2746. "Existing file '%s' is not present in the backup!", f.c_str());
  2747. continue;
  2748. }
  2749. auto backup_engine_impl = f_ei->second.first;
  2750. auto backup_file_info = f_ei->second.second;
  2751. DBOptions db_options;
  2752. std::string backup_file_path =
  2753. backup_engine_impl->GetAbsolutePath(backup_file_info->filename);
  2754. std::string backup_file_checksum = backup_file_info->checksum_hex;
  2755. if (!backup_file_checksum.empty()) {
  2756. backup_file_num_to_checksum[number] = backup_file_checksum;
  2757. } else {
  2758. // Backup file checksum is missing in the backup metadata.
  2759. // Given explicit requirement, compute it asynchronously.
  2760. EnvOptions backup_env_options;
  2761. if (type == kBlobFile) {
  2762. backup_engine_impl->backup_env_->OptimizeForBlobFileRead(
  2763. backup_env_options, ImmutableDBOptions(db_options));
  2764. } else if (type == kTableFile) {
  2765. backup_engine_impl->backup_env_->OptimizeForCompactionTableRead(
  2766. backup_env_options, ImmutableDBOptions(db_options));
  2767. }
  2768. WorkItem backup_file_work_item(
  2769. backup_file_path, "" /* dst_path */, backup_file_info->temp,
  2770. Temperature::kUnknown /* dst_temperature */, "" /* contents */,
  2771. backup_engine_impl->backup_env_, nullptr /* dst_env */,
  2772. backup_env_options, false /* sync */,
  2773. options_.restore_rate_limiter.get(), 0 /* size_limit */,
  2774. nullptr /* stats */, {} /* progress_callback */,
  2775. kUnknownFileChecksumFuncName /* src_checksum_func_name */,
  2776. "" /* src_checksum_hex */, "" /* db_id */, "" /* db_session_id*/,
  2777. WorkItemType::ComputeChecksum);
  2778. ComputeChecksumWorkItem backup_file_checksum_work_item(
  2779. backup_file_work_item.result.get_future(),
  2780. backup_file_info->filename, number);
  2781. work_items_.write(std::move(backup_file_work_item));
  2782. backup_files_compute_checksum_work_items.push_back(
  2783. std::move(backup_file_checksum_work_item));
  2784. Log(options_.info_log,
  2785. "Checksum is missing in '%s' backup file metadata."
  2786. "Scheduled async computation...",
  2787. backup_file_info->filename.c_str());
  2788. }
  2789. // Unconditionally compute checksum for existing file.
  2790. EnvOptions db_env_options;
  2791. if (type == kBlobFile) {
  2792. db_env_->OptimizeForBlobFileRead(db_env_options,
  2793. ImmutableDBOptions(db_options));
  2794. } else if (type == kTableFile) {
  2795. db_env_->OptimizeForCompactionTableRead(db_env_options,
  2796. ImmutableDBOptions(db_options));
  2797. }
  2798. WorkItem db_file_work_item(
  2799. db_file_path, "" /* dst_path */, backup_file_info->temp,
  2800. Temperature::kUnknown /* dst_temperature */, "" /* contents */,
  2801. db_env_, nullptr /* dst_env */, db_env_options, false /* sync */,
  2802. options_.restore_rate_limiter.get(), 0 /* size_limit */,
  2803. nullptr /* stats */, {} /* progress_callback */,
  2804. kUnknownFileChecksumFuncName /* src_checksum_func_name */,
  2805. "" /* src_checksum_hex */, "" /* db_id */, "" /* db_session_id*/,
  2806. WorkItemType::ComputeChecksum);
  2807. ComputeChecksumWorkItem db_file_checksum_work_item(
  2808. db_file_work_item.result.get_future(), db_file_path, number);
  2809. work_items_.write(std::move(db_file_work_item));
  2810. db_files_compute_checksum_work_items.push_back(
  2811. std::move(db_file_checksum_work_item));
  2812. Log(options_.info_log,
  2813. "Schedule async checksum computation for file '%s'", f.c_str());
  2814. }
  2815. }
  2816. if (mode == RestoreOptions::Mode::kVerifyChecksum) {
  2817. // First loop through checksum computation results for backup files.
  2818. for (auto& item : backup_files_compute_checksum_work_items) {
  2819. item.result.wait();
  2820. auto result = item.result.get();
  2821. IOStatus item_io_status = result.io_status;
  2822. if (!item_io_status.ok()) {
  2823. // Failed computation for backup file will result in purging
  2824. // the existing file and restoring the backup file.
  2825. Log(options_.info_log,
  2826. "Encountered IO error while computing checksum for "
  2827. "backup file '%s'.",
  2828. item.file_path.c_str());
  2829. continue;
  2830. }
  2831. backup_file_num_to_checksum[item.file_number] = result.checksum_hex;
  2832. }
  2833. // Loop through db files checksum computation results.
  2834. for (auto& item : db_files_compute_checksum_work_items) {
  2835. item.result.wait();
  2836. auto result = item.result.get();
  2837. IOStatus item_io_status = result.io_status;
  2838. if (!item_io_status.ok()) {
  2839. // Failed computation for existing file will result in purging
  2840. // and restoring it from the corresponding backup file.
  2841. Log(options_.info_log,
  2842. "Encountered IO error while computing checksum for "
  2843. "existing file '%s'.",
  2844. item.file_path.c_str());
  2845. continue;
  2846. }
  2847. auto it = backup_file_num_to_checksum.find(item.file_number);
  2848. if (it == backup_file_num_to_checksum.end()) {
  2849. Log(options_.info_log,
  2850. "Failed to find backup file checksum for existing file '%s'.",
  2851. item.file_path.c_str());
  2852. continue;
  2853. }
  2854. if (it->second != result.checksum_hex) {
  2855. Log(options_.info_log,
  2856. "Checksum mismatch between backup file and existing file '%s'.",
  2857. item.file_path.c_str());
  2858. continue;
  2859. }
  2860. files_to_keep.insert(item.file_number);
  2861. Log(options_.info_log, "Existing file '%s' is retained for restore.",
  2862. item.file_path.c_str());
  2863. }
  2864. }
  2865. ROCKS_LOG_INFO(options_.info_log,
  2866. "Done with incremental restore evaluation. "
  2867. "Retained %zu files.",
  2868. files_to_keep.size());
  2869. }
  2870. void BackupEngineImpl::DeleteChildren(
  2871. const std::string& dir, const std::unordered_set<uint64_t>& files_to_keep,
  2872. uint32_t file_type_filter) const {
  2873. std::vector<std::string> children;
  2874. db_fs_->GetChildren(dir, io_options_, &children, nullptr)
  2875. .PermitUncheckedError(); // ignore errors
  2876. for (const auto& f : children) {
  2877. uint64_t number;
  2878. FileType type;
  2879. bool ok = ParseFileName(f, &number, &type);
  2880. if (ok && (files_to_keep.find(number) != files_to_keep.end())) {
  2881. // don't delete file with this number.
  2882. continue;
  2883. }
  2884. if (ok && (file_type_filter & (1 << type))) {
  2885. // don't delete this file type.
  2886. continue;
  2887. }
  2888. db_fs_->DeleteFile(dir + "/" + f, io_options_, nullptr)
  2889. .PermitUncheckedError(); // ignore errors
  2890. }
  2891. }
  2892. IOStatus BackupEngineImpl::ReadChildFileCurrentSizes(
  2893. const std::string& dir, const std::shared_ptr<FileSystem>& fs,
  2894. std::unordered_map<std::string, uint64_t>* result) const {
  2895. assert(result != nullptr);
  2896. std::vector<Env::FileAttributes> files_attrs;
  2897. IOStatus io_status = fs->FileExists(dir, io_options_, nullptr);
  2898. if (io_status.ok()) {
  2899. io_status =
  2900. fs->GetChildrenFileAttributes(dir, io_options_, &files_attrs, nullptr);
  2901. } else if (io_status.IsNotFound()) {
  2902. // Insert no entries can be considered success
  2903. io_status = IOStatus::OK();
  2904. }
  2905. const bool slash_needed = dir.empty() || dir.back() != '/';
  2906. for (const auto& file_attrs : files_attrs) {
  2907. result->emplace(dir + (slash_needed ? "/" : "") + file_attrs.name,
  2908. file_attrs.size_bytes);
  2909. }
  2910. return io_status;
  2911. }
  2912. IOStatus BackupEngineImpl::GarbageCollect() {
  2913. assert(!read_only_);
  2914. // We will make a best effort to remove all garbage even in the presence
  2915. // of inconsistencies or I/O failures that inhibit finding garbage.
  2916. IOStatus overall_status = IOStatus::OK();
  2917. // If all goes well, we don't need another auto-GC this session
  2918. might_need_garbage_collect_ = false;
  2919. ROCKS_LOG_INFO(options_.info_log, "Starting garbage collection");
  2920. // delete obsolete shared files
  2921. for (bool with_checksum : {false, true}) {
  2922. std::vector<std::string> shared_children;
  2923. {
  2924. std::string shared_path;
  2925. if (with_checksum) {
  2926. shared_path = GetAbsolutePath(GetSharedFileWithChecksumRel());
  2927. } else {
  2928. shared_path = GetAbsolutePath(GetSharedFileRel());
  2929. }
  2930. IOStatus io_s = backup_fs_->FileExists(shared_path, io_options_, nullptr);
  2931. if (io_s.ok()) {
  2932. io_s = backup_fs_->GetChildren(shared_path, io_options_,
  2933. &shared_children, nullptr);
  2934. } else if (io_s.IsNotFound()) {
  2935. io_s = IOStatus::OK();
  2936. }
  2937. if (!io_s.ok()) {
  2938. overall_status = io_s;
  2939. // Trying again later might work
  2940. might_need_garbage_collect_ = true;
  2941. }
  2942. }
  2943. for (auto& child : shared_children) {
  2944. std::string rel_fname;
  2945. if (with_checksum) {
  2946. rel_fname = GetSharedFileWithChecksumRel(child);
  2947. } else {
  2948. rel_fname = GetSharedFileRel(child);
  2949. }
  2950. auto child_itr = backuped_file_infos_.find(rel_fname);
  2951. // if it's not refcounted, delete it
  2952. if (child_itr == backuped_file_infos_.end() ||
  2953. child_itr->second->refs == 0) {
  2954. // this might be a directory, but DeleteFile will just fail in that
  2955. // case, so we're good
  2956. IOStatus io_s = backup_fs_->DeleteFile(GetAbsolutePath(rel_fname),
  2957. io_options_, nullptr);
  2958. ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
  2959. rel_fname.c_str(), io_s.ToString().c_str());
  2960. backuped_file_infos_.erase(rel_fname);
  2961. if (!io_s.ok()) {
  2962. // Trying again later might work
  2963. might_need_garbage_collect_ = true;
  2964. }
  2965. }
  2966. }
  2967. }
  2968. // delete obsolete private files
  2969. std::vector<std::string> private_children;
  2970. {
  2971. IOStatus io_s =
  2972. backup_fs_->GetChildren(GetAbsolutePath(kPrivateDirName), io_options_,
  2973. &private_children, nullptr);
  2974. if (!io_s.ok()) {
  2975. overall_status = io_s;
  2976. // Trying again later might work
  2977. might_need_garbage_collect_ = true;
  2978. }
  2979. }
  2980. for (auto& child : private_children) {
  2981. BackupID backup_id = 0;
  2982. bool tmp_dir = child.find(".tmp") != std::string::npos;
  2983. sscanf(child.c_str(), "%u", &backup_id);
  2984. if (!tmp_dir && // if it's tmp_dir, delete it
  2985. (backup_id == 0 || backups_.find(backup_id) != backups_.end())) {
  2986. // it's either not a number or it's still alive. continue
  2987. continue;
  2988. }
  2989. // here we have to delete the dir and all its children
  2990. std::string full_private_path =
  2991. GetAbsolutePath(GetPrivateFileRel(backup_id));
  2992. std::vector<std::string> subchildren;
  2993. if (backup_fs_
  2994. ->GetChildren(full_private_path, io_options_, &subchildren, nullptr)
  2995. .ok()) {
  2996. for (auto& subchild : subchildren) {
  2997. IOStatus io_s = backup_fs_->DeleteFile(full_private_path + subchild,
  2998. io_options_, nullptr);
  2999. ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
  3000. (full_private_path + subchild).c_str(),
  3001. io_s.ToString().c_str());
  3002. if (!io_s.ok()) {
  3003. // Trying again later might work
  3004. might_need_garbage_collect_ = true;
  3005. }
  3006. }
  3007. }
  3008. // finally delete the private dir
  3009. IOStatus io_s =
  3010. backup_fs_->DeleteDir(full_private_path, io_options_, nullptr);
  3011. ROCKS_LOG_INFO(options_.info_log, "Deleting dir %s -- %s",
  3012. full_private_path.c_str(), io_s.ToString().c_str());
  3013. if (!io_s.ok()) {
  3014. // Trying again later might work
  3015. might_need_garbage_collect_ = true;
  3016. }
  3017. }
  3018. assert(overall_status.ok() || might_need_garbage_collect_);
  3019. return overall_status;
  3020. }
  3021. // ------- BackupMeta class --------
  3022. IOStatus BackupEngineImpl::BackupMeta::AddFile(
  3023. std::shared_ptr<FileInfo> file_info) {
  3024. auto itr = file_infos_->find(file_info->filename);
  3025. if (itr == file_infos_->end()) {
  3026. auto ret = file_infos_->insert({file_info->filename, file_info});
  3027. if (ret.second) {
  3028. itr = ret.first;
  3029. itr->second->refs = 1;
  3030. } else {
  3031. // if this happens, something is seriously wrong
  3032. return IOStatus::Corruption("In memory metadata insertion error");
  3033. }
  3034. } else {
  3035. // Compare sizes, because we scanned that off the filesystem on both
  3036. // ends. This is like a check in VerifyBackup.
  3037. if (itr->second->size != file_info->size) {
  3038. std::string msg = "Size mismatch for existing backup file: ";
  3039. msg.append(file_info->filename);
  3040. msg.append(" Size in backup is " + std::to_string(itr->second->size) +
  3041. " while size in DB is " + std::to_string(file_info->size));
  3042. msg.append(
  3043. " If this DB file checks as not corrupt, try deleting old"
  3044. " backups or backing up to a different backup directory.");
  3045. return IOStatus::Corruption(msg);
  3046. }
  3047. if (file_info->checksum_hex.empty()) {
  3048. // No checksum available to check
  3049. } else if (itr->second->checksum_hex.empty()) {
  3050. // Remember checksum if newly acquired
  3051. itr->second->checksum_hex = file_info->checksum_hex;
  3052. } else if (itr->second->checksum_hex != file_info->checksum_hex) {
  3053. // Note: to save I/O, these will be equal trivially on already backed
  3054. // up files that don't have the checksum in their name. And it should
  3055. // never fail for files that do have checksum in their name.
  3056. // Should never reach here, but produce an appropriate corruption
  3057. // message in case we do in a release build.
  3058. assert(false);
  3059. std::string msg = "Checksum mismatch for existing backup file: ";
  3060. msg.append(file_info->filename);
  3061. msg.append(" Expected checksum is " + itr->second->checksum_hex +
  3062. " while computed checksum is " + file_info->checksum_hex);
  3063. msg.append(
  3064. " If this DB file checks as not corrupt, try deleting old"
  3065. " backups or backing up to a different backup directory.");
  3066. return IOStatus::Corruption(msg);
  3067. }
  3068. ++itr->second->refs; // increase refcount if already present
  3069. }
  3070. size_ += file_info->size;
  3071. files_.push_back(itr->second);
  3072. return IOStatus::OK();
  3073. }
  3074. IOStatus BackupEngineImpl::BackupMeta::Delete(bool delete_meta) {
  3075. IOStatus io_s;
  3076. for (const auto& file : files_) {
  3077. --file->refs; // decrease refcount
  3078. }
  3079. files_.clear();
  3080. // delete meta file
  3081. if (delete_meta) {
  3082. io_s = fs_->FileExists(meta_filename_, iooptions_, nullptr);
  3083. if (io_s.ok()) {
  3084. io_s = fs_->DeleteFile(meta_filename_, iooptions_, nullptr);
  3085. } else if (io_s.IsNotFound()) {
  3086. io_s = IOStatus::OK(); // nothing to delete
  3087. }
  3088. }
  3089. timestamp_ = 0;
  3090. return io_s;
  3091. }
  3092. // Constants for backup meta file schema (see LoadFromFile)
  3093. const std::string kSchemaVersionPrefix{"schema_version "};
  3094. const std::string kFooterMarker{"// FOOTER"};
  3095. const std::string kAppMetaDataFieldName{"metadata"};
  3096. // WART: The checksums are crc32c but named "crc32"
  3097. const std::string kFileCrc32cFieldName{"crc32"};
  3098. const std::string kFileSizeFieldName{"size"};
  3099. const std::string kTemperatureFieldName{"temp"};
  3100. const std::string kExcludedFieldName{"ni::excluded"};
  3101. // Marks a (future) field that should cause failure if not recognized.
  3102. // Other fields are assumed to be ignorable. For example, in the future
  3103. // we might add
  3104. // ni::file_name_escape uri_percent
  3105. // to indicate all file names have had spaces and special characters
  3106. // escaped using a URI percent encoding.
  3107. const std::string kNonIgnorableFieldPrefix{"ni::"};
  3108. // Each backup meta file is of the format (schema version 1):
  3109. //----------------------------------------------------------
  3110. // <timestamp>
  3111. // <seq number>
  3112. // metadata <metadata> (optional)
  3113. // <number of files>
  3114. // <file1> crc32 <crc32c_as_unsigned_decimal>
  3115. // <file2> crc32 <crc32c_as_unsigned_decimal>
  3116. // ...
  3117. //----------------------------------------------------------
  3118. //
  3119. // For schema version 2.x:
  3120. //----------------------------------------------------------
  3121. // schema_version <ver>
  3122. // <timestamp>
  3123. // <seq number>
  3124. // [<field name> <field data>]
  3125. // ...
  3126. // <number of files>
  3127. // <file1>( <field name> <field data no spaces>)*
  3128. // <file2>( <field name> <field data no spaces>)*
  3129. // ...
  3130. // [// FOOTER]
  3131. // [<field name> <field data>]
  3132. // ...
  3133. //----------------------------------------------------------
  3134. // where
  3135. // <ver> ::= [0-9]+([.][0-9]+)
  3136. // <field name> ::= [A-Za-z_][A-Za-z_0-9.]+
  3137. // <field data> is anything but newline
  3138. // <field data no spaces> is anything but space and newline
  3139. // Although "// FOOTER" wouldn't strictly be required as a delimiter
  3140. // given the number of files is included, it is there for parsing
  3141. // sanity in case of corruption. It is only required if followed
  3142. // by footer fields, such as a checksum of the meta file (so far).
  3143. // Unrecognized fields are ignored, to support schema evolution on
  3144. // non-critical features with forward compatibility. Update schema
  3145. // major version for breaking changes. Schema minor versions are indicated
  3146. // only for diagnostic/debugging purposes.
  3147. //
  3148. // Fields in schema version 2.0:
  3149. // * Top-level meta fields:
  3150. // * Only "metadata" as in schema version 1
  3151. // * File meta fields:
  3152. // * "crc32" - a crc32c checksum as in schema version 1
  3153. // * "size" - the size of the file (new)
  3154. // * Footer meta fields:
  3155. // * None yet (future use for meta file checksum anticipated)
  3156. //
  3157. IOStatus BackupEngineImpl::BackupMeta::LoadFromFile(
  3158. const std::string& backup_dir,
  3159. const std::unordered_map<std::string, uint64_t>& abs_path_to_size,
  3160. RateLimiter* rate_limiter, Logger* info_log,
  3161. std::unordered_set<std::string>* reported_ignored_fields) {
  3162. assert(reported_ignored_fields);
  3163. assert(Empty());
  3164. std::unique_ptr<LineFileReader> backup_meta_reader;
  3165. {
  3166. IOStatus io_s = LineFileReader::Create(fs_, meta_filename_, FileOptions(),
  3167. &backup_meta_reader,
  3168. nullptr /* dbg */, rate_limiter);
  3169. if (!io_s.ok()) {
  3170. return io_s;
  3171. }
  3172. }
  3173. // If we don't read an explicit schema_version, that implies version 1,
  3174. // which is what we call the original backup meta schema.
  3175. int schema_major_version = 1;
  3176. // Failures handled at the end
  3177. std::string line;
  3178. if (backup_meta_reader->ReadLine(&line,
  3179. Env::IO_LOW /* rate_limiter_priority */)) {
  3180. if (StartsWith(line, kSchemaVersionPrefix)) {
  3181. std::string ver = line.substr(kSchemaVersionPrefix.size());
  3182. if (ver == "2" || StartsWith(ver, "2.")) {
  3183. schema_major_version = 2;
  3184. } else {
  3185. return IOStatus::NotSupported(
  3186. "Unsupported/unrecognized schema version: " + ver);
  3187. }
  3188. line.clear();
  3189. } else if (line.empty()) {
  3190. return IOStatus::Corruption("Unexpected empty line");
  3191. }
  3192. }
  3193. if (!line.empty()) {
  3194. timestamp_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
  3195. } else if (backup_meta_reader->ReadLine(
  3196. &line, Env::IO_LOW /* rate_limiter_priority */)) {
  3197. timestamp_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
  3198. }
  3199. if (backup_meta_reader->ReadLine(&line,
  3200. Env::IO_LOW /* rate_limiter_priority */)) {
  3201. sequence_number_ = std::strtoull(line.c_str(), nullptr, /*base*/ 10);
  3202. }
  3203. uint32_t num_files = UINT32_MAX;
  3204. while (backup_meta_reader->ReadLine(
  3205. &line, Env::IO_LOW /* rate_limiter_priority */)) {
  3206. if (line.empty()) {
  3207. return IOStatus::Corruption("Unexpected empty line");
  3208. }
  3209. // Number -> number of files -> exit loop reading optional meta fields
  3210. if (line[0] >= '0' && line[0] <= '9') {
  3211. num_files = static_cast<uint32_t>(strtoul(line.c_str(), nullptr, 10));
  3212. break;
  3213. }
  3214. // else, must be a meta field assignment
  3215. auto space_pos = line.find_first_of(' ');
  3216. if (space_pos == std::string::npos) {
  3217. return IOStatus::Corruption("Expected number of files or meta field");
  3218. }
  3219. std::string field_name = line.substr(0, space_pos);
  3220. std::string field_data = line.substr(space_pos + 1);
  3221. if (field_name == kAppMetaDataFieldName) {
  3222. // app metadata present
  3223. bool decode_success = Slice(field_data).DecodeHex(&app_metadata_);
  3224. if (!decode_success) {
  3225. return IOStatus::Corruption(
  3226. "Failed to decode stored hex encoded app metadata");
  3227. }
  3228. } else if (schema_major_version < 2) {
  3229. return IOStatus::Corruption("Expected number of files or \"" +
  3230. kAppMetaDataFieldName + "\" field");
  3231. } else if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
  3232. return IOStatus::NotSupported("Unrecognized non-ignorable meta field " +
  3233. field_name + " (from future version?)");
  3234. } else {
  3235. // Warn the first time we see any particular unrecognized meta field
  3236. if (reported_ignored_fields->insert("meta:" + field_name).second) {
  3237. ROCKS_LOG_WARN(info_log, "Ignoring unrecognized backup meta field %s",
  3238. field_name.c_str());
  3239. }
  3240. }
  3241. }
  3242. std::vector<std::shared_ptr<FileInfo>> files;
  3243. bool footer_present = false;
  3244. while (backup_meta_reader->ReadLine(
  3245. &line, Env::IO_LOW /* rate_limiter_priority */)) {
  3246. std::vector<std::string> components = StringSplit(line, ' ');
  3247. if (components.size() < 1) {
  3248. return IOStatus::Corruption("Empty line instead of file entry.");
  3249. }
  3250. if (schema_major_version >= 2 && components.size() == 2 &&
  3251. line == kFooterMarker) {
  3252. footer_present = true;
  3253. break;
  3254. }
  3255. const std::string& filename = components[0];
  3256. if (schema_major_version >= 2) {
  3257. if (components.size() % 2 != 1) {
  3258. return IOStatus::Corruption(
  3259. "Bad number of line components for file entry.");
  3260. }
  3261. } else {
  3262. // Check restricted original schema
  3263. if (components.size() < 3) {
  3264. return IOStatus::Corruption("File checksum is missing for " + filename +
  3265. " in " + meta_filename_);
  3266. }
  3267. if (components[1] != kFileCrc32cFieldName) {
  3268. return IOStatus::Corruption("Unknown checksum type for " + filename +
  3269. " in " + meta_filename_);
  3270. }
  3271. if (components.size() > 3) {
  3272. return IOStatus::Corruption("Extra data for entry " + filename +
  3273. " in " + meta_filename_);
  3274. }
  3275. }
  3276. std::optional<uint64_t> expected_size{};
  3277. std::string checksum_hex;
  3278. Temperature temp = Temperature::kUnknown;
  3279. bool excluded = false;
  3280. for (unsigned i = 1; i < components.size(); i += 2) {
  3281. const std::string& field_name = components[i];
  3282. const std::string& field_data = components[i + 1];
  3283. if (field_name == kFileCrc32cFieldName) {
  3284. uint32_t checksum_value =
  3285. static_cast<uint32_t>(strtoul(field_data.c_str(), nullptr, 10));
  3286. if (field_data != std::to_string(checksum_value)) {
  3287. return IOStatus::Corruption("Invalid checksum value for " + filename +
  3288. " in " + meta_filename_);
  3289. }
  3290. checksum_hex = ChecksumInt32ToHex(checksum_value);
  3291. } else if (field_name == kFileSizeFieldName) {
  3292. expected_size = std::strtoull(field_data.c_str(), nullptr, /*base*/ 10);
  3293. } else if (field_name == kTemperatureFieldName) {
  3294. auto iter = temperature_string_map.find(field_data);
  3295. if (iter != temperature_string_map.end()) {
  3296. temp = iter->second;
  3297. } else {
  3298. // Could report corruption, but in case of new temperatures added
  3299. // in future, letting those map to kUnknown which should generally
  3300. // be safe.
  3301. temp = Temperature::kUnknown;
  3302. }
  3303. } else if (field_name == kExcludedFieldName) {
  3304. if (field_data == "true") {
  3305. excluded = true;
  3306. } else if (field_data == "false") {
  3307. excluded = false;
  3308. } else {
  3309. return IOStatus::NotSupported("Unrecognized value \"" + field_data +
  3310. "\" for field " + field_name);
  3311. }
  3312. } else if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
  3313. return IOStatus::NotSupported("Unrecognized non-ignorable file field " +
  3314. field_name + " (from future version?)");
  3315. } else {
  3316. // Warn the first time we see any particular unrecognized file field
  3317. if (reported_ignored_fields->insert("file:" + field_name).second) {
  3318. ROCKS_LOG_WARN(info_log, "Ignoring unrecognized backup file field %s",
  3319. field_name.c_str());
  3320. }
  3321. }
  3322. }
  3323. if (excluded) {
  3324. excluded_files_.emplace_back(filename);
  3325. } else {
  3326. // Verify file exists, with expected size
  3327. std::string abs_path = backup_dir + "/" + filename;
  3328. auto e = abs_path_to_size.find(abs_path);
  3329. if (e == abs_path_to_size.end()) {
  3330. return IOStatus::Corruption(
  3331. "Pathname in meta file not found on disk: " + abs_path);
  3332. }
  3333. uint64_t actual_size = e->second;
  3334. if (expected_size.has_value() && *expected_size != actual_size) {
  3335. return IOStatus::Corruption("For file " + filename + " expected size " +
  3336. std::to_string(*expected_size) +
  3337. " but found size" +
  3338. std::to_string(actual_size));
  3339. }
  3340. // NOTE: FileInfo will be coalesced for sharing later (AddFile below)
  3341. files.emplace_back(
  3342. std::make_shared<FileInfo>(filename, actual_size, checksum_hex,
  3343. /*id*/ "", /*sid*/ "", temp));
  3344. }
  3345. }
  3346. if (footer_present) {
  3347. assert(schema_major_version >= 2);
  3348. while (backup_meta_reader->ReadLine(
  3349. &line, Env::IO_LOW /* rate_limiter_priority */)) {
  3350. if (line.empty()) {
  3351. return IOStatus::Corruption("Unexpected empty line");
  3352. }
  3353. auto space_pos = line.find_first_of(' ');
  3354. if (space_pos == std::string::npos) {
  3355. return IOStatus::Corruption("Expected footer field");
  3356. }
  3357. std::string field_name = line.substr(0, space_pos);
  3358. std::string field_data = line.substr(space_pos + 1);
  3359. if (StartsWith(field_name, kNonIgnorableFieldPrefix)) {
  3360. return IOStatus::NotSupported("Unrecognized non-ignorable field " +
  3361. field_name + " (from future version?)");
  3362. } else if (reported_ignored_fields->insert("footer:" + field_name)
  3363. .second) {
  3364. // Warn the first time we see any particular unrecognized footer field
  3365. ROCKS_LOG_WARN(info_log,
  3366. "Ignoring unrecognized backup meta footer field %s",
  3367. field_name.c_str());
  3368. }
  3369. }
  3370. }
  3371. {
  3372. IOStatus io_s = backup_meta_reader->GetStatus();
  3373. if (!io_s.ok()) {
  3374. return io_s;
  3375. }
  3376. }
  3377. if (num_files != files.size()) {
  3378. return IOStatus::Corruption(
  3379. "Inconsistent number of files or missing/incomplete header in " +
  3380. meta_filename_);
  3381. }
  3382. files_.reserve(files.size());
  3383. for (const auto& file_info : files) {
  3384. IOStatus io_s = AddFile(file_info);
  3385. if (!io_s.ok()) {
  3386. return io_s;
  3387. }
  3388. }
  3389. return IOStatus::OK();
  3390. }
  3391. const std::vector<std::string> minor_version_strings{
  3392. "", // invalid major version 0
  3393. "", // implicit major version 1
  3394. "2.1",
  3395. };
  3396. IOStatus BackupEngineImpl::BackupMeta::StoreToFile(
  3397. bool sync, int schema_version,
  3398. const TEST_BackupMetaSchemaOptions* schema_test_options) {
  3399. if (schema_version < 1) {
  3400. return IOStatus::InvalidArgument(
  3401. "BackupEngineOptions::schema_version must be >= 1");
  3402. }
  3403. if (schema_version > static_cast<int>(minor_version_strings.size() - 1)) {
  3404. return IOStatus::NotSupported(
  3405. "Only BackupEngineOptions::schema_version <= " +
  3406. std::to_string(minor_version_strings.size() - 1) + " is supported");
  3407. }
  3408. std::string ver = minor_version_strings[schema_version];
  3409. // Need schema_version >= 2 for TEST_BackupMetaSchemaOptions
  3410. assert(schema_version >= 2 || schema_test_options == nullptr);
  3411. IOStatus io_s;
  3412. std::unique_ptr<FSWritableFile> backup_meta_file;
  3413. FileOptions file_options;
  3414. file_options.use_mmap_writes = false;
  3415. file_options.use_direct_writes = false;
  3416. io_s = fs_->NewWritableFile(meta_tmp_filename_, file_options,
  3417. &backup_meta_file, nullptr);
  3418. if (!io_s.ok()) {
  3419. return io_s;
  3420. }
  3421. std::ostringstream buf;
  3422. if (schema_test_options) {
  3423. // override for testing
  3424. ver = schema_test_options->version;
  3425. }
  3426. if (!ver.empty()) {
  3427. assert(schema_version >= 2);
  3428. buf << kSchemaVersionPrefix << ver << "\n";
  3429. }
  3430. buf << static_cast<unsigned long long>(timestamp_) << "\n";
  3431. buf << sequence_number_ << "\n";
  3432. if (!app_metadata_.empty()) {
  3433. std::string hex_encoded_metadata =
  3434. Slice(app_metadata_).ToString(/* hex */ true);
  3435. buf << kAppMetaDataFieldName << " " << hex_encoded_metadata << "\n";
  3436. }
  3437. if (schema_test_options) {
  3438. for (auto& e : schema_test_options->meta_fields) {
  3439. buf << e.first << " " << e.second << "\n";
  3440. }
  3441. }
  3442. buf << files_.size() << "\n";
  3443. for (const auto& file : files_) {
  3444. buf << file->filename;
  3445. if (schema_test_options == nullptr ||
  3446. schema_test_options->crc32c_checksums) {
  3447. // use crc32c for now, switch to something else if needed
  3448. buf << " " << kFileCrc32cFieldName << " "
  3449. << ChecksumHexToInt32(file->checksum_hex);
  3450. }
  3451. if (schema_version >= 2 && file->temp != Temperature::kUnknown) {
  3452. buf << " " << kTemperatureFieldName << " "
  3453. << temperature_to_string[file->temp];
  3454. }
  3455. if (schema_test_options && schema_test_options->file_sizes) {
  3456. buf << " " << kFileSizeFieldName << " " << std::to_string(file->size);
  3457. }
  3458. if (schema_test_options) {
  3459. for (auto& e : schema_test_options->file_fields) {
  3460. buf << " " << e.first << " " << e.second;
  3461. }
  3462. }
  3463. buf << "\n";
  3464. }
  3465. for (const auto& file : excluded_files_) {
  3466. assert(schema_version >= 2);
  3467. buf << file.relative_file << " " << kExcludedFieldName << " true\n";
  3468. }
  3469. if (schema_test_options && !schema_test_options->footer_fields.empty()) {
  3470. buf << kFooterMarker << "\n";
  3471. for (auto& e : schema_test_options->footer_fields) {
  3472. buf << e.first << " " << e.second << "\n";
  3473. }
  3474. }
  3475. io_s = backup_meta_file->Append(Slice(buf.str()), iooptions_, nullptr);
  3476. IOSTATS_ADD(bytes_written, buf.str().size());
  3477. if (io_s.ok() && sync) {
  3478. io_s = backup_meta_file->Sync(iooptions_, nullptr);
  3479. }
  3480. if (io_s.ok()) {
  3481. io_s = backup_meta_file->Close(iooptions_, nullptr);
  3482. }
  3483. if (io_s.ok()) {
  3484. io_s = fs_->RenameFile(meta_tmp_filename_, meta_filename_, iooptions_,
  3485. nullptr);
  3486. }
  3487. return io_s;
  3488. }
  3489. } // namespace
  3490. IOStatus BackupEngineReadOnly::Open(const BackupEngineOptions& options,
  3491. Env* env,
  3492. BackupEngineReadOnly** backup_engine_ptr) {
  3493. if (options.destroy_old_data) {
  3494. return IOStatus::InvalidArgument(
  3495. "Can't destroy old data with ReadOnly BackupEngine");
  3496. }
  3497. std::unique_ptr<BackupEngineImplThreadSafe> backup_engine(
  3498. new BackupEngineImplThreadSafe(options, env, true /*read_only*/));
  3499. auto s = backup_engine->Initialize();
  3500. if (!s.ok()) {
  3501. *backup_engine_ptr = nullptr;
  3502. return s;
  3503. }
  3504. *backup_engine_ptr = backup_engine.release();
  3505. return IOStatus::OK();
  3506. }
  3507. void TEST_SetBackupMetaSchemaOptions(
  3508. BackupEngine* engine, const TEST_BackupMetaSchemaOptions& options) {
  3509. BackupEngineImplThreadSafe* impl =
  3510. static_cast_with_check<BackupEngineImplThreadSafe>(engine);
  3511. impl->TEST_SetBackupMetaSchemaOptions(options);
  3512. }
  3513. void TEST_SetDefaultRateLimitersClock(
  3514. BackupEngine* engine,
  3515. const std::shared_ptr<SystemClock>& backup_rate_limiter_clock,
  3516. const std::shared_ptr<SystemClock>& restore_rate_limiter_clock) {
  3517. BackupEngineImplThreadSafe* impl =
  3518. static_cast_with_check<BackupEngineImplThreadSafe>(engine);
  3519. impl->TEST_SetDefaultRateLimitersClock(backup_rate_limiter_clock,
  3520. restore_rate_limiter_clock);
  3521. }
  3522. } // namespace ROCKSDB_NAMESPACE