db_test2.cc 270 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <atomic>
  10. #include <cstdlib>
  11. #include <functional>
  12. #include <memory>
  13. #include "db/db_test_util.h"
  14. #include "db/read_callback.h"
  15. #include "db/version_edit.h"
  16. #include "env/fs_readonly.h"
  17. #include "options/options_helper.h"
  18. #include "port/port.h"
  19. #include "port/stack_trace.h"
  20. #include "rocksdb/experimental.h"
  21. #include "rocksdb/iostats_context.h"
  22. #include "rocksdb/persistent_cache.h"
  23. #include "rocksdb/trace_record.h"
  24. #include "rocksdb/trace_record_result.h"
  25. #include "rocksdb/utilities/replayer.h"
  26. #include "rocksdb/wal_filter.h"
  27. #include "test_util/testutil.h"
  28. #include "util/defer.h"
  29. #include "util/random.h"
  30. #include "utilities/fault_injection_env.h"
  31. namespace ROCKSDB_NAMESPACE {
  32. class DBTest2 : public DBTestBase {
  33. public:
  34. DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {}
  35. };
  36. TEST_F(DBTest2, OpenForReadOnly) {
  37. DB* db_ptr = nullptr;
  38. std::string dbname = test::PerThreadDBPath("db_readonly");
  39. Options options = CurrentOptions();
  40. options.create_if_missing = true;
  41. // OpenForReadOnly should fail but will create <dbname> in the file system
  42. ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr));
  43. // Since <dbname> is created, we should be able to delete the dir
  44. // We first get the list files under <dbname>
  45. // There should not be any subdirectories -- this is not checked here
  46. std::vector<std::string> files;
  47. ASSERT_OK(env_->GetChildren(dbname, &files));
  48. for (auto& f : files) {
  49. ASSERT_OK(env_->DeleteFile(dbname + "/" + f));
  50. }
  51. // <dbname> should be empty now and we should be able to delete it
  52. ASSERT_OK(env_->DeleteDir(dbname));
  53. options.create_if_missing = false;
  54. // OpenForReadOnly should fail since <dbname> was successfully deleted
  55. ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr));
  56. // With create_if_missing false, there should not be a dir in the file system
  57. ASSERT_NOK(env_->FileExists(dbname));
  58. }
  59. TEST_F(DBTest2, OpenForReadOnlyWithColumnFamilies) {
  60. DB* db_ptr = nullptr;
  61. std::string dbname = test::PerThreadDBPath("db_readonly");
  62. Options options = CurrentOptions();
  63. options.create_if_missing = true;
  64. ColumnFamilyOptions cf_options(options);
  65. std::vector<ColumnFamilyDescriptor> column_families;
  66. column_families.emplace_back(kDefaultColumnFamilyName, cf_options);
  67. column_families.emplace_back("goku", cf_options);
  68. std::vector<ColumnFamilyHandle*> handles;
  69. // OpenForReadOnly should fail but will create <dbname> in the file system
  70. ASSERT_NOK(
  71. DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr));
  72. // Since <dbname> is created, we should be able to delete the dir
  73. // We first get the list files under <dbname>
  74. // There should not be any subdirectories -- this is not checked here
  75. std::vector<std::string> files;
  76. ASSERT_OK(env_->GetChildren(dbname, &files));
  77. for (auto& f : files) {
  78. ASSERT_OK(env_->DeleteFile(dbname + "/" + f));
  79. }
  80. // <dbname> should be empty now and we should be able to delete it
  81. ASSERT_OK(env_->DeleteDir(dbname));
  82. options.create_if_missing = false;
  83. // OpenForReadOnly should fail since <dbname> was successfully deleted
  84. ASSERT_NOK(
  85. DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr));
  86. // With create_if_missing false, there should not be a dir in the file system
  87. ASSERT_NOK(env_->FileExists(dbname));
  88. }
  89. class PartitionedIndexTestListener : public EventListener {
  90. public:
  91. void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
  92. ASSERT_GT(info.table_properties.index_partitions, 1);
  93. ASSERT_EQ(info.table_properties.index_key_is_user_key, 0);
  94. }
  95. };
  96. TEST_F(DBTest2, PartitionedIndexUserToInternalKey) {
  97. const int kValueSize = 10500;
  98. const int kNumEntriesPerFile = 1000;
  99. const int kNumFiles = 3;
  100. const int kNumDistinctKeys = 30;
  101. BlockBasedTableOptions table_options;
  102. Options options = CurrentOptions();
  103. options.disable_auto_compactions = true;
  104. table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
  105. PartitionedIndexTestListener* listener = new PartitionedIndexTestListener();
  106. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  107. options.listeners.emplace_back(listener);
  108. std::vector<const Snapshot*> snapshots;
  109. Reopen(options);
  110. Random rnd(301);
  111. for (int i = 0; i < kNumFiles; i++) {
  112. for (int j = 0; j < kNumEntriesPerFile; j++) {
  113. int key_id = (i * kNumEntriesPerFile + j) % kNumDistinctKeys;
  114. std::string value = rnd.RandomString(kValueSize);
  115. ASSERT_OK(Put("keykey_" + std::to_string(key_id), value));
  116. snapshots.push_back(db_->GetSnapshot());
  117. }
  118. ASSERT_OK(Flush());
  119. }
  120. for (auto s : snapshots) {
  121. db_->ReleaseSnapshot(s);
  122. }
  123. }
  124. class PrefixFullBloomWithReverseComparator
  125. : public DBTestBase,
  126. public ::testing::WithParamInterface<bool> {
  127. public:
  128. PrefixFullBloomWithReverseComparator()
  129. : DBTestBase("prefix_bloom_reverse", /*env_do_fsync=*/true) {}
  130. void SetUp() override { if_cache_filter_ = GetParam(); }
  131. bool if_cache_filter_;
  132. };
  133. TEST_P(PrefixFullBloomWithReverseComparator,
  134. PrefixFullBloomWithReverseComparator) {
  135. Options options = last_options_;
  136. options.comparator = ReverseBytewiseComparator();
  137. options.prefix_extractor.reset(NewCappedPrefixTransform(3));
  138. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  139. BlockBasedTableOptions bbto;
  140. if (if_cache_filter_) {
  141. bbto.no_block_cache = false;
  142. bbto.cache_index_and_filter_blocks = true;
  143. bbto.block_cache = NewLRUCache(1);
  144. }
  145. bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  146. bbto.whole_key_filtering = false;
  147. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  148. DestroyAndReopen(options);
  149. ASSERT_OK(dbfull()->Put(WriteOptions(), "bar123", "foo"));
  150. ASSERT_OK(dbfull()->Put(WriteOptions(), "bar234", "foo2"));
  151. ASSERT_OK(dbfull()->Put(WriteOptions(), "foo123", "foo3"));
  152. ASSERT_OK(dbfull()->Flush(FlushOptions()));
  153. if (bbto.block_cache) {
  154. bbto.block_cache->EraseUnRefEntries();
  155. }
  156. std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  157. iter->Seek("bar345");
  158. ASSERT_OK(iter->status());
  159. ASSERT_TRUE(iter->Valid());
  160. ASSERT_EQ("bar234", iter->key().ToString());
  161. ASSERT_EQ("foo2", iter->value().ToString());
  162. iter->Next();
  163. ASSERT_TRUE(iter->Valid());
  164. ASSERT_EQ("bar123", iter->key().ToString());
  165. ASSERT_EQ("foo", iter->value().ToString());
  166. iter->Seek("foo234");
  167. ASSERT_OK(iter->status());
  168. ASSERT_TRUE(iter->Valid());
  169. ASSERT_EQ("foo123", iter->key().ToString());
  170. ASSERT_EQ("foo3", iter->value().ToString());
  171. iter->Seek("bar");
  172. ASSERT_OK(iter->status());
  173. ASSERT_TRUE(!iter->Valid());
  174. }
  175. INSTANTIATE_TEST_CASE_P(PrefixFullBloomWithReverseComparator,
  176. PrefixFullBloomWithReverseComparator, testing::Bool());
  177. TEST_F(DBTest2, IteratorPropertyVersionNumber) {
  178. ASSERT_OK(Put("", ""));
  179. Iterator* iter1 = db_->NewIterator(ReadOptions());
  180. ASSERT_OK(iter1->status());
  181. std::string prop_value;
  182. ASSERT_OK(
  183. iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
  184. uint64_t version_number1 =
  185. static_cast<uint64_t>(std::atoi(prop_value.c_str()));
  186. ASSERT_OK(Put("", ""));
  187. ASSERT_OK(Flush());
  188. Iterator* iter2 = db_->NewIterator(ReadOptions());
  189. ASSERT_OK(iter2->status());
  190. ASSERT_OK(
  191. iter2->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
  192. uint64_t version_number2 =
  193. static_cast<uint64_t>(std::atoi(prop_value.c_str()));
  194. ASSERT_GT(version_number2, version_number1);
  195. ASSERT_OK(Put("", ""));
  196. Iterator* iter3 = db_->NewIterator(ReadOptions());
  197. ASSERT_OK(iter3->status());
  198. ASSERT_OK(
  199. iter3->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
  200. uint64_t version_number3 =
  201. static_cast<uint64_t>(std::atoi(prop_value.c_str()));
  202. ASSERT_EQ(version_number2, version_number3);
  203. iter1->SeekToFirst();
  204. ASSERT_OK(
  205. iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
  206. uint64_t version_number1_new =
  207. static_cast<uint64_t>(std::atoi(prop_value.c_str()));
  208. ASSERT_EQ(version_number1, version_number1_new);
  209. delete iter1;
  210. delete iter2;
  211. delete iter3;
  212. }
  213. TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) {
  214. Options options = CurrentOptions();
  215. options.create_if_missing = true;
  216. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  217. BlockBasedTableOptions table_options;
  218. table_options.cache_index_and_filter_blocks = true;
  219. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  220. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  221. CreateAndReopenWithCF({"pikachu"}, options);
  222. ASSERT_OK(Put(1, "a", "begin"));
  223. ASSERT_OK(Put(1, "z", "end"));
  224. ASSERT_OK(Flush(1));
  225. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  226. std::string value;
  227. value = Get(1, "a");
  228. }
  229. TEST_F(DBTest2, MaxSuccessiveMergesChangeWithDBRecovery) {
  230. Options options = CurrentOptions();
  231. options.create_if_missing = true;
  232. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  233. options.max_successive_merges = 3;
  234. options.merge_operator = MergeOperators::CreatePutOperator();
  235. options.disable_auto_compactions = true;
  236. DestroyAndReopen(options);
  237. ASSERT_OK(Put("poi", "Finch"));
  238. ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Reese"));
  239. ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Shaw"));
  240. ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Root"));
  241. options.max_successive_merges = 2;
  242. Reopen(options);
  243. }
  244. class DBTestSharedWriteBufferAcrossCFs
  245. : public DBTestBase,
  246. public testing::WithParamInterface<std::tuple<bool, bool>> {
  247. public:
  248. DBTestSharedWriteBufferAcrossCFs()
  249. : DBTestBase("db_test_shared_write_buffer", /*env_do_fsync=*/true) {}
  250. void SetUp() override {
  251. use_old_interface_ = std::get<0>(GetParam());
  252. cost_cache_ = std::get<1>(GetParam());
  253. }
  254. bool use_old_interface_;
  255. bool cost_cache_;
  256. };
  257. TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) {
  258. Options options = CurrentOptions();
  259. options.arena_block_size = 4096;
  260. auto flush_listener = std::make_shared<FlushCounterListener>();
  261. options.listeners.push_back(flush_listener);
  262. // Don't trip the listener at shutdown.
  263. options.avoid_flush_during_shutdown = true;
  264. // Avoid undeterministic value by malloc_usable_size();
  265. // Force arena block size to 1
  266. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  267. "Arena::Arena:0", [&](void* arg) {
  268. size_t* block_size = static_cast<size_t*>(arg);
  269. *block_size = 1;
  270. });
  271. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  272. "Arena::AllocateNewBlock:0", [&](void* arg) {
  273. std::pair<size_t*, size_t*>* pair =
  274. static_cast<std::pair<size_t*, size_t*>*>(arg);
  275. *std::get<0>(*pair) = *std::get<1>(*pair);
  276. });
  277. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  278. // The total soft write buffer size is about 105000
  279. std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
  280. ASSERT_LT(cache->GetUsage(), 256 * 1024);
  281. if (use_old_interface_) {
  282. options.db_write_buffer_size = 120000; // this is the real limit
  283. } else if (!cost_cache_) {
  284. options.write_buffer_manager.reset(new WriteBufferManager(114285));
  285. } else {
  286. options.write_buffer_manager.reset(new WriteBufferManager(114285, cache));
  287. }
  288. options.write_buffer_size = 500000; // this is never hit
  289. CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
  290. WriteOptions wo;
  291. wo.disableWAL = true;
  292. std::function<void()> wait_flush = [&]() {
  293. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
  294. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
  295. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
  296. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
  297. // Ensure background work is fully finished including listener callbacks
  298. // before accessing listener state.
  299. ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
  300. };
  301. // Create some data and flush "default" and "nikitich" so that they
  302. // are newer CFs created.
  303. flush_listener->expected_flush_reason = FlushReason::kManualFlush;
  304. ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
  305. ASSERT_OK(Flush(3));
  306. ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
  307. ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
  308. ASSERT_OK(Flush(0));
  309. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  310. static_cast<uint64_t>(1));
  311. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  312. static_cast<uint64_t>(1));
  313. flush_listener->expected_flush_reason = FlushReason::kWriteBufferManager;
  314. ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
  315. if (cost_cache_) {
  316. ASSERT_GE(cache->GetUsage(), 256 * 1024);
  317. ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
  318. }
  319. wait_flush();
  320. ASSERT_OK(Put(0, Key(1), DummyString(60000), wo));
  321. if (cost_cache_) {
  322. ASSERT_GE(cache->GetUsage(), 256 * 1024);
  323. ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
  324. }
  325. wait_flush();
  326. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  327. // No flush should trigger
  328. wait_flush();
  329. {
  330. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  331. static_cast<uint64_t>(1));
  332. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
  333. static_cast<uint64_t>(0));
  334. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
  335. static_cast<uint64_t>(0));
  336. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  337. static_cast<uint64_t>(1));
  338. }
  339. // Trigger a flush. Flushing "nikitich".
  340. ASSERT_OK(Put(3, Key(2), DummyString(30000), wo));
  341. wait_flush();
  342. ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
  343. wait_flush();
  344. {
  345. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  346. static_cast<uint64_t>(1));
  347. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
  348. static_cast<uint64_t>(0));
  349. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
  350. static_cast<uint64_t>(0));
  351. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  352. static_cast<uint64_t>(2));
  353. }
  354. // Without hitting the threshold, no flush should trigger.
  355. ASSERT_OK(Put(2, Key(1), DummyString(30000), wo));
  356. wait_flush();
  357. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  358. wait_flush();
  359. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  360. wait_flush();
  361. {
  362. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  363. static_cast<uint64_t>(1));
  364. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
  365. static_cast<uint64_t>(0));
  366. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
  367. static_cast<uint64_t>(0));
  368. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  369. static_cast<uint64_t>(2));
  370. }
  371. // Hit the write buffer limit again. "default"
  372. // will have been flushed.
  373. ASSERT_OK(Put(2, Key(2), DummyString(10000), wo));
  374. wait_flush();
  375. ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
  376. wait_flush();
  377. ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
  378. wait_flush();
  379. ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
  380. wait_flush();
  381. ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
  382. wait_flush();
  383. {
  384. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  385. static_cast<uint64_t>(2));
  386. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
  387. static_cast<uint64_t>(0));
  388. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
  389. static_cast<uint64_t>(0));
  390. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  391. static_cast<uint64_t>(2));
  392. }
  393. // Trigger another flush. This time "dobrynia". "pikachu" should not
  394. // be flushed, althrough it was never flushed.
  395. ASSERT_OK(Put(1, Key(1), DummyString(1), wo));
  396. wait_flush();
  397. ASSERT_OK(Put(2, Key(1), DummyString(80000), wo));
  398. wait_flush();
  399. ASSERT_OK(Put(1, Key(1), DummyString(1), wo));
  400. wait_flush();
  401. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  402. wait_flush();
  403. {
  404. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  405. static_cast<uint64_t>(2));
  406. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
  407. static_cast<uint64_t>(0));
  408. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
  409. static_cast<uint64_t>(1));
  410. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
  411. static_cast<uint64_t>(2));
  412. }
  413. if (cost_cache_) {
  414. ASSERT_GE(cache->GetUsage(), 256 * 1024);
  415. Close();
  416. options.write_buffer_manager.reset();
  417. last_options_.write_buffer_manager.reset();
  418. ASSERT_LT(cache->GetUsage(), 256 * 1024);
  419. }
  420. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  421. }
  422. INSTANTIATE_TEST_CASE_P(DBTestSharedWriteBufferAcrossCFs,
  423. DBTestSharedWriteBufferAcrossCFs,
  424. ::testing::Values(std::make_tuple(true, false),
  425. std::make_tuple(false, false),
  426. std::make_tuple(false, true)));
  427. TEST_F(DBTest2, SharedWriteBufferLimitAcrossDB) {
  428. std::string dbname2 = test::PerThreadDBPath("db_shared_wb_db2");
  429. Options options = CurrentOptions();
  430. options.arena_block_size = 4096;
  431. auto flush_listener = std::make_shared<FlushCounterListener>();
  432. options.listeners.push_back(flush_listener);
  433. // Don't trip the listener at shutdown.
  434. options.avoid_flush_during_shutdown = true;
  435. // Avoid undeterministic value by malloc_usable_size();
  436. // Force arena block size to 1
  437. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  438. "Arena::Arena:0", [&](void* arg) {
  439. size_t* block_size = static_cast<size_t*>(arg);
  440. *block_size = 1;
  441. });
  442. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  443. "Arena::AllocateNewBlock:0", [&](void* arg) {
  444. std::pair<size_t*, size_t*>* pair =
  445. static_cast<std::pair<size_t*, size_t*>*>(arg);
  446. *std::get<0>(*pair) = *std::get<1>(*pair);
  447. });
  448. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  449. options.write_buffer_size = 500000; // this is never hit
  450. // Use a write buffer total size so that the soft limit is about
  451. // 105000.
  452. options.write_buffer_manager.reset(new WriteBufferManager(120000));
  453. CreateAndReopenWithCF({"cf1", "cf2"}, options);
  454. ASSERT_OK(DestroyDB(dbname2, options));
  455. DB* db2 = nullptr;
  456. ASSERT_OK(DB::Open(options, dbname2, &db2));
  457. WriteOptions wo;
  458. wo.disableWAL = true;
  459. std::function<void()> wait_flush = [&]() {
  460. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
  461. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
  462. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
  463. ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
  464. // Ensure background work is fully finished including listener callbacks
  465. // before accessing listener state.
  466. ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
  467. ASSERT_OK(
  468. static_cast_with_check<DBImpl>(db2)->TEST_WaitForBackgroundWork());
  469. };
  470. // Trigger a flush on cf2
  471. flush_listener->expected_flush_reason = FlushReason::kWriteBufferManager;
  472. ASSERT_OK(Put(2, Key(1), DummyString(70000), wo));
  473. wait_flush();
  474. ASSERT_OK(Put(0, Key(1), DummyString(20000), wo));
  475. wait_flush();
  476. // Insert to DB2
  477. ASSERT_OK(db2->Put(wo, Key(2), DummyString(20000)));
  478. wait_flush();
  479. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  480. wait_flush();
  481. ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
  482. {
  483. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default") +
  484. GetNumberOfSstFilesForColumnFamily(db_, "cf1") +
  485. GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
  486. static_cast<uint64_t>(1));
  487. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
  488. static_cast<uint64_t>(0));
  489. }
  490. // Triggering to flush another CF in DB1
  491. ASSERT_OK(db2->Put(wo, Key(2), DummyString(70000)));
  492. wait_flush();
  493. ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
  494. wait_flush();
  495. {
  496. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  497. static_cast<uint64_t>(1));
  498. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"),
  499. static_cast<uint64_t>(0));
  500. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
  501. static_cast<uint64_t>(1));
  502. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
  503. static_cast<uint64_t>(0));
  504. }
  505. // Triggering flush in DB2.
  506. ASSERT_OK(db2->Put(wo, Key(3), DummyString(40000)));
  507. wait_flush();
  508. ASSERT_OK(db2->Put(wo, Key(1), DummyString(1)));
  509. wait_flush();
  510. ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
  511. {
  512. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
  513. static_cast<uint64_t>(1));
  514. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"),
  515. static_cast<uint64_t>(0));
  516. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
  517. static_cast<uint64_t>(1));
  518. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
  519. static_cast<uint64_t>(1));
  520. }
  521. delete db2;
  522. ASSERT_OK(DestroyDB(dbname2, options));
  523. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  524. }
  525. TEST_F(DBTest2, TestWriteBufferNoLimitWithCache) {
  526. Options options = CurrentOptions();
  527. options.arena_block_size = 4096;
  528. std::shared_ptr<Cache> cache = NewLRUCache(LRUCacheOptions(
  529. 10000000 /* capacity */, 1 /* num_shard_bits */,
  530. false /* strict_capacity_limit */, 0.0 /* high_pri_pool_ratio */,
  531. nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
  532. kDontChargeCacheMetadata));
  533. options.write_buffer_size = 50000; // this is never hit
  534. // Use a write buffer total size so that the soft limit is about
  535. // 105000.
  536. options.write_buffer_manager.reset(new WriteBufferManager(0, cache));
  537. Reopen(options);
  538. ASSERT_OK(Put("foo", "bar"));
  539. // One dummy entry is 256KB.
  540. ASSERT_GT(cache->GetUsage(), 128000);
  541. }
  542. namespace {
  543. void ValidateKeyExistence(DB* db, const std::vector<Slice>& keys_must_exist,
  544. const std::vector<Slice>& keys_must_not_exist) {
  545. // Ensure that expected keys exist
  546. std::vector<std::string> values;
  547. if (keys_must_exist.size() > 0) {
  548. std::vector<Status> status_list =
  549. db->MultiGet(ReadOptions(), keys_must_exist, &values);
  550. for (size_t i = 0; i < keys_must_exist.size(); i++) {
  551. ASSERT_OK(status_list[i]);
  552. }
  553. }
  554. // Ensure that given keys don't exist
  555. if (keys_must_not_exist.size() > 0) {
  556. std::vector<Status> status_list =
  557. db->MultiGet(ReadOptions(), keys_must_not_exist, &values);
  558. for (size_t i = 0; i < keys_must_not_exist.size(); i++) {
  559. ASSERT_TRUE(status_list[i].IsNotFound());
  560. }
  561. }
  562. }
  563. } // anonymous namespace
  564. TEST_F(DBTest2, WalFilterTest) {
  565. class TestWalFilter : public WalFilter {
  566. private:
  567. // Processing option that is requested to be applied at the given index
  568. WalFilter::WalProcessingOption wal_processing_option_;
  569. // Index at which to apply wal_processing_option_
  570. // At other indexes default wal_processing_option::kContinueProcessing is
  571. // returned.
  572. size_t apply_option_at_record_index_;
  573. // Current record index, incremented with each record encountered.
  574. size_t current_record_index_;
  575. public:
  576. TestWalFilter(WalFilter::WalProcessingOption wal_processing_option,
  577. size_t apply_option_for_record_index)
  578. : wal_processing_option_(wal_processing_option),
  579. apply_option_at_record_index_(apply_option_for_record_index),
  580. current_record_index_(0) {}
  581. WalProcessingOption LogRecord(const WriteBatch& /*batch*/,
  582. WriteBatch* /*new_batch*/,
  583. bool* /*batch_changed*/) const override {
  584. WalFilter::WalProcessingOption option_to_return;
  585. if (current_record_index_ == apply_option_at_record_index_) {
  586. option_to_return = wal_processing_option_;
  587. } else {
  588. option_to_return = WalProcessingOption::kContinueProcessing;
  589. }
  590. // Filter is passed as a const object for RocksDB to not modify the
  591. // object, however we modify it for our own purpose here and hence
  592. // cast the constness away.
  593. (const_cast<TestWalFilter*>(this)->current_record_index_)++;
  594. return option_to_return;
  595. }
  596. const char* Name() const override { return "TestWalFilter"; }
  597. };
  598. // Create 3 batches with two keys each
  599. std::vector<std::vector<std::string>> batch_keys(3);
  600. batch_keys[0].push_back("key1");
  601. batch_keys[0].push_back("key2");
  602. batch_keys[1].push_back("key3");
  603. batch_keys[1].push_back("key4");
  604. batch_keys[2].push_back("key5");
  605. batch_keys[2].push_back("key6");
  606. // Test with all WAL processing options
  607. for (int option = 0;
  608. option < static_cast<int>(
  609. WalFilter::WalProcessingOption::kWalProcessingOptionMax);
  610. option++) {
  611. Options options = OptionsForLogIterTest();
  612. DestroyAndReopen(options);
  613. CreateAndReopenWithCF({"pikachu"}, options);
  614. // Write given keys in given batches
  615. for (size_t i = 0; i < batch_keys.size(); i++) {
  616. WriteBatch batch;
  617. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  618. ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
  619. }
  620. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  621. }
  622. WalFilter::WalProcessingOption wal_processing_option =
  623. static_cast<WalFilter::WalProcessingOption>(option);
  624. // Create a test filter that would apply wal_processing_option at the first
  625. // record
  626. size_t apply_option_for_record_index = 1;
  627. TestWalFilter test_wal_filter(wal_processing_option,
  628. apply_option_for_record_index);
  629. // Reopen database with option to use WAL filter
  630. options = OptionsForLogIterTest();
  631. options.wal_filter = &test_wal_filter;
  632. Status status =
  633. TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  634. if (wal_processing_option ==
  635. WalFilter::WalProcessingOption::kCorruptedRecord) {
  636. ASSERT_NOK(status);
  637. // In case of corruption we can turn off paranoid_checks to reopen
  638. // databse
  639. options.paranoid_checks = false;
  640. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  641. } else {
  642. ASSERT_OK(status);
  643. }
  644. // Compute which keys we expect to be found
  645. // and which we expect not to be found after recovery.
  646. std::vector<Slice> keys_must_exist;
  647. std::vector<Slice> keys_must_not_exist;
  648. switch (wal_processing_option) {
  649. case WalFilter::WalProcessingOption::kCorruptedRecord:
  650. case WalFilter::WalProcessingOption::kContinueProcessing: {
  651. fprintf(stderr, "Testing with complete WAL processing\n");
  652. // we expect all records to be processed
  653. for (size_t i = 0; i < batch_keys.size(); i++) {
  654. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  655. keys_must_exist.emplace_back(batch_keys[i][j]);
  656. }
  657. }
  658. break;
  659. }
  660. case WalFilter::WalProcessingOption::kIgnoreCurrentRecord: {
  661. fprintf(stderr,
  662. "Testing with ignoring record %" ROCKSDB_PRIszt " only\n",
  663. apply_option_for_record_index);
  664. // We expect the record with apply_option_for_record_index to be not
  665. // found.
  666. for (size_t i = 0; i < batch_keys.size(); i++) {
  667. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  668. if (i == apply_option_for_record_index) {
  669. keys_must_not_exist.emplace_back(batch_keys[i][j]);
  670. } else {
  671. keys_must_exist.emplace_back(batch_keys[i][j]);
  672. }
  673. }
  674. }
  675. break;
  676. }
  677. case WalFilter::WalProcessingOption::kStopReplay: {
  678. fprintf(stderr,
  679. "Testing with stopping replay from record %" ROCKSDB_PRIszt
  680. "\n",
  681. apply_option_for_record_index);
  682. // We expect records beyond apply_option_for_record_index to be not
  683. // found.
  684. for (size_t i = 0; i < batch_keys.size(); i++) {
  685. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  686. if (i >= apply_option_for_record_index) {
  687. keys_must_not_exist.emplace_back(batch_keys[i][j]);
  688. } else {
  689. keys_must_exist.emplace_back(batch_keys[i][j]);
  690. }
  691. }
  692. }
  693. break;
  694. }
  695. default:
  696. FAIL(); // unhandled case
  697. }
  698. bool checked_after_reopen = false;
  699. while (true) {
  700. // Ensure that expected keys exists
  701. // and not expected keys don't exist after recovery
  702. ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
  703. if (checked_after_reopen) {
  704. break;
  705. }
  706. // reopen database again to make sure previous log(s) are not used
  707. //(even if they were skipped)
  708. // reopn database with option to use WAL filter
  709. options = OptionsForLogIterTest();
  710. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  711. checked_after_reopen = true;
  712. }
  713. }
  714. }
  715. TEST_F(DBTest2, WalFilterTestWithChangeBatch) {
  716. class ChangeBatchHandler : public WriteBatch::Handler {
  717. private:
  718. // Batch to insert keys in
  719. WriteBatch* new_write_batch_;
  720. // Number of keys to add in the new batch
  721. size_t num_keys_to_add_in_new_batch_;
  722. // Number of keys added to new batch
  723. size_t num_keys_added_;
  724. public:
  725. ChangeBatchHandler(WriteBatch* new_write_batch,
  726. size_t num_keys_to_add_in_new_batch)
  727. : new_write_batch_(new_write_batch),
  728. num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
  729. num_keys_added_(0) {}
  730. void Put(const Slice& key, const Slice& value) override {
  731. if (num_keys_added_ < num_keys_to_add_in_new_batch_) {
  732. ASSERT_OK(new_write_batch_->Put(key, value));
  733. ++num_keys_added_;
  734. }
  735. }
  736. };
  737. class TestWalFilterWithChangeBatch : public WalFilter {
  738. private:
  739. // Index at which to start changing records
  740. size_t change_records_from_index_;
  741. // Number of keys to add in the new batch
  742. size_t num_keys_to_add_in_new_batch_;
  743. // Current record index, incremented with each record encountered.
  744. size_t current_record_index_;
  745. public:
  746. TestWalFilterWithChangeBatch(size_t change_records_from_index,
  747. size_t num_keys_to_add_in_new_batch)
  748. : change_records_from_index_(change_records_from_index),
  749. num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
  750. current_record_index_(0) {}
  751. WalProcessingOption LogRecord(const WriteBatch& batch,
  752. WriteBatch* new_batch,
  753. bool* batch_changed) const override {
  754. if (current_record_index_ >= change_records_from_index_) {
  755. ChangeBatchHandler handler(new_batch, num_keys_to_add_in_new_batch_);
  756. Status s = batch.Iterate(&handler);
  757. if (s.ok()) {
  758. *batch_changed = true;
  759. } else {
  760. assert(false);
  761. }
  762. }
  763. // Filter is passed as a const object for RocksDB to not modify the
  764. // object, however we modify it for our own purpose here and hence
  765. // cast the constness away.
  766. (const_cast<TestWalFilterWithChangeBatch*>(this)
  767. ->current_record_index_)++;
  768. return WalProcessingOption::kContinueProcessing;
  769. }
  770. const char* Name() const override { return "TestWalFilterWithChangeBatch"; }
  771. };
  772. std::vector<std::vector<std::string>> batch_keys(3);
  773. batch_keys[0].push_back("key1");
  774. batch_keys[0].push_back("key2");
  775. batch_keys[1].push_back("key3");
  776. batch_keys[1].push_back("key4");
  777. batch_keys[2].push_back("key5");
  778. batch_keys[2].push_back("key6");
  779. Options options = OptionsForLogIterTest();
  780. DestroyAndReopen(options);
  781. CreateAndReopenWithCF({"pikachu"}, options);
  782. // Write given keys in given batches
  783. for (size_t i = 0; i < batch_keys.size(); i++) {
  784. WriteBatch batch;
  785. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  786. ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
  787. }
  788. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  789. }
  790. // Create a test filter that would apply wal_processing_option at the first
  791. // record
  792. size_t change_records_from_index = 1;
  793. size_t num_keys_to_add_in_new_batch = 1;
  794. TestWalFilterWithChangeBatch test_wal_filter_with_change_batch(
  795. change_records_from_index, num_keys_to_add_in_new_batch);
  796. // Reopen database with option to use WAL filter
  797. options = OptionsForLogIterTest();
  798. options.wal_filter = &test_wal_filter_with_change_batch;
  799. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  800. // Ensure that all keys exist before change_records_from_index_
  801. // And after that index only single key exists
  802. // as our filter adds only single key for each batch
  803. std::vector<Slice> keys_must_exist;
  804. std::vector<Slice> keys_must_not_exist;
  805. for (size_t i = 0; i < batch_keys.size(); i++) {
  806. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  807. if (i >= change_records_from_index && j >= num_keys_to_add_in_new_batch) {
  808. keys_must_not_exist.emplace_back(batch_keys[i][j]);
  809. } else {
  810. keys_must_exist.emplace_back(batch_keys[i][j]);
  811. }
  812. }
  813. }
  814. bool checked_after_reopen = false;
  815. while (true) {
  816. // Ensure that expected keys exists
  817. // and not expected keys don't exist after recovery
  818. ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
  819. if (checked_after_reopen) {
  820. break;
  821. }
  822. // reopen database again to make sure previous log(s) are not used
  823. //(even if they were skipped)
  824. // reopn database with option to use WAL filter
  825. options = OptionsForLogIterTest();
  826. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  827. checked_after_reopen = true;
  828. }
  829. }
  830. TEST_F(DBTest2, WalFilterTestWithChangeBatchExtraKeys) {
  831. class TestWalFilterWithChangeBatchAddExtraKeys : public WalFilter {
  832. public:
  833. WalProcessingOption LogRecord(const WriteBatch& batch,
  834. WriteBatch* new_batch,
  835. bool* batch_changed) const override {
  836. *new_batch = batch;
  837. Status s = new_batch->Put("key_extra", "value_extra");
  838. if (s.ok()) {
  839. *batch_changed = true;
  840. } else {
  841. assert(false);
  842. }
  843. return WalProcessingOption::kContinueProcessing;
  844. }
  845. const char* Name() const override {
  846. return "WalFilterTestWithChangeBatchExtraKeys";
  847. }
  848. };
  849. std::vector<std::vector<std::string>> batch_keys(3);
  850. batch_keys[0].push_back("key1");
  851. batch_keys[0].push_back("key2");
  852. batch_keys[1].push_back("key3");
  853. batch_keys[1].push_back("key4");
  854. batch_keys[2].push_back("key5");
  855. batch_keys[2].push_back("key6");
  856. Options options = OptionsForLogIterTest();
  857. DestroyAndReopen(options);
  858. CreateAndReopenWithCF({"pikachu"}, options);
  859. // Write given keys in given batches
  860. for (size_t i = 0; i < batch_keys.size(); i++) {
  861. WriteBatch batch;
  862. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  863. ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
  864. }
  865. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  866. }
  867. // Create a test filter that would add extra keys
  868. TestWalFilterWithChangeBatchAddExtraKeys test_wal_filter_extra_keys;
  869. // Reopen database with option to use WAL filter
  870. options = OptionsForLogIterTest();
  871. options.wal_filter = &test_wal_filter_extra_keys;
  872. Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  873. ASSERT_TRUE(status.IsNotSupported());
  874. // Reopen without filter, now reopen should succeed - previous
  875. // attempt to open must not have altered the db.
  876. options = OptionsForLogIterTest();
  877. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  878. std::vector<Slice> keys_must_exist;
  879. std::vector<Slice> keys_must_not_exist; // empty vector
  880. for (size_t i = 0; i < batch_keys.size(); i++) {
  881. for (size_t j = 0; j < batch_keys[i].size(); j++) {
  882. keys_must_exist.emplace_back(batch_keys[i][j]);
  883. }
  884. }
  885. ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
  886. }
  887. TEST_F(DBTest2, WalFilterTestWithColumnFamilies) {
  888. class TestWalFilterWithColumnFamilies : public WalFilter {
  889. private:
  890. // column_family_id -> log_number map (provided to WALFilter)
  891. std::map<uint32_t, uint64_t> cf_log_number_map_;
  892. // column_family_name -> column_family_id map (provided to WALFilter)
  893. std::map<std::string, uint32_t> cf_name_id_map_;
  894. // column_family_name -> keys_found_in_wal map
  895. // We store keys that are applicable to the column_family
  896. // during recovery (i.e. aren't already flushed to SST file(s))
  897. // for verification against the keys we expect.
  898. std::map<uint32_t, std::vector<std::string>> cf_wal_keys_;
  899. public:
  900. void ColumnFamilyLogNumberMap(
  901. const std::map<uint32_t, uint64_t>& cf_lognumber_map,
  902. const std::map<std::string, uint32_t>& cf_name_id_map) override {
  903. cf_log_number_map_ = cf_lognumber_map;
  904. cf_name_id_map_ = cf_name_id_map;
  905. }
  906. WalProcessingOption LogRecordFound(unsigned long long log_number,
  907. const std::string& /*log_file_name*/,
  908. const WriteBatch& batch,
  909. WriteBatch* /*new_batch*/,
  910. bool* /*batch_changed*/) override {
  911. class LogRecordBatchHandler : public WriteBatch::Handler {
  912. private:
  913. const std::map<uint32_t, uint64_t>& cf_log_number_map_;
  914. std::map<uint32_t, std::vector<std::string>>& cf_wal_keys_;
  915. unsigned long long log_number_;
  916. public:
  917. LogRecordBatchHandler(
  918. unsigned long long current_log_number,
  919. const std::map<uint32_t, uint64_t>& cf_log_number_map,
  920. std::map<uint32_t, std::vector<std::string>>& cf_wal_keys)
  921. : cf_log_number_map_(cf_log_number_map),
  922. cf_wal_keys_(cf_wal_keys),
  923. log_number_(current_log_number) {}
  924. Status PutCF(uint32_t column_family_id, const Slice& key,
  925. const Slice& /*value*/) override {
  926. auto it = cf_log_number_map_.find(column_family_id);
  927. assert(it != cf_log_number_map_.end());
  928. unsigned long long log_number_for_cf = it->second;
  929. // If the current record is applicable for column_family_id
  930. // (i.e. isn't flushed to SST file(s) for column_family_id)
  931. // add it to the cf_wal_keys_ map for verification.
  932. if (log_number_ >= log_number_for_cf) {
  933. cf_wal_keys_[column_family_id].push_back(
  934. std::string(key.data(), key.size()));
  935. }
  936. return Status::OK();
  937. }
  938. } handler(log_number, cf_log_number_map_, cf_wal_keys_);
  939. Status s = batch.Iterate(&handler);
  940. if (!s.ok()) {
  941. // TODO(AR) is this ok?
  942. return WalProcessingOption::kCorruptedRecord;
  943. }
  944. return WalProcessingOption::kContinueProcessing;
  945. }
  946. const char* Name() const override {
  947. return "WalFilterTestWithColumnFamilies";
  948. }
  949. const std::map<uint32_t, std::vector<std::string>>& GetColumnFamilyKeys() {
  950. return cf_wal_keys_;
  951. }
  952. const std::map<std::string, uint32_t>& GetColumnFamilyNameIdMap() {
  953. return cf_name_id_map_;
  954. }
  955. };
  956. std::vector<std::vector<std::string>> batch_keys_pre_flush(3);
  957. batch_keys_pre_flush[0].push_back("key1");
  958. batch_keys_pre_flush[0].push_back("key2");
  959. batch_keys_pre_flush[1].push_back("key3");
  960. batch_keys_pre_flush[1].push_back("key4");
  961. batch_keys_pre_flush[2].push_back("key5");
  962. batch_keys_pre_flush[2].push_back("key6");
  963. Options options = OptionsForLogIterTest();
  964. DestroyAndReopen(options);
  965. CreateAndReopenWithCF({"pikachu"}, options);
  966. // Write given keys in given batches
  967. for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) {
  968. WriteBatch batch;
  969. for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) {
  970. ASSERT_OK(batch.Put(handles_[0], batch_keys_pre_flush[i][j],
  971. DummyString(1024)));
  972. ASSERT_OK(batch.Put(handles_[1], batch_keys_pre_flush[i][j],
  973. DummyString(1024)));
  974. }
  975. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  976. }
  977. // Flush default column-family
  978. ASSERT_OK(db_->Flush(FlushOptions(), handles_[0]));
  979. // Do some more writes
  980. std::vector<std::vector<std::string>> batch_keys_post_flush(3);
  981. batch_keys_post_flush[0].push_back("key7");
  982. batch_keys_post_flush[0].push_back("key8");
  983. batch_keys_post_flush[1].push_back("key9");
  984. batch_keys_post_flush[1].push_back("key10");
  985. batch_keys_post_flush[2].push_back("key11");
  986. batch_keys_post_flush[2].push_back("key12");
  987. // Write given keys in given batches
  988. for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
  989. WriteBatch batch;
  990. for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
  991. ASSERT_OK(batch.Put(handles_[0], batch_keys_post_flush[i][j],
  992. DummyString(1024)));
  993. ASSERT_OK(batch.Put(handles_[1], batch_keys_post_flush[i][j],
  994. DummyString(1024)));
  995. }
  996. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  997. }
  998. // On Recovery we should only find the second batch applicable to default CF
  999. // But both batches applicable to pikachu CF
  1000. // Create a test filter that would add extra keys
  1001. TestWalFilterWithColumnFamilies test_wal_filter_column_families;
  1002. // Reopen database with option to use WAL filter
  1003. options = OptionsForLogIterTest();
  1004. options.wal_filter = &test_wal_filter_column_families;
  1005. Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  1006. ASSERT_TRUE(status.ok());
  1007. // verify that handles_[0] only has post_flush keys
  1008. // while handles_[1] has pre and post flush keys
  1009. auto cf_wal_keys = test_wal_filter_column_families.GetColumnFamilyKeys();
  1010. auto name_id_map = test_wal_filter_column_families.GetColumnFamilyNameIdMap();
  1011. size_t index = 0;
  1012. auto keys_cf = cf_wal_keys[name_id_map[kDefaultColumnFamilyName]];
  1013. // default column-family, only post_flush keys are expected
  1014. for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
  1015. for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
  1016. Slice key_from_the_log(keys_cf[index++]);
  1017. Slice batch_key(batch_keys_post_flush[i][j]);
  1018. ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
  1019. }
  1020. }
  1021. ASSERT_EQ(index, keys_cf.size());
  1022. index = 0;
  1023. keys_cf = cf_wal_keys[name_id_map["pikachu"]];
  1024. // pikachu column-family, all keys are expected
  1025. for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) {
  1026. for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) {
  1027. Slice key_from_the_log(keys_cf[index++]);
  1028. Slice batch_key(batch_keys_pre_flush[i][j]);
  1029. ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
  1030. }
  1031. }
  1032. for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
  1033. for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
  1034. Slice key_from_the_log(keys_cf[index++]);
  1035. Slice batch_key(batch_keys_post_flush[i][j]);
  1036. ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
  1037. }
  1038. }
  1039. ASSERT_EQ(index, keys_cf.size());
  1040. }
  1041. class CompactionStallTestListener : public EventListener {
  1042. public:
  1043. CompactionStallTestListener()
  1044. : compacting_files_cnt_(0), compacted_files_cnt_(0) {}
  1045. void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
  1046. ASSERT_EQ(ci.cf_name, "default");
  1047. ASSERT_EQ(ci.base_input_level, 0);
  1048. ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum);
  1049. compacting_files_cnt_ += ci.input_files.size();
  1050. }
  1051. void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
  1052. ASSERT_EQ(ci.cf_name, "default");
  1053. ASSERT_EQ(ci.base_input_level, 0);
  1054. ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum);
  1055. compacted_files_cnt_ += ci.input_files.size();
  1056. }
  1057. std::atomic<size_t> compacting_files_cnt_;
  1058. std::atomic<size_t> compacted_files_cnt_;
  1059. };
  1060. TEST_F(DBTest2, CompactionStall) {
  1061. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  1062. {{"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:0"},
  1063. {"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:1"},
  1064. {"DBTest2::CompactionStall:2",
  1065. "DBImpl::NotifyOnCompactionBegin::UnlockMutex"},
  1066. {"DBTest2::CompactionStall:3",
  1067. "DBImpl::NotifyOnCompactionCompleted::UnlockMutex"}});
  1068. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1069. Options options = CurrentOptions();
  1070. options.level0_file_num_compaction_trigger = 4;
  1071. options.max_background_compactions = 40;
  1072. CompactionStallTestListener* listener = new CompactionStallTestListener();
  1073. options.listeners.emplace_back(listener);
  1074. DestroyAndReopen(options);
  1075. // make sure all background compaction jobs can be scheduled
  1076. auto stop_token =
  1077. dbfull()->TEST_write_controler().GetCompactionPressureToken();
  1078. Random rnd(301);
  1079. // 4 Files in L0
  1080. for (int i = 0; i < 4; i++) {
  1081. for (int j = 0; j < 10; j++) {
  1082. ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
  1083. }
  1084. ASSERT_OK(Flush());
  1085. }
  1086. // Wait for compaction to be triggered
  1087. TEST_SYNC_POINT("DBTest2::CompactionStall:0");
  1088. // Clear "DBImpl::BGWorkCompaction" SYNC_POINT since we want to hold it again
  1089. // at DBTest2::CompactionStall::1
  1090. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
  1091. // Another 6 L0 files to trigger compaction again
  1092. for (int i = 0; i < 6; i++) {
  1093. for (int j = 0; j < 10; j++) {
  1094. ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
  1095. }
  1096. ASSERT_OK(Flush());
  1097. }
  1098. // Wait for another compaction to be triggered
  1099. TEST_SYNC_POINT("DBTest2::CompactionStall:1");
  1100. // Hold NotifyOnCompactionBegin in the unlock mutex section
  1101. TEST_SYNC_POINT("DBTest2::CompactionStall:2");
  1102. // Hold NotifyOnCompactionCompleted in the unlock mutex section
  1103. TEST_SYNC_POINT("DBTest2::CompactionStall:3");
  1104. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1105. ASSERT_LT(NumTableFilesAtLevel(0),
  1106. options.level0_file_num_compaction_trigger);
  1107. ASSERT_GT(listener->compacted_files_cnt_.load(),
  1108. 10 - options.level0_file_num_compaction_trigger);
  1109. ASSERT_EQ(listener->compacting_files_cnt_.load(),
  1110. listener->compacted_files_cnt_.load());
  1111. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1112. }
  1113. TEST_F(DBTest2, FirstSnapshotTest) {
  1114. Options options;
  1115. options.write_buffer_size = 100000; // Small write buffer
  1116. options = CurrentOptions(options);
  1117. CreateAndReopenWithCF({"pikachu"}, options);
  1118. // This snapshot will have sequence number 0 what is expected behaviour.
  1119. const Snapshot* s1 = db_->GetSnapshot();
  1120. ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
  1121. ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
  1122. db_->ReleaseSnapshot(s1);
  1123. }
  1124. TEST_F(DBTest2, DuplicateSnapshot) {
  1125. Options options;
  1126. options = CurrentOptions(options);
  1127. std::vector<const Snapshot*> snapshots;
  1128. DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
  1129. SequenceNumber oldest_ww_snap, first_ww_snap;
  1130. ASSERT_OK(Put("k", "v")); // inc seq
  1131. snapshots.push_back(db_->GetSnapshot());
  1132. snapshots.push_back(db_->GetSnapshot());
  1133. ASSERT_OK(Put("k", "v")); // inc seq
  1134. snapshots.push_back(db_->GetSnapshot());
  1135. snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
  1136. first_ww_snap = snapshots.back()->GetSequenceNumber();
  1137. ASSERT_OK(Put("k", "v")); // inc seq
  1138. snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
  1139. snapshots.push_back(db_->GetSnapshot());
  1140. ASSERT_OK(Put("k", "v")); // inc seq
  1141. snapshots.push_back(db_->GetSnapshot());
  1142. {
  1143. InstrumentedMutexLock l(dbi->mutex());
  1144. auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap);
  1145. ASSERT_EQ(seqs.size(), 4); // duplicates are not counted
  1146. ASSERT_EQ(oldest_ww_snap, first_ww_snap);
  1147. }
  1148. for (auto s : snapshots) {
  1149. db_->ReleaseSnapshot(s);
  1150. }
  1151. }
  1152. class PinL0IndexAndFilterBlocksTest
  1153. : public DBTestBase,
  1154. public testing::WithParamInterface<std::tuple<bool, bool>> {
  1155. public:
  1156. PinL0IndexAndFilterBlocksTest()
  1157. : DBTestBase("db_pin_l0_index_bloom_test", /*env_do_fsync=*/true) {}
  1158. void SetUp() override {
  1159. infinite_max_files_ = std::get<0>(GetParam());
  1160. disallow_preload_ = std::get<1>(GetParam());
  1161. }
  1162. void CreateTwoLevels(Options* options, bool close_afterwards) {
  1163. if (infinite_max_files_) {
  1164. options->max_open_files = -1;
  1165. }
  1166. options->create_if_missing = true;
  1167. options->statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1168. BlockBasedTableOptions table_options;
  1169. table_options.cache_index_and_filter_blocks = true;
  1170. table_options.pin_l0_filter_and_index_blocks_in_cache = true;
  1171. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  1172. options->table_factory.reset(NewBlockBasedTableFactory(table_options));
  1173. CreateAndReopenWithCF({"pikachu"}, *options);
  1174. ASSERT_OK(Put(1, "a", "begin"));
  1175. ASSERT_OK(Put(1, "z", "end"));
  1176. ASSERT_OK(Flush(1));
  1177. // move this table to L1
  1178. ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
  1179. ASSERT_EQ(1, NumTableFilesAtLevel(1, 1));
  1180. // reset block cache
  1181. table_options.block_cache = NewLRUCache(64 * 1024);
  1182. options->table_factory.reset(NewBlockBasedTableFactory(table_options));
  1183. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, *options));
  1184. // create new table at L0
  1185. ASSERT_OK(Put(1, "a2", "begin2"));
  1186. ASSERT_OK(Put(1, "z2", "end2"));
  1187. ASSERT_OK(Flush(1));
  1188. if (close_afterwards) {
  1189. Close(); // This ensures that there is no ref to block cache entries
  1190. }
  1191. table_options.block_cache->EraseUnRefEntries();
  1192. }
  1193. bool infinite_max_files_;
  1194. bool disallow_preload_;
  1195. };
  1196. TEST_P(PinL0IndexAndFilterBlocksTest,
  1197. IndexAndFilterBlocksOfNewTableAddedToCacheWithPinning) {
  1198. Options options = CurrentOptions();
  1199. if (infinite_max_files_) {
  1200. options.max_open_files = -1;
  1201. }
  1202. options.create_if_missing = true;
  1203. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1204. BlockBasedTableOptions table_options;
  1205. table_options.cache_index_and_filter_blocks = true;
  1206. table_options.pin_l0_filter_and_index_blocks_in_cache = true;
  1207. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  1208. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1209. CreateAndReopenWithCF({"pikachu"}, options);
  1210. ASSERT_OK(Put(1, "key", "val"));
  1211. // Create a new table.
  1212. ASSERT_OK(Flush(1));
  1213. // index/filter blocks added to block cache right after table creation.
  1214. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1215. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1216. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1217. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1218. // only index/filter were added
  1219. ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  1220. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
  1221. // Miss and hit count should remain the same, they're all pinned.
  1222. ASSERT_TRUE(db_->KeyMayExist(ReadOptions(), handles_[1], "key", nullptr));
  1223. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1224. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1225. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1226. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1227. // Miss and hit count should remain the same, they're all pinned.
  1228. std::string value = Get(1, "key");
  1229. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1230. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1231. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1232. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1233. }
  1234. TEST_P(PinL0IndexAndFilterBlocksTest,
  1235. MultiLevelIndexAndFilterBlocksCachedWithPinning) {
  1236. Options options = CurrentOptions();
  1237. PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, false);
  1238. // get base cache values
  1239. uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
  1240. uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
  1241. uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
  1242. uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
  1243. std::string value;
  1244. // this should be read from L0
  1245. // so cache values don't change
  1246. value = Get(1, "a2");
  1247. ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1248. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1249. ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1250. ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1251. // this should be read from L1
  1252. // the file is opened, prefetching results in a cache filter miss
  1253. // the block is loaded and added to the cache,
  1254. // then the get results in a cache hit for L1
  1255. // When we have inifinite max_files, there is still cache miss because we have
  1256. // reset the block cache
  1257. value = Get(1, "a");
  1258. ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1259. ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1260. }
  1261. TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) {
  1262. Options options = CurrentOptions();
  1263. // This ensures that db does not ref anything in the block cache, so
  1264. // EraseUnRefEntries could clear them up.
  1265. bool close_afterwards = true;
  1266. PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, close_afterwards);
  1267. // Get base cache values
  1268. uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
  1269. uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
  1270. uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
  1271. uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
  1272. if (disallow_preload_) {
  1273. // Now we have two files. We narrow the max open files to allow 3 entries
  1274. // so that preloading SST files won't happen.
  1275. options.max_open_files = 13;
  1276. // RocksDB sanitize max open files to at least 20. Modify it back.
  1277. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1278. "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
  1279. int* max_open_files = static_cast<int*>(arg);
  1280. *max_open_files = 13;
  1281. });
  1282. }
  1283. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1284. // Reopen database. If max_open_files is set as -1, table readers will be
  1285. // preloaded. This will trigger a BlockBasedTable::Open() and prefetch
  1286. // L0 index and filter. Level 1's prefetching is disabled in DB::Open()
  1287. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  1288. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1289. if (!disallow_preload_) {
  1290. // After reopen, cache miss are increased by one because we read (and only
  1291. // read) filter and index on L0
  1292. ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1293. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1294. ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1295. ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1296. } else {
  1297. // If max_open_files is not -1, we do not preload table readers, so there is
  1298. // no change.
  1299. ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1300. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1301. ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1302. ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1303. }
  1304. std::string value;
  1305. // this should be read from L0
  1306. value = Get(1, "a2");
  1307. // If max_open_files is -1, we have pinned index and filter in Rep, so there
  1308. // will not be changes in index and filter misses or hits. If max_open_files
  1309. // is not -1, Get() will open a TableReader and prefetch index and filter.
  1310. ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1311. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1312. ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1313. ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1314. // this should be read from L1
  1315. value = Get(1, "a");
  1316. if (!disallow_preload_) {
  1317. // In infinite max files case, there's a cache miss in executing Get()
  1318. // because index and filter are not prefetched before.
  1319. ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1320. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1321. ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1322. ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1323. } else {
  1324. // In this case, cache miss will be increased by one in
  1325. // BlockBasedTable::Open() because this is not in DB::Open() code path so we
  1326. // will prefetch L1's index and filter. Cache hit will also be increased by
  1327. // one because Get() will read index and filter from the block cache
  1328. // prefetched in previous Open() call.
  1329. ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1330. ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1331. ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1332. ASSERT_EQ(ih + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1333. }
  1334. // Force a full compaction to one single file. There will be a block
  1335. // cache read for both of index and filter. If prefetch doesn't explicitly
  1336. // happen, it will happen when verifying the file.
  1337. Compact(1, "a", "zzzzz");
  1338. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1339. if (!disallow_preload_) {
  1340. ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1341. ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1342. ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1343. ASSERT_EQ(ih + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1344. } else {
  1345. ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1346. ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1347. ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1348. ASSERT_EQ(ih + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1349. }
  1350. // Bloom and index hit will happen when a Get() happens.
  1351. value = Get(1, "a");
  1352. if (!disallow_preload_) {
  1353. ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1354. ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1355. ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1356. ASSERT_EQ(ih + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1357. } else {
  1358. ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1359. ASSERT_EQ(fh + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  1360. ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1361. ASSERT_EQ(ih + 4, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  1362. }
  1363. }
  1364. INSTANTIATE_TEST_CASE_P(PinL0IndexAndFilterBlocksTest,
  1365. PinL0IndexAndFilterBlocksTest,
  1366. ::testing::Values(std::make_tuple(true, false),
  1367. std::make_tuple(false, false),
  1368. std::make_tuple(false, true)));
  1369. TEST_F(DBTest2, MaxCompactionBytesTest) {
  1370. Options options = CurrentOptions();
  1371. options.memtable_factory.reset(test::NewSpecialSkipListFactory(
  1372. DBTestBase::kNumKeysByGenerateNewRandomFile));
  1373. options.compaction_style = kCompactionStyleLevel;
  1374. options.write_buffer_size = 200 << 10;
  1375. options.arena_block_size = 4 << 10;
  1376. options.level0_file_num_compaction_trigger = 4;
  1377. options.num_levels = 4;
  1378. options.compression = kNoCompression;
  1379. options.max_bytes_for_level_base = 450 << 10;
  1380. options.target_file_size_base = 100 << 10;
  1381. // Infinite for full compaction.
  1382. options.max_compaction_bytes = options.target_file_size_base * 100;
  1383. Reopen(options);
  1384. Random rnd(301);
  1385. for (int num = 0; num < 8; num++) {
  1386. GenerateNewRandomFile(&rnd);
  1387. }
  1388. CompactRangeOptions cro;
  1389. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  1390. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  1391. ASSERT_EQ("0,0,8", FilesPerLevel(0));
  1392. // When compact from Ln -> Ln+1, cut a file if the file overlaps with
  1393. // more than three files in Ln+1.
  1394. options.max_compaction_bytes = options.target_file_size_base * 3;
  1395. Reopen(options);
  1396. GenerateNewRandomFile(&rnd);
  1397. // Add three more small files that overlap with the previous file
  1398. for (int i = 0; i < 3; i++) {
  1399. ASSERT_OK(Put("a", "z"));
  1400. ASSERT_OK(Flush());
  1401. }
  1402. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1403. // Output files to L1 are cut to 4 pieces, according to
  1404. // options.max_compaction_bytes (300K)
  1405. // There are 8 files on L2 (grandparents level), each one is 100K. The first
  1406. // file overlaps with a, b which max_compaction_bytes is less than 300K, the
  1407. // second one overlaps with d, e, which is also less than 300K. Including any
  1408. // extra grandparent file will make the future compaction larger than 300K.
  1409. // L1: [ 1 ] [ 2 ] [ 3 ] [ 4 ]
  1410. // L2: [a] [b] [c] [d] [e] [f] [g] [h]
  1411. ASSERT_EQ("0,4,8", FilesPerLevel(0));
  1412. }
  1413. static void UniqueIdCallback(void* arg) {
  1414. int* result = static_cast<int*>(arg);
  1415. if (*result == -1) {
  1416. *result = 0;
  1417. }
  1418. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
  1419. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1420. "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback);
  1421. }
  1422. class MockPersistentCache : public PersistentCache {
  1423. public:
  1424. explicit MockPersistentCache(const bool is_compressed, const size_t max_size)
  1425. : is_compressed_(is_compressed), max_size_(max_size) {
  1426. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1427. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1428. "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback);
  1429. }
  1430. ~MockPersistentCache() override = default;
  1431. PersistentCache::StatsType Stats() override {
  1432. return PersistentCache::StatsType();
  1433. }
  1434. uint64_t NewId() override {
  1435. return last_id_.fetch_add(1, std::memory_order_relaxed);
  1436. }
  1437. Status Insert(const Slice& page_key, const char* data,
  1438. const size_t size) override {
  1439. MutexLock _(&lock_);
  1440. if (size_ > max_size_) {
  1441. size_ -= data_.begin()->second.size();
  1442. data_.erase(data_.begin());
  1443. }
  1444. data_.insert(std::make_pair(page_key.ToString(), std::string(data, size)));
  1445. size_ += size;
  1446. return Status::OK();
  1447. }
  1448. Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
  1449. size_t* size) override {
  1450. MutexLock _(&lock_);
  1451. auto it = data_.find(page_key.ToString());
  1452. if (it == data_.end()) {
  1453. return Status::NotFound();
  1454. }
  1455. assert(page_key.ToString() == it->first);
  1456. data->reset(new char[it->second.size()]);
  1457. memcpy(data->get(), it->second.c_str(), it->second.size());
  1458. *size = it->second.size();
  1459. return Status::OK();
  1460. }
  1461. bool IsCompressed() override { return is_compressed_; }
  1462. std::string GetPrintableOptions() const override {
  1463. return "MockPersistentCache";
  1464. }
  1465. port::Mutex lock_;
  1466. std::map<std::string, std::string> data_;
  1467. const bool is_compressed_ = true;
  1468. size_t size_ = 0;
  1469. const size_t max_size_ = 10 * 1024; // 10KiB
  1470. std::atomic<uint64_t> last_id_{1};
  1471. };
  1472. #ifdef OS_LINUX
  1473. // Make sure that in CPU time perf context counters, Env::NowCPUNanos()
  1474. // is used, rather than Env::CPUNanos();
  1475. TEST_F(DBTest2, TestPerfContextGetCpuTime) {
  1476. // force resizing table cache so table handle is not preloaded so that
  1477. // we can measure find_table_nanos during Get().
  1478. dbfull()->TEST_table_cache()->SetCapacity(0);
  1479. ASSERT_OK(Put("foo", "bar"));
  1480. ASSERT_OK(Flush());
  1481. env_->now_cpu_count_.store(0);
  1482. env_->SetMockSleep();
  1483. // NOTE: Presumed unnecessary and removed: resetting mock time in env
  1484. // CPU timing is not enabled with kEnableTimeExceptForMutex
  1485. SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex);
  1486. ASSERT_EQ("bar", Get("foo"));
  1487. ASSERT_EQ(0, get_perf_context()->get_cpu_nanos);
  1488. ASSERT_EQ(0, env_->now_cpu_count_.load());
  1489. constexpr uint64_t kDummyAddonSeconds = uint64_t{1000000};
  1490. constexpr uint64_t kDummyAddonNanos = 1000000000U * kDummyAddonSeconds;
  1491. // Add time to NowNanos() reading.
  1492. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1493. "TableCache::FindTable:0",
  1494. [&](void* /*arg*/) { env_->MockSleepForSeconds(kDummyAddonSeconds); });
  1495. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1496. SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
  1497. ASSERT_EQ("bar", Get("foo"));
  1498. ASSERT_GT(env_->now_cpu_count_.load(), 2);
  1499. ASSERT_LT(get_perf_context()->get_cpu_nanos, kDummyAddonNanos);
  1500. ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonNanos);
  1501. SetPerfLevel(PerfLevel::kDisable);
  1502. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1503. }
  1504. TEST_F(DBTest2, TestPerfContextIterCpuTime) {
  1505. DestroyAndReopen(CurrentOptions());
  1506. // force resizing table cache so table handle is not preloaded so that
  1507. // we can measure find_table_nanos during iteration
  1508. dbfull()->TEST_table_cache()->SetCapacity(0);
  1509. const size_t kNumEntries = 10;
  1510. for (size_t i = 0; i < kNumEntries; ++i) {
  1511. ASSERT_OK(Put("k" + std::to_string(i), "v" + std::to_string(i)));
  1512. }
  1513. ASSERT_OK(Flush());
  1514. for (size_t i = 0; i < kNumEntries; ++i) {
  1515. ASSERT_EQ("v" + std::to_string(i), Get("k" + std::to_string(i)));
  1516. }
  1517. std::string last_key = "k" + std::to_string(kNumEntries - 1);
  1518. std::string last_value = "v" + std::to_string(kNumEntries - 1);
  1519. env_->now_cpu_count_.store(0);
  1520. env_->SetMockSleep();
  1521. // NOTE: Presumed unnecessary and removed: resetting mock time in env
  1522. // CPU timing is not enabled with kEnableTimeExceptForMutex
  1523. SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex);
  1524. Iterator* iter = db_->NewIterator(ReadOptions());
  1525. iter->Seek("k0");
  1526. ASSERT_TRUE(iter->Valid());
  1527. ASSERT_EQ("v0", iter->value().ToString());
  1528. iter->SeekForPrev(last_key);
  1529. ASSERT_TRUE(iter->Valid());
  1530. iter->SeekToLast();
  1531. ASSERT_TRUE(iter->Valid());
  1532. ASSERT_EQ(last_value, iter->value().ToString());
  1533. iter->SeekToFirst();
  1534. ASSERT_TRUE(iter->Valid());
  1535. ASSERT_EQ("v0", iter->value().ToString());
  1536. ASSERT_EQ(0, get_perf_context()->iter_seek_cpu_nanos);
  1537. iter->Next();
  1538. ASSERT_TRUE(iter->Valid());
  1539. ASSERT_EQ("v1", iter->value().ToString());
  1540. ASSERT_EQ(0, get_perf_context()->iter_next_cpu_nanos);
  1541. iter->Prev();
  1542. ASSERT_TRUE(iter->Valid());
  1543. ASSERT_OK(iter->status());
  1544. ASSERT_EQ("v0", iter->value().ToString());
  1545. ASSERT_EQ(0, get_perf_context()->iter_prev_cpu_nanos);
  1546. ASSERT_EQ(0, env_->now_cpu_count_.load());
  1547. delete iter;
  1548. constexpr uint64_t kDummyAddonSeconds = uint64_t{1000000};
  1549. constexpr uint64_t kDummyAddonNanos = 1000000000U * kDummyAddonSeconds;
  1550. // Add time to NowNanos() reading.
  1551. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1552. "TableCache::FindTable:0",
  1553. [&](void* /*arg*/) { env_->MockSleepForSeconds(kDummyAddonSeconds); });
  1554. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1555. SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
  1556. iter = db_->NewIterator(ReadOptions());
  1557. iter->Seek("k0");
  1558. ASSERT_TRUE(iter->Valid());
  1559. ASSERT_EQ("v0", iter->value().ToString());
  1560. iter->SeekForPrev(last_key);
  1561. ASSERT_TRUE(iter->Valid());
  1562. iter->SeekToLast();
  1563. ASSERT_TRUE(iter->Valid());
  1564. ASSERT_EQ(last_value, iter->value().ToString());
  1565. iter->SeekToFirst();
  1566. ASSERT_TRUE(iter->Valid());
  1567. ASSERT_EQ("v0", iter->value().ToString());
  1568. ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, 0);
  1569. ASSERT_LT(get_perf_context()->iter_seek_cpu_nanos, kDummyAddonNanos);
  1570. iter->Next();
  1571. ASSERT_TRUE(iter->Valid());
  1572. ASSERT_EQ("v1", iter->value().ToString());
  1573. ASSERT_GT(get_perf_context()->iter_next_cpu_nanos, 0);
  1574. ASSERT_LT(get_perf_context()->iter_next_cpu_nanos, kDummyAddonNanos);
  1575. iter->Prev();
  1576. ASSERT_TRUE(iter->Valid());
  1577. ASSERT_OK(iter->status());
  1578. ASSERT_EQ("v0", iter->value().ToString());
  1579. ASSERT_GT(get_perf_context()->iter_prev_cpu_nanos, 0);
  1580. ASSERT_LT(get_perf_context()->iter_prev_cpu_nanos, kDummyAddonNanos);
  1581. ASSERT_GE(env_->now_cpu_count_.load(), 12);
  1582. ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonNanos);
  1583. SetPerfLevel(PerfLevel::kDisable);
  1584. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1585. delete iter;
  1586. }
  1587. #endif // OS_LINUX
  1588. #if !defined OS_SOLARIS
  1589. TEST_F(DBTest2, PersistentCache) {
  1590. int num_iter = 80;
  1591. Options options;
  1592. options.write_buffer_size = 64 * 1024; // small write buffer
  1593. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1594. options = CurrentOptions(options);
  1595. auto bsizes = {/*no block cache*/ 0, /*1M*/ 1 * 1024 * 1024};
  1596. auto types = {/*compressed*/ 1, /*uncompressed*/ 0};
  1597. for (auto bsize : bsizes) {
  1598. for (auto type : types) {
  1599. BlockBasedTableOptions table_options;
  1600. table_options.persistent_cache.reset(
  1601. new MockPersistentCache(type, 10 * 1024));
  1602. table_options.no_block_cache = true;
  1603. table_options.block_cache = bsize ? NewLRUCache(bsize) : nullptr;
  1604. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1605. DestroyAndReopen(options);
  1606. CreateAndReopenWithCF({"pikachu"}, options);
  1607. // default column family doesn't have block cache
  1608. Options no_block_cache_opts;
  1609. no_block_cache_opts.statistics = options.statistics;
  1610. no_block_cache_opts = CurrentOptions(no_block_cache_opts);
  1611. BlockBasedTableOptions table_options_no_bc;
  1612. table_options_no_bc.no_block_cache = true;
  1613. no_block_cache_opts.table_factory.reset(
  1614. NewBlockBasedTableFactory(table_options_no_bc));
  1615. ReopenWithColumnFamilies(
  1616. {"default", "pikachu"},
  1617. std::vector<Options>({no_block_cache_opts, options}));
  1618. Random rnd(301);
  1619. // Write 8MB (80 values, each 100K)
  1620. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1621. std::vector<std::string> values;
  1622. std::string str;
  1623. for (int i = 0; i < num_iter; i++) {
  1624. if (i % 4 == 0) { // high compression ratio
  1625. str = rnd.RandomString(1000);
  1626. }
  1627. values.push_back(str);
  1628. ASSERT_OK(Put(1, Key(i), values[i]));
  1629. }
  1630. // flush all data from memtable so that reads are from block cache
  1631. ASSERT_OK(Flush(1));
  1632. for (int i = 0; i < num_iter; i++) {
  1633. ASSERT_EQ(Get(1, Key(i)), values[i]);
  1634. }
  1635. auto hit = options.statistics->getTickerCount(PERSISTENT_CACHE_HIT);
  1636. auto miss = options.statistics->getTickerCount(PERSISTENT_CACHE_MISS);
  1637. ASSERT_GT(hit, 0);
  1638. ASSERT_GT(miss, 0);
  1639. }
  1640. }
  1641. }
  1642. #endif // !defined OS_SOLARIS
  1643. namespace {
  1644. void CountSyncPoint() {
  1645. TEST_SYNC_POINT_CALLBACK("DBTest2::MarkedPoint", nullptr /* arg */);
  1646. }
  1647. } // anonymous namespace
  1648. TEST_F(DBTest2, SyncPointMarker) {
  1649. std::atomic<int> sync_point_called(0);
  1650. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1651. "DBTest2::MarkedPoint",
  1652. [&](void* /*arg*/) { sync_point_called.fetch_add(1); });
  1653. // The first dependency enforces Marker can be loaded before MarkedPoint.
  1654. // The second checks that thread 1's MarkedPoint should be disabled here.
  1655. // Execution order:
  1656. // | Thread 1 | Thread 2 |
  1657. // | | Marker |
  1658. // | MarkedPoint | |
  1659. // | Thread1First | |
  1660. // | | MarkedPoint |
  1661. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependencyAndMarkers(
  1662. {{"DBTest2::SyncPointMarker:Thread1First", "DBTest2::MarkedPoint"}},
  1663. {{"DBTest2::SyncPointMarker:Marker", "DBTest2::MarkedPoint"}});
  1664. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1665. std::function<void()> func1 = [&]() {
  1666. CountSyncPoint();
  1667. TEST_SYNC_POINT("DBTest2::SyncPointMarker:Thread1First");
  1668. };
  1669. std::function<void()> func2 = [&]() {
  1670. TEST_SYNC_POINT("DBTest2::SyncPointMarker:Marker");
  1671. CountSyncPoint();
  1672. };
  1673. auto thread1 = port::Thread(func1);
  1674. auto thread2 = port::Thread(func2);
  1675. thread1.join();
  1676. thread2.join();
  1677. // Callback is only executed once
  1678. ASSERT_EQ(sync_point_called.load(), 1);
  1679. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1680. }
  1681. size_t GetEncodedEntrySize(size_t key_size, size_t value_size) {
  1682. std::string buffer;
  1683. PutVarint32(&buffer, static_cast<uint32_t>(0));
  1684. PutVarint32(&buffer, static_cast<uint32_t>(key_size));
  1685. PutVarint32(&buffer, static_cast<uint32_t>(value_size));
  1686. return buffer.size() + key_size + value_size;
  1687. }
  1688. TEST_F(DBTest2, ReadAmpBitmap) {
  1689. Options options = CurrentOptions();
  1690. BlockBasedTableOptions bbto;
  1691. uint32_t bytes_per_bit[2] = {1, 16};
  1692. for (size_t k = 0; k < 2; k++) {
  1693. // Disable delta encoding to make it easier to calculate read amplification
  1694. bbto.use_delta_encoding = false;
  1695. // Huge block cache to make it easier to calculate read amplification
  1696. bbto.block_cache = NewLRUCache(1024 * 1024 * 1024);
  1697. bbto.read_amp_bytes_per_bit = bytes_per_bit[k];
  1698. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  1699. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1700. DestroyAndReopen(options);
  1701. const size_t kNumEntries = 10000;
  1702. Random rnd(301);
  1703. for (size_t i = 0; i < kNumEntries; i++) {
  1704. ASSERT_OK(Put(Key(static_cast<int>(i)), rnd.RandomString(100)));
  1705. }
  1706. ASSERT_OK(Flush());
  1707. Close();
  1708. Reopen(options);
  1709. // Read keys/values randomly and verify that reported read amp error
  1710. // is less than 2%
  1711. uint64_t total_useful_bytes = 0;
  1712. std::set<int> read_keys;
  1713. std::string value;
  1714. for (size_t i = 0; i < kNumEntries * 5; i++) {
  1715. int key_idx = rnd.Next() % kNumEntries;
  1716. std::string key = Key(key_idx);
  1717. ASSERT_OK(db_->Get(ReadOptions(), key, &value));
  1718. if (read_keys.find(key_idx) == read_keys.end()) {
  1719. auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
  1720. total_useful_bytes +=
  1721. GetEncodedEntrySize(internal_key.size(), value.size());
  1722. read_keys.insert(key_idx);
  1723. }
  1724. double expected_read_amp =
  1725. static_cast<double>(total_useful_bytes) /
  1726. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
  1727. double read_amp =
  1728. static_cast<double>(options.statistics->getTickerCount(
  1729. READ_AMP_ESTIMATE_USEFUL_BYTES)) /
  1730. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
  1731. double error_pct = fabs(expected_read_amp - read_amp) * 100;
  1732. // Error between reported read amp and real read amp should be less than
  1733. // 2%
  1734. EXPECT_LE(error_pct, 2);
  1735. }
  1736. // Make sure we read every thing in the DB (which is smaller than our cache)
  1737. Iterator* iter = db_->NewIterator(ReadOptions());
  1738. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1739. ASSERT_EQ(iter->value().ToString(), Get(iter->key().ToString()));
  1740. }
  1741. ASSERT_OK(iter->status());
  1742. delete iter;
  1743. // Read amp is on average 100% since we read all what we loaded in memory
  1744. if (k == 0) {
  1745. ASSERT_EQ(
  1746. options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES),
  1747. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES));
  1748. } else {
  1749. ASSERT_NEAR(
  1750. options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES) *
  1751. 1.0f /
  1752. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES),
  1753. 1, .01);
  1754. }
  1755. }
  1756. }
  1757. #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented
  1758. TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) {
  1759. {
  1760. const int kIdBufLen = 100;
  1761. char id_buf[kIdBufLen];
  1762. Status s = Status::NotSupported();
  1763. #ifndef OS_WIN
  1764. // You can't open a directory on windows using random access file
  1765. std::unique_ptr<RandomAccessFile> file;
  1766. s = env_->NewRandomAccessFile(dbname_, &file, EnvOptions());
  1767. if (s.ok()) {
  1768. if (file->GetUniqueId(id_buf, kIdBufLen) == 0) {
  1769. // fs holding db directory doesn't support getting a unique file id,
  1770. // this means that running this test will fail because lru_cache will
  1771. // load the blocks again regardless of them being already in the cache
  1772. return;
  1773. }
  1774. }
  1775. #endif
  1776. if (!s.ok()) {
  1777. std::unique_ptr<Directory> dir;
  1778. ASSERT_OK(env_->NewDirectory(dbname_, &dir));
  1779. if (dir->GetUniqueId(id_buf, kIdBufLen) == 0) {
  1780. // fs holding db directory doesn't support getting a unique file id,
  1781. // this means that running this test will fail because lru_cache will
  1782. // load the blocks again regardless of them being already in the cache
  1783. return;
  1784. }
  1785. }
  1786. }
  1787. uint32_t bytes_per_bit[2] = {1, 16};
  1788. for (size_t k = 0; k < 2; k++) {
  1789. std::shared_ptr<Cache> lru_cache = NewLRUCache(1024 * 1024 * 1024);
  1790. std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1791. Options options = CurrentOptions();
  1792. BlockBasedTableOptions bbto;
  1793. // Disable delta encoding to make it easier to calculate read amplification
  1794. bbto.use_delta_encoding = false;
  1795. // Huge block cache to make it easier to calculate read amplification
  1796. bbto.block_cache = lru_cache;
  1797. bbto.read_amp_bytes_per_bit = bytes_per_bit[k];
  1798. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  1799. options.statistics = stats;
  1800. DestroyAndReopen(options);
  1801. const int kNumEntries = 10000;
  1802. Random rnd(301);
  1803. for (int i = 0; i < kNumEntries; i++) {
  1804. ASSERT_OK(Put(Key(i), rnd.RandomString(100)));
  1805. }
  1806. ASSERT_OK(Flush());
  1807. Close();
  1808. Reopen(options);
  1809. std::set<int> read_keys;
  1810. std::string value;
  1811. // Iter1: Read half the DB, Read even keys
  1812. // Key(0), Key(2), Key(4), Key(6), Key(8), ...
  1813. for (int i = 0; i < kNumEntries; i += 2) {
  1814. std::string key = Key(i);
  1815. ASSERT_OK(db_->Get(ReadOptions(), key, &value));
  1816. if (read_keys.find(i) == read_keys.end()) {
  1817. auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
  1818. read_keys.insert(i);
  1819. }
  1820. }
  1821. size_t total_useful_bytes_iter1 =
  1822. options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
  1823. size_t total_loaded_bytes_iter1 =
  1824. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
  1825. Close();
  1826. std::shared_ptr<Statistics> new_statistics =
  1827. ROCKSDB_NAMESPACE::CreateDBStatistics();
  1828. // Destroy old statistics obj that the blocks in lru_cache are pointing to
  1829. options.statistics.reset();
  1830. // Use the statistics object that we just created
  1831. options.statistics = new_statistics;
  1832. Reopen(options);
  1833. // Iter2: Read half the DB, Read odd keys
  1834. // Key(1), Key(3), Key(5), Key(7), Key(9), ...
  1835. for (int i = 1; i < kNumEntries; i += 2) {
  1836. std::string key = Key(i);
  1837. ASSERT_OK(db_->Get(ReadOptions(), key, &value));
  1838. if (read_keys.find(i) == read_keys.end()) {
  1839. auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
  1840. read_keys.insert(i);
  1841. }
  1842. }
  1843. size_t total_useful_bytes_iter2 =
  1844. options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
  1845. size_t total_loaded_bytes_iter2 =
  1846. options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
  1847. // Read amp is on average 100% since we read all what we loaded in memory
  1848. if (k == 0) {
  1849. ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2,
  1850. total_loaded_bytes_iter1 + total_loaded_bytes_iter2);
  1851. } else {
  1852. ASSERT_NEAR((total_useful_bytes_iter1 + total_useful_bytes_iter2) * 1.0f /
  1853. (total_loaded_bytes_iter1 + total_loaded_bytes_iter2),
  1854. 1, .01);
  1855. }
  1856. }
  1857. }
  1858. #endif // !OS_SOLARIS
  1859. TEST_F(DBTest2, AutomaticCompactionOverlapManualCompaction) {
  1860. Options options = CurrentOptions();
  1861. options.num_levels = 3;
  1862. options.IncreaseParallelism(20);
  1863. DestroyAndReopen(options);
  1864. ASSERT_OK(Put(Key(0), "a"));
  1865. ASSERT_OK(Put(Key(5), "a"));
  1866. ASSERT_OK(Flush());
  1867. ASSERT_OK(Put(Key(10), "a"));
  1868. ASSERT_OK(Put(Key(15), "a"));
  1869. ASSERT_OK(Flush());
  1870. CompactRangeOptions cro;
  1871. cro.change_level = true;
  1872. cro.target_level = 2;
  1873. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  1874. auto get_stat = [](std::string level_str, LevelStatType type,
  1875. std::map<std::string, std::string> props) {
  1876. auto prop_str =
  1877. "compaction." + level_str + "." +
  1878. InternalStats::compaction_level_stats.at(type).property_name.c_str();
  1879. auto prop_item = props.find(prop_str);
  1880. return prop_item == props.end() ? 0 : std::stod(prop_item->second);
  1881. };
  1882. // Trivial move 2 files to L2
  1883. ASSERT_EQ("0,0,2", FilesPerLevel());
  1884. // Also test that the stats GetMapProperty API reporting the same result
  1885. {
  1886. std::map<std::string, std::string> prop;
  1887. ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop));
  1888. ASSERT_EQ(0, get_stat("L0", LevelStatType::NUM_FILES, prop));
  1889. ASSERT_EQ(0, get_stat("L1", LevelStatType::NUM_FILES, prop));
  1890. ASSERT_EQ(2, get_stat("L2", LevelStatType::NUM_FILES, prop));
  1891. ASSERT_EQ(2, get_stat("Sum", LevelStatType::NUM_FILES, prop));
  1892. }
  1893. // While the compaction is running, we will create 2 new files that
  1894. // can fit in L2, these 2 files will be moved to L2 and overlap with
  1895. // the running compaction and break the LSM consistency.
  1896. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1897. "CompactionJob::Run():Start", [&](void* /*arg*/) {
  1898. ASSERT_OK(
  1899. dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
  1900. {"max_bytes_for_level_base", "1"}}));
  1901. ASSERT_OK(Put(Key(6), "a"));
  1902. ASSERT_OK(Put(Key(7), "a"));
  1903. ASSERT_OK(Flush());
  1904. ASSERT_OK(Put(Key(8), "a"));
  1905. ASSERT_OK(Put(Key(9), "a"));
  1906. ASSERT_OK(Flush());
  1907. });
  1908. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1909. // Run a manual compaction that will compact the 2 files in L2
  1910. // into 1 file in L2
  1911. cro.exclusive_manual_compaction = false;
  1912. cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  1913. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  1914. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1915. // Test that the stats GetMapProperty API reporting 1 file in L2
  1916. {
  1917. std::map<std::string, std::string> prop;
  1918. ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop));
  1919. ASSERT_EQ(1, get_stat("L2", LevelStatType::NUM_FILES, prop));
  1920. }
  1921. }
  1922. TEST_F(DBTest2, ManualCompactionOverlapManualCompaction) {
  1923. Options options = CurrentOptions();
  1924. options.num_levels = 2;
  1925. options.IncreaseParallelism(20);
  1926. options.disable_auto_compactions = true;
  1927. DestroyAndReopen(options);
  1928. ASSERT_OK(Put(Key(0), "a"));
  1929. ASSERT_OK(Put(Key(5), "a"));
  1930. ASSERT_OK(Flush());
  1931. ASSERT_OK(Put(Key(10), "a"));
  1932. ASSERT_OK(Put(Key(15), "a"));
  1933. ASSERT_OK(Flush());
  1934. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  1935. // Trivial move 2 files to L1
  1936. ASSERT_EQ("0,2", FilesPerLevel());
  1937. std::function<void()> bg_manual_compact = [&]() {
  1938. std::string k1 = Key(6);
  1939. std::string k2 = Key(9);
  1940. Slice k1s(k1);
  1941. Slice k2s(k2);
  1942. CompactRangeOptions cro;
  1943. cro.exclusive_manual_compaction = false;
  1944. ASSERT_OK(db_->CompactRange(cro, &k1s, &k2s));
  1945. };
  1946. ROCKSDB_NAMESPACE::port::Thread bg_thread;
  1947. // While the compaction is running, we will create 2 new files that
  1948. // can fit in L1, these 2 files will be moved to L1 and overlap with
  1949. // the running compaction and break the LSM consistency.
  1950. std::atomic<bool> flag(false);
  1951. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1952. "CompactionJob::Run():Start", [&](void* /*arg*/) {
  1953. if (flag.exchange(true)) {
  1954. // We want to make sure to call this callback only once
  1955. return;
  1956. }
  1957. ASSERT_OK(Put(Key(6), "a"));
  1958. ASSERT_OK(Put(Key(7), "a"));
  1959. ASSERT_OK(Flush());
  1960. ASSERT_OK(Put(Key(8), "a"));
  1961. ASSERT_OK(Put(Key(9), "a"));
  1962. ASSERT_OK(Flush());
  1963. // Start a non-exclusive manual compaction in a bg thread
  1964. bg_thread = port::Thread(bg_manual_compact);
  1965. // This manual compaction conflict with the other manual compaction
  1966. // so it should wait until the first compaction finish
  1967. env_->SleepForMicroseconds(1000000);
  1968. });
  1969. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1970. // Run a manual compaction that will compact the 2 files in L1
  1971. // into 1 file in L1
  1972. CompactRangeOptions cro;
  1973. cro.exclusive_manual_compaction = false;
  1974. cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  1975. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  1976. bg_thread.join();
  1977. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1978. }
  1979. TEST_F(DBTest2, PausingManualCompaction1) {
  1980. Options options = CurrentOptions();
  1981. options.disable_auto_compactions = true;
  1982. options.num_levels = 7;
  1983. DestroyAndReopen(options);
  1984. Random rnd(301);
  1985. // Generate a file containing 10 keys.
  1986. for (int i = 0; i < 10; i++) {
  1987. ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
  1988. }
  1989. ASSERT_OK(Flush());
  1990. // Generate another file containing same keys
  1991. for (int i = 0; i < 10; i++) {
  1992. ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
  1993. }
  1994. ASSERT_OK(Flush());
  1995. int manual_compactions_paused = 0;
  1996. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1997. "CompactionJob::Run():PausingManualCompaction:1", [&](void* arg) {
  1998. auto canceled = static_cast<std::atomic<bool>*>(arg);
  1999. // CompactRange triggers manual compaction and cancel the compaction
  2000. // by set *canceled as true
  2001. if (canceled != nullptr) {
  2002. canceled->store(true, std::memory_order_release);
  2003. }
  2004. manual_compactions_paused += 1;
  2005. });
  2006. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2007. "TestCompactFiles:PausingManualCompaction:3", [&](void* arg) {
  2008. auto paused = static_cast<std::atomic<int>*>(arg);
  2009. // CompactFiles() relies on manual_compactions_paused to
  2010. // determine if this compaction should be paused or not
  2011. ASSERT_EQ(0, paused->load(std::memory_order_acquire));
  2012. paused->fetch_add(1, std::memory_order_release);
  2013. });
  2014. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2015. std::vector<std::string> files_before_compact, files_after_compact;
  2016. // Remember file name before compaction is triggered
  2017. std::vector<LiveFileMetaData> files_meta;
  2018. dbfull()->GetLiveFilesMetaData(&files_meta);
  2019. for (const auto& file : files_meta) {
  2020. files_before_compact.push_back(file.name);
  2021. }
  2022. // OK, now trigger a manual compaction
  2023. ASSERT_TRUE(dbfull()
  2024. ->CompactRange(CompactRangeOptions(), nullptr, nullptr)
  2025. .IsManualCompactionPaused());
  2026. // Wait for compactions to get scheduled and stopped
  2027. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2028. // Get file names after compaction is stopped
  2029. files_meta.clear();
  2030. dbfull()->GetLiveFilesMetaData(&files_meta);
  2031. for (const auto& file : files_meta) {
  2032. files_after_compact.push_back(file.name);
  2033. }
  2034. // Like nothing happened
  2035. ASSERT_EQ(files_before_compact, files_after_compact);
  2036. ASSERT_EQ(manual_compactions_paused, 1);
  2037. manual_compactions_paused = 0;
  2038. // Now make sure CompactFiles also not run
  2039. ASSERT_TRUE(dbfull()
  2040. ->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(),
  2041. files_before_compact, 0)
  2042. .IsManualCompactionPaused());
  2043. // Wait for manual compaction to get scheduled and finish
  2044. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2045. files_meta.clear();
  2046. files_after_compact.clear();
  2047. dbfull()->GetLiveFilesMetaData(&files_meta);
  2048. for (const auto& file : files_meta) {
  2049. files_after_compact.push_back(file.name);
  2050. }
  2051. ASSERT_EQ(files_before_compact, files_after_compact);
  2052. // CompactFiles returns at entry point
  2053. ASSERT_EQ(manual_compactions_paused, 0);
  2054. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2055. }
  2056. // PausingManualCompaction does not affect auto compaction
  2057. TEST_F(DBTest2, PausingManualCompaction2) {
  2058. Options options = CurrentOptions();
  2059. options.level0_file_num_compaction_trigger = 2;
  2060. options.disable_auto_compactions = false;
  2061. DestroyAndReopen(options);
  2062. dbfull()->DisableManualCompaction();
  2063. Random rnd(301);
  2064. for (int i = 0; i < 2; i++) {
  2065. // Generate a file containing 100 keys.
  2066. for (int j = 0; j < 100; j++) {
  2067. ASSERT_OK(Put(Key(j), rnd.RandomString(50)));
  2068. }
  2069. ASSERT_OK(Flush());
  2070. }
  2071. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2072. std::vector<LiveFileMetaData> files_meta;
  2073. dbfull()->GetLiveFilesMetaData(&files_meta);
  2074. ASSERT_EQ(files_meta.size(), 1);
  2075. }
  2076. TEST_F(DBTest2, PausingManualCompaction3) {
  2077. CompactRangeOptions compact_options;
  2078. Options options = CurrentOptions();
  2079. options.disable_auto_compactions = true;
  2080. options.num_levels = 7;
  2081. Random rnd(301);
  2082. auto generate_files = [&]() {
  2083. for (int i = 0; i < options.num_levels; i++) {
  2084. for (int j = 0; j < options.num_levels - i + 1; j++) {
  2085. for (int k = 0; k < 1000; k++) {
  2086. ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
  2087. }
  2088. ASSERT_OK(Flush());
  2089. }
  2090. for (int l = 1; l < options.num_levels - i; l++) {
  2091. MoveFilesToLevel(l);
  2092. }
  2093. }
  2094. };
  2095. DestroyAndReopen(options);
  2096. generate_files();
  2097. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2098. int run_manual_compactions = 0;
  2099. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2100. "CompactionJob::Run():PausingManualCompaction:1",
  2101. [&](void* /*arg*/) { run_manual_compactions++; });
  2102. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2103. dbfull()->DisableManualCompaction();
  2104. ASSERT_TRUE(dbfull()
  2105. ->CompactRange(compact_options, nullptr, nullptr)
  2106. .IsManualCompactionPaused());
  2107. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2108. // As manual compaction disabled, not even reach sync point
  2109. ASSERT_EQ(run_manual_compactions, 0);
  2110. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2111. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2112. "CompactionJob::Run():PausingManualCompaction:1");
  2113. dbfull()->EnableManualCompaction();
  2114. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2115. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2116. ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
  2117. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2118. }
  2119. TEST_F(DBTest2, PausingManualCompaction4) {
  2120. CompactRangeOptions compact_options;
  2121. Options options = CurrentOptions();
  2122. options.disable_auto_compactions = true;
  2123. options.num_levels = 7;
  2124. Random rnd(301);
  2125. auto generate_files = [&]() {
  2126. for (int i = 0; i < options.num_levels; i++) {
  2127. for (int j = 0; j < options.num_levels - i + 1; j++) {
  2128. for (int k = 0; k < 1000; k++) {
  2129. ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
  2130. }
  2131. ASSERT_OK(Flush());
  2132. }
  2133. for (int l = 1; l < options.num_levels - i; l++) {
  2134. MoveFilesToLevel(l);
  2135. }
  2136. }
  2137. };
  2138. DestroyAndReopen(options);
  2139. generate_files();
  2140. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2141. int run_manual_compactions = 0;
  2142. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2143. "CompactionJob::Run():PausingManualCompaction:2", [&](void* arg) {
  2144. auto canceled = static_cast<std::atomic<bool>*>(arg);
  2145. // CompactRange triggers manual compaction and cancel the compaction
  2146. // by set *canceled as true
  2147. if (canceled != nullptr) {
  2148. canceled->store(true, std::memory_order_release);
  2149. }
  2150. run_manual_compactions++;
  2151. });
  2152. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2153. "TestCompactFiles:PausingManualCompaction:3", [&](void* arg) {
  2154. auto paused = static_cast<std::atomic<int>*>(arg);
  2155. // CompactFiles() relies on manual_compactions_paused to
  2156. // determine if thie compaction should be paused or not
  2157. ASSERT_EQ(0, paused->load(std::memory_order_acquire));
  2158. paused->fetch_add(1, std::memory_order_release);
  2159. });
  2160. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2161. ASSERT_TRUE(dbfull()
  2162. ->CompactRange(compact_options, nullptr, nullptr)
  2163. .IsManualCompactionPaused());
  2164. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2165. ASSERT_EQ(run_manual_compactions, 1);
  2166. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2167. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2168. "CompactionJob::Run():PausingManualCompaction:2");
  2169. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2170. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2171. ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
  2172. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2173. }
  2174. TEST_F(DBTest2, CancelManualCompaction1) {
  2175. CompactRangeOptions compact_options;
  2176. auto canceledPtr =
  2177. std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
  2178. compact_options.canceled = canceledPtr.get();
  2179. Options options = CurrentOptions();
  2180. options.disable_auto_compactions = true;
  2181. options.num_levels = 7;
  2182. Random rnd(301);
  2183. auto generate_files = [&]() {
  2184. for (int i = 0; i < options.num_levels; i++) {
  2185. for (int j = 0; j < options.num_levels - i + 1; j++) {
  2186. for (int k = 0; k < 1000; k++) {
  2187. ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
  2188. }
  2189. ASSERT_OK(Flush());
  2190. }
  2191. for (int l = 1; l < options.num_levels - i; l++) {
  2192. MoveFilesToLevel(l);
  2193. }
  2194. }
  2195. };
  2196. DestroyAndReopen(options);
  2197. generate_files();
  2198. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2199. int run_manual_compactions = 0;
  2200. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2201. "CompactionJob::Run():PausingManualCompaction:1",
  2202. [&](void* /*arg*/) { run_manual_compactions++; });
  2203. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2204. // Setup a callback to disable compactions after a couple of levels are
  2205. // compacted
  2206. int compactions_run = 0;
  2207. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2208. "DBImpl::RunManualCompaction()::1",
  2209. [&](void* /*arg*/) { ++compactions_run; });
  2210. ASSERT_TRUE(dbfull()
  2211. ->CompactRange(compact_options, nullptr, nullptr)
  2212. .IsManualCompactionPaused());
  2213. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2214. // Since compactions are disabled, we shouldn't start compacting.
  2215. // E.g. we should call the compaction function exactly one time.
  2216. ASSERT_EQ(compactions_run, 0);
  2217. ASSERT_EQ(run_manual_compactions, 0);
  2218. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2219. compactions_run = 0;
  2220. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2221. "DBImpl::RunManualCompaction()::1");
  2222. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2223. "DBImpl::RunManualCompaction()::1", [&](void* /*arg*/) {
  2224. ++compactions_run;
  2225. // After 3 compactions disable
  2226. if (compactions_run == 3) {
  2227. compact_options.canceled->store(true, std::memory_order_release);
  2228. }
  2229. });
  2230. compact_options.canceled->store(false, std::memory_order_release);
  2231. ASSERT_TRUE(dbfull()
  2232. ->CompactRange(compact_options, nullptr, nullptr)
  2233. .IsManualCompactionPaused());
  2234. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2235. ASSERT_EQ(compactions_run, 3);
  2236. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2237. "DBImpl::RunManualCompaction()::1");
  2238. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2239. "CompactionJob::Run():PausingManualCompaction:1");
  2240. // Compactions should work again if we re-enable them..
  2241. compact_options.canceled->store(false, std::memory_order_relaxed);
  2242. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2243. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2244. ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
  2245. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2246. }
  2247. TEST_F(DBTest2, CancelManualCompaction2) {
  2248. CompactRangeOptions compact_options;
  2249. auto canceledPtr =
  2250. std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
  2251. compact_options.canceled = canceledPtr.get();
  2252. compact_options.max_subcompactions = 1;
  2253. Options options = CurrentOptions();
  2254. options.disable_auto_compactions = true;
  2255. options.num_levels = 7;
  2256. Random rnd(301);
  2257. auto generate_files = [&]() {
  2258. for (int i = 0; i < options.num_levels; i++) {
  2259. for (int j = 0; j < options.num_levels - i + 1; j++) {
  2260. for (int k = 0; k < 1000; k++) {
  2261. ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
  2262. }
  2263. ASSERT_OK(Flush());
  2264. }
  2265. for (int l = 1; l < options.num_levels - i; l++) {
  2266. MoveFilesToLevel(l);
  2267. }
  2268. }
  2269. };
  2270. DestroyAndReopen(options);
  2271. generate_files();
  2272. ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
  2273. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2274. int compactions_run = 0;
  2275. std::atomic<int> kv_compactions{0};
  2276. int compactions_stopped_at = 0;
  2277. int kv_compactions_stopped_at = 0;
  2278. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2279. "DBImpl::RunManualCompaction()::1", [&](void* /*arg*/) {
  2280. ++compactions_run;
  2281. // After 3 compactions disable
  2282. });
  2283. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2284. "CompactionIterator:ProcessKV", [&](void* /*arg*/) {
  2285. int kv_compactions_run =
  2286. kv_compactions.fetch_add(1, std::memory_order_release);
  2287. if (kv_compactions_run == 5) {
  2288. compact_options.canceled->store(true, std::memory_order_release);
  2289. kv_compactions_stopped_at = kv_compactions_run;
  2290. compactions_stopped_at = compactions_run;
  2291. }
  2292. });
  2293. compact_options.canceled->store(false, std::memory_order_release);
  2294. ASSERT_TRUE(dbfull()
  2295. ->CompactRange(compact_options, nullptr, nullptr)
  2296. .IsManualCompactionPaused());
  2297. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2298. // NOTE: as we set compact_options.max_subcompacitons = 1, and store true to
  2299. // the canceled variable from the single compacting thread (via callback),
  2300. // this value is deterministically kv_compactions_stopped_at + 1.
  2301. ASSERT_EQ(kv_compactions, kv_compactions_stopped_at + 1);
  2302. ASSERT_EQ(compactions_run, compactions_stopped_at);
  2303. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2304. "CompactionIterator::ProcessKV");
  2305. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2306. "DBImpl::RunManualCompaction()::1");
  2307. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2308. "CompactionJob::Run():PausingManualCompaction:1");
  2309. // Compactions should work again if we re-enable them..
  2310. compact_options.canceled->store(false, std::memory_order_relaxed);
  2311. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2312. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2313. ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
  2314. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2315. }
  2316. class CancelCompactionListener : public EventListener {
  2317. public:
  2318. CancelCompactionListener()
  2319. : num_compaction_started_(0), num_compaction_ended_(0) {}
  2320. void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
  2321. ASSERT_EQ(ci.cf_name, "default");
  2322. ASSERT_EQ(ci.base_input_level, 0);
  2323. num_compaction_started_++;
  2324. }
  2325. void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
  2326. ASSERT_EQ(ci.cf_name, "default");
  2327. ASSERT_EQ(ci.base_input_level, 0);
  2328. ASSERT_EQ(ci.status.code(), code_);
  2329. ASSERT_EQ(ci.status.subcode(), subcode_);
  2330. num_compaction_ended_++;
  2331. }
  2332. std::atomic<size_t> num_compaction_started_;
  2333. std::atomic<size_t> num_compaction_ended_;
  2334. Status::Code code_;
  2335. Status::SubCode subcode_;
  2336. };
  2337. TEST_F(DBTest2, CancelManualCompactionWithListener) {
  2338. CompactRangeOptions compact_options;
  2339. auto canceledPtr =
  2340. std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
  2341. compact_options.canceled = canceledPtr.get();
  2342. compact_options.max_subcompactions = 1;
  2343. Options options = CurrentOptions();
  2344. options.disable_auto_compactions = true;
  2345. CancelCompactionListener* listener = new CancelCompactionListener();
  2346. options.listeners.emplace_back(listener);
  2347. DestroyAndReopen(options);
  2348. Random rnd(301);
  2349. for (int i = 0; i < 10; i++) {
  2350. for (int j = 0; j < 10; j++) {
  2351. ASSERT_OK(Put(Key(i + j * 10), rnd.RandomString(50)));
  2352. }
  2353. ASSERT_OK(Flush());
  2354. }
  2355. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2356. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2357. "CompactionIterator:ProcessKV", [&](void* /*arg*/) {
  2358. compact_options.canceled->store(true, std::memory_order_release);
  2359. });
  2360. int running_compaction = 0;
  2361. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2362. "CompactionJob::FinishCompactionOutputFile1",
  2363. [&](void* /*arg*/) { running_compaction++; });
  2364. // Case I: 1 Notify begin compaction, 2 Set *canceled as true to disable
  2365. // manual compaction in the callback function, 3 Compaction not run,
  2366. // 4 Notify compaction end.
  2367. listener->code_ = Status::kIncomplete;
  2368. listener->subcode_ = Status::SubCode::kManualCompactionPaused;
  2369. compact_options.canceled->store(false, std::memory_order_release);
  2370. ASSERT_TRUE(dbfull()
  2371. ->CompactRange(compact_options, nullptr, nullptr)
  2372. .IsManualCompactionPaused());
  2373. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2374. ASSERT_GT(listener->num_compaction_started_, 0);
  2375. ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
  2376. ASSERT_EQ(running_compaction, 0);
  2377. listener->num_compaction_started_ = 0;
  2378. listener->num_compaction_ended_ = 0;
  2379. // Case II: 1 Set *canceled as true in the callback function to disable manual
  2380. // compaction, 2 Notify begin compaction (return without notifying), 3 Notify
  2381. // compaction end (return without notifying).
  2382. ASSERT_TRUE(dbfull()
  2383. ->CompactRange(compact_options, nullptr, nullptr)
  2384. .IsManualCompactionPaused());
  2385. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2386. ASSERT_EQ(listener->num_compaction_started_, 0);
  2387. ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
  2388. ASSERT_EQ(running_compaction, 0);
  2389. // Case III: 1 Notify begin compaction, 2 Compaction in between
  2390. // 3. Set *canceled as true in the callback function to disable manual
  2391. // compaction, 4 Notify compaction end.
  2392. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
  2393. "CompactionIterator:ProcessKV");
  2394. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2395. "CompactionJob::Run:BeforeVerify", [&](void* /*arg*/) {
  2396. compact_options.canceled->store(true, std::memory_order_release);
  2397. });
  2398. listener->code_ = Status::kOk;
  2399. listener->subcode_ = Status::SubCode::kNone;
  2400. compact_options.canceled->store(false, std::memory_order_release);
  2401. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2402. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2403. ASSERT_GT(listener->num_compaction_started_, 0);
  2404. ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
  2405. // Compaction job will succeed.
  2406. ASSERT_GT(running_compaction, 0);
  2407. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  2408. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2409. }
  2410. TEST_F(DBTest2, CompactionOnBottomPriorityWithListener) {
  2411. int num_levels = 3;
  2412. const int kNumFilesTrigger = 4;
  2413. Options options = CurrentOptions();
  2414. env_->SetBackgroundThreads(0, Env::Priority::HIGH);
  2415. env_->SetBackgroundThreads(0, Env::Priority::LOW);
  2416. env_->SetBackgroundThreads(1, Env::Priority::BOTTOM);
  2417. options.env = env_;
  2418. options.compaction_style = kCompactionStyleUniversal;
  2419. options.num_levels = num_levels;
  2420. options.write_buffer_size = 100 << 10; // 100KB
  2421. options.target_file_size_base = 32 << 10; // 32KB
  2422. options.level0_file_num_compaction_trigger = kNumFilesTrigger;
  2423. // Trigger compaction if size amplification exceeds 110%
  2424. options.compaction_options_universal.max_size_amplification_percent = 110;
  2425. CancelCompactionListener* listener = new CancelCompactionListener();
  2426. options.listeners.emplace_back(listener);
  2427. DestroyAndReopen(options);
  2428. int num_bottom_thread_compaction_scheduled = 0;
  2429. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2430. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2431. "DBImpl::BackgroundCompaction:ForwardToBottomPriPool",
  2432. [&](void* /*arg*/) { num_bottom_thread_compaction_scheduled++; });
  2433. int num_compaction_jobs = 0;
  2434. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2435. "CompactionJob::Run():End",
  2436. [&](void* /*arg*/) { num_compaction_jobs++; });
  2437. listener->code_ = Status::kOk;
  2438. listener->subcode_ = Status::SubCode::kNone;
  2439. Random rnd(301);
  2440. for (int i = 0; i < 1; ++i) {
  2441. for (int num = 0; num < kNumFilesTrigger; num++) {
  2442. int key_idx = 0;
  2443. GenerateNewFile(&rnd, &key_idx, true /* no_wait */);
  2444. // use no_wait above because that one waits for flush and compaction. We
  2445. // don't want to wait for compaction because the full compaction is
  2446. // intentionally blocked while more files are flushed.
  2447. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  2448. }
  2449. }
  2450. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2451. ASSERT_GT(num_bottom_thread_compaction_scheduled, 0);
  2452. ASSERT_EQ(num_compaction_jobs, 1);
  2453. ASSERT_GT(listener->num_compaction_started_, 0);
  2454. ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
  2455. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  2456. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2457. }
  2458. TEST_F(DBTest2, OptimizeForPointLookup) {
  2459. Options options = CurrentOptions();
  2460. Close();
  2461. options.OptimizeForPointLookup(2);
  2462. ASSERT_OK(DB::Open(options, dbname_, &db_));
  2463. ASSERT_OK(Put("foo", "v1"));
  2464. ASSERT_EQ("v1", Get("foo"));
  2465. ASSERT_OK(Flush());
  2466. ASSERT_EQ("v1", Get("foo"));
  2467. }
  2468. TEST_F(DBTest2, OptimizeForSmallDB) {
  2469. Options options = CurrentOptions();
  2470. Close();
  2471. options.OptimizeForSmallDb();
  2472. // Find the cache object
  2473. ASSERT_TRUE(options.table_factory->IsInstanceOf(
  2474. TableFactory::kBlockBasedTableName()));
  2475. auto table_options =
  2476. options.table_factory->GetOptions<BlockBasedTableOptions>();
  2477. ASSERT_TRUE(table_options != nullptr);
  2478. std::shared_ptr<Cache> cache = table_options->block_cache;
  2479. ASSERT_EQ(0, cache->GetUsage());
  2480. ASSERT_OK(DB::Open(options, dbname_, &db_));
  2481. ASSERT_OK(Put("foo", "v1"));
  2482. // memtable size is costed to the block cache
  2483. ASSERT_NE(0, cache->GetUsage());
  2484. ASSERT_EQ("v1", Get("foo"));
  2485. ASSERT_OK(Flush());
  2486. size_t prev_size = cache->GetUsage();
  2487. // Remember block cache size, so that we can find that
  2488. // it is filled after Get().
  2489. // Use pinnable slice so that it can ping the block so that
  2490. // when we check the size it is not evicted.
  2491. PinnableSlice value;
  2492. ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "foo", &value));
  2493. ASSERT_GT(cache->GetUsage(), prev_size);
  2494. value.Reset();
  2495. }
  2496. TEST_F(DBTest2, IterRaceFlush1) {
  2497. ASSERT_OK(Put("foo", "v1"));
  2498. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2499. {{"DBImpl::NewIterator:1", "DBTest2::IterRaceFlush:1"},
  2500. {"DBTest2::IterRaceFlush:2", "DBImpl::NewIterator:2"}});
  2501. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2502. ROCKSDB_NAMESPACE::port::Thread t1([&] {
  2503. TEST_SYNC_POINT("DBTest2::IterRaceFlush:1");
  2504. ASSERT_OK(Put("foo", "v2"));
  2505. ASSERT_OK(Flush());
  2506. TEST_SYNC_POINT("DBTest2::IterRaceFlush:2");
  2507. });
  2508. // iterator is created after the first Put(), and its snapshot sequence is
  2509. // assigned after second Put(), so it must see v2.
  2510. {
  2511. std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
  2512. it->Seek("foo");
  2513. ASSERT_TRUE(it->Valid());
  2514. ASSERT_OK(it->status());
  2515. ASSERT_EQ("foo", it->key().ToString());
  2516. ASSERT_EQ("v2", it->value().ToString());
  2517. }
  2518. t1.join();
  2519. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2520. }
  2521. TEST_F(DBTest2, IterRaceFlush2) {
  2522. ASSERT_OK(Put("foo", "v1"));
  2523. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2524. {{"DBImpl::NewIterator:3", "DBTest2::IterRaceFlush2:1"},
  2525. {"DBTest2::IterRaceFlush2:2", "DBImpl::NewIterator:4"}});
  2526. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2527. ROCKSDB_NAMESPACE::port::Thread t1([&] {
  2528. TEST_SYNC_POINT("DBTest2::IterRaceFlush2:1");
  2529. ASSERT_OK(Put("foo", "v2"));
  2530. ASSERT_OK(Flush());
  2531. TEST_SYNC_POINT("DBTest2::IterRaceFlush2:2");
  2532. });
  2533. // iterator is created after the first Put(), and its snapshot sequence is
  2534. // assigned before second Put(), thus it must see v1.
  2535. {
  2536. std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
  2537. it->Seek("foo");
  2538. ASSERT_TRUE(it->Valid());
  2539. ASSERT_OK(it->status());
  2540. ASSERT_EQ("foo", it->key().ToString());
  2541. ASSERT_EQ("v1", it->value().ToString());
  2542. }
  2543. t1.join();
  2544. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2545. }
  2546. TEST_F(DBTest2, IterRefreshRaceFlush) {
  2547. ASSERT_OK(Put("foo", "v1"));
  2548. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2549. {{"ArenaWrappedDBIter::Refresh:1", "DBTest2::IterRefreshRaceFlush:1"},
  2550. {"DBTest2::IterRefreshRaceFlush:2", "ArenaWrappedDBIter::Refresh:2"}});
  2551. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2552. ROCKSDB_NAMESPACE::port::Thread t1([&] {
  2553. TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:1");
  2554. ASSERT_OK(Put("foo", "v2"));
  2555. ASSERT_OK(Flush());
  2556. TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:2");
  2557. });
  2558. // iterator is refreshed after the first Put(), and its sequence number is
  2559. // assigned after second Put(), thus it must see v2.
  2560. {
  2561. std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
  2562. ASSERT_OK(it->status());
  2563. ASSERT_OK(it->Refresh());
  2564. it->Seek("foo");
  2565. ASSERT_TRUE(it->Valid());
  2566. ASSERT_OK(it->status());
  2567. ASSERT_EQ("foo", it->key().ToString());
  2568. ASSERT_EQ("v2", it->value().ToString());
  2569. }
  2570. t1.join();
  2571. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2572. }
  2573. TEST_F(DBTest2, GetRaceFlush1) {
  2574. ASSERT_OK(Put("foo", "v1"));
  2575. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2576. {{"DBImpl::GetImpl:1", "DBTest2::GetRaceFlush:1"},
  2577. {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:2"}});
  2578. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2579. ROCKSDB_NAMESPACE::port::Thread t1([&] {
  2580. TEST_SYNC_POINT("DBTest2::GetRaceFlush:1");
  2581. ASSERT_OK(Put("foo", "v2"));
  2582. ASSERT_OK(Flush());
  2583. TEST_SYNC_POINT("DBTest2::GetRaceFlush:2");
  2584. });
  2585. // Get() is issued after the first Put(), so it should see either
  2586. // "v1" or "v2".
  2587. ASSERT_NE("NOT_FOUND", Get("foo"));
  2588. t1.join();
  2589. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2590. }
  2591. TEST_F(DBTest2, GetRaceFlush2) {
  2592. ASSERT_OK(Put("foo", "v1"));
  2593. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2594. {{"DBImpl::GetImpl:3", "DBTest2::GetRaceFlush:1"},
  2595. {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:4"}});
  2596. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2597. port::Thread t1([&] {
  2598. TEST_SYNC_POINT("DBTest2::GetRaceFlush:1");
  2599. ASSERT_OK(Put("foo", "v2"));
  2600. ASSERT_OK(Flush());
  2601. TEST_SYNC_POINT("DBTest2::GetRaceFlush:2");
  2602. });
  2603. // Get() is issued after the first Put(), so it should see either
  2604. // "v1" or "v2".
  2605. ASSERT_NE("NOT_FOUND", Get("foo"));
  2606. t1.join();
  2607. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2608. }
  2609. TEST_F(DBTest2, DirectIO) {
  2610. if (!IsDirectIOSupported()) {
  2611. return;
  2612. }
  2613. Options options = CurrentOptions();
  2614. options.use_direct_reads = options.use_direct_io_for_flush_and_compaction =
  2615. true;
  2616. options.allow_mmap_reads = options.allow_mmap_writes = false;
  2617. DestroyAndReopen(options);
  2618. ASSERT_OK(Put(Key(0), "a"));
  2619. ASSERT_OK(Put(Key(5), "a"));
  2620. ASSERT_OK(Flush());
  2621. ASSERT_OK(Put(Key(10), "a"));
  2622. ASSERT_OK(Put(Key(15), "a"));
  2623. ASSERT_OK(Flush());
  2624. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  2625. Reopen(options);
  2626. }
  2627. TEST_F(DBTest2, MemtableOnlyIterator) {
  2628. Options options = CurrentOptions();
  2629. CreateAndReopenWithCF({"pikachu"}, options);
  2630. ASSERT_OK(Put(1, "foo", "first"));
  2631. ASSERT_OK(Put(1, "bar", "second"));
  2632. ReadOptions ropt;
  2633. ropt.read_tier = kMemtableTier;
  2634. std::string value;
  2635. Iterator* it = nullptr;
  2636. // Before flushing
  2637. // point lookups
  2638. ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
  2639. ASSERT_EQ("first", value);
  2640. ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
  2641. ASSERT_EQ("second", value);
  2642. // Memtable-only iterator (read_tier=kMemtableTier); data not flushed yet.
  2643. it = db_->NewIterator(ropt, handles_[1]);
  2644. int count = 0;
  2645. for (it->SeekToFirst(); it->Valid(); it->Next()) {
  2646. ASSERT_TRUE(it->Valid());
  2647. count++;
  2648. }
  2649. ASSERT_TRUE(!it->Valid());
  2650. ASSERT_OK(it->status());
  2651. ASSERT_EQ(2, count);
  2652. delete it;
  2653. ASSERT_OK(Flush(1));
  2654. // After flushing
  2655. // point lookups
  2656. ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
  2657. ASSERT_EQ("first", value);
  2658. ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
  2659. ASSERT_EQ("second", value);
  2660. // nothing should be returned using memtable-only iterator after flushing.
  2661. it = db_->NewIterator(ropt, handles_[1]);
  2662. ASSERT_OK(it->status());
  2663. count = 0;
  2664. for (it->SeekToFirst(); it->Valid(); it->Next()) {
  2665. ASSERT_TRUE(it->Valid());
  2666. count++;
  2667. }
  2668. ASSERT_TRUE(!it->Valid());
  2669. ASSERT_EQ(0, count);
  2670. ASSERT_OK(it->status());
  2671. delete it;
  2672. // Add a key to memtable
  2673. ASSERT_OK(Put(1, "foobar", "third"));
  2674. it = db_->NewIterator(ropt, handles_[1]);
  2675. ASSERT_OK(it->status());
  2676. count = 0;
  2677. for (it->SeekToFirst(); it->Valid(); it->Next()) {
  2678. ASSERT_TRUE(it->Valid());
  2679. ASSERT_EQ("foobar", it->key().ToString());
  2680. ASSERT_EQ("third", it->value().ToString());
  2681. count++;
  2682. }
  2683. ASSERT_TRUE(!it->Valid());
  2684. ASSERT_EQ(1, count);
  2685. ASSERT_OK(it->status());
  2686. delete it;
  2687. }
  2688. TEST_F(DBTest2, LowPriWrite) {
  2689. Options options = CurrentOptions();
  2690. // Compaction pressure should trigger since 6 files
  2691. options.level0_file_num_compaction_trigger = 4;
  2692. options.level0_slowdown_writes_trigger = 12;
  2693. options.level0_stop_writes_trigger = 30;
  2694. options.delayed_write_rate = 8 * 1024 * 1024;
  2695. Reopen(options);
  2696. std::atomic<int> rate_limit_count(0);
  2697. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2698. "GenericRateLimiter::Request:1", [&](void* arg) {
  2699. rate_limit_count.fetch_add(1);
  2700. int64_t* rate_bytes_per_sec = static_cast<int64_t*>(arg);
  2701. ASSERT_EQ(1024 * 1024, *rate_bytes_per_sec);
  2702. });
  2703. // Make a trivial L5 for L0 to compact into. L6 will be large so debt ratio
  2704. // will not cause compaction pressure.
  2705. Random rnd(301);
  2706. ASSERT_OK(Put("", rnd.RandomString(102400)));
  2707. ASSERT_OK(Flush());
  2708. MoveFilesToLevel(6);
  2709. ASSERT_OK(Put("", ""));
  2710. ASSERT_OK(Flush());
  2711. MoveFilesToLevel(5);
  2712. // Block compaction
  2713. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  2714. {"DBTest.LowPriWrite:0", "DBImpl::BGWorkCompaction"},
  2715. });
  2716. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2717. WriteOptions wo;
  2718. for (int i = 0; i < 6; i++) {
  2719. wo.low_pri = false;
  2720. ASSERT_OK(Put("", "", wo));
  2721. wo.low_pri = true;
  2722. ASSERT_OK(Put("", "", wo));
  2723. ASSERT_OK(Flush());
  2724. }
  2725. ASSERT_EQ(0, rate_limit_count.load());
  2726. wo.low_pri = true;
  2727. ASSERT_OK(Put("", "", wo));
  2728. ASSERT_EQ(1, rate_limit_count.load());
  2729. wo.low_pri = false;
  2730. ASSERT_OK(Put("", "", wo));
  2731. ASSERT_EQ(1, rate_limit_count.load());
  2732. wo.low_pri = true;
  2733. std::string big_value = std::string(1 * 1024 * 1024, 'x');
  2734. ASSERT_OK(Put("", big_value, wo));
  2735. ASSERT_LT(1, rate_limit_count.load());
  2736. // Reset
  2737. rate_limit_count = 0;
  2738. wo.low_pri = false;
  2739. ASSERT_OK(Put("", big_value, wo));
  2740. ASSERT_EQ(0, rate_limit_count.load());
  2741. TEST_SYNC_POINT("DBTest.LowPriWrite:0");
  2742. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2743. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2744. wo.low_pri = true;
  2745. ASSERT_OK(Put("", "", wo));
  2746. ASSERT_EQ(0, rate_limit_count.load());
  2747. wo.low_pri = false;
  2748. ASSERT_OK(Put("", "", wo));
  2749. ASSERT_EQ(0, rate_limit_count.load());
  2750. }
  2751. TEST_F(DBTest2, RateLimitedCompactionReads) {
  2752. // compaction input has 512KB data
  2753. const int kNumKeysPerFile = 128;
  2754. const int kBytesPerKey = 1024;
  2755. const int kNumL0Files = 4;
  2756. for (int compaction_readahead_size : {0, 32 << 10}) {
  2757. for (auto use_direct_io : {false, true}) {
  2758. if (use_direct_io && !IsDirectIOSupported()) {
  2759. continue;
  2760. }
  2761. Options options = CurrentOptions();
  2762. options.compaction_readahead_size = compaction_readahead_size;
  2763. options.compression = kNoCompression;
  2764. options.level0_file_num_compaction_trigger = kNumL0Files;
  2765. options.memtable_factory.reset(
  2766. test::NewSpecialSkipListFactory(kNumKeysPerFile));
  2767. // takes roughly one second, split into 100 x 10ms intervals. Each
  2768. // interval permits 5.12KB, which is smaller than the block size, so this
  2769. // test exercises the code for chunking reads.
  2770. options.rate_limiter.reset(NewGenericRateLimiter(
  2771. static_cast<int64_t>(kNumL0Files * kNumKeysPerFile *
  2772. kBytesPerKey) /* rate_bytes_per_sec */,
  2773. 10 * 1000 /* refill_period_us */, 10 /* fairness */,
  2774. RateLimiter::Mode::kReadsOnly));
  2775. options.use_direct_reads =
  2776. options.use_direct_io_for_flush_and_compaction = use_direct_io;
  2777. BlockBasedTableOptions bbto;
  2778. bbto.block_size = 16384;
  2779. bbto.no_block_cache = true;
  2780. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  2781. DestroyAndReopen(options);
  2782. // To precisely control when to start bg compaction for excluding previous
  2783. // rate-limited bytes of flush read for table verification
  2784. std::shared_ptr<test::SleepingBackgroundTask> sleeping_task(
  2785. new test::SleepingBackgroundTask());
  2786. env_->SetBackgroundThreads(1, Env::LOW);
  2787. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  2788. sleeping_task.get(), Env::Priority::LOW);
  2789. sleeping_task->WaitUntilSleeping();
  2790. for (int i = 0; i < kNumL0Files; ++i) {
  2791. for (int j = 0; j <= kNumKeysPerFile; ++j) {
  2792. ASSERT_OK(Put(Key(j), DummyString(kBytesPerKey)));
  2793. }
  2794. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  2795. if (i + 1 < kNumL0Files) {
  2796. ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
  2797. }
  2798. }
  2799. size_t rate_limited_bytes_start_bytes =
  2800. options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL);
  2801. sleeping_task->WakeUp();
  2802. sleeping_task->WaitUntilDone();
  2803. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2804. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  2805. // should be slightly above 512KB due to non-data blocks read. Arbitrarily
  2806. // chose 1MB as the upper bound on the total bytes read.
  2807. size_t rate_limited_bytes =
  2808. static_cast<size_t>(
  2809. options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL)) -
  2810. rate_limited_bytes_start_bytes;
  2811. // The charges can exist for `IO_LOW` and `IO_USER` priorities.
  2812. size_t rate_limited_bytes_by_pri =
  2813. options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
  2814. options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
  2815. ASSERT_EQ(rate_limited_bytes,
  2816. static_cast<size_t>(rate_limited_bytes_by_pri));
  2817. // Include the explicit prefetch of the footer in direct I/O case.
  2818. size_t direct_io_extra = use_direct_io ? 512 * 1024 : 0;
  2819. ASSERT_GE(
  2820. rate_limited_bytes,
  2821. static_cast<size_t>(kNumKeysPerFile * kBytesPerKey * kNumL0Files));
  2822. ASSERT_LT(
  2823. rate_limited_bytes,
  2824. static_cast<size_t>(2 * kNumKeysPerFile * kBytesPerKey * kNumL0Files +
  2825. direct_io_extra));
  2826. Iterator* iter = db_->NewIterator(ReadOptions());
  2827. ASSERT_OK(iter->status());
  2828. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  2829. ASSERT_EQ(iter->value().ToString(), DummyString(kBytesPerKey));
  2830. }
  2831. delete iter;
  2832. // bytes read for user iterator shouldn't count against the rate limit.
  2833. rate_limited_bytes_by_pri =
  2834. options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
  2835. options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
  2836. ASSERT_EQ(rate_limited_bytes,
  2837. static_cast<size_t>(rate_limited_bytes_by_pri));
  2838. }
  2839. }
  2840. }
  2841. // Make sure DB can be reopen with reduced number of levels, given no file
  2842. // is on levels higher than the new num_levels.
  2843. TEST_F(DBTest2, ReduceLevel) {
  2844. Options options;
  2845. options.env = env_;
  2846. options.disable_auto_compactions = true;
  2847. options.num_levels = 7;
  2848. Reopen(options);
  2849. ASSERT_OK(Put("foo", "bar"));
  2850. ASSERT_OK(Flush());
  2851. MoveFilesToLevel(6);
  2852. ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel());
  2853. CompactRangeOptions compact_options;
  2854. compact_options.change_level = true;
  2855. compact_options.target_level = 1;
  2856. ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
  2857. ASSERT_EQ("0,1", FilesPerLevel());
  2858. options.num_levels = 3;
  2859. Reopen(options);
  2860. ASSERT_EQ("0,1", FilesPerLevel());
  2861. }
  2862. // Test that ReadCallback is actually used in both memtbale and sst tables
  2863. TEST_F(DBTest2, ReadCallbackTest) {
  2864. Options options;
  2865. options.disable_auto_compactions = true;
  2866. options.num_levels = 7;
  2867. options.env = env_;
  2868. Reopen(options);
  2869. std::vector<const Snapshot*> snapshots;
  2870. // Try to create a db with multiple layers and a memtable
  2871. const std::string key = "foo";
  2872. const std::string value = "bar";
  2873. // This test assumes that the seq start with 1 and increased by 1 after each
  2874. // write batch of size 1. If that behavior changes, the test needs to be
  2875. // updated as well.
  2876. // TODO(myabandeh): update this test to use the seq number that is returned by
  2877. // the DB instead of assuming what seq the DB used.
  2878. int i = 1;
  2879. for (; i < 10; i++) {
  2880. ASSERT_OK(Put(key, value + std::to_string(i)));
  2881. // Take a snapshot to avoid the value being removed during compaction
  2882. auto snapshot = dbfull()->GetSnapshot();
  2883. snapshots.push_back(snapshot);
  2884. }
  2885. ASSERT_OK(Flush());
  2886. for (; i < 20; i++) {
  2887. ASSERT_OK(Put(key, value + std::to_string(i)));
  2888. // Take a snapshot to avoid the value being removed during compaction
  2889. auto snapshot = dbfull()->GetSnapshot();
  2890. snapshots.push_back(snapshot);
  2891. }
  2892. ASSERT_OK(Flush());
  2893. MoveFilesToLevel(6);
  2894. ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
  2895. for (; i < 30; i++) {
  2896. ASSERT_OK(Put(key, value + std::to_string(i)));
  2897. auto snapshot = dbfull()->GetSnapshot();
  2898. snapshots.push_back(snapshot);
  2899. }
  2900. ASSERT_OK(Flush());
  2901. ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel());
  2902. // And also add some values to the memtable
  2903. for (; i < 40; i++) {
  2904. ASSERT_OK(Put(key, value + std::to_string(i)));
  2905. auto snapshot = dbfull()->GetSnapshot();
  2906. snapshots.push_back(snapshot);
  2907. }
  2908. class TestReadCallback : public ReadCallback {
  2909. public:
  2910. explicit TestReadCallback(SequenceNumber snapshot)
  2911. : ReadCallback(snapshot), snapshot_(snapshot) {}
  2912. bool IsVisibleFullCheck(SequenceNumber seq) override {
  2913. return seq <= snapshot_;
  2914. }
  2915. private:
  2916. SequenceNumber snapshot_;
  2917. };
  2918. for (int seq = 1; seq < i; seq++) {
  2919. PinnableSlice pinnable_val;
  2920. ReadOptions roptions;
  2921. TestReadCallback callback(seq);
  2922. bool dont_care = true;
  2923. DBImpl::GetImplOptions get_impl_options;
  2924. get_impl_options.column_family = dbfull()->DefaultColumnFamily();
  2925. get_impl_options.value = &pinnable_val;
  2926. get_impl_options.value_found = &dont_care;
  2927. get_impl_options.callback = &callback;
  2928. Status s = dbfull()->GetImpl(roptions, key, get_impl_options);
  2929. ASSERT_TRUE(s.ok());
  2930. // Assuming that after each Put the DB increased seq by one, the value and
  2931. // seq number must be equal since we also inc value by 1 after each Put.
  2932. ASSERT_EQ(value + std::to_string(seq), pinnable_val.ToString());
  2933. }
  2934. for (auto snapshot : snapshots) {
  2935. dbfull()->ReleaseSnapshot(snapshot);
  2936. }
  2937. }
  2938. TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) {
  2939. // Regression test for race condition where an obsolete file is returned to
  2940. // user as a "live file" but then deleted, all while file deletions are
  2941. // disabled.
  2942. //
  2943. // It happened like this:
  2944. //
  2945. // 1. [flush thread] Log file "x.log" found by FindObsoleteFiles
  2946. // 2. [user thread] DisableFileDeletions, GetSortedWalFiles are called and the
  2947. // latter returned "x.log"
  2948. // 3. [flush thread] PurgeObsoleteFiles deleted "x.log"
  2949. // 4. [user thread] Reading "x.log" failed
  2950. //
  2951. // Unfortunately the only regression test I can come up with involves sleep.
  2952. // We cannot set SyncPoints to repro since, once the fix is applied, the
  2953. // SyncPoints would cause a deadlock as the repro's sequence of events is now
  2954. // prohibited.
  2955. //
  2956. // Instead, if we sleep for a second between Find and Purge, and ensure the
  2957. // read attempt happens after purge, then the sequence of events will almost
  2958. // certainly happen on the old code.
  2959. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  2960. {"DBImpl::BackgroundCallFlush:FilesFound",
  2961. "DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered"},
  2962. {"DBImpl::PurgeObsoleteFiles:End",
  2963. "DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured"},
  2964. });
  2965. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2966. "DBImpl::PurgeObsoleteFiles:Begin",
  2967. [&](void* /*arg*/) { env_->SleepForMicroseconds(1000000); });
  2968. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2969. ASSERT_OK(Put("key", "val"));
  2970. FlushOptions flush_opts;
  2971. flush_opts.wait = false;
  2972. ASSERT_OK(db_->Flush(flush_opts));
  2973. TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered");
  2974. ASSERT_OK(db_->DisableFileDeletions());
  2975. VectorWalPtr log_files;
  2976. ASSERT_OK(db_->GetSortedWalFiles(log_files));
  2977. TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured");
  2978. for (const auto& log_file : log_files) {
  2979. ASSERT_OK(env_->FileExists(LogFileName(dbname_, log_file->LogNumber())));
  2980. }
  2981. ASSERT_OK(db_->EnableFileDeletions());
  2982. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2983. }
  2984. TEST_F(DBTest2, TestNumPread) {
  2985. Options options = CurrentOptions();
  2986. bool prefetch_supported =
  2987. test::IsPrefetchSupported(env_->GetFileSystem(), dbname_);
  2988. // disable block cache
  2989. BlockBasedTableOptions table_options;
  2990. table_options.no_block_cache = true;
  2991. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2992. Reopen(options);
  2993. env_->count_random_reads_ = true;
  2994. env_->random_file_open_counter_.store(0);
  2995. ASSERT_OK(Put("bar", "foo"));
  2996. ASSERT_OK(Put("foo", "bar"));
  2997. ASSERT_OK(Flush());
  2998. if (prefetch_supported) {
  2999. // After flush, we'll open the file and read footer, meta block,
  3000. // property block and index block.
  3001. ASSERT_EQ(4, env_->random_read_counter_.Read());
  3002. } else {
  3003. // With prefetch not supported, we will do a single read into a buffer
  3004. ASSERT_EQ(1, env_->random_read_counter_.Read());
  3005. }
  3006. ASSERT_EQ(1, env_->random_file_open_counter_.load());
  3007. // One pread per a normal data block read
  3008. env_->random_file_open_counter_.store(0);
  3009. env_->random_read_counter_.Reset();
  3010. ASSERT_EQ("bar", Get("foo"));
  3011. ASSERT_EQ(1, env_->random_read_counter_.Read());
  3012. // All files are already opened.
  3013. ASSERT_EQ(0, env_->random_file_open_counter_.load());
  3014. env_->random_file_open_counter_.store(0);
  3015. env_->random_read_counter_.Reset();
  3016. ASSERT_OK(Put("bar2", "foo2"));
  3017. ASSERT_OK(Put("foo2", "bar2"));
  3018. ASSERT_OK(Flush());
  3019. if (prefetch_supported) {
  3020. // After flush, we'll open the file and read footer, meta block,
  3021. // property block and index block.
  3022. ASSERT_EQ(4, env_->random_read_counter_.Read());
  3023. } else {
  3024. // With prefetch not supported, we will do a single read into a buffer
  3025. ASSERT_EQ(1, env_->random_read_counter_.Read());
  3026. }
  3027. ASSERT_EQ(1, env_->random_file_open_counter_.load());
  3028. env_->random_file_open_counter_.store(0);
  3029. env_->random_read_counter_.Reset();
  3030. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  3031. if (prefetch_supported) {
  3032. // Compaction needs two input blocks, which requires 2 preads, and
  3033. // generate a new SST file which needs 4 preads (footer, meta block,
  3034. // property block and index block). In total 6.
  3035. ASSERT_EQ(6, env_->random_read_counter_.Read());
  3036. } else {
  3037. // With prefetch off, compaction needs two input blocks,
  3038. // followed by a single buffered read. In total 3.
  3039. ASSERT_EQ(3, env_->random_read_counter_.Read());
  3040. }
  3041. // All compaction input files should have already been opened.
  3042. ASSERT_EQ(1, env_->random_file_open_counter_.load());
  3043. // One pread per a normal data block read
  3044. env_->random_file_open_counter_.store(0);
  3045. env_->random_read_counter_.Reset();
  3046. ASSERT_EQ("foo2", Get("bar2"));
  3047. ASSERT_EQ(1, env_->random_read_counter_.Read());
  3048. // SST files are already opened.
  3049. ASSERT_EQ(0, env_->random_file_open_counter_.load());
  3050. }
  3051. class TraceExecutionResultHandler : public TraceRecordResult::Handler {
  3052. public:
  3053. TraceExecutionResultHandler() = default;
  3054. ~TraceExecutionResultHandler() override = default;
  3055. Status Handle(const StatusOnlyTraceExecutionResult& result) override {
  3056. if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
  3057. return Status::InvalidArgument("Invalid timestamps.");
  3058. }
  3059. result.GetStatus().PermitUncheckedError();
  3060. switch (result.GetTraceType()) {
  3061. case kTraceWrite: {
  3062. total_latency_ += result.GetLatency();
  3063. cnt_++;
  3064. writes_++;
  3065. break;
  3066. }
  3067. default:
  3068. return Status::Corruption("Type mismatch.");
  3069. }
  3070. return Status::OK();
  3071. }
  3072. Status Handle(const SingleValueTraceExecutionResult& result) override {
  3073. if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
  3074. return Status::InvalidArgument("Invalid timestamps.");
  3075. }
  3076. result.GetStatus().PermitUncheckedError();
  3077. switch (result.GetTraceType()) {
  3078. case kTraceGet: {
  3079. total_latency_ += result.GetLatency();
  3080. cnt_++;
  3081. gets_++;
  3082. break;
  3083. }
  3084. default:
  3085. return Status::Corruption("Type mismatch.");
  3086. }
  3087. return Status::OK();
  3088. }
  3089. Status Handle(const MultiValuesTraceExecutionResult& result) override {
  3090. if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
  3091. return Status::InvalidArgument("Invalid timestamps.");
  3092. }
  3093. for (const Status& s : result.GetMultiStatus()) {
  3094. s.PermitUncheckedError();
  3095. }
  3096. switch (result.GetTraceType()) {
  3097. case kTraceMultiGet: {
  3098. total_latency_ += result.GetLatency();
  3099. cnt_++;
  3100. multigets_++;
  3101. break;
  3102. }
  3103. default:
  3104. return Status::Corruption("Type mismatch.");
  3105. }
  3106. return Status::OK();
  3107. }
  3108. Status Handle(const IteratorTraceExecutionResult& result) override {
  3109. if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
  3110. return Status::InvalidArgument("Invalid timestamps.");
  3111. }
  3112. result.GetStatus().PermitUncheckedError();
  3113. switch (result.GetTraceType()) {
  3114. case kTraceIteratorSeek:
  3115. case kTraceIteratorSeekForPrev: {
  3116. total_latency_ += result.GetLatency();
  3117. cnt_++;
  3118. seeks_++;
  3119. break;
  3120. }
  3121. default:
  3122. return Status::Corruption("Type mismatch.");
  3123. }
  3124. return Status::OK();
  3125. }
  3126. void Reset() {
  3127. total_latency_ = 0;
  3128. cnt_ = 0;
  3129. writes_ = 0;
  3130. gets_ = 0;
  3131. seeks_ = 0;
  3132. multigets_ = 0;
  3133. }
  3134. double GetAvgLatency() const {
  3135. return cnt_ == 0 ? 0.0 : 1.0 * total_latency_ / cnt_;
  3136. }
  3137. int GetNumWrites() const { return writes_; }
  3138. int GetNumGets() const { return gets_; }
  3139. int GetNumIterSeeks() const { return seeks_; }
  3140. int GetNumMultiGets() const { return multigets_; }
  3141. private:
  3142. std::atomic<uint64_t> total_latency_{0};
  3143. std::atomic<uint32_t> cnt_{0};
  3144. std::atomic<int> writes_{0};
  3145. std::atomic<int> gets_{0};
  3146. std::atomic<int> seeks_{0};
  3147. std::atomic<int> multigets_{0};
  3148. };
  3149. TEST_F(DBTest2, TraceAndReplay) {
  3150. Options options = CurrentOptions();
  3151. options.merge_operator = MergeOperators::CreatePutOperator();
  3152. ReadOptions ro;
  3153. WriteOptions wo;
  3154. TraceOptions trace_opts;
  3155. EnvOptions env_opts;
  3156. CreateAndReopenWithCF({"pikachu"}, options);
  3157. Random rnd(301);
  3158. Iterator* single_iter = nullptr;
  3159. ASSERT_TRUE(db_->EndTrace().IsIOError());
  3160. std::string trace_filename = dbname_ + "/rocksdb.trace";
  3161. std::unique_ptr<TraceWriter> trace_writer;
  3162. ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
  3163. ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
  3164. // 5 Writes
  3165. ASSERT_OK(Put(0, "a", "1"));
  3166. ASSERT_OK(Merge(0, "b", "2"));
  3167. ASSERT_OK(Delete(0, "c"));
  3168. ASSERT_OK(SingleDelete(0, "d"));
  3169. ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
  3170. // 6th Write
  3171. WriteBatch batch;
  3172. ASSERT_OK(batch.Put("f", "11"));
  3173. ASSERT_OK(batch.Merge("g", "12"));
  3174. ASSERT_OK(batch.Delete("h"));
  3175. ASSERT_OK(batch.SingleDelete("i"));
  3176. ASSERT_OK(batch.DeleteRange("j", "k"));
  3177. ASSERT_OK(db_->Write(wo, &batch));
  3178. // 2 Seek(ForPrev)s
  3179. single_iter = db_->NewIterator(ro);
  3180. single_iter->Seek("f"); // Seek 1
  3181. single_iter->SeekForPrev("g");
  3182. ASSERT_OK(single_iter->status());
  3183. delete single_iter;
  3184. // 2 Gets
  3185. ASSERT_EQ("1", Get(0, "a"));
  3186. ASSERT_EQ("12", Get(0, "g"));
  3187. // 7th and 8th Write, 3rd Get
  3188. ASSERT_OK(Put(1, "foo", "bar"));
  3189. ASSERT_OK(Put(1, "rocksdb", "rocks"));
  3190. ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
  3191. // Total Write x 8, Get x 3, Seek x 2.
  3192. ASSERT_OK(db_->EndTrace());
  3193. // These should not get into the trace file as it is after EndTrace.
  3194. ASSERT_OK(Put("hello", "world"));
  3195. ASSERT_OK(Merge("foo", "bar"));
  3196. // Open another db, replay, and verify the data
  3197. std::string value;
  3198. std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay");
  3199. ASSERT_OK(DestroyDB(dbname2, options));
  3200. // Using a different name than db2, to pacify infer's use-after-lifetime
  3201. // warnings (http://fbinfer.com).
  3202. DB* db2_init = nullptr;
  3203. options.create_if_missing = true;
  3204. ASSERT_OK(DB::Open(options, dbname2, &db2_init));
  3205. ColumnFamilyHandle* cf;
  3206. ASSERT_OK(
  3207. db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
  3208. delete cf;
  3209. delete db2_init;
  3210. DB* db2 = nullptr;
  3211. std::vector<ColumnFamilyDescriptor> column_families;
  3212. ColumnFamilyOptions cf_options;
  3213. cf_options.merge_operator = MergeOperators::CreatePutOperator();
  3214. column_families.emplace_back("default", cf_options);
  3215. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3216. std::vector<ColumnFamilyHandle*> handles;
  3217. DBOptions db_opts;
  3218. db_opts.env = env_;
  3219. ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
  3220. env_->SleepForMicroseconds(100);
  3221. // Verify that the keys don't already exist
  3222. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3223. ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
  3224. std::unique_ptr<TraceReader> trace_reader;
  3225. ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
  3226. std::unique_ptr<Replayer> replayer;
  3227. ASSERT_OK(
  3228. db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
  3229. TraceExecutionResultHandler res_handler;
  3230. std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)> res_cb =
  3231. [&res_handler](Status exec_s, std::unique_ptr<TraceRecordResult>&& res) {
  3232. ASSERT_TRUE(exec_s.ok() || exec_s.IsNotSupported());
  3233. if (res != nullptr) {
  3234. ASSERT_OK(res->Accept(&res_handler));
  3235. res.reset();
  3236. }
  3237. };
  3238. // Unprepared replay should fail with Status::Incomplete()
  3239. ASSERT_TRUE(replayer->Replay(ReplayOptions(), nullptr).IsIncomplete());
  3240. ASSERT_OK(replayer->Prepare());
  3241. // Ok to repeatedly Prepare().
  3242. ASSERT_OK(replayer->Prepare());
  3243. // Replay using 1 thread, 1x speed.
  3244. ASSERT_OK(replayer->Replay(ReplayOptions(1, 1.0), res_cb));
  3245. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3246. ASSERT_EQ(res_handler.GetNumWrites(), 8);
  3247. ASSERT_EQ(res_handler.GetNumGets(), 3);
  3248. ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
  3249. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3250. res_handler.Reset();
  3251. ASSERT_OK(db2->Get(ro, handles[0], "a", &value));
  3252. ASSERT_EQ("1", value);
  3253. ASSERT_OK(db2->Get(ro, handles[0], "g", &value));
  3254. ASSERT_EQ("12", value);
  3255. ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
  3256. ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
  3257. ASSERT_OK(db2->Get(ro, handles[1], "foo", &value));
  3258. ASSERT_EQ("bar", value);
  3259. ASSERT_OK(db2->Get(ro, handles[1], "rocksdb", &value));
  3260. ASSERT_EQ("rocks", value);
  3261. // Re-replay should fail with Status::Incomplete() if Prepare() was not
  3262. // called. Currently we don't distinguish between unprepared and trace end.
  3263. ASSERT_TRUE(replayer->Replay(ReplayOptions(), nullptr).IsIncomplete());
  3264. // Re-replay using 2 threads, 2x speed.
  3265. ASSERT_OK(replayer->Prepare());
  3266. ASSERT_OK(replayer->Replay(ReplayOptions(2, 2.0), res_cb));
  3267. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3268. ASSERT_EQ(res_handler.GetNumWrites(), 8);
  3269. ASSERT_EQ(res_handler.GetNumGets(), 3);
  3270. ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
  3271. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3272. res_handler.Reset();
  3273. // Re-replay using 2 threads, 1/2 speed.
  3274. ASSERT_OK(replayer->Prepare());
  3275. ASSERT_OK(replayer->Replay(ReplayOptions(2, 0.5), res_cb));
  3276. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3277. ASSERT_EQ(res_handler.GetNumWrites(), 8);
  3278. ASSERT_EQ(res_handler.GetNumGets(), 3);
  3279. ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
  3280. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3281. res_handler.Reset();
  3282. replayer.reset();
  3283. for (auto handle : handles) {
  3284. delete handle;
  3285. }
  3286. delete db2;
  3287. ASSERT_OK(DestroyDB(dbname2, options));
  3288. }
  3289. TEST_F(DBTest2, TraceAndManualReplay) {
  3290. Options options = CurrentOptions();
  3291. options.merge_operator = MergeOperators::CreatePutOperator();
  3292. ReadOptions ro;
  3293. WriteOptions wo;
  3294. TraceOptions trace_opts;
  3295. EnvOptions env_opts;
  3296. CreateAndReopenWithCF({"pikachu"}, options);
  3297. Random rnd(301);
  3298. Iterator* single_iter = nullptr;
  3299. ASSERT_TRUE(db_->EndTrace().IsIOError());
  3300. std::string trace_filename = dbname_ + "/rocksdb.trace";
  3301. std::unique_ptr<TraceWriter> trace_writer;
  3302. ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
  3303. ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
  3304. ASSERT_OK(Put(0, "a", "1"));
  3305. ASSERT_OK(Merge(0, "b", "2"));
  3306. ASSERT_OK(Delete(0, "c"));
  3307. ASSERT_OK(SingleDelete(0, "d"));
  3308. ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
  3309. WriteBatch batch;
  3310. ASSERT_OK(batch.Put("f", "11"));
  3311. ASSERT_OK(batch.Merge("g", "12"));
  3312. ASSERT_OK(batch.Delete("h"));
  3313. ASSERT_OK(batch.SingleDelete("i"));
  3314. ASSERT_OK(batch.DeleteRange("j", "k"));
  3315. ASSERT_OK(db_->Write(wo, &batch));
  3316. single_iter = db_->NewIterator(ro);
  3317. single_iter->Seek("f");
  3318. single_iter->SeekForPrev("g");
  3319. ASSERT_OK(single_iter->status());
  3320. delete single_iter;
  3321. // Write some sequenced keys for testing lower/upper bounds of iterator.
  3322. batch.Clear();
  3323. ASSERT_OK(batch.Put("iter-0", "iter-0"));
  3324. ASSERT_OK(batch.Put("iter-1", "iter-1"));
  3325. ASSERT_OK(batch.Put("iter-2", "iter-2"));
  3326. ASSERT_OK(batch.Put("iter-3", "iter-3"));
  3327. ASSERT_OK(batch.Put("iter-4", "iter-4"));
  3328. ASSERT_OK(db_->Write(wo, &batch));
  3329. ReadOptions bounded_ro = ro;
  3330. Slice lower_bound("iter-1");
  3331. Slice upper_bound("iter-3");
  3332. bounded_ro.iterate_lower_bound = &lower_bound;
  3333. bounded_ro.iterate_upper_bound = &upper_bound;
  3334. single_iter = db_->NewIterator(bounded_ro);
  3335. single_iter->Seek("iter-0");
  3336. ASSERT_EQ(single_iter->key().ToString(), "iter-1");
  3337. single_iter->Seek("iter-2");
  3338. ASSERT_EQ(single_iter->key().ToString(), "iter-2");
  3339. single_iter->Seek("iter-4");
  3340. ASSERT_FALSE(single_iter->Valid());
  3341. single_iter->SeekForPrev("iter-0");
  3342. ASSERT_FALSE(single_iter->Valid());
  3343. single_iter->SeekForPrev("iter-2");
  3344. ASSERT_EQ(single_iter->key().ToString(), "iter-2");
  3345. single_iter->SeekForPrev("iter-4");
  3346. ASSERT_EQ(single_iter->key().ToString(), "iter-2");
  3347. ASSERT_OK(single_iter->status());
  3348. delete single_iter;
  3349. ASSERT_EQ("1", Get(0, "a"));
  3350. ASSERT_EQ("12", Get(0, "g"));
  3351. ASSERT_OK(Put(1, "foo", "bar"));
  3352. ASSERT_OK(Put(1, "rocksdb", "rocks"));
  3353. ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
  3354. // Same as TraceAndReplay, Write x 8, Get x 3, Seek x 2.
  3355. // Plus 1 WriteBatch for iterator with lower/upper bounds, and 6
  3356. // Seek(ForPrev)s.
  3357. // Total Write x 9, Get x 3, Seek x 8
  3358. ASSERT_OK(db_->EndTrace());
  3359. // These should not get into the trace file as it is after EndTrace.
  3360. ASSERT_OK(Put("hello", "world"));
  3361. ASSERT_OK(Merge("foo", "bar"));
  3362. // Open another db, replay, and verify the data
  3363. std::string value;
  3364. std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay");
  3365. ASSERT_OK(DestroyDB(dbname2, options));
  3366. // Using a different name than db2, to pacify infer's use-after-lifetime
  3367. // warnings (http://fbinfer.com).
  3368. DB* db2_init = nullptr;
  3369. options.create_if_missing = true;
  3370. ASSERT_OK(DB::Open(options, dbname2, &db2_init));
  3371. ColumnFamilyHandle* cf;
  3372. ASSERT_OK(
  3373. db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
  3374. delete cf;
  3375. delete db2_init;
  3376. DB* db2 = nullptr;
  3377. std::vector<ColumnFamilyDescriptor> column_families;
  3378. ColumnFamilyOptions cf_options;
  3379. cf_options.merge_operator = MergeOperators::CreatePutOperator();
  3380. column_families.emplace_back("default", cf_options);
  3381. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3382. std::vector<ColumnFamilyHandle*> handles;
  3383. DBOptions db_opts;
  3384. db_opts.env = env_;
  3385. ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
  3386. env_->SleepForMicroseconds(100);
  3387. // Verify that the keys don't already exist
  3388. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3389. ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
  3390. std::unique_ptr<TraceReader> trace_reader;
  3391. ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
  3392. std::unique_ptr<Replayer> replayer;
  3393. ASSERT_OK(
  3394. db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
  3395. TraceExecutionResultHandler res_handler;
  3396. // Manual replay for 2 times. The 2nd checks if the replay can restart.
  3397. std::unique_ptr<TraceRecord> record;
  3398. std::unique_ptr<TraceRecordResult> result;
  3399. for (int i = 0; i < 2; i++) {
  3400. // Next should fail if unprepared.
  3401. ASSERT_TRUE(replayer->Next(nullptr).IsIncomplete());
  3402. ASSERT_OK(replayer->Prepare());
  3403. Status s = Status::OK();
  3404. // Looping until trace end.
  3405. while (s.ok()) {
  3406. s = replayer->Next(&record);
  3407. // Skip unsupported operations.
  3408. if (s.IsNotSupported()) {
  3409. continue;
  3410. }
  3411. if (s.ok()) {
  3412. ASSERT_OK(replayer->Execute(record, &result));
  3413. if (result != nullptr) {
  3414. ASSERT_OK(result->Accept(&res_handler));
  3415. if (record->GetTraceType() == kTraceIteratorSeek ||
  3416. record->GetTraceType() == kTraceIteratorSeekForPrev) {
  3417. IteratorSeekQueryTraceRecord* iter_rec =
  3418. dynamic_cast<IteratorSeekQueryTraceRecord*>(record.get());
  3419. IteratorTraceExecutionResult* iter_res =
  3420. dynamic_cast<IteratorTraceExecutionResult*>(result.get());
  3421. // Check if lower/upper bounds are correctly saved and decoded.
  3422. std::string lower_str = iter_rec->GetLowerBound().ToString();
  3423. std::string upper_str = iter_rec->GetUpperBound().ToString();
  3424. std::string iter_key = iter_res->GetKey().ToString();
  3425. std::string iter_value = iter_res->GetValue().ToString();
  3426. if (!lower_str.empty() && !upper_str.empty()) {
  3427. ASSERT_EQ(lower_str, "iter-1");
  3428. ASSERT_EQ(upper_str, "iter-3");
  3429. if (iter_res->GetValid()) {
  3430. // If iterator is valid, then lower_bound <= key < upper_bound.
  3431. ASSERT_GE(iter_key, lower_str);
  3432. ASSERT_LT(iter_key, upper_str);
  3433. } else {
  3434. // If iterator is invalid, then
  3435. // key < lower_bound or key >= upper_bound.
  3436. ASSERT_TRUE(iter_key < lower_str || iter_key >= upper_str);
  3437. }
  3438. }
  3439. // If iterator is invalid, the key and value should be empty.
  3440. if (!iter_res->GetValid()) {
  3441. ASSERT_TRUE(iter_key.empty());
  3442. ASSERT_TRUE(iter_value.empty());
  3443. }
  3444. }
  3445. result.reset();
  3446. }
  3447. }
  3448. }
  3449. // Status::Incomplete() will be returned when manually reading the trace
  3450. // end, or Prepare() was not called.
  3451. ASSERT_TRUE(s.IsIncomplete());
  3452. ASSERT_TRUE(replayer->Next(nullptr).IsIncomplete());
  3453. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3454. ASSERT_EQ(res_handler.GetNumWrites(), 9);
  3455. ASSERT_EQ(res_handler.GetNumGets(), 3);
  3456. ASSERT_EQ(res_handler.GetNumIterSeeks(), 8);
  3457. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3458. res_handler.Reset();
  3459. }
  3460. ASSERT_OK(db2->Get(ro, handles[0], "a", &value));
  3461. ASSERT_EQ("1", value);
  3462. ASSERT_OK(db2->Get(ro, handles[0], "g", &value));
  3463. ASSERT_EQ("12", value);
  3464. ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
  3465. ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
  3466. ASSERT_OK(db2->Get(ro, handles[1], "foo", &value));
  3467. ASSERT_EQ("bar", value);
  3468. ASSERT_OK(db2->Get(ro, handles[1], "rocksdb", &value));
  3469. ASSERT_EQ("rocks", value);
  3470. // Test execution of artificially created TraceRecords.
  3471. uint64_t fake_ts = 1U;
  3472. // Write
  3473. batch.Clear();
  3474. ASSERT_OK(batch.Put("trace-record-write1", "write1"));
  3475. ASSERT_OK(batch.Put("trace-record-write2", "write2"));
  3476. record.reset(new WriteQueryTraceRecord(batch.Data(), fake_ts++));
  3477. ASSERT_OK(replayer->Execute(record, &result));
  3478. ASSERT_TRUE(result != nullptr);
  3479. ASSERT_OK(result->Accept(&res_handler)); // Write x 1
  3480. ASSERT_OK(db2->Get(ro, handles[0], "trace-record-write1", &value));
  3481. ASSERT_EQ("write1", value);
  3482. ASSERT_OK(db2->Get(ro, handles[0], "trace-record-write2", &value));
  3483. ASSERT_EQ("write2", value);
  3484. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3485. ASSERT_EQ(res_handler.GetNumWrites(), 1);
  3486. ASSERT_EQ(res_handler.GetNumGets(), 0);
  3487. ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
  3488. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3489. res_handler.Reset();
  3490. // Get related
  3491. // Get an existing key.
  3492. record.reset(new GetQueryTraceRecord(handles[0]->GetID(),
  3493. "trace-record-write1", fake_ts++));
  3494. ASSERT_OK(replayer->Execute(record, &result));
  3495. ASSERT_TRUE(result != nullptr);
  3496. ASSERT_OK(result->Accept(&res_handler)); // Get x 1
  3497. // Get an non-existing key, should still return Status::OK().
  3498. record.reset(new GetQueryTraceRecord(handles[0]->GetID(), "trace-record-get",
  3499. fake_ts++));
  3500. ASSERT_OK(replayer->Execute(record, &result));
  3501. ASSERT_TRUE(result != nullptr);
  3502. ASSERT_OK(result->Accept(&res_handler)); // Get x 2
  3503. // Get from an invalid (non-existing) cf_id.
  3504. uint32_t invalid_cf_id = handles[1]->GetID() + 1;
  3505. record.reset(new GetQueryTraceRecord(invalid_cf_id, "whatever", fake_ts++));
  3506. ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
  3507. ASSERT_TRUE(result == nullptr);
  3508. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3509. ASSERT_EQ(res_handler.GetNumWrites(), 0);
  3510. ASSERT_EQ(res_handler.GetNumGets(), 2);
  3511. ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
  3512. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3513. res_handler.Reset();
  3514. // Iteration related
  3515. for (IteratorSeekQueryTraceRecord::SeekType seekType :
  3516. {IteratorSeekQueryTraceRecord::kSeek,
  3517. IteratorSeekQueryTraceRecord::kSeekForPrev}) {
  3518. // Seek to an existing key.
  3519. record.reset(new IteratorSeekQueryTraceRecord(
  3520. seekType, handles[0]->GetID(), "trace-record-write1", fake_ts++));
  3521. ASSERT_OK(replayer->Execute(record, &result));
  3522. ASSERT_TRUE(result != nullptr);
  3523. ASSERT_OK(result->Accept(&res_handler)); // Seek x 1 in one iteration
  3524. // Seek to an non-existing key, should still return Status::OK().
  3525. record.reset(new IteratorSeekQueryTraceRecord(
  3526. seekType, handles[0]->GetID(), "trace-record-get", fake_ts++));
  3527. ASSERT_OK(replayer->Execute(record, &result));
  3528. ASSERT_TRUE(result != nullptr);
  3529. ASSERT_OK(result->Accept(&res_handler)); // Seek x 2 in one iteration
  3530. // Seek from an invalid cf_id.
  3531. record.reset(new IteratorSeekQueryTraceRecord(seekType, invalid_cf_id,
  3532. "whatever", fake_ts++));
  3533. ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
  3534. ASSERT_TRUE(result == nullptr);
  3535. }
  3536. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3537. ASSERT_EQ(res_handler.GetNumWrites(), 0);
  3538. ASSERT_EQ(res_handler.GetNumGets(), 0);
  3539. ASSERT_EQ(res_handler.GetNumIterSeeks(), 4); // Seek x 2 in two iterations
  3540. ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
  3541. res_handler.Reset();
  3542. // MultiGet related
  3543. // Get existing keys.
  3544. record.reset(new MultiGetQueryTraceRecord(
  3545. std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
  3546. std::vector<std::string>({"a", "foo"}), fake_ts++));
  3547. ASSERT_OK(replayer->Execute(record, &result));
  3548. ASSERT_TRUE(result != nullptr);
  3549. ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 1
  3550. // Get all non-existing keys, should still return Status::OK().
  3551. record.reset(new MultiGetQueryTraceRecord(
  3552. std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
  3553. std::vector<std::string>({"no1", "no2"}), fake_ts++));
  3554. ASSERT_OK(replayer->Execute(record, &result));
  3555. ASSERT_TRUE(result != nullptr);
  3556. ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 2
  3557. // Get mixed of existing and non-existing keys, should still return
  3558. // Status::OK().
  3559. record.reset(new MultiGetQueryTraceRecord(
  3560. std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
  3561. std::vector<std::string>({"a", "no2"}), fake_ts++));
  3562. ASSERT_OK(replayer->Execute(record, &result));
  3563. ASSERT_TRUE(result != nullptr);
  3564. MultiValuesTraceExecutionResult* mvr =
  3565. dynamic_cast<MultiValuesTraceExecutionResult*>(result.get());
  3566. ASSERT_TRUE(mvr != nullptr);
  3567. ASSERT_OK(mvr->GetMultiStatus()[0]);
  3568. ASSERT_TRUE(mvr->GetMultiStatus()[1].IsNotFound());
  3569. ASSERT_EQ(mvr->GetValues()[0], "1");
  3570. ASSERT_EQ(mvr->GetValues()[1], "");
  3571. ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 3
  3572. // Get from an invalid (non-existing) cf_id.
  3573. record.reset(new MultiGetQueryTraceRecord(
  3574. std::vector<uint32_t>(
  3575. {handles[0]->GetID(), handles[1]->GetID(), invalid_cf_id}),
  3576. std::vector<std::string>({"a", "foo", "whatever"}), fake_ts++));
  3577. ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
  3578. ASSERT_TRUE(result == nullptr);
  3579. // Empty MultiGet
  3580. record.reset(new MultiGetQueryTraceRecord(
  3581. std::vector<uint32_t>(), std::vector<std::string>(), fake_ts++));
  3582. ASSERT_TRUE(replayer->Execute(record, &result).IsInvalidArgument());
  3583. ASSERT_TRUE(result == nullptr);
  3584. // MultiGet size mismatch
  3585. record.reset(new MultiGetQueryTraceRecord(
  3586. std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
  3587. std::vector<std::string>({"a"}), fake_ts++));
  3588. ASSERT_TRUE(replayer->Execute(record, &result).IsInvalidArgument());
  3589. ASSERT_TRUE(result == nullptr);
  3590. ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
  3591. ASSERT_EQ(res_handler.GetNumWrites(), 0);
  3592. ASSERT_EQ(res_handler.GetNumGets(), 0);
  3593. ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
  3594. ASSERT_EQ(res_handler.GetNumMultiGets(), 3);
  3595. res_handler.Reset();
  3596. replayer.reset();
  3597. for (auto handle : handles) {
  3598. delete handle;
  3599. }
  3600. delete db2;
  3601. ASSERT_OK(DestroyDB(dbname2, options));
  3602. }
  3603. TEST_F(DBTest2, TraceWithLimit) {
  3604. Options options = CurrentOptions();
  3605. options.merge_operator = MergeOperators::CreatePutOperator();
  3606. ReadOptions ro;
  3607. WriteOptions wo;
  3608. TraceOptions trace_opts;
  3609. EnvOptions env_opts;
  3610. CreateAndReopenWithCF({"pikachu"}, options);
  3611. Random rnd(301);
  3612. // test the max trace file size options
  3613. trace_opts.max_trace_file_size = 5;
  3614. std::string trace_filename = dbname_ + "/rocksdb.trace1";
  3615. std::unique_ptr<TraceWriter> trace_writer;
  3616. ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
  3617. ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
  3618. ASSERT_OK(Put(0, "a", "1"));
  3619. ASSERT_OK(Put(0, "b", "1"));
  3620. ASSERT_OK(Put(0, "c", "1"));
  3621. ASSERT_OK(db_->EndTrace());
  3622. std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay2");
  3623. std::string value;
  3624. ASSERT_OK(DestroyDB(dbname2, options));
  3625. // Using a different name than db2, to pacify infer's use-after-lifetime
  3626. // warnings (http://fbinfer.com).
  3627. DB* db2_init = nullptr;
  3628. options.create_if_missing = true;
  3629. ASSERT_OK(DB::Open(options, dbname2, &db2_init));
  3630. ColumnFamilyHandle* cf;
  3631. ASSERT_OK(
  3632. db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
  3633. delete cf;
  3634. delete db2_init;
  3635. DB* db2 = nullptr;
  3636. std::vector<ColumnFamilyDescriptor> column_families;
  3637. ColumnFamilyOptions cf_options;
  3638. cf_options.merge_operator = MergeOperators::CreatePutOperator();
  3639. column_families.emplace_back("default", cf_options);
  3640. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3641. std::vector<ColumnFamilyHandle*> handles;
  3642. DBOptions db_opts;
  3643. db_opts.env = env_;
  3644. ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
  3645. env_->SleepForMicroseconds(100);
  3646. // Verify that the keys don't already exist
  3647. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3648. ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
  3649. ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
  3650. std::unique_ptr<TraceReader> trace_reader;
  3651. ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
  3652. std::unique_ptr<Replayer> replayer;
  3653. ASSERT_OK(
  3654. db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
  3655. ASSERT_OK(replayer->Prepare());
  3656. ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
  3657. replayer.reset();
  3658. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3659. ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
  3660. ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
  3661. for (auto handle : handles) {
  3662. delete handle;
  3663. }
  3664. delete db2;
  3665. ASSERT_OK(DestroyDB(dbname2, options));
  3666. }
  3667. TEST_F(DBTest2, TraceWithSampling) {
  3668. Options options = CurrentOptions();
  3669. ReadOptions ro;
  3670. WriteOptions wo;
  3671. TraceOptions trace_opts;
  3672. EnvOptions env_opts;
  3673. CreateAndReopenWithCF({"pikachu"}, options);
  3674. Random rnd(301);
  3675. // test the trace file sampling options
  3676. trace_opts.sampling_frequency = 2;
  3677. std::string trace_filename = dbname_ + "/rocksdb.trace_sampling";
  3678. std::unique_ptr<TraceWriter> trace_writer;
  3679. ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
  3680. ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
  3681. ASSERT_OK(Put(0, "a", "1"));
  3682. ASSERT_OK(Put(0, "b", "2"));
  3683. ASSERT_OK(Put(0, "c", "3"));
  3684. ASSERT_OK(Put(0, "d", "4"));
  3685. ASSERT_OK(Put(0, "e", "5"));
  3686. ASSERT_OK(db_->EndTrace());
  3687. std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay_sampling");
  3688. std::string value;
  3689. ASSERT_OK(DestroyDB(dbname2, options));
  3690. // Using a different name than db2, to pacify infer's use-after-lifetime
  3691. // warnings (http://fbinfer.com).
  3692. DB* db2_init = nullptr;
  3693. options.create_if_missing = true;
  3694. ASSERT_OK(DB::Open(options, dbname2, &db2_init));
  3695. ColumnFamilyHandle* cf;
  3696. ASSERT_OK(
  3697. db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
  3698. delete cf;
  3699. delete db2_init;
  3700. DB* db2 = nullptr;
  3701. std::vector<ColumnFamilyDescriptor> column_families;
  3702. ColumnFamilyOptions cf_options;
  3703. column_families.emplace_back("default", cf_options);
  3704. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3705. std::vector<ColumnFamilyHandle*> handles;
  3706. DBOptions db_opts;
  3707. db_opts.env = env_;
  3708. ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
  3709. env_->SleepForMicroseconds(100);
  3710. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3711. ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
  3712. ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
  3713. ASSERT_TRUE(db2->Get(ro, handles[0], "d", &value).IsNotFound());
  3714. ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound());
  3715. std::unique_ptr<TraceReader> trace_reader;
  3716. ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
  3717. std::unique_ptr<Replayer> replayer;
  3718. ASSERT_OK(
  3719. db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
  3720. ASSERT_OK(replayer->Prepare());
  3721. ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
  3722. replayer.reset();
  3723. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3724. ASSERT_FALSE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
  3725. ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
  3726. ASSERT_FALSE(db2->Get(ro, handles[0], "d", &value).IsNotFound());
  3727. ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound());
  3728. for (auto handle : handles) {
  3729. delete handle;
  3730. }
  3731. delete db2;
  3732. ASSERT_OK(DestroyDB(dbname2, options));
  3733. }
  3734. TEST_F(DBTest2, TraceWithFilter) {
  3735. Options options = CurrentOptions();
  3736. options.merge_operator = MergeOperators::CreatePutOperator();
  3737. ReadOptions ro;
  3738. WriteOptions wo;
  3739. TraceOptions trace_opts;
  3740. EnvOptions env_opts;
  3741. CreateAndReopenWithCF({"pikachu"}, options);
  3742. Random rnd(301);
  3743. Iterator* single_iter = nullptr;
  3744. trace_opts.filter = TraceFilterType::kTraceFilterWrite;
  3745. std::string trace_filename = dbname_ + "/rocksdb.trace";
  3746. std::unique_ptr<TraceWriter> trace_writer;
  3747. ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
  3748. ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
  3749. ASSERT_OK(Put(0, "a", "1"));
  3750. ASSERT_OK(Merge(0, "b", "2"));
  3751. ASSERT_OK(Delete(0, "c"));
  3752. ASSERT_OK(SingleDelete(0, "d"));
  3753. ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
  3754. WriteBatch batch;
  3755. ASSERT_OK(batch.Put("f", "11"));
  3756. ASSERT_OK(batch.Merge("g", "12"));
  3757. ASSERT_OK(batch.Delete("h"));
  3758. ASSERT_OK(batch.SingleDelete("i"));
  3759. ASSERT_OK(batch.DeleteRange("j", "k"));
  3760. ASSERT_OK(db_->Write(wo, &batch));
  3761. single_iter = db_->NewIterator(ro);
  3762. single_iter->Seek("f");
  3763. single_iter->SeekForPrev("g");
  3764. delete single_iter;
  3765. ASSERT_EQ("1", Get(0, "a"));
  3766. ASSERT_EQ("12", Get(0, "g"));
  3767. ASSERT_OK(Put(1, "foo", "bar"));
  3768. ASSERT_OK(Put(1, "rocksdb", "rocks"));
  3769. ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
  3770. ASSERT_OK(db_->EndTrace());
  3771. // These should not get into the trace file as it is after EndTrace.
  3772. ASSERT_OK(Put("hello", "world"));
  3773. ASSERT_OK(Merge("foo", "bar"));
  3774. // Open another db, replay, and verify the data
  3775. std::string value;
  3776. std::string dbname2 = test::PerThreadDBPath(env_, "db_replay");
  3777. ASSERT_OK(DestroyDB(dbname2, options));
  3778. // Using a different name than db2, to pacify infer's use-after-lifetime
  3779. // warnings (http://fbinfer.com).
  3780. DB* db2_init = nullptr;
  3781. options.create_if_missing = true;
  3782. ASSERT_OK(DB::Open(options, dbname2, &db2_init));
  3783. ColumnFamilyHandle* cf;
  3784. ASSERT_OK(
  3785. db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
  3786. delete cf;
  3787. delete db2_init;
  3788. DB* db2 = nullptr;
  3789. std::vector<ColumnFamilyDescriptor> column_families;
  3790. ColumnFamilyOptions cf_options;
  3791. cf_options.merge_operator = MergeOperators::CreatePutOperator();
  3792. column_families.emplace_back("default", cf_options);
  3793. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3794. std::vector<ColumnFamilyHandle*> handles;
  3795. DBOptions db_opts;
  3796. db_opts.env = env_;
  3797. ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
  3798. env_->SleepForMicroseconds(100);
  3799. // Verify that the keys don't already exist
  3800. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3801. ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
  3802. std::unique_ptr<TraceReader> trace_reader;
  3803. ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
  3804. std::unique_ptr<Replayer> replayer;
  3805. ASSERT_OK(
  3806. db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
  3807. ASSERT_OK(replayer->Prepare());
  3808. ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
  3809. replayer.reset();
  3810. // All the key-values should not present since we filter out the WRITE ops.
  3811. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
  3812. ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
  3813. ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
  3814. ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
  3815. ASSERT_TRUE(db2->Get(ro, handles[0], "foo", &value).IsNotFound());
  3816. ASSERT_TRUE(db2->Get(ro, handles[0], "rocksdb", &value).IsNotFound());
  3817. for (auto handle : handles) {
  3818. delete handle;
  3819. }
  3820. delete db2;
  3821. ASSERT_OK(DestroyDB(dbname2, options));
  3822. // Set up a new db.
  3823. std::string dbname3 = test::PerThreadDBPath(env_, "db_not_trace_read");
  3824. ASSERT_OK(DestroyDB(dbname3, options));
  3825. DB* db3_init = nullptr;
  3826. options.create_if_missing = true;
  3827. ColumnFamilyHandle* cf3;
  3828. ASSERT_OK(DB::Open(options, dbname3, &db3_init));
  3829. ASSERT_OK(
  3830. db3_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf3));
  3831. delete cf3;
  3832. delete db3_init;
  3833. column_families.clear();
  3834. column_families.emplace_back("default", cf_options);
  3835. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  3836. handles.clear();
  3837. DB* db3 = nullptr;
  3838. ASSERT_OK(DB::Open(db_opts, dbname3, column_families, &handles, &db3));
  3839. env_->SleepForMicroseconds(100);
  3840. // Verify that the keys don't already exist
  3841. ASSERT_TRUE(db3->Get(ro, handles[0], "a", &value).IsNotFound());
  3842. ASSERT_TRUE(db3->Get(ro, handles[0], "g", &value).IsNotFound());
  3843. // The tracer will not record the READ ops.
  3844. trace_opts.filter = TraceFilterType::kTraceFilterGet;
  3845. std::string trace_filename3 = dbname_ + "/rocksdb.trace_3";
  3846. std::unique_ptr<TraceWriter> trace_writer3;
  3847. ASSERT_OK(
  3848. NewFileTraceWriter(env_, env_opts, trace_filename3, &trace_writer3));
  3849. ASSERT_OK(db3->StartTrace(trace_opts, std::move(trace_writer3)));
  3850. ASSERT_OK(db3->Put(wo, handles[0], "a", "1"));
  3851. ASSERT_OK(db3->Merge(wo, handles[0], "b", "2"));
  3852. ASSERT_OK(db3->Delete(wo, handles[0], "c"));
  3853. ASSERT_OK(db3->SingleDelete(wo, handles[0], "d"));
  3854. ASSERT_OK(db3->Get(ro, handles[0], "a", &value));
  3855. ASSERT_EQ(value, "1");
  3856. ASSERT_TRUE(db3->Get(ro, handles[0], "c", &value).IsNotFound());
  3857. ASSERT_OK(db3->EndTrace());
  3858. for (auto handle : handles) {
  3859. delete handle;
  3860. }
  3861. delete db3;
  3862. ASSERT_OK(DestroyDB(dbname3, options));
  3863. std::unique_ptr<TraceReader> trace_reader3;
  3864. ASSERT_OK(
  3865. NewFileTraceReader(env_, env_opts, trace_filename3, &trace_reader3));
  3866. // Count the number of records in the trace file;
  3867. int count = 0;
  3868. std::string data;
  3869. Status s;
  3870. while (true) {
  3871. s = trace_reader3->Read(&data);
  3872. if (!s.ok()) {
  3873. break;
  3874. }
  3875. count += 1;
  3876. }
  3877. // We also need to count the header and footer
  3878. // 4 WRITE + HEADER + FOOTER = 6
  3879. ASSERT_EQ(count, 6);
  3880. }
  3881. TEST_F(DBTest2, PinnableSliceAndMmapReads) {
  3882. Options options = CurrentOptions();
  3883. options.env = env_;
  3884. if (!IsMemoryMappedAccessSupported()) {
  3885. ROCKSDB_GTEST_SKIP("Test requires default environment");
  3886. return;
  3887. }
  3888. options.allow_mmap_reads = true;
  3889. options.max_open_files = 100;
  3890. options.compression = kNoCompression;
  3891. Reopen(options);
  3892. ASSERT_OK(Put("foo", "bar"));
  3893. ASSERT_OK(Flush());
  3894. PinnableSlice pinned_value;
  3895. ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
  3896. // It is not safe to pin mmap files as they might disappear by compaction
  3897. ASSERT_FALSE(pinned_value.IsPinned());
  3898. ASSERT_EQ(pinned_value.ToString(), "bar");
  3899. ASSERT_OK(dbfull()->TEST_CompactRange(
  3900. 0 /* level */, nullptr /* begin */, nullptr /* end */,
  3901. nullptr /* column_family */, true /* disallow_trivial_move */));
  3902. // Ensure pinned_value doesn't rely on memory munmap'd by the above
  3903. // compaction. It crashes if it does.
  3904. ASSERT_EQ(pinned_value.ToString(), "bar");
  3905. pinned_value.Reset();
  3906. // Unsafe to pin mmap files when they could be kicked out of table cache
  3907. Close();
  3908. ASSERT_OK(ReadOnlyReopen(options));
  3909. ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
  3910. ASSERT_FALSE(pinned_value.IsPinned());
  3911. ASSERT_EQ(pinned_value.ToString(), "bar");
  3912. pinned_value.Reset();
  3913. // In read-only mode with infinite capacity on table cache it should pin the
  3914. // value and avoid the memcpy
  3915. Close();
  3916. options.max_open_files = -1;
  3917. ASSERT_OK(ReadOnlyReopen(options));
  3918. ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
  3919. ASSERT_TRUE(pinned_value.IsPinned());
  3920. ASSERT_EQ(pinned_value.ToString(), "bar");
  3921. }
  3922. TEST_F(DBTest2, DISABLED_IteratorPinnedMemory) {
  3923. Options options = CurrentOptions();
  3924. options.create_if_missing = true;
  3925. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  3926. BlockBasedTableOptions bbto;
  3927. bbto.no_block_cache = false;
  3928. bbto.cache_index_and_filter_blocks = false;
  3929. bbto.block_cache = NewLRUCache(100000);
  3930. bbto.block_size = 400; // small block size
  3931. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  3932. Reopen(options);
  3933. Random rnd(301);
  3934. std::string v = rnd.RandomString(400);
  3935. // Since v is the size of a block, each key should take a block
  3936. // of 400+ bytes.
  3937. ASSERT_OK(Put("1", v));
  3938. ASSERT_OK(Put("3", v));
  3939. ASSERT_OK(Put("5", v));
  3940. ASSERT_OK(Put("7", v));
  3941. ASSERT_OK(Flush());
  3942. ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage());
  3943. // Verify that iterators don't pin more than one data block in block cache
  3944. // at each time.
  3945. {
  3946. std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  3947. iter->SeekToFirst();
  3948. for (int i = 0; i < 4; i++) {
  3949. ASSERT_TRUE(iter->Valid());
  3950. // Block cache should contain exactly one block.
  3951. ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
  3952. ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
  3953. iter->Next();
  3954. }
  3955. ASSERT_FALSE(iter->Valid());
  3956. iter->Seek("4");
  3957. ASSERT_TRUE(iter->Valid());
  3958. ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
  3959. ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
  3960. iter->Seek("3");
  3961. ASSERT_TRUE(iter->Valid());
  3962. ASSERT_OK(iter->status());
  3963. ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
  3964. ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
  3965. }
  3966. ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage());
  3967. // Test compaction case
  3968. ASSERT_OK(Put("2", v));
  3969. ASSERT_OK(Put("5", v));
  3970. ASSERT_OK(Put("6", v));
  3971. ASSERT_OK(Put("8", v));
  3972. ASSERT_OK(Flush());
  3973. // Clear existing data in block cache
  3974. bbto.block_cache->SetCapacity(0);
  3975. bbto.block_cache->SetCapacity(100000);
  3976. // Verify compaction input iterators don't hold more than one data blocks at
  3977. // one time.
  3978. std::atomic<bool> finished(false);
  3979. std::atomic<int> block_newed(0);
  3980. std::atomic<int> block_destroyed(0);
  3981. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  3982. "Block::Block:0", [&](void* /*arg*/) {
  3983. if (finished) {
  3984. return;
  3985. }
  3986. // Two iterators. At most 2 outstanding blocks.
  3987. EXPECT_GE(block_newed.load(), block_destroyed.load());
  3988. EXPECT_LE(block_newed.load(), block_destroyed.load() + 1);
  3989. block_newed.fetch_add(1);
  3990. });
  3991. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  3992. "Block::~Block", [&](void* /*arg*/) {
  3993. if (finished) {
  3994. return;
  3995. }
  3996. // Two iterators. At most 2 outstanding blocks.
  3997. EXPECT_GE(block_newed.load(), block_destroyed.load() + 1);
  3998. EXPECT_LE(block_newed.load(), block_destroyed.load() + 2);
  3999. block_destroyed.fetch_add(1);
  4000. });
  4001. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4002. "CompactionJob::Run:BeforeVerify",
  4003. [&](void* /*arg*/) { finished = true; });
  4004. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  4005. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  4006. // Two input files. Each of them has 4 data blocks.
  4007. ASSERT_EQ(8, block_newed.load());
  4008. ASSERT_EQ(8, block_destroyed.load());
  4009. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4010. }
  4011. TEST_F(DBTest2, TestGetColumnFamilyHandleUnlocked) {
  4012. // Setup sync point dependency to reproduce the race condition of
  4013. // DBImpl::GetColumnFamilyHandleUnlocked
  4014. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  4015. {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1",
  4016. "TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2"},
  4017. {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2",
  4018. "TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1"},
  4019. });
  4020. SyncPoint::GetInstance()->EnableProcessing();
  4021. CreateColumnFamilies({"test1", "test2"}, Options());
  4022. ASSERT_EQ(handles_.size(), 2);
  4023. DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
  4024. port::Thread user_thread1([&]() {
  4025. auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[0]->GetID());
  4026. ASSERT_EQ(cfh->GetID(), handles_[0]->GetID());
  4027. TEST_SYNC_POINT(
  4028. "TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1");
  4029. TEST_SYNC_POINT(
  4030. "TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1");
  4031. ASSERT_EQ(cfh->GetID(), handles_[0]->GetID());
  4032. });
  4033. port::Thread user_thread2([&]() {
  4034. TEST_SYNC_POINT(
  4035. "TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2");
  4036. auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[1]->GetID());
  4037. ASSERT_EQ(cfh->GetID(), handles_[1]->GetID());
  4038. TEST_SYNC_POINT(
  4039. "TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2");
  4040. ASSERT_EQ(cfh->GetID(), handles_[1]->GetID());
  4041. });
  4042. user_thread1.join();
  4043. user_thread2.join();
  4044. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4045. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4046. }
  4047. TEST_F(DBTest2, TestCompactFiles) {
  4048. // Setup sync point dependency to reproduce the race condition of
  4049. // DBImpl::GetColumnFamilyHandleUnlocked
  4050. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  4051. {"TestCompactFiles::IngestExternalFile1",
  4052. "TestCompactFiles::IngestExternalFile2"},
  4053. });
  4054. SyncPoint::GetInstance()->EnableProcessing();
  4055. Options options;
  4056. options.env = env_;
  4057. options.num_levels = 2;
  4058. options.disable_auto_compactions = true;
  4059. Reopen(options);
  4060. auto* handle = db_->DefaultColumnFamily();
  4061. ASSERT_EQ(db_->NumberLevels(handle), 2);
  4062. ROCKSDB_NAMESPACE::SstFileWriter sst_file_writer{
  4063. ROCKSDB_NAMESPACE::EnvOptions(), options};
  4064. std::string external_file1 = dbname_ + "/test_compact_files1.sst_t";
  4065. std::string external_file2 = dbname_ + "/test_compact_files2.sst_t";
  4066. std::string external_file3 = dbname_ + "/test_compact_files3.sst_t";
  4067. ASSERT_OK(sst_file_writer.Open(external_file1));
  4068. ASSERT_OK(sst_file_writer.Put("1", "1"));
  4069. ASSERT_OK(sst_file_writer.Put("2", "2"));
  4070. ASSERT_OK(sst_file_writer.Finish());
  4071. ASSERT_OK(sst_file_writer.Open(external_file2));
  4072. ASSERT_OK(sst_file_writer.Put("3", "3"));
  4073. ASSERT_OK(sst_file_writer.Put("4", "4"));
  4074. ASSERT_OK(sst_file_writer.Finish());
  4075. ASSERT_OK(sst_file_writer.Open(external_file3));
  4076. ASSERT_OK(sst_file_writer.Put("5", "5"));
  4077. ASSERT_OK(sst_file_writer.Put("6", "6"));
  4078. ASSERT_OK(sst_file_writer.Finish());
  4079. ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file3},
  4080. IngestExternalFileOptions()));
  4081. ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2);
  4082. std::vector<std::string> files;
  4083. GetSstFiles(env_, dbname_, &files);
  4084. ASSERT_EQ(files.size(), 2);
  4085. Status user_thread1_status;
  4086. port::Thread user_thread1([&]() {
  4087. user_thread1_status =
  4088. db_->CompactFiles(CompactionOptions(), handle, files, 1);
  4089. });
  4090. Status user_thread2_status;
  4091. port::Thread user_thread2([&]() {
  4092. user_thread2_status = db_->IngestExternalFile(handle, {external_file2},
  4093. IngestExternalFileOptions());
  4094. TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile1");
  4095. });
  4096. user_thread1.join();
  4097. user_thread2.join();
  4098. ASSERT_OK(user_thread1_status);
  4099. ASSERT_OK(user_thread2_status);
  4100. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4101. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4102. }
  4103. TEST_F(DBTest2, TestCancelCompactFiles) {
  4104. SyncPoint::GetInstance()->EnableProcessing();
  4105. Options options;
  4106. options.env = env_;
  4107. options.num_levels = 2;
  4108. options.disable_auto_compactions = true;
  4109. Reopen(options);
  4110. auto* handle = db_->DefaultColumnFamily();
  4111. ASSERT_EQ(db_->NumberLevels(handle), 2);
  4112. ROCKSDB_NAMESPACE::SstFileWriter sst_file_writer{
  4113. ROCKSDB_NAMESPACE::EnvOptions(), options};
  4114. // ingest large SST files
  4115. std::vector<std::string> external_sst_file_names;
  4116. int key_counter = 0;
  4117. const int num_keys_per_file = 100000;
  4118. const int num_files = 10;
  4119. for (int i = 0; i < num_files; ++i) {
  4120. std::string file_name =
  4121. dbname_ + "/test_compact_files" + std::to_string(i) + ".sst_t";
  4122. external_sst_file_names.push_back(file_name);
  4123. ASSERT_OK(sst_file_writer.Open(file_name));
  4124. for (int j = 0; j < num_keys_per_file; ++j) {
  4125. ASSERT_OK(sst_file_writer.Put(Key(j + num_keys_per_file * key_counter),
  4126. std::to_string(j)));
  4127. }
  4128. key_counter += 1;
  4129. ASSERT_OK(sst_file_writer.Finish());
  4130. }
  4131. ASSERT_OK(db_->IngestExternalFile(handle, external_sst_file_names,
  4132. IngestExternalFileOptions()));
  4133. ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
  4134. std::vector<std::string> files;
  4135. GetSstFiles(env_, dbname_, &files);
  4136. ASSERT_EQ(files.size(), num_files);
  4137. // Test that 0 compactions happen - canceled is set to True initially
  4138. CompactionOptions compaction_options;
  4139. std::atomic<bool> canceled(true);
  4140. compaction_options.canceled = &canceled;
  4141. ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
  4142. .IsManualCompactionPaused());
  4143. ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
  4144. // Test cancellation before the check to cancel compaction happens -
  4145. // compaction should not occur
  4146. bool disable_compaction = false;
  4147. compaction_options.canceled->store(false, std::memory_order_release);
  4148. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4149. "TestCancelCompactFiles:SuccessfulCompaction", [&](void* arg) {
  4150. auto paused = static_cast<std::atomic<int>*>(arg);
  4151. if (disable_compaction) {
  4152. db_->DisableManualCompaction();
  4153. ASSERT_EQ(1, paused->load(std::memory_order_acquire));
  4154. } else {
  4155. compaction_options.canceled->store(true, std::memory_order_release);
  4156. ASSERT_EQ(0, paused->load(std::memory_order_acquire));
  4157. }
  4158. });
  4159. ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
  4160. .IsManualCompactionPaused());
  4161. ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
  4162. // DisableManualCompaction() should successfully cancel compaction
  4163. disable_compaction = true;
  4164. compaction_options.canceled->store(false, std::memory_order_release);
  4165. ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
  4166. .IsManualCompactionPaused());
  4167. ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
  4168. // unlike CompactRange, value of compaction_options.canceled will be
  4169. // unaffected by calling DisableManualCompactions()
  4170. ASSERT_FALSE(compaction_options.canceled->load());
  4171. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4172. db_->EnableManualCompaction();
  4173. // Test cancelation after the check to cancel compaction - compaction should
  4174. // occur, leaving only 1 file
  4175. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4176. "CompactFilesImpl:0", [&](void* /*arg*/) {
  4177. compaction_options.canceled->store(true, std::memory_order_release);
  4178. });
  4179. compaction_options.canceled->store(false, std::memory_order_release);
  4180. ASSERT_OK(db_->CompactFiles(compaction_options, handle, files, 1));
  4181. ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1);
  4182. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4183. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4184. }
  4185. TEST_F(DBTest2, MultiDBParallelOpenTest) {
  4186. const int kNumDbs = 2;
  4187. Options options = CurrentOptions();
  4188. std::vector<std::string> dbnames;
  4189. for (int i = 0; i < kNumDbs; ++i) {
  4190. dbnames.emplace_back(test::PerThreadDBPath(env_, "db" + std::to_string(i)));
  4191. ASSERT_OK(DestroyDB(dbnames.back(), options));
  4192. }
  4193. // Verify empty DBs can be created in parallel
  4194. std::vector<std::thread> open_threads;
  4195. std::vector<DB*> dbs{static_cast<unsigned int>(kNumDbs), nullptr};
  4196. options.create_if_missing = true;
  4197. for (int i = 0; i < kNumDbs; ++i) {
  4198. open_threads.emplace_back(
  4199. [&](int dbnum) {
  4200. ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum]));
  4201. },
  4202. i);
  4203. }
  4204. // Now add some data and close, so next we can verify non-empty DBs can be
  4205. // recovered in parallel
  4206. for (int i = 0; i < kNumDbs; ++i) {
  4207. open_threads[i].join();
  4208. ASSERT_OK(dbs[i]->Put(WriteOptions(), "xi", "gua"));
  4209. delete dbs[i];
  4210. }
  4211. // Verify non-empty DBs can be recovered in parallel
  4212. open_threads.clear();
  4213. for (int i = 0; i < kNumDbs; ++i) {
  4214. open_threads.emplace_back(
  4215. [&](int dbnum) {
  4216. ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum]));
  4217. },
  4218. i);
  4219. }
  4220. // Wait and cleanup
  4221. for (int i = 0; i < kNumDbs; ++i) {
  4222. open_threads[i].join();
  4223. delete dbs[i];
  4224. ASSERT_OK(DestroyDB(dbnames[i], options));
  4225. }
  4226. }
  4227. namespace {
  4228. class DummyOldStats : public Statistics {
  4229. public:
  4230. const char* Name() const override { return "DummyOldStats"; }
  4231. uint64_t getTickerCount(uint32_t /*ticker_type*/) const override { return 0; }
  4232. void recordTick(uint32_t /* ticker_type */, uint64_t /* count */) override {
  4233. num_rt++;
  4234. }
  4235. void setTickerCount(uint32_t /*ticker_type*/, uint64_t /*count*/) override {}
  4236. uint64_t getAndResetTickerCount(uint32_t /*ticker_type*/) override {
  4237. return 0;
  4238. }
  4239. void measureTime(uint32_t /*histogram_type*/, uint64_t /*count*/) override {
  4240. num_mt++;
  4241. }
  4242. void histogramData(
  4243. uint32_t /*histogram_type*/,
  4244. ROCKSDB_NAMESPACE::HistogramData* const /*data*/) const override {}
  4245. std::string getHistogramString(uint32_t /*type*/) const override {
  4246. return "";
  4247. }
  4248. bool HistEnabledForType(uint32_t /*type*/) const override { return false; }
  4249. std::string ToString() const override { return ""; }
  4250. std::atomic<int> num_rt{0};
  4251. std::atomic<int> num_mt{0};
  4252. };
  4253. } // anonymous namespace
  4254. TEST_F(DBTest2, OldStatsInterface) {
  4255. DummyOldStats* dos = new DummyOldStats();
  4256. std::shared_ptr<Statistics> stats(dos);
  4257. Options options = CurrentOptions();
  4258. options.create_if_missing = true;
  4259. options.statistics = stats;
  4260. Reopen(options);
  4261. ASSERT_OK(Put("foo", "bar"));
  4262. ASSERT_EQ("bar", Get("foo"));
  4263. ASSERT_OK(Flush());
  4264. ASSERT_EQ("bar", Get("foo"));
  4265. ASSERT_GT(dos->num_rt, 0);
  4266. ASSERT_GT(dos->num_mt, 0);
  4267. }
  4268. TEST_F(DBTest2, CloseWithUnreleasedSnapshot) {
  4269. const Snapshot* ss = db_->GetSnapshot();
  4270. for (auto h : handles_) {
  4271. db_->DestroyColumnFamilyHandle(h);
  4272. }
  4273. handles_.clear();
  4274. ASSERT_NOK(db_->Close());
  4275. db_->ReleaseSnapshot(ss);
  4276. ASSERT_OK(db_->Close());
  4277. delete db_;
  4278. db_ = nullptr;
  4279. }
  4280. TEST_F(DBTest2, PrefixBloomReseek) {
  4281. Options options = CurrentOptions();
  4282. options.create_if_missing = true;
  4283. options.prefix_extractor.reset(NewCappedPrefixTransform(3));
  4284. BlockBasedTableOptions bbto;
  4285. bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  4286. bbto.whole_key_filtering = false;
  4287. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  4288. DestroyAndReopen(options);
  4289. // Construct two L1 files with keys:
  4290. // f1:[aaa1 ccc1] f2:[ddd0]
  4291. ASSERT_OK(Put("aaa1", ""));
  4292. ASSERT_OK(Put("ccc1", ""));
  4293. ASSERT_OK(Flush());
  4294. ASSERT_OK(Put("ddd0", ""));
  4295. ASSERT_OK(Flush());
  4296. CompactRangeOptions cro;
  4297. cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
  4298. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  4299. ASSERT_OK(Put("bbb1", ""));
  4300. Iterator* iter = db_->NewIterator(ReadOptions());
  4301. ASSERT_OK(iter->status());
  4302. // Seeking into f1, the iterator will check bloom filter which returns the
  4303. // file iterator ot be invalidate, and the cursor will put into f2, with
  4304. // the next key to be "ddd0".
  4305. iter->Seek("bbb1");
  4306. ASSERT_TRUE(iter->Valid());
  4307. ASSERT_EQ("bbb1", iter->key().ToString());
  4308. // Reseek ccc1, the L1 iterator needs to go back to f1 and reseek.
  4309. iter->Seek("ccc1");
  4310. ASSERT_TRUE(iter->Valid());
  4311. ASSERT_EQ("ccc1", iter->key().ToString());
  4312. delete iter;
  4313. }
  4314. TEST_F(DBTest2, PrefixBloomFilteredOut) {
  4315. Options options = CurrentOptions();
  4316. options.create_if_missing = true;
  4317. options.prefix_extractor.reset(NewCappedPrefixTransform(3));
  4318. BlockBasedTableOptions bbto;
  4319. bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
  4320. bbto.whole_key_filtering = false;
  4321. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  4322. // This test is also the primary test for prefix_seek_opt_in_only
  4323. for (bool opt_in : {false, true}) {
  4324. options.prefix_seek_opt_in_only = opt_in;
  4325. DestroyAndReopen(options);
  4326. // Construct two L1 files with keys:
  4327. // f1:[aaa1 ccc1] f2:[ddd0]
  4328. ASSERT_OK(Put("aaa1", ""));
  4329. ASSERT_OK(Put("ccc1", ""));
  4330. ASSERT_OK(Flush());
  4331. ASSERT_OK(Put("ddd0", ""));
  4332. ASSERT_OK(Flush());
  4333. CompactRangeOptions cro;
  4334. cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
  4335. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  4336. ReadOptions ropts;
  4337. for (bool same : {false, true}) {
  4338. ropts.prefix_same_as_start = same;
  4339. std::unique_ptr<Iterator> iter(db_->NewIterator(ropts));
  4340. ASSERT_OK(iter->status());
  4341. iter->Seek("bbb1");
  4342. ASSERT_OK(iter->status());
  4343. if (opt_in && !same) {
  4344. // Unbounded total order seek
  4345. ASSERT_TRUE(iter->Valid());
  4346. ASSERT_EQ(iter->key(), "ccc1");
  4347. } else {
  4348. // Bloom filter is filterd out by f1. When same == false, this is just
  4349. // one valid position following the contract. Postioning to ccc1 or ddd0
  4350. // is also valid. This is just to validate the behavior of the current
  4351. // implementation. If underlying implementation changes, the test might
  4352. // fail here.
  4353. ASSERT_FALSE(iter->Valid());
  4354. }
  4355. }
  4356. }
  4357. }
  4358. TEST_F(DBTest2, RowCacheSnapshot) {
  4359. Options options = CurrentOptions();
  4360. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4361. options.row_cache = NewLRUCache(8 * 8192);
  4362. DestroyAndReopen(options);
  4363. ASSERT_OK(Put("foo", "bar1"));
  4364. const Snapshot* s1 = db_->GetSnapshot();
  4365. ASSERT_OK(Put("foo", "bar2"));
  4366. ASSERT_OK(Flush());
  4367. ASSERT_OK(Put("foo2", "bar"));
  4368. const Snapshot* s2 = db_->GetSnapshot();
  4369. ASSERT_OK(Put("foo3", "bar"));
  4370. const Snapshot* s3 = db_->GetSnapshot();
  4371. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
  4372. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
  4373. ASSERT_EQ(Get("foo"), "bar2");
  4374. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
  4375. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
  4376. ASSERT_EQ(Get("foo"), "bar2");
  4377. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
  4378. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
  4379. ASSERT_EQ(Get("foo", s1), "bar1");
  4380. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
  4381. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
  4382. ASSERT_EQ(Get("foo", s2), "bar2");
  4383. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2);
  4384. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
  4385. ASSERT_EQ(Get("foo", s1), "bar1");
  4386. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3);
  4387. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
  4388. ASSERT_EQ(Get("foo", s3), "bar2");
  4389. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 4);
  4390. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
  4391. db_->ReleaseSnapshot(s1);
  4392. db_->ReleaseSnapshot(s2);
  4393. db_->ReleaseSnapshot(s3);
  4394. }
  4395. // When DB is reopened with multiple column families, the manifest file
  4396. // is written after the first CF is flushed, and it is written again
  4397. // after each flush. If DB crashes between the flushes, the flushed CF
  4398. // flushed will pass the latest log file, and now we require it not
  4399. // to be corrupted, and triggering a corruption report.
  4400. // We need to fix the bug and enable the test.
  4401. TEST_F(DBTest2, CrashInRecoveryMultipleCF) {
  4402. const std::vector<std::string> sync_points = {
  4403. "DBImpl::RecoverLogFiles:BeforeFlushFinalMemtable",
  4404. "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0"};
  4405. for (const auto& test_sync_point : sync_points) {
  4406. Options options = CurrentOptions();
  4407. // First destroy original db to ensure a clean start.
  4408. DestroyAndReopen(options);
  4409. options.create_if_missing = true;
  4410. options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
  4411. CreateAndReopenWithCF({"pikachu"}, options);
  4412. ASSERT_OK(Put("foo", "bar"));
  4413. ASSERT_OK(Flush());
  4414. ASSERT_OK(Put(1, "foo", "bar"));
  4415. ASSERT_OK(Flush(1));
  4416. ASSERT_OK(Put("foo", "bar"));
  4417. ASSERT_OK(Put(1, "foo", "bar"));
  4418. // The value is large enough to be divided to two blocks.
  4419. std::string large_value(400, ' ');
  4420. ASSERT_OK(Put("foo1", large_value));
  4421. ASSERT_OK(Put("foo2", large_value));
  4422. Close();
  4423. // Corrupt the log file in the middle, so that it is not corrupted
  4424. // in the tail.
  4425. std::vector<std::string> filenames;
  4426. ASSERT_OK(env_->GetChildren(dbname_, &filenames));
  4427. for (const auto& f : filenames) {
  4428. uint64_t number;
  4429. FileType type;
  4430. if (ParseFileName(f, &number, &type) && type == FileType::kWalFile) {
  4431. std::string fname = dbname_ + "/" + f;
  4432. std::string file_content;
  4433. ASSERT_OK(ReadFileToString(env_, fname, &file_content));
  4434. file_content[400] = 'h';
  4435. file_content[401] = 'a';
  4436. ASSERT_OK(WriteStringToFile(env_, file_content, fname, false));
  4437. break;
  4438. }
  4439. }
  4440. // Reopen and freeze the file system after the first manifest write.
  4441. FaultInjectionTestEnv fit_env(options.env);
  4442. options.env = &fit_env;
  4443. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4444. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4445. test_sync_point,
  4446. [&](void* /*arg*/) { fit_env.SetFilesystemActive(false); });
  4447. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  4448. ASSERT_NOK(TryReopenWithColumnFamilies(
  4449. {kDefaultColumnFamilyName, "pikachu"}, options));
  4450. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4451. fit_env.SetFilesystemActive(true);
  4452. // If we continue using failure ingestion Env, it will conplain something
  4453. // when renaming current file, which is not expected. Need to investigate
  4454. // why.
  4455. options.env = env_;
  4456. ASSERT_OK(TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"},
  4457. options));
  4458. }
  4459. }
  4460. TEST_F(DBTest2, SeekFileRangeDeleteTail) {
  4461. Options options = CurrentOptions();
  4462. options.prefix_extractor.reset(NewCappedPrefixTransform(1));
  4463. options.num_levels = 3;
  4464. DestroyAndReopen(options);
  4465. ASSERT_OK(Put("a", "a"));
  4466. const Snapshot* s1 = db_->GetSnapshot();
  4467. ASSERT_OK(
  4468. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f"));
  4469. ASSERT_OK(Put("b", "a"));
  4470. ASSERT_OK(Flush());
  4471. ASSERT_OK(Put("x", "a"));
  4472. ASSERT_OK(Put("z", "a"));
  4473. ASSERT_OK(Flush());
  4474. CompactRangeOptions cro;
  4475. cro.change_level = true;
  4476. cro.target_level = 2;
  4477. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  4478. {
  4479. ReadOptions ro;
  4480. ro.total_order_seek = true;
  4481. std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
  4482. ASSERT_OK(iter->status());
  4483. iter->Seek("e");
  4484. ASSERT_TRUE(iter->Valid());
  4485. ASSERT_EQ("x", iter->key().ToString());
  4486. }
  4487. db_->ReleaseSnapshot(s1);
  4488. }
  4489. TEST_F(DBTest2, BackgroundPurgeTest) {
  4490. Options options = CurrentOptions();
  4491. options.write_buffer_manager =
  4492. std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(1 << 20);
  4493. options.avoid_unnecessary_blocking_io = true;
  4494. DestroyAndReopen(options);
  4495. size_t base_value = options.write_buffer_manager->memory_usage();
  4496. ASSERT_OK(Put("a", "a"));
  4497. Iterator* iter = db_->NewIterator(ReadOptions());
  4498. ASSERT_OK(iter->status());
  4499. ASSERT_OK(Flush());
  4500. size_t value = options.write_buffer_manager->memory_usage();
  4501. ASSERT_GT(value, base_value);
  4502. db_->GetEnv()->SetBackgroundThreads(1, Env::Priority::HIGH);
  4503. test::SleepingBackgroundTask sleeping_task_after;
  4504. db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  4505. &sleeping_task_after, Env::Priority::HIGH);
  4506. delete iter;
  4507. Env::Default()->SleepForMicroseconds(100000);
  4508. value = options.write_buffer_manager->memory_usage();
  4509. ASSERT_GT(value, base_value);
  4510. sleeping_task_after.WakeUp();
  4511. sleeping_task_after.WaitUntilDone();
  4512. test::SleepingBackgroundTask sleeping_task_after2;
  4513. db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  4514. &sleeping_task_after2, Env::Priority::HIGH);
  4515. sleeping_task_after2.WakeUp();
  4516. sleeping_task_after2.WaitUntilDone();
  4517. value = options.write_buffer_manager->memory_usage();
  4518. ASSERT_EQ(base_value, value);
  4519. }
  4520. TEST_F(DBTest2, SwitchMemtableRaceWithNewManifest) {
  4521. Options options = CurrentOptions();
  4522. DestroyAndReopen(options);
  4523. options.max_manifest_file_size = 10;
  4524. options.create_if_missing = true;
  4525. CreateAndReopenWithCF({"pikachu"}, options);
  4526. ASSERT_EQ(2, handles_.size());
  4527. ASSERT_OK(Put("foo", "value"));
  4528. const int kL0Files = options.level0_file_num_compaction_trigger;
  4529. for (int i = 0; i < kL0Files; ++i) {
  4530. ASSERT_OK(Put(/*cf=*/1, "a", std::to_string(i)));
  4531. ASSERT_OK(Flush(/*cf=*/1));
  4532. }
  4533. port::Thread thread([&]() { ASSERT_OK(Flush()); });
  4534. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  4535. thread.join();
  4536. }
  4537. TEST_F(DBTest2, SameSmallestInSameLevel) {
  4538. // This test validates fractional casacading logic when several files at one
  4539. // one level only contains the same user key.
  4540. Options options = CurrentOptions();
  4541. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  4542. DestroyAndReopen(options);
  4543. ASSERT_OK(Put("key", "1"));
  4544. ASSERT_OK(Put("key", "2"));
  4545. ASSERT_OK(db_->Merge(WriteOptions(), "key", "3"));
  4546. ASSERT_OK(db_->Merge(WriteOptions(), "key", "4"));
  4547. ASSERT_OK(Flush());
  4548. CompactRangeOptions cro;
  4549. cro.change_level = true;
  4550. cro.target_level = 2;
  4551. ASSERT_OK(dbfull()->CompactRange(cro, db_->DefaultColumnFamily(), nullptr,
  4552. nullptr));
  4553. ASSERT_OK(db_->Merge(WriteOptions(), "key", "5"));
  4554. ASSERT_OK(Flush());
  4555. ASSERT_OK(db_->Merge(WriteOptions(), "key", "6"));
  4556. ASSERT_OK(Flush());
  4557. ASSERT_OK(db_->Merge(WriteOptions(), "key", "7"));
  4558. ASSERT_OK(Flush());
  4559. ASSERT_OK(db_->Merge(WriteOptions(), "key", "8"));
  4560. ASSERT_OK(Flush());
  4561. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  4562. ASSERT_EQ("0,4,1", FilesPerLevel());
  4563. ASSERT_EQ("2,3,4,5,6,7,8", Get("key"));
  4564. }
  4565. TEST_F(DBTest2, FileConsistencyCheckInOpen) {
  4566. ASSERT_OK(Put("foo", "bar"));
  4567. ASSERT_OK(Flush());
  4568. SyncPoint::GetInstance()->SetCallBack(
  4569. "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) {
  4570. Status* ret_s = static_cast<Status*>(arg);
  4571. *ret_s = Status::Corruption("fcc");
  4572. });
  4573. SyncPoint::GetInstance()->EnableProcessing();
  4574. Options options = CurrentOptions();
  4575. options.force_consistency_checks = true;
  4576. ASSERT_NOK(TryReopen(options));
  4577. SyncPoint::GetInstance()->DisableProcessing();
  4578. }
  4579. TEST_F(DBTest2, BlockBasedTablePrefixIndexSeekForPrev) {
  4580. // create a DB with block prefix index
  4581. BlockBasedTableOptions table_options;
  4582. Options options = CurrentOptions();
  4583. table_options.block_size = 300;
  4584. table_options.index_type = BlockBasedTableOptions::kHashSearch;
  4585. table_options.index_shortening =
  4586. BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
  4587. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4588. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  4589. Reopen(options);
  4590. Random rnd(301);
  4591. std::string large_value = rnd.RandomString(500);
  4592. ASSERT_OK(Put("a1", large_value));
  4593. ASSERT_OK(Put("x1", large_value));
  4594. ASSERT_OK(Put("y1", large_value));
  4595. ASSERT_OK(Flush());
  4596. {
  4597. std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
  4598. ASSERT_OK(iterator->status());
  4599. iterator->SeekForPrev("x3");
  4600. ASSERT_TRUE(iterator->Valid());
  4601. ASSERT_EQ("x1", iterator->key().ToString());
  4602. iterator->SeekForPrev("a3");
  4603. ASSERT_TRUE(iterator->Valid());
  4604. ASSERT_EQ("a1", iterator->key().ToString());
  4605. iterator->SeekForPrev("y3");
  4606. ASSERT_TRUE(iterator->Valid());
  4607. ASSERT_EQ("y1", iterator->key().ToString());
  4608. // Query more than one non-existing prefix to cover the case both
  4609. // of empty hash bucket and hash bucket conflict.
  4610. iterator->SeekForPrev("b1");
  4611. // Result should be not valid or "a1".
  4612. if (iterator->Valid()) {
  4613. ASSERT_EQ("a1", iterator->key().ToString());
  4614. }
  4615. iterator->SeekForPrev("c1");
  4616. // Result should be not valid or "a1".
  4617. if (iterator->Valid()) {
  4618. ASSERT_EQ("a1", iterator->key().ToString());
  4619. }
  4620. iterator->SeekForPrev("d1");
  4621. // Result should be not valid or "a1".
  4622. if (iterator->Valid()) {
  4623. ASSERT_EQ("a1", iterator->key().ToString());
  4624. }
  4625. iterator->SeekForPrev("y3");
  4626. ASSERT_TRUE(iterator->Valid());
  4627. ASSERT_EQ("y1", iterator->key().ToString());
  4628. }
  4629. }
  4630. TEST_F(DBTest2, PartitionedIndexPrefetchFailure) {
  4631. Options options = last_options_;
  4632. options.env = env_;
  4633. options.max_open_files = 20;
  4634. BlockBasedTableOptions bbto;
  4635. bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
  4636. bbto.metadata_block_size = 128;
  4637. bbto.block_size = 128;
  4638. bbto.block_cache = NewLRUCache(16777216);
  4639. bbto.cache_index_and_filter_blocks = true;
  4640. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  4641. DestroyAndReopen(options);
  4642. // Force no table cache so every read will preload the SST file.
  4643. dbfull()->TEST_table_cache()->SetCapacity(0);
  4644. bbto.block_cache->SetCapacity(0);
  4645. Random rnd(301);
  4646. for (int i = 0; i < 4096; i++) {
  4647. ASSERT_OK(Put(Key(i), rnd.RandomString(32)));
  4648. }
  4649. ASSERT_OK(Flush());
  4650. // Try different random failures in table open for 300 times.
  4651. for (int i = 0; i < 300; i++) {
  4652. env_->num_reads_fails_ = 0;
  4653. env_->rand_reads_fail_odd_ = 8;
  4654. std::string value;
  4655. Status s = dbfull()->Get(ReadOptions(), Key(1), &value);
  4656. if (env_->num_reads_fails_ > 0) {
  4657. ASSERT_NOK(s);
  4658. } else {
  4659. ASSERT_OK(s);
  4660. }
  4661. }
  4662. env_->rand_reads_fail_odd_ = 0;
  4663. }
  4664. TEST_F(DBTest2, ChangePrefixExtractor) {
  4665. for (bool use_partitioned_filter : {true, false}) {
  4666. // create a DB with block prefix index
  4667. BlockBasedTableOptions table_options;
  4668. Options options = CurrentOptions();
  4669. options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
  4670. // Sometimes filter is checked based on upper bound. Assert counters
  4671. // for that case. Otherwise, only check data correctness.
  4672. bool expect_filter_check = !use_partitioned_filter;
  4673. table_options.partition_filters = use_partitioned_filter;
  4674. if (use_partitioned_filter) {
  4675. table_options.index_type =
  4676. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
  4677. }
  4678. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  4679. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4680. options.statistics = CreateDBStatistics();
  4681. options.prefix_extractor.reset(NewFixedPrefixTransform(2));
  4682. DestroyAndReopen(options);
  4683. Random rnd(301);
  4684. ASSERT_OK(Put("aa", ""));
  4685. ASSERT_OK(Put("xb", ""));
  4686. ASSERT_OK(Put("xx1", ""));
  4687. ASSERT_OK(Put("xz1", ""));
  4688. ASSERT_OK(Put("zz", ""));
  4689. ASSERT_OK(Flush());
  4690. // After reopening DB with prefix size 2 => 1, prefix extractor
  4691. // won't take effective unless it won't change results based
  4692. // on upper bound and seek key.
  4693. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  4694. Reopen(options);
  4695. {
  4696. std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
  4697. ASSERT_OK(iterator->status());
  4698. iterator->Seek("xa");
  4699. ASSERT_TRUE(iterator->Valid());
  4700. ASSERT_EQ("xb", iterator->key().ToString());
  4701. if (expect_filter_check) {
  4702. EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4703. }
  4704. iterator->Seek("xz");
  4705. ASSERT_TRUE(iterator->Valid());
  4706. ASSERT_EQ("xz1", iterator->key().ToString());
  4707. if (expect_filter_check) {
  4708. EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4709. }
  4710. }
  4711. std::string ub_str = "xg9";
  4712. Slice ub(ub_str);
  4713. ReadOptions ro;
  4714. ro.iterate_upper_bound = &ub;
  4715. {
  4716. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4717. ASSERT_OK(iterator->status());
  4718. // SeekForPrev() never uses prefix bloom if it is changed.
  4719. iterator->SeekForPrev("xg0");
  4720. ASSERT_TRUE(iterator->Valid());
  4721. ASSERT_EQ("xb", iterator->key().ToString());
  4722. if (expect_filter_check) {
  4723. EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4724. }
  4725. }
  4726. ub_str = "xx9";
  4727. ub = Slice(ub_str);
  4728. {
  4729. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4730. ASSERT_OK(iterator->status());
  4731. iterator->Seek("x");
  4732. ASSERT_TRUE(iterator->Valid());
  4733. ASSERT_EQ("xb", iterator->key().ToString());
  4734. if (expect_filter_check) {
  4735. EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4736. }
  4737. iterator->Seek("xx0");
  4738. ASSERT_TRUE(iterator->Valid());
  4739. ASSERT_EQ("xx1", iterator->key().ToString());
  4740. if (expect_filter_check) {
  4741. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4742. }
  4743. }
  4744. CompactRangeOptions compact_range_opts;
  4745. compact_range_opts.bottommost_level_compaction =
  4746. BottommostLevelCompaction::kForce;
  4747. ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr));
  4748. ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr));
  4749. // Re-execute similar queries after a full compaction
  4750. {
  4751. std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
  4752. iterator->Seek("x");
  4753. ASSERT_TRUE(iterator->Valid());
  4754. ASSERT_EQ("xb", iterator->key().ToString());
  4755. if (expect_filter_check) {
  4756. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4757. }
  4758. iterator->Seek("xg");
  4759. ASSERT_TRUE(iterator->Valid());
  4760. ASSERT_EQ("xx1", iterator->key().ToString());
  4761. if (expect_filter_check) {
  4762. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4763. }
  4764. iterator->Seek("xz");
  4765. ASSERT_TRUE(iterator->Valid());
  4766. ASSERT_EQ("xz1", iterator->key().ToString());
  4767. if (expect_filter_check) {
  4768. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4769. }
  4770. ASSERT_OK(iterator->status());
  4771. }
  4772. {
  4773. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4774. iterator->SeekForPrev("xx0");
  4775. ASSERT_TRUE(iterator->Valid());
  4776. ASSERT_EQ("xb", iterator->key().ToString());
  4777. if (expect_filter_check) {
  4778. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4779. }
  4780. iterator->Seek("xx0");
  4781. ASSERT_TRUE(iterator->Valid());
  4782. ASSERT_EQ("xx1", iterator->key().ToString());
  4783. if (expect_filter_check) {
  4784. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4785. }
  4786. ASSERT_OK(iterator->status());
  4787. }
  4788. ub_str = "xg9";
  4789. ub = Slice(ub_str);
  4790. {
  4791. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4792. iterator->SeekForPrev("xg0");
  4793. ASSERT_TRUE(iterator->Valid());
  4794. ASSERT_EQ("xb", iterator->key().ToString());
  4795. if (expect_filter_check) {
  4796. EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
  4797. }
  4798. ASSERT_OK(iterator->status());
  4799. }
  4800. }
  4801. }
  4802. TEST_F(DBTest2, BlockBasedTablePrefixGetIndexNotFound) {
  4803. // create a DB with block prefix index
  4804. BlockBasedTableOptions table_options;
  4805. Options options = CurrentOptions();
  4806. table_options.block_size = 300;
  4807. table_options.index_type = BlockBasedTableOptions::kHashSearch;
  4808. table_options.index_shortening =
  4809. BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
  4810. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4811. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  4812. options.level0_file_num_compaction_trigger = 8;
  4813. Reopen(options);
  4814. ASSERT_OK(Put("b1", "ok"));
  4815. ASSERT_OK(Flush());
  4816. // Flushing several files so that the chance that hash bucket
  4817. // is empty fo "b" in at least one of the files is high.
  4818. ASSERT_OK(Put("a1", ""));
  4819. ASSERT_OK(Put("c1", ""));
  4820. ASSERT_OK(Flush());
  4821. ASSERT_OK(Put("a2", ""));
  4822. ASSERT_OK(Put("c2", ""));
  4823. ASSERT_OK(Flush());
  4824. ASSERT_OK(Put("a3", ""));
  4825. ASSERT_OK(Put("c3", ""));
  4826. ASSERT_OK(Flush());
  4827. ASSERT_OK(Put("a4", ""));
  4828. ASSERT_OK(Put("c4", ""));
  4829. ASSERT_OK(Flush());
  4830. ASSERT_OK(Put("a5", ""));
  4831. ASSERT_OK(Put("c5", ""));
  4832. ASSERT_OK(Flush());
  4833. ASSERT_EQ("ok", Get("b1"));
  4834. }
  4835. TEST_F(DBTest2, AutoPrefixMode1) {
  4836. do {
  4837. // create a DB with block prefix index
  4838. Options options = CurrentOptions();
  4839. BlockBasedTableOptions table_options =
  4840. *options.table_factory->GetOptions<BlockBasedTableOptions>();
  4841. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  4842. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4843. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  4844. options.statistics = CreateDBStatistics();
  4845. Reopen(options);
  4846. Random rnd(301);
  4847. std::string large_value = rnd.RandomString(500);
  4848. ASSERT_OK(Put("a1", large_value));
  4849. ASSERT_OK(Put("x1", large_value));
  4850. ASSERT_OK(Put("y1", large_value));
  4851. ASSERT_OK(Flush());
  4852. ReadOptions ro;
  4853. ro.total_order_seek = false;
  4854. ro.auto_prefix_mode = true;
  4855. const auto hit_stat = options.num_levels == 1
  4856. ? LAST_LEVEL_SEEK_FILTER_MATCH
  4857. : NON_LAST_LEVEL_SEEK_FILTER_MATCH;
  4858. const auto miss_stat = options.num_levels == 1
  4859. ? LAST_LEVEL_SEEK_FILTERED
  4860. : NON_LAST_LEVEL_SEEK_FILTERED;
  4861. {
  4862. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4863. iterator->Seek("b1");
  4864. ASSERT_TRUE(iterator->Valid());
  4865. ASSERT_EQ("x1", iterator->key().ToString());
  4866. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4867. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4868. ASSERT_OK(iterator->status());
  4869. }
  4870. Slice ub;
  4871. ro.iterate_upper_bound = &ub;
  4872. ub = "b9";
  4873. {
  4874. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4875. iterator->Seek("b1");
  4876. ASSERT_FALSE(iterator->Valid());
  4877. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4878. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  4879. ASSERT_OK(iterator->status());
  4880. }
  4881. ub = "z";
  4882. {
  4883. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4884. iterator->Seek("b1");
  4885. ASSERT_TRUE(iterator->Valid());
  4886. ASSERT_EQ("x1", iterator->key().ToString());
  4887. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4888. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4889. ASSERT_OK(iterator->status());
  4890. }
  4891. ub = "c";
  4892. {
  4893. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4894. iterator->Seek("b1");
  4895. ASSERT_FALSE(iterator->Valid());
  4896. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4897. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  4898. ASSERT_OK(iterator->status());
  4899. }
  4900. ub = "c1";
  4901. {
  4902. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4903. iterator->Seek("b1");
  4904. ASSERT_FALSE(iterator->Valid());
  4905. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4906. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4907. ASSERT_OK(iterator->status());
  4908. }
  4909. // The same queries without recreating iterator
  4910. {
  4911. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4912. ub = "b9";
  4913. iterator->Seek("b1");
  4914. ASSERT_FALSE(iterator->Valid());
  4915. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4916. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  4917. ASSERT_OK(iterator->status());
  4918. ub = "z";
  4919. iterator->Seek("b1");
  4920. ASSERT_TRUE(iterator->Valid());
  4921. ASSERT_EQ("x1", iterator->key().ToString());
  4922. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4923. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4924. ub = "c";
  4925. iterator->Seek("b1");
  4926. ASSERT_FALSE(iterator->Valid());
  4927. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4928. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  4929. ub = "b9";
  4930. iterator->SeekForPrev("b1");
  4931. ASSERT_TRUE(iterator->Valid());
  4932. ASSERT_EQ("a1", iterator->key().ToString());
  4933. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4934. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4935. ub = "zz";
  4936. iterator->SeekToLast();
  4937. ASSERT_TRUE(iterator->Valid());
  4938. ASSERT_EQ("y1", iterator->key().ToString());
  4939. iterator->SeekToFirst();
  4940. ASSERT_TRUE(iterator->Valid());
  4941. ASSERT_EQ("a1", iterator->key().ToString());
  4942. }
  4943. // Similar, now with reverse comparator
  4944. // Technically, we are violating axiom 2 of prefix_extractors, but
  4945. // it should be revised because of major use-cases using
  4946. // ReverseBytewiseComparator with capped/fixed prefix Seek. (FIXME)
  4947. options.comparator = ReverseBytewiseComparator();
  4948. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  4949. DestroyAndReopen(options);
  4950. ASSERT_OK(Put("a1", large_value));
  4951. ASSERT_OK(Put("x1", large_value));
  4952. ASSERT_OK(Put("y1", large_value));
  4953. ASSERT_OK(Flush());
  4954. {
  4955. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  4956. ub = "b1";
  4957. iterator->Seek("b9");
  4958. ASSERT_FALSE(iterator->Valid());
  4959. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4960. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  4961. ASSERT_OK(iterator->status());
  4962. ub = "b1";
  4963. iterator->Seek("z");
  4964. ASSERT_TRUE(iterator->Valid());
  4965. ASSERT_EQ("y1", iterator->key().ToString());
  4966. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4967. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4968. ub = "b1";
  4969. iterator->Seek("c");
  4970. ASSERT_FALSE(iterator->Valid());
  4971. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4972. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4973. ub = "b";
  4974. iterator->Seek("c9");
  4975. ASSERT_FALSE(iterator->Valid());
  4976. // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
  4977. // is "correctly" implemented.
  4978. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4979. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4980. ub = "a";
  4981. iterator->Seek("b9");
  4982. // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
  4983. // is "correctly" implemented.
  4984. ASSERT_TRUE(iterator->Valid());
  4985. ASSERT_EQ("a1", iterator->key().ToString());
  4986. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4987. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4988. ub = "b";
  4989. iterator->Seek("a");
  4990. ASSERT_FALSE(iterator->Valid());
  4991. // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
  4992. // matches BytewiseComparator::IsSameLengthImmediateSuccessor. Upper
  4993. // comparing before seek key prevents a real bug from surfacing.
  4994. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  4995. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  4996. ub = "b1";
  4997. iterator->SeekForPrev("b9");
  4998. ASSERT_TRUE(iterator->Valid());
  4999. // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
  5000. // is "correctly" implemented.
  5001. ASSERT_EQ("x1", iterator->key().ToString());
  5002. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  5003. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  5004. ub = "a";
  5005. iterator->SeekToLast();
  5006. ASSERT_TRUE(iterator->Valid());
  5007. ASSERT_EQ("a1", iterator->key().ToString());
  5008. iterator->SeekToFirst();
  5009. ASSERT_TRUE(iterator->Valid());
  5010. ASSERT_EQ("y1", iterator->key().ToString());
  5011. }
  5012. // Now something a bit different, related to "short" keys that
  5013. // auto_prefix_mode can omit. See "BUG" section of auto_prefix_mode.
  5014. options.comparator = BytewiseComparator();
  5015. for (const auto config : {"fixed:2", "capped:2"}) {
  5016. ASSERT_OK(SliceTransform::CreateFromString(ConfigOptions(), config,
  5017. &options.prefix_extractor));
  5018. // FIXME: kHashSearch, etc. requires all keys be InDomain
  5019. if (StartsWith(config, "fixed") &&
  5020. (table_options.index_type == BlockBasedTableOptions::kHashSearch ||
  5021. StartsWith(options.memtable_factory->Name(), "Hash"))) {
  5022. continue;
  5023. }
  5024. DestroyAndReopen(options);
  5025. const char* a_end_stuff = "a\xffXYZ";
  5026. const char* b_begin_stuff = "b\x00XYZ";
  5027. ASSERT_OK(Put("a", large_value));
  5028. ASSERT_OK(Put("b", large_value));
  5029. ASSERT_OK(Put(Slice(b_begin_stuff, 3), large_value));
  5030. ASSERT_OK(Put("c", large_value));
  5031. ASSERT_OK(Flush());
  5032. // control showing valid optimization with auto_prefix mode
  5033. ub = Slice(a_end_stuff, 4);
  5034. ro.iterate_upper_bound = &ub;
  5035. std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
  5036. iterator->Seek(Slice(a_end_stuff, 2));
  5037. ASSERT_FALSE(iterator->Valid());
  5038. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  5039. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  5040. ASSERT_OK(iterator->status());
  5041. // test, cannot be validly optimized with auto_prefix_mode
  5042. ub = Slice(b_begin_stuff, 2);
  5043. ro.iterate_upper_bound = &ub;
  5044. iterator->Seek(Slice(a_end_stuff, 2));
  5045. // !!! BUG !!! See "BUG" section of auto_prefix_mode.
  5046. ASSERT_FALSE(iterator->Valid());
  5047. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  5048. EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
  5049. ASSERT_OK(iterator->status());
  5050. // To prove that is the wrong result, now use total order seek
  5051. ReadOptions tos_ro = ro;
  5052. tos_ro.total_order_seek = true;
  5053. tos_ro.auto_prefix_mode = false;
  5054. iterator.reset(db_->NewIterator(tos_ro));
  5055. iterator->Seek(Slice(a_end_stuff, 2));
  5056. ASSERT_TRUE(iterator->Valid());
  5057. ASSERT_EQ("b", iterator->key().ToString());
  5058. EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
  5059. EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
  5060. ASSERT_OK(iterator->status());
  5061. }
  5062. } while (ChangeOptions(kSkipPlainTable));
  5063. }
  5064. class RenameCurrentTest : public DBTestBase,
  5065. public testing::WithParamInterface<std::string> {
  5066. public:
  5067. RenameCurrentTest()
  5068. : DBTestBase("rename_current_test", /*env_do_fsync=*/true),
  5069. sync_point_(GetParam()) {}
  5070. ~RenameCurrentTest() override = default;
  5071. void SetUp() override {
  5072. env_->no_file_overwrite_.store(true, std::memory_order_release);
  5073. }
  5074. void TearDown() override {
  5075. env_->no_file_overwrite_.store(false, std::memory_order_release);
  5076. }
  5077. void SetupSyncPoints() {
  5078. SyncPoint::GetInstance()->DisableProcessing();
  5079. SyncPoint::GetInstance()->SetCallBack(sync_point_, [&](void* arg) {
  5080. Status* s = static_cast<Status*>(arg);
  5081. assert(s);
  5082. *s = Status::IOError("Injected IO error.");
  5083. });
  5084. }
  5085. const std::string sync_point_;
  5086. };
  5087. INSTANTIATE_TEST_CASE_P(DistributedFS, RenameCurrentTest,
  5088. ::testing::Values("SetCurrentFile:BeforeRename",
  5089. "SetCurrentFile:AfterRename"));
  5090. TEST_P(RenameCurrentTest, Open) {
  5091. Destroy(last_options_);
  5092. Options options = GetDefaultOptions();
  5093. options.create_if_missing = true;
  5094. SetupSyncPoints();
  5095. SyncPoint::GetInstance()->EnableProcessing();
  5096. Status s = TryReopen(options);
  5097. ASSERT_NOK(s);
  5098. SyncPoint::GetInstance()->DisableProcessing();
  5099. Reopen(options);
  5100. }
  5101. TEST_P(RenameCurrentTest, Flush) {
  5102. Destroy(last_options_);
  5103. Options options = GetDefaultOptions();
  5104. options.max_manifest_file_size = 1;
  5105. options.create_if_missing = true;
  5106. Reopen(options);
  5107. ASSERT_OK(Put("key", "value"));
  5108. SetupSyncPoints();
  5109. SyncPoint::GetInstance()->EnableProcessing();
  5110. ASSERT_NOK(Flush());
  5111. ASSERT_NOK(Put("foo", "value"));
  5112. SyncPoint::GetInstance()->DisableProcessing();
  5113. Reopen(options);
  5114. ASSERT_EQ("value", Get("key"));
  5115. ASSERT_EQ("NOT_FOUND", Get("foo"));
  5116. }
  5117. TEST_P(RenameCurrentTest, Compaction) {
  5118. Destroy(last_options_);
  5119. Options options = GetDefaultOptions();
  5120. options.max_manifest_file_size = 1;
  5121. options.create_if_missing = true;
  5122. Reopen(options);
  5123. ASSERT_OK(Put("a", "a_value"));
  5124. ASSERT_OK(Put("c", "c_value"));
  5125. ASSERT_OK(Flush());
  5126. ASSERT_OK(Put("b", "b_value"));
  5127. ASSERT_OK(Put("d", "d_value"));
  5128. ASSERT_OK(Flush());
  5129. SetupSyncPoints();
  5130. SyncPoint::GetInstance()->EnableProcessing();
  5131. ASSERT_NOK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  5132. /*end=*/nullptr));
  5133. ASSERT_NOK(Put("foo", "value"));
  5134. SyncPoint::GetInstance()->DisableProcessing();
  5135. Reopen(options);
  5136. ASSERT_EQ("NOT_FOUND", Get("foo"));
  5137. ASSERT_EQ("d_value", Get("d"));
  5138. }
  5139. TEST_F(DBTest2, VariousFileTemperatures) {
  5140. constexpr size_t kNumberFileTypes = static_cast<size_t>(kBlobFile) + 1U;
  5141. struct MyTestFS : public FileTemperatureTestFS {
  5142. explicit MyTestFS(const std::shared_ptr<FileSystem>& fs)
  5143. : FileTemperatureTestFS(fs) {
  5144. Reset();
  5145. }
  5146. IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
  5147. std::unique_ptr<FSWritableFile>* result,
  5148. IODebugContext* dbg) override {
  5149. IOStatus ios =
  5150. FileTemperatureTestFS::NewWritableFile(fname, opts, result, dbg);
  5151. if (ios.ok()) {
  5152. uint64_t number;
  5153. FileType type;
  5154. if (ParseFileName(GetFileName(fname), &number, "LOG", &type)) {
  5155. if (type == kTableFile) {
  5156. // Not checked here
  5157. } else if (type == kWalFile) {
  5158. if (opts.temperature != expected_wal_temperature) {
  5159. std::cerr << "Attempt to open " << fname << " with temperature "
  5160. << temperature_to_string[opts.temperature]
  5161. << " rather than "
  5162. << temperature_to_string[expected_wal_temperature]
  5163. << std::endl;
  5164. assert(false);
  5165. }
  5166. } else if (type == kDescriptorFile) {
  5167. if (opts.temperature != expected_manifest_temperature) {
  5168. std::cerr << "Attempt to open " << fname << " with temperature "
  5169. << temperature_to_string[opts.temperature]
  5170. << " rather than "
  5171. << temperature_to_string[expected_wal_temperature]
  5172. << std::endl;
  5173. assert(false);
  5174. }
  5175. } else if (opts.temperature != expected_other_metadata_temperature) {
  5176. std::cerr << "Attempt to open " << fname << " with temperature "
  5177. << temperature_to_string[opts.temperature]
  5178. << " rather than "
  5179. << temperature_to_string[expected_wal_temperature]
  5180. << std::endl;
  5181. assert(false);
  5182. }
  5183. UpdateCount(type, 1);
  5184. }
  5185. }
  5186. return ios;
  5187. }
  5188. IOStatus RenameFile(const std::string& src, const std::string& dst,
  5189. const IOOptions& options,
  5190. IODebugContext* dbg) override {
  5191. IOStatus ios = FileTemperatureTestFS::RenameFile(src, dst, options, dbg);
  5192. if (ios.ok()) {
  5193. uint64_t number;
  5194. FileType src_type;
  5195. FileType dst_type;
  5196. assert(ParseFileName(GetFileName(src), &number, "LOG", &src_type));
  5197. assert(ParseFileName(GetFileName(dst), &number, "LOG", &dst_type));
  5198. UpdateCount(src_type, -1);
  5199. UpdateCount(dst_type, 1);
  5200. }
  5201. return ios;
  5202. }
  5203. void UpdateCount(FileType type, int delta) {
  5204. size_t i = static_cast<size_t>(type);
  5205. assert(i < kNumberFileTypes);
  5206. counts[i].FetchAddRelaxed(delta);
  5207. }
  5208. std::map<FileType, size_t> PopCounts() {
  5209. std::map<FileType, size_t> ret;
  5210. for (size_t i = 0; i < kNumberFileTypes; ++i) {
  5211. int c = counts[i].ExchangeRelaxed(0);
  5212. if (c > 0) {
  5213. ret[static_cast<FileType>(i)] = c;
  5214. }
  5215. }
  5216. return ret;
  5217. }
  5218. FileOptions OptimizeForLogWrite(
  5219. const FileOptions& file_options,
  5220. const DBOptions& /*db_options*/) const override {
  5221. FileOptions opts = file_options;
  5222. if (optimize_wal_temperature != Temperature::kUnknown) {
  5223. opts.temperature = optimize_wal_temperature;
  5224. }
  5225. return opts;
  5226. }
  5227. FileOptions OptimizeForManifestWrite(
  5228. const FileOptions& file_options) const override {
  5229. FileOptions opts = file_options;
  5230. if (optimize_manifest_temperature != Temperature::kUnknown) {
  5231. opts.temperature = optimize_manifest_temperature;
  5232. }
  5233. return opts;
  5234. }
  5235. void Reset() {
  5236. optimize_manifest_temperature = Temperature::kUnknown;
  5237. optimize_wal_temperature = Temperature::kUnknown;
  5238. expected_manifest_temperature = Temperature::kUnknown;
  5239. expected_other_metadata_temperature = Temperature::kUnknown;
  5240. expected_wal_temperature = Temperature::kUnknown;
  5241. for (auto& c : counts) {
  5242. c.StoreRelaxed(0);
  5243. }
  5244. }
  5245. Temperature optimize_manifest_temperature;
  5246. Temperature optimize_wal_temperature;
  5247. Temperature expected_manifest_temperature;
  5248. Temperature expected_other_metadata_temperature;
  5249. Temperature expected_wal_temperature;
  5250. std::array<RelaxedAtomic<int>, kNumberFileTypes> counts;
  5251. };
  5252. // We don't have enough non-unknown temps to confidently distinguish that
  5253. // a specific setting caused a specific outcome, in a single run. Using
  5254. // RandomKnownTemperature() is a reasonable work-around without blowing up
  5255. // test time.
  5256. auto test_fs = std::make_shared<MyTestFS>(env_->GetFileSystem());
  5257. std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
  5258. for (bool use_optimize : {false, true}) {
  5259. std::cerr << "use_optimize: " << std::to_string(use_optimize) << std::endl;
  5260. for (bool use_temp_options : {false, true}) {
  5261. std::cerr << "use_temp_options: " << std::to_string(use_temp_options)
  5262. << std::endl;
  5263. Options options = CurrentOptions();
  5264. // Currently require for last level temperature
  5265. options.compaction_style = kCompactionStyleUniversal;
  5266. options.env = env.get();
  5267. test_fs->Reset();
  5268. if (use_optimize) {
  5269. test_fs->optimize_manifest_temperature = RandomKnownTemperature();
  5270. test_fs->expected_manifest_temperature =
  5271. test_fs->optimize_manifest_temperature;
  5272. test_fs->optimize_wal_temperature = RandomKnownTemperature();
  5273. test_fs->expected_wal_temperature = test_fs->optimize_wal_temperature;
  5274. }
  5275. if (use_temp_options) {
  5276. options.metadata_write_temperature = RandomKnownTemperature();
  5277. test_fs->expected_manifest_temperature =
  5278. options.metadata_write_temperature;
  5279. test_fs->expected_other_metadata_temperature =
  5280. options.metadata_write_temperature;
  5281. options.wal_write_temperature = RandomKnownTemperature();
  5282. test_fs->expected_wal_temperature = options.wal_write_temperature;
  5283. options.last_level_temperature = RandomKnownTemperature();
  5284. options.default_write_temperature = RandomKnownTemperature();
  5285. }
  5286. DestroyAndReopen(options);
  5287. Defer closer([&] { Close(); });
  5288. using FTC = std::map<FileType, size_t>;
  5289. // Files on DB startup
  5290. ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
  5291. {kDescriptorFile, 2},
  5292. {kCurrentFile, 2},
  5293. {kIdentityFile, 1},
  5294. {kOptionsFile, 1}}));
  5295. // Temperature count map
  5296. using TCM = std::map<Temperature, size_t>;
  5297. ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), TCM({}));
  5298. ASSERT_OK(Put("foo", "1"));
  5299. ASSERT_OK(Put("bar", "1"));
  5300. ASSERT_OK(Flush());
  5301. ASSERT_OK(Put("foo", "2"));
  5302. ASSERT_OK(Put("bar", "2"));
  5303. ASSERT_OK(Flush());
  5304. ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
  5305. TCM({{options.default_write_temperature, 2}}));
  5306. ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
  5307. ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
  5308. ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
  5309. TCM({{options.last_level_temperature, 1}}));
  5310. ASSERT_OK(Put("foo", "3"));
  5311. ASSERT_OK(Put("bar", "3"));
  5312. ASSERT_OK(Flush());
  5313. // Just in memtable/WAL
  5314. ASSERT_OK(Put("dog", "3"));
  5315. {
  5316. TCM expected;
  5317. expected[options.default_write_temperature] += 1;
  5318. expected[options.last_level_temperature] += 1;
  5319. ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), expected);
  5320. }
  5321. // New files during operation
  5322. ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 3}, {kTableFile, 4}}));
  5323. Reopen(options);
  5324. // New files during re-open/recovery
  5325. ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
  5326. {kTableFile, 1},
  5327. {kDescriptorFile, 1},
  5328. {kCurrentFile, 1},
  5329. {kOptionsFile, 1}}));
  5330. Destroy(options);
  5331. }
  5332. }
  5333. }
  5334. TEST_F(DBTest2, LastLevelTemperature) {
  5335. class TestListener : public EventListener {
  5336. public:
  5337. void OnFileReadFinish(const FileOperationInfo& info) override {
  5338. UpdateFileTemperature(info);
  5339. }
  5340. void OnFileWriteFinish(const FileOperationInfo& info) override {
  5341. UpdateFileTemperature(info);
  5342. }
  5343. void OnFileFlushFinish(const FileOperationInfo& info) override {
  5344. UpdateFileTemperature(info);
  5345. }
  5346. void OnFileSyncFinish(const FileOperationInfo& info) override {
  5347. UpdateFileTemperature(info);
  5348. }
  5349. void OnFileCloseFinish(const FileOperationInfo& info) override {
  5350. UpdateFileTemperature(info);
  5351. }
  5352. bool ShouldBeNotifiedOnFileIO() override { return true; }
  5353. std::unordered_map<uint64_t, Temperature> file_temperatures;
  5354. private:
  5355. void UpdateFileTemperature(const FileOperationInfo& info) {
  5356. auto filename = GetFileName(info.path);
  5357. uint64_t number;
  5358. FileType type;
  5359. ASSERT_TRUE(ParseFileName(filename, &number, &type));
  5360. if (type == kTableFile) {
  5361. MutexLock l(&mutex_);
  5362. auto ret = file_temperatures.insert({number, info.temperature});
  5363. if (!ret.second) {
  5364. // the same file temperature should always be the same for all events
  5365. ASSERT_TRUE(ret.first->second == info.temperature);
  5366. }
  5367. }
  5368. }
  5369. std::string GetFileName(const std::string& fname) {
  5370. auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
  5371. // workaround only for Windows that the file path could contain both
  5372. // Windows FilePathSeparator and '/'
  5373. filename = filename.substr(filename.find_last_of('/') + 1);
  5374. return filename;
  5375. }
  5376. port::Mutex mutex_;
  5377. };
  5378. const int kNumLevels = 7;
  5379. const int kLastLevel = kNumLevels - 1;
  5380. auto* listener = new TestListener();
  5381. Options options = CurrentOptions();
  5382. options.last_level_temperature = Temperature::kWarm;
  5383. options.level0_file_num_compaction_trigger = 2;
  5384. options.level_compaction_dynamic_level_bytes = true;
  5385. options.num_levels = kNumLevels;
  5386. options.statistics = CreateDBStatistics();
  5387. options.listeners.emplace_back(listener);
  5388. Reopen(options);
  5389. auto size = GetSstSizeHelper(Temperature::kUnknown);
  5390. ASSERT_EQ(size, 0);
  5391. size = GetSstSizeHelper(Temperature::kWarm);
  5392. ASSERT_EQ(size, 0);
  5393. size = GetSstSizeHelper(Temperature::kHot);
  5394. ASSERT_EQ(size, 0);
  5395. ASSERT_OK(Put("foo", "bar"));
  5396. ASSERT_OK(Put("bar", "bar"));
  5397. ASSERT_OK(Flush());
  5398. ASSERT_OK(Put("foo", "bar"));
  5399. ASSERT_OK(Put("bar", "bar"));
  5400. ASSERT_OK(Flush());
  5401. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5402. get_iostats_context()->Reset();
  5403. IOStatsContext* iostats = get_iostats_context();
  5404. ColumnFamilyMetaData metadata;
  5405. db_->GetColumnFamilyMetaData(&metadata);
  5406. ASSERT_EQ(1, metadata.file_count);
  5407. SstFileMetaData meta = metadata.levels[kLastLevel].files[0];
  5408. ASSERT_EQ(Temperature::kWarm, meta.temperature);
  5409. uint64_t number;
  5410. FileType type;
  5411. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5412. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5413. size = GetSstSizeHelper(Temperature::kUnknown);
  5414. ASSERT_EQ(size, 0);
  5415. size = GetSstSizeHelper(Temperature::kWarm);
  5416. ASSERT_GT(size, 0);
  5417. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5418. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
  5419. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5420. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5421. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5422. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5423. ASSERT_EQ("bar", Get("foo"));
  5424. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5425. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
  5426. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5427. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
  5428. ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
  5429. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
  5430. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5431. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5432. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5433. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
  5434. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
  5435. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
  5436. // non-bottommost file still has unknown temperature
  5437. ASSERT_OK(Put("foo", "bar"));
  5438. ASSERT_OK(Put("bar", "bar"));
  5439. ASSERT_OK(Flush());
  5440. ASSERT_EQ("bar", Get("bar"));
  5441. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5442. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
  5443. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5444. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
  5445. ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
  5446. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
  5447. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5448. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5449. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5450. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
  5451. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
  5452. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
  5453. db_->GetColumnFamilyMetaData(&metadata);
  5454. ASSERT_EQ(2, metadata.file_count);
  5455. meta = metadata.levels[0].files[0];
  5456. ASSERT_EQ(Temperature::kUnknown, meta.temperature);
  5457. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5458. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5459. meta = metadata.levels[kLastLevel].files[0];
  5460. ASSERT_EQ(Temperature::kWarm, meta.temperature);
  5461. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5462. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5463. size = GetSstSizeHelper(Temperature::kUnknown);
  5464. ASSERT_GT(size, 0);
  5465. size = GetSstSizeHelper(Temperature::kWarm);
  5466. ASSERT_GT(size, 0);
  5467. // reopen and check the information is persisted
  5468. Reopen(options);
  5469. db_->GetColumnFamilyMetaData(&metadata);
  5470. ASSERT_EQ(2, metadata.file_count);
  5471. meta = metadata.levels[0].files[0];
  5472. ASSERT_EQ(Temperature::kUnknown, meta.temperature);
  5473. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5474. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5475. meta = metadata.levels[kLastLevel].files[0];
  5476. ASSERT_EQ(Temperature::kWarm, meta.temperature);
  5477. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5478. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5479. size = GetSstSizeHelper(Temperature::kUnknown);
  5480. ASSERT_GT(size, 0);
  5481. size = GetSstSizeHelper(Temperature::kWarm);
  5482. ASSERT_GT(size, 0);
  5483. // check other non-exist temperatures
  5484. size = GetSstSizeHelper(Temperature::kHot);
  5485. ASSERT_EQ(size, 0);
  5486. size = GetSstSizeHelper(Temperature::kCold);
  5487. ASSERT_EQ(size, 0);
  5488. std::string prop;
  5489. ASSERT_TRUE(dbfull()->GetProperty(
  5490. DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
  5491. &prop));
  5492. ASSERT_EQ(std::atoi(prop.c_str()), 0);
  5493. Reopen(options);
  5494. db_->GetColumnFamilyMetaData(&metadata);
  5495. ASSERT_EQ(2, metadata.file_count);
  5496. meta = metadata.levels[0].files[0];
  5497. ASSERT_EQ(Temperature::kUnknown, meta.temperature);
  5498. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5499. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5500. meta = metadata.levels[kLastLevel].files[0];
  5501. ASSERT_EQ(Temperature::kWarm, meta.temperature);
  5502. ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
  5503. ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
  5504. }
  5505. TEST_F(DBTest2, LastLevelTemperatureUniversal) {
  5506. const int kTriggerNum = 3;
  5507. const int kNumLevels = 5;
  5508. const int kBottommostLevel = kNumLevels - 1;
  5509. Options options = CurrentOptions();
  5510. options.compaction_style = kCompactionStyleUniversal;
  5511. options.level0_file_num_compaction_trigger = kTriggerNum;
  5512. options.num_levels = kNumLevels;
  5513. options.statistics = CreateDBStatistics();
  5514. DestroyAndReopen(options);
  5515. auto size = GetSstSizeHelper(Temperature::kUnknown);
  5516. ASSERT_EQ(size, 0);
  5517. size = GetSstSizeHelper(Temperature::kWarm);
  5518. ASSERT_EQ(size, 0);
  5519. size = GetSstSizeHelper(Temperature::kHot);
  5520. ASSERT_EQ(size, 0);
  5521. get_iostats_context()->Reset();
  5522. IOStatsContext* iostats = get_iostats_context();
  5523. for (int i = 0; i < kTriggerNum; i++) {
  5524. ASSERT_OK(Put("foo", "bar"));
  5525. ASSERT_OK(Put("bar", "bar"));
  5526. ASSERT_OK(Flush());
  5527. }
  5528. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5529. ColumnFamilyMetaData metadata;
  5530. db_->GetColumnFamilyMetaData(&metadata);
  5531. ASSERT_EQ(1, metadata.file_count);
  5532. ASSERT_EQ(Temperature::kUnknown,
  5533. metadata.levels[kBottommostLevel].files[0].temperature);
  5534. size = GetSstSizeHelper(Temperature::kUnknown);
  5535. ASSERT_GT(size, 0);
  5536. size = GetSstSizeHelper(Temperature::kWarm);
  5537. ASSERT_EQ(size, 0);
  5538. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5539. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
  5540. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count, 0);
  5541. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5542. ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5543. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5544. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
  5545. ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
  5546. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
  5547. ASSERT_EQ("bar", Get("foo"));
  5548. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5549. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
  5550. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
  5551. ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
  5552. ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
  5553. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
  5554. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5555. ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5556. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5557. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
  5558. ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
  5559. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
  5560. ASSERT_OK(Put("foo", "bar"));
  5561. ASSERT_OK(Put("bar", "bar"));
  5562. ASSERT_OK(Flush());
  5563. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5564. db_->GetColumnFamilyMetaData(&metadata);
  5565. ASSERT_EQ(2, metadata.file_count);
  5566. ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
  5567. size = GetSstSizeHelper(Temperature::kUnknown);
  5568. ASSERT_GT(size, 0);
  5569. size = GetSstSizeHelper(Temperature::kWarm);
  5570. ASSERT_EQ(size, 0);
  5571. // Update last level temperature
  5572. options.last_level_temperature = Temperature::kWarm;
  5573. Reopen(options);
  5574. db_->GetColumnFamilyMetaData(&metadata);
  5575. // Should not impact existing ones
  5576. ASSERT_EQ(Temperature::kUnknown,
  5577. metadata.levels[kBottommostLevel].files[0].temperature);
  5578. size = GetSstSizeHelper(Temperature::kUnknown);
  5579. ASSERT_GT(size, 0);
  5580. size = GetSstSizeHelper(Temperature::kWarm);
  5581. ASSERT_EQ(size, 0);
  5582. // new generated file should have the new settings
  5583. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  5584. db_->GetColumnFamilyMetaData(&metadata);
  5585. ASSERT_EQ(1, metadata.file_count);
  5586. ASSERT_EQ(Temperature::kWarm,
  5587. metadata.levels[kBottommostLevel].files[0].temperature);
  5588. size = GetSstSizeHelper(Temperature::kUnknown);
  5589. ASSERT_EQ(size, 0);
  5590. size = GetSstSizeHelper(Temperature::kWarm);
  5591. ASSERT_GT(size, 0);
  5592. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
  5593. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
  5594. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
  5595. ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
  5596. ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
  5597. ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
  5598. // non-bottommost file still has unknown temperature
  5599. ASSERT_OK(Put("foo", "bar"));
  5600. ASSERT_OK(Put("bar", "bar"));
  5601. ASSERT_OK(Flush());
  5602. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5603. db_->GetColumnFamilyMetaData(&metadata);
  5604. ASSERT_EQ(2, metadata.file_count);
  5605. ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
  5606. size = GetSstSizeHelper(Temperature::kUnknown);
  5607. ASSERT_GT(size, 0);
  5608. size = GetSstSizeHelper(Temperature::kWarm);
  5609. ASSERT_GT(size, 0);
  5610. // check other non-exist temperatures
  5611. size = GetSstSizeHelper(Temperature::kHot);
  5612. ASSERT_EQ(size, 0);
  5613. size = GetSstSizeHelper(Temperature::kCold);
  5614. ASSERT_EQ(size, 0);
  5615. std::string prop;
  5616. ASSERT_TRUE(dbfull()->GetProperty(
  5617. DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
  5618. &prop));
  5619. ASSERT_EQ(std::atoi(prop.c_str()), 0);
  5620. // Update last level temperature dynamically with SetOptions
  5621. auto s = db_->SetOptions({{"last_level_temperature", "kCold"}});
  5622. ASSERT_OK(s);
  5623. ASSERT_EQ(db_->GetOptions().last_level_temperature, Temperature::kCold);
  5624. db_->GetColumnFamilyMetaData(&metadata);
  5625. // Should not impact the existing files
  5626. ASSERT_EQ(Temperature::kWarm,
  5627. metadata.levels[kBottommostLevel].files[0].temperature);
  5628. size = GetSstSizeHelper(Temperature::kUnknown);
  5629. ASSERT_GT(size, 0);
  5630. size = GetSstSizeHelper(Temperature::kWarm);
  5631. ASSERT_GT(size, 0);
  5632. size = GetSstSizeHelper(Temperature::kCold);
  5633. ASSERT_EQ(size, 0);
  5634. // new generated files should have the new settings
  5635. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  5636. db_->GetColumnFamilyMetaData(&metadata);
  5637. ASSERT_EQ(1, metadata.file_count);
  5638. ASSERT_EQ(Temperature::kCold,
  5639. metadata.levels[kBottommostLevel].files[0].temperature);
  5640. size = GetSstSizeHelper(Temperature::kUnknown);
  5641. ASSERT_EQ(size, 0);
  5642. size = GetSstSizeHelper(Temperature::kWarm);
  5643. ASSERT_EQ(size, 0);
  5644. size = GetSstSizeHelper(Temperature::kCold);
  5645. ASSERT_GT(size, 0);
  5646. // kLastTemperature is an invalid temperature
  5647. options.last_level_temperature = Temperature::kLastTemperature;
  5648. s = TryReopen(options);
  5649. ASSERT_TRUE(s.IsIOError());
  5650. }
  5651. TEST_F(DBTest2, LastLevelStatistics) {
  5652. for (bool write_time_default : {false, true}) {
  5653. SCOPED_TRACE("write time default? " + std::to_string(write_time_default));
  5654. Options options = CurrentOptions();
  5655. options.last_level_temperature = Temperature::kWarm;
  5656. if (write_time_default) {
  5657. options.default_write_temperature = Temperature::kHot;
  5658. ASSERT_EQ(options.default_temperature, Temperature::kUnknown);
  5659. } else {
  5660. options.default_temperature = Temperature::kHot;
  5661. ASSERT_EQ(options.default_write_temperature, Temperature::kUnknown);
  5662. }
  5663. options.level0_file_num_compaction_trigger = 2;
  5664. options.level_compaction_dynamic_level_bytes = true;
  5665. options.statistics = CreateDBStatistics();
  5666. BlockBasedTableOptions bbto;
  5667. bbto.no_block_cache = true;
  5668. options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  5669. DestroyAndReopen(options);
  5670. // generate 1 sst on level 0
  5671. ASSERT_OK(Put("foo1", "bar"));
  5672. ASSERT_OK(Put("bar", "bar"));
  5673. ASSERT_OK(Flush());
  5674. ASSERT_EQ("bar", Get("bar"));
  5675. ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0);
  5676. ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0);
  5677. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5678. options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5679. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5680. options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
  5681. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0);
  5682. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0);
  5683. // 2nd flush to trigger compaction
  5684. ASSERT_OK(Put("foo2", "bar"));
  5685. ASSERT_OK(Put("bar", "bar"));
  5686. ASSERT_OK(Flush());
  5687. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5688. ASSERT_EQ("bar", Get("bar"));
  5689. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5690. options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5691. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5692. options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
  5693. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
  5694. options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
  5695. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
  5696. options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
  5697. auto pre_bytes =
  5698. options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES);
  5699. auto pre_count =
  5700. options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  5701. // 3rd flush to generate 1 sst on level 0
  5702. ASSERT_OK(Put("foo3", "bar"));
  5703. ASSERT_OK(Put("bar", "bar"));
  5704. ASSERT_OK(Flush());
  5705. ASSERT_EQ("bar", Get("foo1"));
  5706. ASSERT_EQ("bar", Get("foo2"));
  5707. ASSERT_EQ("bar", Get("foo3"));
  5708. ASSERT_EQ("bar", Get("bar"));
  5709. ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5710. pre_bytes);
  5711. ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5712. pre_count);
  5713. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5714. options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5715. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5716. options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
  5717. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
  5718. options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
  5719. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
  5720. options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
  5721. // Control
  5722. ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
  5723. options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT));
  5724. // Not a realistic setting to make last level kWarm and default temp kCold.
  5725. // This is just for testing default temp can be reset on reopen while the
  5726. // last level temp is consistent across DB reopen because those file's temp
  5727. // are persisted in manifest.
  5728. options.default_temperature = Temperature::kCold;
  5729. ASSERT_OK(options.statistics->Reset());
  5730. Reopen(options);
  5731. ASSERT_EQ("bar", Get("foo1"));
  5732. ASSERT_EQ("bar", Get("foo2"));
  5733. ASSERT_EQ("bar", Get("foo3"));
  5734. ASSERT_EQ("bar", Get("bar"));
  5735. if (write_time_default) {
  5736. // Unchanged
  5737. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5738. options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5739. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5740. options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
  5741. ASSERT_LT(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5742. ASSERT_EQ(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
  5743. } else {
  5744. // Changed (in how we map kUnknown)
  5745. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
  5746. options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
  5747. ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
  5748. options.statistics->getTickerCount(COLD_FILE_READ_COUNT));
  5749. ASSERT_EQ(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
  5750. ASSERT_LT(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
  5751. }
  5752. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
  5753. options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
  5754. ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
  5755. options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
  5756. // Control
  5757. ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
  5758. options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT));
  5759. }
  5760. }
  5761. TEST_F(DBTest2, CheckpointFileTemperature) {
  5762. class NoLinkTestFS : public FileTemperatureTestFS {
  5763. using FileTemperatureTestFS::FileTemperatureTestFS;
  5764. IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
  5765. IODebugContext*) override {
  5766. // return not supported to force checkpoint copy the file instead of just
  5767. // link
  5768. return IOStatus::NotSupported();
  5769. }
  5770. };
  5771. auto test_fs = std::make_shared<NoLinkTestFS>(env_->GetFileSystem());
  5772. std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
  5773. Options options = CurrentOptions();
  5774. options.last_level_temperature = Temperature::kWarm;
  5775. // set dynamic_level to true so the compaction would compact the data to the
  5776. // last level directly which will have the last_level_temperature
  5777. options.level_compaction_dynamic_level_bytes = true;
  5778. options.level0_file_num_compaction_trigger = 2;
  5779. options.env = env.get();
  5780. Reopen(options);
  5781. // generate a bottommost file and a non-bottommost file
  5782. ASSERT_OK(Put("foo", "bar"));
  5783. ASSERT_OK(Put("bar", "bar"));
  5784. ASSERT_OK(Flush());
  5785. ASSERT_OK(Put("foo", "bar"));
  5786. ASSERT_OK(Put("bar", "bar"));
  5787. ASSERT_OK(Flush());
  5788. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5789. ASSERT_OK(Put("foo", "bar"));
  5790. ASSERT_OK(Put("bar", "bar"));
  5791. ASSERT_OK(Flush());
  5792. auto size = GetSstSizeHelper(Temperature::kWarm);
  5793. ASSERT_GT(size, 0);
  5794. std::map<uint64_t, Temperature> temperatures;
  5795. std::vector<LiveFileStorageInfo> infos;
  5796. ASSERT_OK(
  5797. dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), &infos));
  5798. for (const auto& info : infos) {
  5799. temperatures.emplace(info.file_number, info.temperature);
  5800. }
  5801. test_fs->PopRequestedSstFileTemperatures();
  5802. Checkpoint* checkpoint;
  5803. ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
  5804. ASSERT_OK(
  5805. checkpoint->CreateCheckpoint(dbname_ + kFilePathSeparator + "tempcp"));
  5806. // checking src file src_temperature hints: 2 sst files: 1 sst is kWarm,
  5807. // another is kUnknown
  5808. std::vector<std::pair<uint64_t, Temperature>> requested_temps;
  5809. test_fs->PopRequestedSstFileTemperatures(&requested_temps);
  5810. // Two requests
  5811. ASSERT_EQ(requested_temps.size(), 2);
  5812. std::set<uint64_t> distinct_requests;
  5813. for (const auto& requested_temp : requested_temps) {
  5814. // Matching manifest temperatures
  5815. ASSERT_EQ(temperatures.at(requested_temp.first), requested_temp.second);
  5816. distinct_requests.insert(requested_temp.first);
  5817. }
  5818. // Each request to distinct file
  5819. ASSERT_EQ(distinct_requests.size(), requested_temps.size());
  5820. delete checkpoint;
  5821. Close();
  5822. }
  5823. TEST_F(DBTest2, FileTemperatureManifestFixup) {
  5824. auto test_fs = std::make_shared<FileTemperatureTestFS>(env_->GetFileSystem());
  5825. std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
  5826. Options options = CurrentOptions();
  5827. options.last_level_temperature = Temperature::kWarm;
  5828. // set dynamic_level to true so the compaction would compact the data to the
  5829. // last level directly which will have the last_level_temperature
  5830. options.level_compaction_dynamic_level_bytes = true;
  5831. options.level0_file_num_compaction_trigger = 2;
  5832. options.env = env.get();
  5833. std::vector<std::string> cfs = {/*"default",*/ "test1", "test2"};
  5834. CreateAndReopenWithCF(cfs, options);
  5835. // Needed for later re-opens (weird)
  5836. cfs.insert(cfs.begin(), kDefaultColumnFamilyName);
  5837. // Generate a bottommost file in all CFs
  5838. for (int cf = 0; cf < 3; ++cf) {
  5839. ASSERT_OK(Put(cf, "a", "val"));
  5840. ASSERT_OK(Put(cf, "c", "val"));
  5841. ASSERT_OK(Flush(cf));
  5842. ASSERT_OK(Put(cf, "b", "val"));
  5843. ASSERT_OK(Put(cf, "d", "val"));
  5844. ASSERT_OK(Flush(cf));
  5845. }
  5846. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  5847. // verify
  5848. ASSERT_GT(GetSstSizeHelper(Temperature::kWarm), 0);
  5849. ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0);
  5850. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  5851. ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
  5852. // Generate a non-bottommost file in all CFs
  5853. for (int cf = 0; cf < 3; ++cf) {
  5854. ASSERT_OK(Put(cf, "e", "val"));
  5855. ASSERT_OK(Flush(cf));
  5856. }
  5857. // re-verify
  5858. ASSERT_GT(GetSstSizeHelper(Temperature::kWarm), 0);
  5859. // Not supported: ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  5860. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  5861. ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
  5862. // Now change FS temperature on bottommost file(s) to kCold
  5863. std::map<uint64_t, Temperature> current_temps;
  5864. test_fs->CopyCurrentSstFileTemperatures(&current_temps);
  5865. for (auto e : current_temps) {
  5866. if (e.second == Temperature::kWarm) {
  5867. test_fs->OverrideSstFileTemperature(e.first, Temperature::kCold);
  5868. }
  5869. }
  5870. // Metadata not yet updated
  5871. ASSERT_EQ(Get("a"), "val");
  5872. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  5873. // Update with Close and UpdateManifestForFilesState, but first save cf
  5874. // descriptors
  5875. std::vector<ColumnFamilyDescriptor> column_families;
  5876. for (size_t i = 0; i < handles_.size(); ++i) {
  5877. ColumnFamilyDescriptor cfdescriptor;
  5878. handles_[i]->GetDescriptor(&cfdescriptor).PermitUncheckedError();
  5879. column_families.push_back(cfdescriptor);
  5880. }
  5881. Close();
  5882. experimental::UpdateManifestForFilesStateOptions update_opts;
  5883. update_opts.update_temperatures = true;
  5884. ASSERT_OK(experimental::UpdateManifestForFilesState(
  5885. options, dbname_, column_families, update_opts));
  5886. // Re-open and re-verify after update
  5887. ReopenWithColumnFamilies(cfs, options);
  5888. ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0);
  5889. // Not supported: ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  5890. ASSERT_EQ(GetSstSizeHelper(Temperature::kWarm), 0);
  5891. ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
  5892. // Change kUnknown to kHot
  5893. test_fs->CopyCurrentSstFileTemperatures(&current_temps);
  5894. for (auto e : current_temps) {
  5895. if (e.second == Temperature::kUnknown) {
  5896. test_fs->OverrideSstFileTemperature(e.first, Temperature::kHot);
  5897. }
  5898. }
  5899. // Update with Close and UpdateManifestForFilesState
  5900. Close();
  5901. ASSERT_OK(experimental::UpdateManifestForFilesState(
  5902. options, dbname_, column_families, update_opts));
  5903. // Re-open and re-verify after update
  5904. ReopenWithColumnFamilies(cfs, options);
  5905. ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0);
  5906. ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0);
  5907. ASSERT_EQ(GetSstSizeHelper(Temperature::kWarm), 0);
  5908. ASSERT_GT(GetSstSizeHelper(Temperature::kHot), 0);
  5909. Close();
  5910. }
  5911. // WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery.
  5912. TEST_F(DBTest2, PointInTimeRecoveryWithIOErrorWhileReadingWal) {
  5913. Options options = CurrentOptions();
  5914. DestroyAndReopen(options);
  5915. ASSERT_OK(Put("foo", "value0"));
  5916. Close();
  5917. SyncPoint::GetInstance()->DisableProcessing();
  5918. SyncPoint::GetInstance()->ClearAllCallBacks();
  5919. bool should_inject_error = false;
  5920. SyncPoint::GetInstance()->SetCallBack(
  5921. "DBImpl::RecoverLogFiles:BeforeReadWal",
  5922. [&](void* /*arg*/) { should_inject_error = true; });
  5923. SyncPoint::GetInstance()->SetCallBack(
  5924. "LogReader::ReadMore:AfterReadFile", [&](void* arg) {
  5925. if (should_inject_error) {
  5926. ASSERT_NE(nullptr, arg);
  5927. *static_cast<Status*>(arg) = Status::IOError("Injected IOError");
  5928. }
  5929. });
  5930. SyncPoint::GetInstance()->EnableProcessing();
  5931. options.avoid_flush_during_recovery = true;
  5932. options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
  5933. Status s = TryReopen(options);
  5934. ASSERT_TRUE(s.IsIOError());
  5935. }
  5936. TEST_F(DBTest2, PointInTimeRecoveryWithSyncFailureInCFCreation) {
  5937. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  5938. {{"DBImpl::BackgroundCallFlush:Start:1",
  5939. "PointInTimeRecoveryWithSyncFailureInCFCreation:1"},
  5940. {"PointInTimeRecoveryWithSyncFailureInCFCreation:2",
  5941. "DBImpl::BackgroundCallFlush:Start:2"}});
  5942. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5943. CreateColumnFamilies({"test1"}, Options());
  5944. ASSERT_OK(Put("foo", "bar"));
  5945. // Creating a CF when a flush is going on, log is synced but the
  5946. // closed log file is not synced and corrupted.
  5947. port::Thread flush_thread([&]() { ASSERT_NOK(Flush()); });
  5948. TEST_SYNC_POINT("PointInTimeRecoveryWithSyncFailureInCFCreation:1");
  5949. CreateColumnFamilies({"test2"}, Options());
  5950. env_->corrupt_in_sync_ = true;
  5951. TEST_SYNC_POINT("PointInTimeRecoveryWithSyncFailureInCFCreation:2");
  5952. flush_thread.join();
  5953. env_->corrupt_in_sync_ = false;
  5954. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5955. // Reopening the DB should not corrupt anything
  5956. Options options = CurrentOptions();
  5957. options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
  5958. ReopenWithColumnFamilies({"default", "test1", "test2"}, options);
  5959. }
  5960. TEST_F(DBTest2, SortL0FilesByEpochNumber) {
  5961. Options options = CurrentOptions();
  5962. options.num_levels = 1;
  5963. options.compaction_style = kCompactionStyleUniversal;
  5964. DestroyAndReopen(options);
  5965. // Set up L0 files to be sorted by their epoch_number
  5966. ASSERT_OK(Put("key1", "seq1"));
  5967. SstFileWriter sst_file_writer{EnvOptions(), options};
  5968. std::string external_file1 = dbname_ + "/test_files1.sst";
  5969. std::string external_file2 = dbname_ + "/test_files2.sst";
  5970. ASSERT_OK(sst_file_writer.Open(external_file1));
  5971. ASSERT_OK(sst_file_writer.Put("key2", "seq0"));
  5972. ASSERT_OK(sst_file_writer.Finish());
  5973. ASSERT_OK(sst_file_writer.Open(external_file2));
  5974. ASSERT_OK(sst_file_writer.Put("key3", "seq0"));
  5975. ASSERT_OK(sst_file_writer.Finish());
  5976. ASSERT_OK(Put("key4", "seq2"));
  5977. ASSERT_OK(Flush());
  5978. auto* handle = db_->DefaultColumnFamily();
  5979. ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file2},
  5980. IngestExternalFileOptions()));
  5981. // To verify L0 files are sorted by epoch_number in descending order
  5982. // instead of largest_seqno
  5983. std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
  5984. ASSERT_EQ(level0_files.size(), 3);
  5985. EXPECT_EQ(level0_files[0]->epoch_number, 3);
  5986. EXPECT_EQ(level0_files[0]->fd.largest_seqno, 0);
  5987. ASSERT_EQ(level0_files[0]->num_entries, 1);
  5988. ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key3"));
  5989. EXPECT_EQ(level0_files[1]->epoch_number, 2);
  5990. EXPECT_EQ(level0_files[1]->fd.largest_seqno, 0);
  5991. ASSERT_EQ(level0_files[1]->num_entries, 1);
  5992. ASSERT_TRUE(level0_files[1]->largest.user_key() == Slice("key2"));
  5993. EXPECT_EQ(level0_files[2]->epoch_number, 1);
  5994. EXPECT_EQ(level0_files[2]->fd.largest_seqno, 2);
  5995. ASSERT_EQ(level0_files[2]->num_entries, 2);
  5996. ASSERT_TRUE(level0_files[2]->largest.user_key() == Slice("key4"));
  5997. ASSERT_TRUE(level0_files[2]->smallest.user_key() == Slice("key1"));
  5998. // To verify compacted file is assigned with the minimum epoch_number
  5999. // among input files'
  6000. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  6001. level0_files = GetLevelFileMetadatas(0 /* level*/);
  6002. ASSERT_EQ(level0_files.size(), 1);
  6003. EXPECT_EQ(level0_files[0]->epoch_number, 1);
  6004. ASSERT_EQ(level0_files[0]->num_entries, 4);
  6005. ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key4"));
  6006. ASSERT_TRUE(level0_files[0]->smallest.user_key() == Slice("key1"));
  6007. }
  6008. TEST_F(DBTest2, SameEpochNumberAfterCompactRangeChangeLevel) {
  6009. Options options = CurrentOptions();
  6010. options.num_levels = 7;
  6011. options.compaction_style = CompactionStyle::kCompactionStyleLevel;
  6012. options.disable_auto_compactions = true;
  6013. DestroyAndReopen(options);
  6014. // Set up the file in L1 to be moved to L0 in later step of CompactRange()
  6015. ASSERT_OK(Put("key1", "seq1"));
  6016. ASSERT_OK(Flush());
  6017. MoveFilesToLevel(1, 0);
  6018. std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
  6019. ASSERT_EQ(level0_files.size(), 0);
  6020. std::vector<FileMetaData*> level1_files = GetLevelFileMetadatas(1 /* level*/);
  6021. ASSERT_EQ(level1_files.size(), 1);
  6022. std::vector<FileMetaData*> level2_files = GetLevelFileMetadatas(2 /* level*/);
  6023. ASSERT_EQ(level2_files.size(), 0);
  6024. ASSERT_EQ(level1_files[0]->epoch_number, 1);
  6025. // To verify CompactRange() moving file to L0 still keeps the file's
  6026. // epoch_number
  6027. CompactRangeOptions croptions;
  6028. croptions.change_level = true;
  6029. croptions.target_level = 0;
  6030. ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
  6031. level0_files = GetLevelFileMetadatas(0 /* level*/);
  6032. level1_files = GetLevelFileMetadatas(1 /* level*/);
  6033. ASSERT_EQ(level0_files.size(), 1);
  6034. ASSERT_EQ(level1_files.size(), 0);
  6035. EXPECT_EQ(level0_files[0]->epoch_number, 1);
  6036. ASSERT_EQ(level0_files[0]->num_entries, 1);
  6037. ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key1"));
  6038. }
  6039. TEST_F(DBTest2, RecoverEpochNumber) {
  6040. for (bool allow_ingest_behind : {true, false}) {
  6041. Options options = CurrentOptions();
  6042. options.allow_ingest_behind = allow_ingest_behind;
  6043. options.num_levels = 7;
  6044. options.compaction_style = kCompactionStyleLevel;
  6045. options.disable_auto_compactions = true;
  6046. DestroyAndReopen(options);
  6047. CreateAndReopenWithCF({"cf1"}, options);
  6048. VersionSet* versions = dbfull()->GetVersionSet();
  6049. assert(versions);
  6050. const ColumnFamilyData* default_cf =
  6051. versions->GetColumnFamilySet()->GetDefault();
  6052. const ColumnFamilyData* cf1 =
  6053. versions->GetColumnFamilySet()->GetColumnFamily("cf1");
  6054. // Set up files in default CF to recover in later step
  6055. ASSERT_OK(Put("key1", "epoch1"));
  6056. ASSERT_OK(Flush());
  6057. MoveFilesToLevel(1 /* level*/, 0 /* cf*/);
  6058. ASSERT_OK(Put("key2", "epoch2"));
  6059. ASSERT_OK(Flush());
  6060. std::vector<FileMetaData*> level0_files =
  6061. GetLevelFileMetadatas(0 /* level*/);
  6062. ASSERT_EQ(level0_files.size(), 1);
  6063. ASSERT_EQ(level0_files[0]->epoch_number,
  6064. allow_ingest_behind
  6065. ? 2 + kReservedEpochNumberForFileIngestedBehind
  6066. : 2);
  6067. ASSERT_EQ(level0_files[0]->num_entries, 1);
  6068. ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
  6069. std::vector<FileMetaData*> level1_files =
  6070. GetLevelFileMetadatas(1 /* level*/);
  6071. ASSERT_EQ(level1_files.size(), 1);
  6072. ASSERT_EQ(level1_files[0]->epoch_number,
  6073. allow_ingest_behind
  6074. ? 1 + kReservedEpochNumberForFileIngestedBehind
  6075. : 1);
  6076. ASSERT_EQ(level1_files[0]->num_entries, 1);
  6077. ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
  6078. // Set up files in cf1 to recover in later step
  6079. ASSERT_OK(Put(1 /* cf */, "cf1_key1", "epoch1"));
  6080. ASSERT_OK(Flush(1 /* cf */));
  6081. std::vector<FileMetaData*> level0_files_cf1 =
  6082. GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
  6083. ASSERT_EQ(level0_files_cf1.size(), 1);
  6084. ASSERT_EQ(level0_files_cf1[0]->epoch_number,
  6085. allow_ingest_behind
  6086. ? 1 + kReservedEpochNumberForFileIngestedBehind
  6087. : 1);
  6088. ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
  6089. ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
  6090. ASSERT_EQ(default_cf->GetNextEpochNumber(),
  6091. allow_ingest_behind
  6092. ? 3 + kReservedEpochNumberForFileIngestedBehind
  6093. : 3);
  6094. ASSERT_EQ(cf1->GetNextEpochNumber(),
  6095. allow_ingest_behind
  6096. ? 2 + kReservedEpochNumberForFileIngestedBehind
  6097. : 2);
  6098. // To verify epoch_number of files of different levels/CFs are
  6099. // persisted and recovered correctly
  6100. ReopenWithColumnFamilies({"default", "cf1"}, options);
  6101. versions = dbfull()->GetVersionSet();
  6102. assert(versions);
  6103. default_cf = versions->GetColumnFamilySet()->GetDefault();
  6104. cf1 = versions->GetColumnFamilySet()->GetColumnFamily("cf1");
  6105. level0_files = GetLevelFileMetadatas(0 /* level*/);
  6106. ASSERT_EQ(level0_files.size(), 1);
  6107. EXPECT_EQ(level0_files[0]->epoch_number,
  6108. allow_ingest_behind
  6109. ? 2 + kReservedEpochNumberForFileIngestedBehind
  6110. : 2);
  6111. ASSERT_EQ(level0_files[0]->num_entries, 1);
  6112. ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
  6113. level1_files = GetLevelFileMetadatas(1 /* level*/);
  6114. ASSERT_EQ(level1_files.size(), 1);
  6115. EXPECT_EQ(level1_files[0]->epoch_number,
  6116. allow_ingest_behind
  6117. ? 1 + kReservedEpochNumberForFileIngestedBehind
  6118. : 1);
  6119. ASSERT_EQ(level1_files[0]->num_entries, 1);
  6120. ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
  6121. level0_files_cf1 = GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
  6122. ASSERT_EQ(level0_files_cf1.size(), 1);
  6123. EXPECT_EQ(level0_files_cf1[0]->epoch_number,
  6124. allow_ingest_behind
  6125. ? 1 + kReservedEpochNumberForFileIngestedBehind
  6126. : 1);
  6127. ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
  6128. ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
  6129. // To verify next epoch number is recovered correctly
  6130. EXPECT_EQ(default_cf->GetNextEpochNumber(),
  6131. allow_ingest_behind
  6132. ? 3 + kReservedEpochNumberForFileIngestedBehind
  6133. : 3);
  6134. EXPECT_EQ(cf1->GetNextEpochNumber(),
  6135. allow_ingest_behind
  6136. ? 2 + kReservedEpochNumberForFileIngestedBehind
  6137. : 2);
  6138. }
  6139. }
  6140. TEST_F(DBTest2, RenameDirectory) {
  6141. Options options = CurrentOptions();
  6142. DestroyAndReopen(options);
  6143. ASSERT_OK(Put("foo", "value0"));
  6144. Close();
  6145. auto old_dbname = dbname_;
  6146. auto new_dbname = dbname_ + "_2";
  6147. EXPECT_OK(env_->RenameFile(dbname_, new_dbname));
  6148. options.create_if_missing = false;
  6149. dbname_ = new_dbname;
  6150. ASSERT_OK(TryReopen(options));
  6151. ASSERT_EQ("value0", Get("foo"));
  6152. Destroy(options);
  6153. dbname_ = old_dbname;
  6154. }
  6155. TEST_F(DBTest2, SstUniqueIdVerifyBackwardCompatible) {
  6156. const int kNumSst = 3;
  6157. const int kLevel0Trigger = 4;
  6158. auto options = CurrentOptions();
  6159. options.level0_file_num_compaction_trigger = kLevel0Trigger;
  6160. options.statistics = CreateDBStatistics();
  6161. // Skip for now
  6162. options.verify_sst_unique_id_in_manifest = false;
  6163. Reopen(options);
  6164. std::atomic_int skipped = 0;
  6165. std::atomic_int passed = 0;
  6166. SyncPoint::GetInstance()->SetCallBack(
  6167. "BlockBasedTable::Open::SkippedVerifyUniqueId",
  6168. [&](void* /*arg*/) { skipped++; });
  6169. SyncPoint::GetInstance()->SetCallBack(
  6170. "BlockBasedTable::Open::PassedVerifyUniqueId",
  6171. [&](void* /*arg*/) { passed++; });
  6172. SyncPoint::GetInstance()->EnableProcessing();
  6173. // generate a few SSTs
  6174. for (int i = 0; i < kNumSst; i++) {
  6175. for (int j = 0; j < 100; j++) {
  6176. ASSERT_OK(Put(Key(i * 10 + j), "value"));
  6177. }
  6178. ASSERT_OK(Flush());
  6179. }
  6180. // Verification has been skipped on files so far
  6181. EXPECT_EQ(skipped, kNumSst);
  6182. EXPECT_EQ(passed, 0);
  6183. // Reopen with verification
  6184. options.verify_sst_unique_id_in_manifest = true;
  6185. skipped = 0;
  6186. passed = 0;
  6187. Reopen(options);
  6188. EXPECT_EQ(skipped, 0);
  6189. EXPECT_EQ(passed, kNumSst);
  6190. // Now simulate no unique id in manifest for next file
  6191. // NOTE: this only works for loading manifest from disk,
  6192. // not in-memory manifest, so we need to re-open below.
  6193. SyncPoint::GetInstance()->SetCallBack(
  6194. "VersionEdit::EncodeTo:UniqueId", [&](void* arg) {
  6195. auto unique_id = static_cast<UniqueId64x2*>(arg);
  6196. // remove id before writing it to manifest
  6197. (*unique_id)[0] = 0;
  6198. (*unique_id)[1] = 0;
  6199. });
  6200. // test compaction generated Sst
  6201. for (int i = kNumSst; i < kLevel0Trigger; i++) {
  6202. for (int j = 0; j < 100; j++) {
  6203. ASSERT_OK(Put(Key(i * 10 + j), "value"));
  6204. }
  6205. ASSERT_OK(Flush());
  6206. }
  6207. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  6208. ASSERT_EQ("0,1", FilesPerLevel(0));
  6209. // Reopen (with verification)
  6210. ASSERT_TRUE(options.verify_sst_unique_id_in_manifest);
  6211. skipped = 0;
  6212. passed = 0;
  6213. Reopen(options);
  6214. EXPECT_EQ(skipped, 1);
  6215. EXPECT_EQ(passed, 0);
  6216. }
  6217. TEST_F(DBTest2, SstUniqueIdVerify) {
  6218. const int kNumSst = 3;
  6219. const int kLevel0Trigger = 4;
  6220. auto options = CurrentOptions();
  6221. options.level0_file_num_compaction_trigger = kLevel0Trigger;
  6222. // Allow mismatch for now
  6223. options.verify_sst_unique_id_in_manifest = false;
  6224. Reopen(options);
  6225. SyncPoint::GetInstance()->SetCallBack(
  6226. "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
  6227. auto props = static_cast<TableProperties*>(props_vs);
  6228. // update table property session_id to a different one, which
  6229. // changes unique ID
  6230. props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
  6231. });
  6232. SyncPoint::GetInstance()->EnableProcessing();
  6233. // generate a few SSTs
  6234. for (int i = 0; i < kNumSst; i++) {
  6235. for (int j = 0; j < 100; j++) {
  6236. ASSERT_OK(Put(Key(i * 10 + j), "value"));
  6237. }
  6238. ASSERT_OK(Flush());
  6239. }
  6240. // Reopen with verification should report corruption
  6241. options.verify_sst_unique_id_in_manifest = true;
  6242. auto s = TryReopen(options);
  6243. ASSERT_TRUE(s.IsCorruption());
  6244. // Reopen without verification should be fine
  6245. options.verify_sst_unique_id_in_manifest = false;
  6246. Reopen(options);
  6247. // test compaction generated Sst
  6248. for (int i = kNumSst; i < kLevel0Trigger; i++) {
  6249. for (int j = 0; j < 100; j++) {
  6250. ASSERT_OK(Put(Key(i * 10 + j), "value"));
  6251. }
  6252. ASSERT_OK(Flush());
  6253. }
  6254. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  6255. ASSERT_EQ("0,1", FilesPerLevel(0));
  6256. // Reopen with verification should fail
  6257. options.verify_sst_unique_id_in_manifest = true;
  6258. s = TryReopen(options);
  6259. ASSERT_TRUE(s.IsCorruption());
  6260. }
  6261. TEST_F(DBTest2, SstUniqueIdVerifyMultiCFs) {
  6262. const int kNumSst = 3;
  6263. const int kLevel0Trigger = 4;
  6264. auto options = CurrentOptions();
  6265. options.level0_file_num_compaction_trigger = kLevel0Trigger;
  6266. // Allow mismatch for now
  6267. options.verify_sst_unique_id_in_manifest = false;
  6268. CreateAndReopenWithCF({"one", "two"}, options);
  6269. // generate good SSTs
  6270. for (int cf_num : {0, 2}) {
  6271. for (int i = 0; i < kNumSst; i++) {
  6272. for (int j = 0; j < 100; j++) {
  6273. ASSERT_OK(Put(cf_num, Key(i * 10 + j), "value"));
  6274. }
  6275. ASSERT_OK(Flush(cf_num));
  6276. }
  6277. }
  6278. // generate SSTs with bad unique id
  6279. SyncPoint::GetInstance()->SetCallBack(
  6280. "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
  6281. auto props = static_cast<TableProperties*>(props_vs);
  6282. // update table property session_id to a different one
  6283. props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
  6284. });
  6285. SyncPoint::GetInstance()->EnableProcessing();
  6286. for (int i = 0; i < kNumSst; i++) {
  6287. for (int j = 0; j < 100; j++) {
  6288. ASSERT_OK(Put(1, Key(i * 10 + j), "value"));
  6289. }
  6290. ASSERT_OK(Flush(1));
  6291. }
  6292. // Reopen with verification should report corruption
  6293. options.verify_sst_unique_id_in_manifest = true;
  6294. auto s = TryReopenWithColumnFamilies({"default", "one", "two"}, options);
  6295. ASSERT_TRUE(s.IsCorruption());
  6296. }
  6297. TEST_F(DBTest2, BestEffortsRecoveryWithSstUniqueIdVerification) {
  6298. const auto tamper_with_uniq_id = [&](void* arg) {
  6299. auto props = static_cast<TableProperties*>(arg);
  6300. assert(props);
  6301. // update table property session_id to a different one
  6302. props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
  6303. };
  6304. const auto assert_db = [&](size_t expected_count,
  6305. const std::string& expected_v) {
  6306. std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
  6307. size_t cnt = 0;
  6308. for (it->SeekToFirst(); it->Valid(); it->Next(), ++cnt) {
  6309. ASSERT_EQ(std::to_string(cnt), it->key());
  6310. ASSERT_EQ(expected_v, it->value());
  6311. }
  6312. EXPECT_OK(it->status());
  6313. ASSERT_EQ(expected_count, cnt);
  6314. };
  6315. const int num_l0_compaction_trigger = 8;
  6316. const int num_l0 = num_l0_compaction_trigger - 1;
  6317. Options options = CurrentOptions();
  6318. options.level0_file_num_compaction_trigger = num_l0_compaction_trigger;
  6319. for (int k = 0; k < num_l0; ++k) {
  6320. // Allow mismatch for now
  6321. options.verify_sst_unique_id_in_manifest = false;
  6322. DestroyAndReopen(options);
  6323. constexpr size_t num_keys_per_file = 10;
  6324. for (int i = 0; i < num_l0; ++i) {
  6325. for (size_t j = 0; j < num_keys_per_file; ++j) {
  6326. ASSERT_OK(Put(std::to_string(j), "v" + std::to_string(i)));
  6327. }
  6328. if (i == k) {
  6329. SyncPoint::GetInstance()->DisableProcessing();
  6330. SyncPoint::GetInstance()->SetCallBack(
  6331. "PropertyBlockBuilder::AddTableProperty:Start",
  6332. tamper_with_uniq_id);
  6333. SyncPoint::GetInstance()->EnableProcessing();
  6334. }
  6335. ASSERT_OK(Flush());
  6336. }
  6337. options.verify_sst_unique_id_in_manifest = true;
  6338. Status s = TryReopen(options);
  6339. ASSERT_TRUE(s.IsCorruption());
  6340. options.best_efforts_recovery = true;
  6341. Reopen(options);
  6342. assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
  6343. // Reopen with regular recovery
  6344. options.best_efforts_recovery = false;
  6345. Reopen(options);
  6346. assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
  6347. SyncPoint::GetInstance()->DisableProcessing();
  6348. SyncPoint::GetInstance()->ClearAllCallBacks();
  6349. for (size_t i = 0; i < num_keys_per_file; ++i) {
  6350. ASSERT_OK(Put(std::to_string(i), "v"));
  6351. }
  6352. ASSERT_OK(Flush());
  6353. Reopen(options);
  6354. {
  6355. for (size_t i = 0; i < num_keys_per_file; ++i) {
  6356. ASSERT_EQ("v", Get(std::to_string(i)));
  6357. }
  6358. }
  6359. }
  6360. }
  6361. TEST_F(DBTest2, GetLatestSeqAndTsForKey) {
  6362. Destroy(last_options_);
  6363. Options options = CurrentOptions();
  6364. options.max_write_buffer_size_to_maintain = 64 << 10;
  6365. options.create_if_missing = true;
  6366. options.disable_auto_compactions = true;
  6367. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  6368. options.statistics = CreateDBStatistics();
  6369. Reopen(options);
  6370. constexpr uint64_t kTsU64Value = 12;
  6371. for (uint64_t key = 0; key < 100; ++key) {
  6372. std::string ts;
  6373. PutFixed64(&ts, kTsU64Value);
  6374. std::string key_str;
  6375. PutFixed64(&key_str, key);
  6376. std::reverse(key_str.begin(), key_str.end());
  6377. ASSERT_OK(db_->Put(WriteOptions(), key_str, ts, "value"));
  6378. }
  6379. ASSERT_OK(Flush());
  6380. constexpr bool cache_only = true;
  6381. constexpr SequenceNumber lower_bound_seq = 0;
  6382. auto* cfhi = static_cast_with_check<ColumnFamilyHandleImpl>(
  6383. dbfull()->DefaultColumnFamily());
  6384. assert(cfhi);
  6385. assert(cfhi->cfd());
  6386. SuperVersion* sv = cfhi->cfd()->GetSuperVersion();
  6387. for (uint64_t key = 0; key < 100; ++key) {
  6388. std::string key_str;
  6389. PutFixed64(&key_str, key);
  6390. std::reverse(key_str.begin(), key_str.end());
  6391. std::string ts;
  6392. SequenceNumber seq = kMaxSequenceNumber;
  6393. bool found_record_for_key = false;
  6394. bool is_blob_index = false;
  6395. const Status s = dbfull()->GetLatestSequenceForKey(
  6396. sv, key_str, cache_only, lower_bound_seq, &seq, &ts,
  6397. &found_record_for_key, &is_blob_index);
  6398. ASSERT_OK(s);
  6399. std::string expected_ts;
  6400. PutFixed64(&expected_ts, kTsU64Value);
  6401. ASSERT_EQ(expected_ts, ts);
  6402. ASSERT_TRUE(found_record_for_key);
  6403. ASSERT_FALSE(is_blob_index);
  6404. }
  6405. // Verify that no read to SST files.
  6406. ASSERT_EQ(0, options.statistics->getTickerCount(GET_HIT_L0));
  6407. }
  6408. #if defined(ZSTD)
  6409. TEST_F(DBTest2, ZSTDChecksum) {
  6410. // Verify that corruption during decompression is caught.
  6411. Options options = CurrentOptions();
  6412. options.create_if_missing = true;
  6413. options.compression = kZSTD;
  6414. options.compression_opts.max_compressed_bytes_per_kb = 1024;
  6415. options.compression_opts.checksum = true;
  6416. DestroyAndReopen(options);
  6417. Random rnd(33);
  6418. ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
  6419. SyncPoint::GetInstance()->SetCallBack(
  6420. "BlockBasedTableBuilder::WriteBlock:TamperWithCompressedData",
  6421. [&](void* arg) {
  6422. std::string* output = static_cast<std::string*>(arg);
  6423. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames
  6424. // Checksum is the last 4 bytes, corrupting that part in unit test is
  6425. // more controllable.
  6426. output->data()[output->size() - 1]++;
  6427. });
  6428. SyncPoint::GetInstance()->EnableProcessing();
  6429. ASSERT_OK(Flush());
  6430. PinnableSlice val;
  6431. Status s = Get(Key(0), &val);
  6432. ASSERT_TRUE(s.IsCorruption());
  6433. // Corruption caught during flush.
  6434. options.paranoid_file_checks = true;
  6435. DestroyAndReopen(options);
  6436. ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
  6437. s = Flush();
  6438. ASSERT_TRUE(s.IsCorruption());
  6439. }
  6440. #endif
  6441. TEST_F(DBTest2, TableCacheMissDuringReadFromBlockCacheTier) {
  6442. Options options = CurrentOptions();
  6443. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  6444. Reopen(options);
  6445. // Give table cache zero capacity to prevent preloading tables. That way,
  6446. // `kBlockCacheTier` reads will fail due to table cache misses.
  6447. dbfull()->TEST_table_cache()->SetCapacity(0);
  6448. ASSERT_OK(Put("foo", "bar"));
  6449. ASSERT_OK(Flush());
  6450. uint64_t orig_num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
  6451. ReadOptions non_blocking_opts;
  6452. non_blocking_opts.read_tier = kBlockCacheTier;
  6453. std::string value;
  6454. ASSERT_TRUE(db_->Get(non_blocking_opts, "foo", &value).IsIncomplete());
  6455. ASSERT_EQ(orig_num_file_opens, TestGetTickerCount(options, NO_FILE_OPENS));
  6456. }
  6457. TEST_F(DBTest2, GetFileChecksumsFromCurrentManifest_CRC32) {
  6458. Options opts = CurrentOptions();
  6459. opts.create_if_missing = true;
  6460. opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
  6461. opts.level0_file_num_compaction_trigger = 10;
  6462. // Bootstrap the test database.
  6463. DB* db = nullptr;
  6464. std::string dbname = test::PerThreadDBPath("file_chksum");
  6465. ASSERT_OK(DB::Open(opts, dbname, &db));
  6466. WriteOptions wopts;
  6467. FlushOptions fopts;
  6468. fopts.wait = true;
  6469. Random rnd(test::RandomSeed());
  6470. // Write 4 files into the default column family.
  6471. for (int i = 0; i < 4; i++) {
  6472. ASSERT_OK(db->Put(wopts, Key(i), rnd.RandomString(100)));
  6473. ASSERT_OK(db->Flush(fopts));
  6474. }
  6475. // Create a new column family, write 1 file into it and drop it.
  6476. ColumnFamilyHandle* cf;
  6477. ASSERT_OK(
  6478. db->CreateColumnFamily(ColumnFamilyOptions(), "soon_to_be_deleted", &cf));
  6479. ASSERT_OK(db->Put(wopts, cf, "some_key", "some_value"));
  6480. ASSERT_OK(db->Flush(fopts, cf));
  6481. // Drop column family should generate corresponding version edit
  6482. // in manifest, which we expect to be correctly interpreted by
  6483. // GetFileChecksumsFromCurrentManifest API after db close.
  6484. ASSERT_OK(db->DropColumnFamily(cf));
  6485. delete cf;
  6486. cf = nullptr;
  6487. // Obtain rich files metadata for source of truth.
  6488. std::vector<LiveFileMetaData> live_files;
  6489. db->GetLiveFilesMetaData(&live_files);
  6490. ASSERT_OK(db->Close());
  6491. delete db;
  6492. db = nullptr;
  6493. // Process current MANIFEST file and build internal file checksum mappings.
  6494. std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
  6495. auto read_only_fs =
  6496. std::make_shared<ReadOnlyFileSystem>(env_->GetFileSystem());
  6497. ASSERT_OK(experimental::GetFileChecksumsFromCurrentManifest(
  6498. read_only_fs.get(), dbname, checksum_list.get()));
  6499. ASSERT_TRUE(checksum_list != nullptr);
  6500. // Retrieve files, related checksums and checksum functions.
  6501. std::vector<uint64_t> file_numbers;
  6502. std::vector<std::string> checksums;
  6503. std::vector<std::string> checksum_func_names;
  6504. ASSERT_OK(checksum_list->GetAllFileChecksums(&file_numbers, &checksums,
  6505. &checksum_func_names));
  6506. // Compare results.
  6507. ASSERT_EQ(live_files.size(), checksum_list->size());
  6508. for (size_t i = 0; i < live_files.size(); i++) {
  6509. std::string stored_checksum;
  6510. std::string stored_func_name;
  6511. ASSERT_OK(checksum_list->SearchOneFileChecksum(
  6512. live_files[i].file_number, &stored_checksum, &stored_func_name));
  6513. ASSERT_EQ(live_files[i].file_checksum, stored_checksum);
  6514. ASSERT_EQ(live_files[i].file_checksum_func_name, stored_func_name);
  6515. }
  6516. }
  6517. } // namespace ROCKSDB_NAMESPACE
  6518. int main(int argc, char** argv) {
  6519. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  6520. ::testing::InitGoogleTest(&argc, argv);
  6521. RegisterCustomObjects(argc, argv);
  6522. return RUN_ALL_TESTS();
  6523. }