| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include <atomic>
- #include <cstdlib>
- #include <functional>
- #include <memory>
- #include "db/db_test_util.h"
- #include "db/read_callback.h"
- #include "db/version_edit.h"
- #include "env/fs_readonly.h"
- #include "options/options_helper.h"
- #include "port/port.h"
- #include "port/stack_trace.h"
- #include "rocksdb/experimental.h"
- #include "rocksdb/iostats_context.h"
- #include "rocksdb/persistent_cache.h"
- #include "rocksdb/trace_record.h"
- #include "rocksdb/trace_record_result.h"
- #include "rocksdb/utilities/replayer.h"
- #include "rocksdb/wal_filter.h"
- #include "test_util/testutil.h"
- #include "util/defer.h"
- #include "util/random.h"
- #include "utilities/fault_injection_env.h"
- namespace ROCKSDB_NAMESPACE {
- class DBTest2 : public DBTestBase {
- public:
- DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {}
- };
- TEST_F(DBTest2, OpenForReadOnly) {
- DB* db_ptr = nullptr;
- std::string dbname = test::PerThreadDBPath("db_readonly");
- Options options = CurrentOptions();
- options.create_if_missing = true;
- // OpenForReadOnly should fail but will create <dbname> in the file system
- ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr));
- // Since <dbname> is created, we should be able to delete the dir
- // We first get the list files under <dbname>
- // There should not be any subdirectories -- this is not checked here
- std::vector<std::string> files;
- ASSERT_OK(env_->GetChildren(dbname, &files));
- for (auto& f : files) {
- ASSERT_OK(env_->DeleteFile(dbname + "/" + f));
- }
- // <dbname> should be empty now and we should be able to delete it
- ASSERT_OK(env_->DeleteDir(dbname));
- options.create_if_missing = false;
- // OpenForReadOnly should fail since <dbname> was successfully deleted
- ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr));
- // With create_if_missing false, there should not be a dir in the file system
- ASSERT_NOK(env_->FileExists(dbname));
- }
- TEST_F(DBTest2, OpenForReadOnlyWithColumnFamilies) {
- DB* db_ptr = nullptr;
- std::string dbname = test::PerThreadDBPath("db_readonly");
- Options options = CurrentOptions();
- options.create_if_missing = true;
- ColumnFamilyOptions cf_options(options);
- std::vector<ColumnFamilyDescriptor> column_families;
- column_families.emplace_back(kDefaultColumnFamilyName, cf_options);
- column_families.emplace_back("goku", cf_options);
- std::vector<ColumnFamilyHandle*> handles;
- // OpenForReadOnly should fail but will create <dbname> in the file system
- ASSERT_NOK(
- DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr));
- // Since <dbname> is created, we should be able to delete the dir
- // We first get the list files under <dbname>
- // There should not be any subdirectories -- this is not checked here
- std::vector<std::string> files;
- ASSERT_OK(env_->GetChildren(dbname, &files));
- for (auto& f : files) {
- ASSERT_OK(env_->DeleteFile(dbname + "/" + f));
- }
- // <dbname> should be empty now and we should be able to delete it
- ASSERT_OK(env_->DeleteDir(dbname));
- options.create_if_missing = false;
- // OpenForReadOnly should fail since <dbname> was successfully deleted
- ASSERT_NOK(
- DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr));
- // With create_if_missing false, there should not be a dir in the file system
- ASSERT_NOK(env_->FileExists(dbname));
- }
- class PartitionedIndexTestListener : public EventListener {
- public:
- void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
- ASSERT_GT(info.table_properties.index_partitions, 1);
- ASSERT_EQ(info.table_properties.index_key_is_user_key, 0);
- }
- };
- TEST_F(DBTest2, PartitionedIndexUserToInternalKey) {
- const int kValueSize = 10500;
- const int kNumEntriesPerFile = 1000;
- const int kNumFiles = 3;
- const int kNumDistinctKeys = 30;
- BlockBasedTableOptions table_options;
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
- PartitionedIndexTestListener* listener = new PartitionedIndexTestListener();
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- options.listeners.emplace_back(listener);
- std::vector<const Snapshot*> snapshots;
- Reopen(options);
- Random rnd(301);
- for (int i = 0; i < kNumFiles; i++) {
- for (int j = 0; j < kNumEntriesPerFile; j++) {
- int key_id = (i * kNumEntriesPerFile + j) % kNumDistinctKeys;
- std::string value = rnd.RandomString(kValueSize);
- ASSERT_OK(Put("keykey_" + std::to_string(key_id), value));
- snapshots.push_back(db_->GetSnapshot());
- }
- ASSERT_OK(Flush());
- }
- for (auto s : snapshots) {
- db_->ReleaseSnapshot(s);
- }
- }
- class PrefixFullBloomWithReverseComparator
- : public DBTestBase,
- public ::testing::WithParamInterface<bool> {
- public:
- PrefixFullBloomWithReverseComparator()
- : DBTestBase("prefix_bloom_reverse", /*env_do_fsync=*/true) {}
- void SetUp() override { if_cache_filter_ = GetParam(); }
- bool if_cache_filter_;
- };
- TEST_P(PrefixFullBloomWithReverseComparator,
- PrefixFullBloomWithReverseComparator) {
- Options options = last_options_;
- options.comparator = ReverseBytewiseComparator();
- options.prefix_extractor.reset(NewCappedPrefixTransform(3));
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- BlockBasedTableOptions bbto;
- if (if_cache_filter_) {
- bbto.no_block_cache = false;
- bbto.cache_index_and_filter_blocks = true;
- bbto.block_cache = NewLRUCache(1);
- }
- bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
- bbto.whole_key_filtering = false;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- DestroyAndReopen(options);
- ASSERT_OK(dbfull()->Put(WriteOptions(), "bar123", "foo"));
- ASSERT_OK(dbfull()->Put(WriteOptions(), "bar234", "foo2"));
- ASSERT_OK(dbfull()->Put(WriteOptions(), "foo123", "foo3"));
- ASSERT_OK(dbfull()->Flush(FlushOptions()));
- if (bbto.block_cache) {
- bbto.block_cache->EraseUnRefEntries();
- }
- std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
- iter->Seek("bar345");
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("bar234", iter->key().ToString());
- ASSERT_EQ("foo2", iter->value().ToString());
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("bar123", iter->key().ToString());
- ASSERT_EQ("foo", iter->value().ToString());
- iter->Seek("foo234");
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("foo123", iter->key().ToString());
- ASSERT_EQ("foo3", iter->value().ToString());
- iter->Seek("bar");
- ASSERT_OK(iter->status());
- ASSERT_TRUE(!iter->Valid());
- }
- INSTANTIATE_TEST_CASE_P(PrefixFullBloomWithReverseComparator,
- PrefixFullBloomWithReverseComparator, testing::Bool());
- TEST_F(DBTest2, IteratorPropertyVersionNumber) {
- ASSERT_OK(Put("", ""));
- Iterator* iter1 = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter1->status());
- std::string prop_value;
- ASSERT_OK(
- iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
- uint64_t version_number1 =
- static_cast<uint64_t>(std::atoi(prop_value.c_str()));
- ASSERT_OK(Put("", ""));
- ASSERT_OK(Flush());
- Iterator* iter2 = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter2->status());
- ASSERT_OK(
- iter2->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
- uint64_t version_number2 =
- static_cast<uint64_t>(std::atoi(prop_value.c_str()));
- ASSERT_GT(version_number2, version_number1);
- ASSERT_OK(Put("", ""));
- Iterator* iter3 = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter3->status());
- ASSERT_OK(
- iter3->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
- uint64_t version_number3 =
- static_cast<uint64_t>(std::atoi(prop_value.c_str()));
- ASSERT_EQ(version_number2, version_number3);
- iter1->SeekToFirst();
- ASSERT_OK(
- iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value));
- uint64_t version_number1_new =
- static_cast<uint64_t>(std::atoi(prop_value.c_str()));
- ASSERT_EQ(version_number1, version_number1_new);
- delete iter1;
- delete iter2;
- delete iter3;
- }
- TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) {
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- BlockBasedTableOptions table_options;
- table_options.cache_index_and_filter_blocks = true;
- table_options.filter_policy.reset(NewBloomFilterPolicy(20));
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- CreateAndReopenWithCF({"pikachu"}, options);
- ASSERT_OK(Put(1, "a", "begin"));
- ASSERT_OK(Put(1, "z", "end"));
- ASSERT_OK(Flush(1));
- ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
- std::string value;
- value = Get(1, "a");
- }
- TEST_F(DBTest2, MaxSuccessiveMergesChangeWithDBRecovery) {
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- options.max_successive_merges = 3;
- options.merge_operator = MergeOperators::CreatePutOperator();
- options.disable_auto_compactions = true;
- DestroyAndReopen(options);
- ASSERT_OK(Put("poi", "Finch"));
- ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Reese"));
- ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Shaw"));
- ASSERT_OK(db_->Merge(WriteOptions(), "poi", "Root"));
- options.max_successive_merges = 2;
- Reopen(options);
- }
- class DBTestSharedWriteBufferAcrossCFs
- : public DBTestBase,
- public testing::WithParamInterface<std::tuple<bool, bool>> {
- public:
- DBTestSharedWriteBufferAcrossCFs()
- : DBTestBase("db_test_shared_write_buffer", /*env_do_fsync=*/true) {}
- void SetUp() override {
- use_old_interface_ = std::get<0>(GetParam());
- cost_cache_ = std::get<1>(GetParam());
- }
- bool use_old_interface_;
- bool cost_cache_;
- };
- TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) {
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- auto flush_listener = std::make_shared<FlushCounterListener>();
- options.listeners.push_back(flush_listener);
- // Don't trip the listener at shutdown.
- options.avoid_flush_during_shutdown = true;
- // Avoid undeterministic value by malloc_usable_size();
- // Force arena block size to 1
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Arena::Arena:0", [&](void* arg) {
- size_t* block_size = static_cast<size_t*>(arg);
- *block_size = 1;
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Arena::AllocateNewBlock:0", [&](void* arg) {
- std::pair<size_t*, size_t*>* pair =
- static_cast<std::pair<size_t*, size_t*>*>(arg);
- *std::get<0>(*pair) = *std::get<1>(*pair);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // The total soft write buffer size is about 105000
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- if (use_old_interface_) {
- options.db_write_buffer_size = 120000; // this is the real limit
- } else if (!cost_cache_) {
- options.write_buffer_manager.reset(new WriteBufferManager(114285));
- } else {
- options.write_buffer_manager.reset(new WriteBufferManager(114285, cache));
- }
- options.write_buffer_size = 500000; // this is never hit
- CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
- WriteOptions wo;
- wo.disableWAL = true;
- std::function<void()> wait_flush = [&]() {
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
- // Ensure background work is fully finished including listener callbacks
- // before accessing listener state.
- ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
- };
- // Create some data and flush "default" and "nikitich" so that they
- // are newer CFs created.
- flush_listener->expected_flush_reason = FlushReason::kManualFlush;
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(3));
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(1));
- flush_listener->expected_flush_reason = FlushReason::kWriteBufferManager;
- ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
- if (cost_cache_) {
- ASSERT_GE(cache->GetUsage(), 256 * 1024);
- ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
- }
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(60000), wo));
- if (cost_cache_) {
- ASSERT_GE(cache->GetUsage(), 256 * 1024);
- ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024);
- }
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- // No flush should trigger
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(1));
- }
- // Trigger a flush. Flushing "nikitich".
- ASSERT_OK(Put(3, Key(2), DummyString(30000), wo));
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(2));
- }
- // Without hitting the threshold, no flush should trigger.
- ASSERT_OK(Put(2, Key(1), DummyString(30000), wo));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(2));
- }
- // Hit the write buffer limit again. "default"
- // will have been flushed.
- ASSERT_OK(Put(2, Key(2), DummyString(10000), wo));
- wait_flush();
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(2));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(2));
- }
- // Trigger another flush. This time "dobrynia". "pikachu" should not
- // be flushed, althrough it was never flushed.
- ASSERT_OK(Put(1, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(80000), wo));
- wait_flush();
- ASSERT_OK(Put(1, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(2));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
- static_cast<uint64_t>(2));
- }
- if (cost_cache_) {
- ASSERT_GE(cache->GetUsage(), 256 * 1024);
- Close();
- options.write_buffer_manager.reset();
- last_options_.write_buffer_manager.reset();
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- INSTANTIATE_TEST_CASE_P(DBTestSharedWriteBufferAcrossCFs,
- DBTestSharedWriteBufferAcrossCFs,
- ::testing::Values(std::make_tuple(true, false),
- std::make_tuple(false, false),
- std::make_tuple(false, true)));
- TEST_F(DBTest2, SharedWriteBufferLimitAcrossDB) {
- std::string dbname2 = test::PerThreadDBPath("db_shared_wb_db2");
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- auto flush_listener = std::make_shared<FlushCounterListener>();
- options.listeners.push_back(flush_listener);
- // Don't trip the listener at shutdown.
- options.avoid_flush_during_shutdown = true;
- // Avoid undeterministic value by malloc_usable_size();
- // Force arena block size to 1
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Arena::Arena:0", [&](void* arg) {
- size_t* block_size = static_cast<size_t*>(arg);
- *block_size = 1;
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Arena::AllocateNewBlock:0", [&](void* arg) {
- std::pair<size_t*, size_t*>* pair =
- static_cast<std::pair<size_t*, size_t*>*>(arg);
- *std::get<0>(*pair) = *std::get<1>(*pair);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- options.write_buffer_size = 500000; // this is never hit
- // Use a write buffer total size so that the soft limit is about
- // 105000.
- options.write_buffer_manager.reset(new WriteBufferManager(120000));
- CreateAndReopenWithCF({"cf1", "cf2"}, options);
- ASSERT_OK(DestroyDB(dbname2, options));
- DB* db2 = nullptr;
- ASSERT_OK(DB::Open(options, dbname2, &db2));
- WriteOptions wo;
- wo.disableWAL = true;
- std::function<void()> wait_flush = [&]() {
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
- ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
- // Ensure background work is fully finished including listener callbacks
- // before accessing listener state.
- ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
- ASSERT_OK(
- static_cast_with_check<DBImpl>(db2)->TEST_WaitForBackgroundWork());
- };
- // Trigger a flush on cf2
- flush_listener->expected_flush_reason = FlushReason::kWriteBufferManager;
- ASSERT_OK(Put(2, Key(1), DummyString(70000), wo));
- wait_flush();
- ASSERT_OK(Put(0, Key(1), DummyString(20000), wo));
- wait_flush();
- // Insert to DB2
- ASSERT_OK(db2->Put(wo, Key(2), DummyString(20000)));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- wait_flush();
- ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default") +
- GetNumberOfSstFilesForColumnFamily(db_, "cf1") +
- GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
- static_cast<uint64_t>(0));
- }
- // Triggering to flush another CF in DB1
- ASSERT_OK(db2->Put(wo, Key(2), DummyString(70000)));
- wait_flush();
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- wait_flush();
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
- static_cast<uint64_t>(0));
- }
- // Triggering flush in DB2.
- ASSERT_OK(db2->Put(wo, Key(3), DummyString(40000)));
- wait_flush();
- ASSERT_OK(db2->Put(wo, Key(1), DummyString(1)));
- wait_flush();
- ASSERT_OK(static_cast<DBImpl*>(db2)->TEST_WaitForFlushMemTable());
- {
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"),
- static_cast<uint64_t>(0));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"),
- static_cast<uint64_t>(1));
- ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"),
- static_cast<uint64_t>(1));
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, TestWriteBufferNoLimitWithCache) {
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- std::shared_ptr<Cache> cache = NewLRUCache(LRUCacheOptions(
- 10000000 /* capacity */, 1 /* num_shard_bits */,
- false /* strict_capacity_limit */, 0.0 /* high_pri_pool_ratio */,
- nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
- kDontChargeCacheMetadata));
- options.write_buffer_size = 50000; // this is never hit
- // Use a write buffer total size so that the soft limit is about
- // 105000.
- options.write_buffer_manager.reset(new WriteBufferManager(0, cache));
- Reopen(options);
- ASSERT_OK(Put("foo", "bar"));
- // One dummy entry is 256KB.
- ASSERT_GT(cache->GetUsage(), 128000);
- }
- namespace {
- void ValidateKeyExistence(DB* db, const std::vector<Slice>& keys_must_exist,
- const std::vector<Slice>& keys_must_not_exist) {
- // Ensure that expected keys exist
- std::vector<std::string> values;
- if (keys_must_exist.size() > 0) {
- std::vector<Status> status_list =
- db->MultiGet(ReadOptions(), keys_must_exist, &values);
- for (size_t i = 0; i < keys_must_exist.size(); i++) {
- ASSERT_OK(status_list[i]);
- }
- }
- // Ensure that given keys don't exist
- if (keys_must_not_exist.size() > 0) {
- std::vector<Status> status_list =
- db->MultiGet(ReadOptions(), keys_must_not_exist, &values);
- for (size_t i = 0; i < keys_must_not_exist.size(); i++) {
- ASSERT_TRUE(status_list[i].IsNotFound());
- }
- }
- }
- } // anonymous namespace
- TEST_F(DBTest2, WalFilterTest) {
- class TestWalFilter : public WalFilter {
- private:
- // Processing option that is requested to be applied at the given index
- WalFilter::WalProcessingOption wal_processing_option_;
- // Index at which to apply wal_processing_option_
- // At other indexes default wal_processing_option::kContinueProcessing is
- // returned.
- size_t apply_option_at_record_index_;
- // Current record index, incremented with each record encountered.
- size_t current_record_index_;
- public:
- TestWalFilter(WalFilter::WalProcessingOption wal_processing_option,
- size_t apply_option_for_record_index)
- : wal_processing_option_(wal_processing_option),
- apply_option_at_record_index_(apply_option_for_record_index),
- current_record_index_(0) {}
- WalProcessingOption LogRecord(const WriteBatch& /*batch*/,
- WriteBatch* /*new_batch*/,
- bool* /*batch_changed*/) const override {
- WalFilter::WalProcessingOption option_to_return;
- if (current_record_index_ == apply_option_at_record_index_) {
- option_to_return = wal_processing_option_;
- } else {
- option_to_return = WalProcessingOption::kContinueProcessing;
- }
- // Filter is passed as a const object for RocksDB to not modify the
- // object, however we modify it for our own purpose here and hence
- // cast the constness away.
- (const_cast<TestWalFilter*>(this)->current_record_index_)++;
- return option_to_return;
- }
- const char* Name() const override { return "TestWalFilter"; }
- };
- // Create 3 batches with two keys each
- std::vector<std::vector<std::string>> batch_keys(3);
- batch_keys[0].push_back("key1");
- batch_keys[0].push_back("key2");
- batch_keys[1].push_back("key3");
- batch_keys[1].push_back("key4");
- batch_keys[2].push_back("key5");
- batch_keys[2].push_back("key6");
- // Test with all WAL processing options
- for (int option = 0;
- option < static_cast<int>(
- WalFilter::WalProcessingOption::kWalProcessingOptionMax);
- option++) {
- Options options = OptionsForLogIterTest();
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // Write given keys in given batches
- for (size_t i = 0; i < batch_keys.size(); i++) {
- WriteBatch batch;
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
- }
- ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
- }
- WalFilter::WalProcessingOption wal_processing_option =
- static_cast<WalFilter::WalProcessingOption>(option);
- // Create a test filter that would apply wal_processing_option at the first
- // record
- size_t apply_option_for_record_index = 1;
- TestWalFilter test_wal_filter(wal_processing_option,
- apply_option_for_record_index);
- // Reopen database with option to use WAL filter
- options = OptionsForLogIterTest();
- options.wal_filter = &test_wal_filter;
- Status status =
- TryReopenWithColumnFamilies({"default", "pikachu"}, options);
- if (wal_processing_option ==
- WalFilter::WalProcessingOption::kCorruptedRecord) {
- ASSERT_NOK(status);
- // In case of corruption we can turn off paranoid_checks to reopen
- // databse
- options.paranoid_checks = false;
- ReopenWithColumnFamilies({"default", "pikachu"}, options);
- } else {
- ASSERT_OK(status);
- }
- // Compute which keys we expect to be found
- // and which we expect not to be found after recovery.
- std::vector<Slice> keys_must_exist;
- std::vector<Slice> keys_must_not_exist;
- switch (wal_processing_option) {
- case WalFilter::WalProcessingOption::kCorruptedRecord:
- case WalFilter::WalProcessingOption::kContinueProcessing: {
- fprintf(stderr, "Testing with complete WAL processing\n");
- // we expect all records to be processed
- for (size_t i = 0; i < batch_keys.size(); i++) {
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- keys_must_exist.emplace_back(batch_keys[i][j]);
- }
- }
- break;
- }
- case WalFilter::WalProcessingOption::kIgnoreCurrentRecord: {
- fprintf(stderr,
- "Testing with ignoring record %" ROCKSDB_PRIszt " only\n",
- apply_option_for_record_index);
- // We expect the record with apply_option_for_record_index to be not
- // found.
- for (size_t i = 0; i < batch_keys.size(); i++) {
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- if (i == apply_option_for_record_index) {
- keys_must_not_exist.emplace_back(batch_keys[i][j]);
- } else {
- keys_must_exist.emplace_back(batch_keys[i][j]);
- }
- }
- }
- break;
- }
- case WalFilter::WalProcessingOption::kStopReplay: {
- fprintf(stderr,
- "Testing with stopping replay from record %" ROCKSDB_PRIszt
- "\n",
- apply_option_for_record_index);
- // We expect records beyond apply_option_for_record_index to be not
- // found.
- for (size_t i = 0; i < batch_keys.size(); i++) {
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- if (i >= apply_option_for_record_index) {
- keys_must_not_exist.emplace_back(batch_keys[i][j]);
- } else {
- keys_must_exist.emplace_back(batch_keys[i][j]);
- }
- }
- }
- break;
- }
- default:
- FAIL(); // unhandled case
- }
- bool checked_after_reopen = false;
- while (true) {
- // Ensure that expected keys exists
- // and not expected keys don't exist after recovery
- ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
- if (checked_after_reopen) {
- break;
- }
- // reopen database again to make sure previous log(s) are not used
- //(even if they were skipped)
- // reopn database with option to use WAL filter
- options = OptionsForLogIterTest();
- ReopenWithColumnFamilies({"default", "pikachu"}, options);
- checked_after_reopen = true;
- }
- }
- }
- TEST_F(DBTest2, WalFilterTestWithChangeBatch) {
- class ChangeBatchHandler : public WriteBatch::Handler {
- private:
- // Batch to insert keys in
- WriteBatch* new_write_batch_;
- // Number of keys to add in the new batch
- size_t num_keys_to_add_in_new_batch_;
- // Number of keys added to new batch
- size_t num_keys_added_;
- public:
- ChangeBatchHandler(WriteBatch* new_write_batch,
- size_t num_keys_to_add_in_new_batch)
- : new_write_batch_(new_write_batch),
- num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
- num_keys_added_(0) {}
- void Put(const Slice& key, const Slice& value) override {
- if (num_keys_added_ < num_keys_to_add_in_new_batch_) {
- ASSERT_OK(new_write_batch_->Put(key, value));
- ++num_keys_added_;
- }
- }
- };
- class TestWalFilterWithChangeBatch : public WalFilter {
- private:
- // Index at which to start changing records
- size_t change_records_from_index_;
- // Number of keys to add in the new batch
- size_t num_keys_to_add_in_new_batch_;
- // Current record index, incremented with each record encountered.
- size_t current_record_index_;
- public:
- TestWalFilterWithChangeBatch(size_t change_records_from_index,
- size_t num_keys_to_add_in_new_batch)
- : change_records_from_index_(change_records_from_index),
- num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
- current_record_index_(0) {}
- WalProcessingOption LogRecord(const WriteBatch& batch,
- WriteBatch* new_batch,
- bool* batch_changed) const override {
- if (current_record_index_ >= change_records_from_index_) {
- ChangeBatchHandler handler(new_batch, num_keys_to_add_in_new_batch_);
- Status s = batch.Iterate(&handler);
- if (s.ok()) {
- *batch_changed = true;
- } else {
- assert(false);
- }
- }
- // Filter is passed as a const object for RocksDB to not modify the
- // object, however we modify it for our own purpose here and hence
- // cast the constness away.
- (const_cast<TestWalFilterWithChangeBatch*>(this)
- ->current_record_index_)++;
- return WalProcessingOption::kContinueProcessing;
- }
- const char* Name() const override { return "TestWalFilterWithChangeBatch"; }
- };
- std::vector<std::vector<std::string>> batch_keys(3);
- batch_keys[0].push_back("key1");
- batch_keys[0].push_back("key2");
- batch_keys[1].push_back("key3");
- batch_keys[1].push_back("key4");
- batch_keys[2].push_back("key5");
- batch_keys[2].push_back("key6");
- Options options = OptionsForLogIterTest();
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // Write given keys in given batches
- for (size_t i = 0; i < batch_keys.size(); i++) {
- WriteBatch batch;
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
- }
- ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
- }
- // Create a test filter that would apply wal_processing_option at the first
- // record
- size_t change_records_from_index = 1;
- size_t num_keys_to_add_in_new_batch = 1;
- TestWalFilterWithChangeBatch test_wal_filter_with_change_batch(
- change_records_from_index, num_keys_to_add_in_new_batch);
- // Reopen database with option to use WAL filter
- options = OptionsForLogIterTest();
- options.wal_filter = &test_wal_filter_with_change_batch;
- ReopenWithColumnFamilies({"default", "pikachu"}, options);
- // Ensure that all keys exist before change_records_from_index_
- // And after that index only single key exists
- // as our filter adds only single key for each batch
- std::vector<Slice> keys_must_exist;
- std::vector<Slice> keys_must_not_exist;
- for (size_t i = 0; i < batch_keys.size(); i++) {
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- if (i >= change_records_from_index && j >= num_keys_to_add_in_new_batch) {
- keys_must_not_exist.emplace_back(batch_keys[i][j]);
- } else {
- keys_must_exist.emplace_back(batch_keys[i][j]);
- }
- }
- }
- bool checked_after_reopen = false;
- while (true) {
- // Ensure that expected keys exists
- // and not expected keys don't exist after recovery
- ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
- if (checked_after_reopen) {
- break;
- }
- // reopen database again to make sure previous log(s) are not used
- //(even if they were skipped)
- // reopn database with option to use WAL filter
- options = OptionsForLogIterTest();
- ReopenWithColumnFamilies({"default", "pikachu"}, options);
- checked_after_reopen = true;
- }
- }
- TEST_F(DBTest2, WalFilterTestWithChangeBatchExtraKeys) {
- class TestWalFilterWithChangeBatchAddExtraKeys : public WalFilter {
- public:
- WalProcessingOption LogRecord(const WriteBatch& batch,
- WriteBatch* new_batch,
- bool* batch_changed) const override {
- *new_batch = batch;
- Status s = new_batch->Put("key_extra", "value_extra");
- if (s.ok()) {
- *batch_changed = true;
- } else {
- assert(false);
- }
- return WalProcessingOption::kContinueProcessing;
- }
- const char* Name() const override {
- return "WalFilterTestWithChangeBatchExtraKeys";
- }
- };
- std::vector<std::vector<std::string>> batch_keys(3);
- batch_keys[0].push_back("key1");
- batch_keys[0].push_back("key2");
- batch_keys[1].push_back("key3");
- batch_keys[1].push_back("key4");
- batch_keys[2].push_back("key5");
- batch_keys[2].push_back("key6");
- Options options = OptionsForLogIterTest();
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // Write given keys in given batches
- for (size_t i = 0; i < batch_keys.size(); i++) {
- WriteBatch batch;
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- ASSERT_OK(batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)));
- }
- ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
- }
- // Create a test filter that would add extra keys
- TestWalFilterWithChangeBatchAddExtraKeys test_wal_filter_extra_keys;
- // Reopen database with option to use WAL filter
- options = OptionsForLogIterTest();
- options.wal_filter = &test_wal_filter_extra_keys;
- Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
- ASSERT_TRUE(status.IsNotSupported());
- // Reopen without filter, now reopen should succeed - previous
- // attempt to open must not have altered the db.
- options = OptionsForLogIterTest();
- ReopenWithColumnFamilies({"default", "pikachu"}, options);
- std::vector<Slice> keys_must_exist;
- std::vector<Slice> keys_must_not_exist; // empty vector
- for (size_t i = 0; i < batch_keys.size(); i++) {
- for (size_t j = 0; j < batch_keys[i].size(); j++) {
- keys_must_exist.emplace_back(batch_keys[i][j]);
- }
- }
- ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
- }
- TEST_F(DBTest2, WalFilterTestWithColumnFamilies) {
- class TestWalFilterWithColumnFamilies : public WalFilter {
- private:
- // column_family_id -> log_number map (provided to WALFilter)
- std::map<uint32_t, uint64_t> cf_log_number_map_;
- // column_family_name -> column_family_id map (provided to WALFilter)
- std::map<std::string, uint32_t> cf_name_id_map_;
- // column_family_name -> keys_found_in_wal map
- // We store keys that are applicable to the column_family
- // during recovery (i.e. aren't already flushed to SST file(s))
- // for verification against the keys we expect.
- std::map<uint32_t, std::vector<std::string>> cf_wal_keys_;
- public:
- void ColumnFamilyLogNumberMap(
- const std::map<uint32_t, uint64_t>& cf_lognumber_map,
- const std::map<std::string, uint32_t>& cf_name_id_map) override {
- cf_log_number_map_ = cf_lognumber_map;
- cf_name_id_map_ = cf_name_id_map;
- }
- WalProcessingOption LogRecordFound(unsigned long long log_number,
- const std::string& /*log_file_name*/,
- const WriteBatch& batch,
- WriteBatch* /*new_batch*/,
- bool* /*batch_changed*/) override {
- class LogRecordBatchHandler : public WriteBatch::Handler {
- private:
- const std::map<uint32_t, uint64_t>& cf_log_number_map_;
- std::map<uint32_t, std::vector<std::string>>& cf_wal_keys_;
- unsigned long long log_number_;
- public:
- LogRecordBatchHandler(
- unsigned long long current_log_number,
- const std::map<uint32_t, uint64_t>& cf_log_number_map,
- std::map<uint32_t, std::vector<std::string>>& cf_wal_keys)
- : cf_log_number_map_(cf_log_number_map),
- cf_wal_keys_(cf_wal_keys),
- log_number_(current_log_number) {}
- Status PutCF(uint32_t column_family_id, const Slice& key,
- const Slice& /*value*/) override {
- auto it = cf_log_number_map_.find(column_family_id);
- assert(it != cf_log_number_map_.end());
- unsigned long long log_number_for_cf = it->second;
- // If the current record is applicable for column_family_id
- // (i.e. isn't flushed to SST file(s) for column_family_id)
- // add it to the cf_wal_keys_ map for verification.
- if (log_number_ >= log_number_for_cf) {
- cf_wal_keys_[column_family_id].push_back(
- std::string(key.data(), key.size()));
- }
- return Status::OK();
- }
- } handler(log_number, cf_log_number_map_, cf_wal_keys_);
- Status s = batch.Iterate(&handler);
- if (!s.ok()) {
- // TODO(AR) is this ok?
- return WalProcessingOption::kCorruptedRecord;
- }
- return WalProcessingOption::kContinueProcessing;
- }
- const char* Name() const override {
- return "WalFilterTestWithColumnFamilies";
- }
- const std::map<uint32_t, std::vector<std::string>>& GetColumnFamilyKeys() {
- return cf_wal_keys_;
- }
- const std::map<std::string, uint32_t>& GetColumnFamilyNameIdMap() {
- return cf_name_id_map_;
- }
- };
- std::vector<std::vector<std::string>> batch_keys_pre_flush(3);
- batch_keys_pre_flush[0].push_back("key1");
- batch_keys_pre_flush[0].push_back("key2");
- batch_keys_pre_flush[1].push_back("key3");
- batch_keys_pre_flush[1].push_back("key4");
- batch_keys_pre_flush[2].push_back("key5");
- batch_keys_pre_flush[2].push_back("key6");
- Options options = OptionsForLogIterTest();
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // Write given keys in given batches
- for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) {
- WriteBatch batch;
- for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) {
- ASSERT_OK(batch.Put(handles_[0], batch_keys_pre_flush[i][j],
- DummyString(1024)));
- ASSERT_OK(batch.Put(handles_[1], batch_keys_pre_flush[i][j],
- DummyString(1024)));
- }
- ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
- }
- // Flush default column-family
- ASSERT_OK(db_->Flush(FlushOptions(), handles_[0]));
- // Do some more writes
- std::vector<std::vector<std::string>> batch_keys_post_flush(3);
- batch_keys_post_flush[0].push_back("key7");
- batch_keys_post_flush[0].push_back("key8");
- batch_keys_post_flush[1].push_back("key9");
- batch_keys_post_flush[1].push_back("key10");
- batch_keys_post_flush[2].push_back("key11");
- batch_keys_post_flush[2].push_back("key12");
- // Write given keys in given batches
- for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
- WriteBatch batch;
- for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
- ASSERT_OK(batch.Put(handles_[0], batch_keys_post_flush[i][j],
- DummyString(1024)));
- ASSERT_OK(batch.Put(handles_[1], batch_keys_post_flush[i][j],
- DummyString(1024)));
- }
- ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
- }
- // On Recovery we should only find the second batch applicable to default CF
- // But both batches applicable to pikachu CF
- // Create a test filter that would add extra keys
- TestWalFilterWithColumnFamilies test_wal_filter_column_families;
- // Reopen database with option to use WAL filter
- options = OptionsForLogIterTest();
- options.wal_filter = &test_wal_filter_column_families;
- Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
- ASSERT_TRUE(status.ok());
- // verify that handles_[0] only has post_flush keys
- // while handles_[1] has pre and post flush keys
- auto cf_wal_keys = test_wal_filter_column_families.GetColumnFamilyKeys();
- auto name_id_map = test_wal_filter_column_families.GetColumnFamilyNameIdMap();
- size_t index = 0;
- auto keys_cf = cf_wal_keys[name_id_map[kDefaultColumnFamilyName]];
- // default column-family, only post_flush keys are expected
- for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
- for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
- Slice key_from_the_log(keys_cf[index++]);
- Slice batch_key(batch_keys_post_flush[i][j]);
- ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
- }
- }
- ASSERT_EQ(index, keys_cf.size());
- index = 0;
- keys_cf = cf_wal_keys[name_id_map["pikachu"]];
- // pikachu column-family, all keys are expected
- for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) {
- for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) {
- Slice key_from_the_log(keys_cf[index++]);
- Slice batch_key(batch_keys_pre_flush[i][j]);
- ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
- }
- }
- for (size_t i = 0; i < batch_keys_post_flush.size(); i++) {
- for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) {
- Slice key_from_the_log(keys_cf[index++]);
- Slice batch_key(batch_keys_post_flush[i][j]);
- ASSERT_EQ(key_from_the_log.compare(batch_key), 0);
- }
- }
- ASSERT_EQ(index, keys_cf.size());
- }
- class CompactionStallTestListener : public EventListener {
- public:
- CompactionStallTestListener()
- : compacting_files_cnt_(0), compacted_files_cnt_(0) {}
- void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
- ASSERT_EQ(ci.cf_name, "default");
- ASSERT_EQ(ci.base_input_level, 0);
- ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum);
- compacting_files_cnt_ += ci.input_files.size();
- }
- void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
- ASSERT_EQ(ci.cf_name, "default");
- ASSERT_EQ(ci.base_input_level, 0);
- ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum);
- compacted_files_cnt_ += ci.input_files.size();
- }
- std::atomic<size_t> compacting_files_cnt_;
- std::atomic<size_t> compacted_files_cnt_;
- };
- TEST_F(DBTest2, CompactionStall) {
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:0"},
- {"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:1"},
- {"DBTest2::CompactionStall:2",
- "DBImpl::NotifyOnCompactionBegin::UnlockMutex"},
- {"DBTest2::CompactionStall:3",
- "DBImpl::NotifyOnCompactionCompleted::UnlockMutex"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- Options options = CurrentOptions();
- options.level0_file_num_compaction_trigger = 4;
- options.max_background_compactions = 40;
- CompactionStallTestListener* listener = new CompactionStallTestListener();
- options.listeners.emplace_back(listener);
- DestroyAndReopen(options);
- // make sure all background compaction jobs can be scheduled
- auto stop_token =
- dbfull()->TEST_write_controler().GetCompactionPressureToken();
- Random rnd(301);
- // 4 Files in L0
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < 10; j++) {
- ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
- }
- ASSERT_OK(Flush());
- }
- // Wait for compaction to be triggered
- TEST_SYNC_POINT("DBTest2::CompactionStall:0");
- // Clear "DBImpl::BGWorkCompaction" SYNC_POINT since we want to hold it again
- // at DBTest2::CompactionStall::1
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
- // Another 6 L0 files to trigger compaction again
- for (int i = 0; i < 6; i++) {
- for (int j = 0; j < 10; j++) {
- ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
- }
- ASSERT_OK(Flush());
- }
- // Wait for another compaction to be triggered
- TEST_SYNC_POINT("DBTest2::CompactionStall:1");
- // Hold NotifyOnCompactionBegin in the unlock mutex section
- TEST_SYNC_POINT("DBTest2::CompactionStall:2");
- // Hold NotifyOnCompactionCompleted in the unlock mutex section
- TEST_SYNC_POINT("DBTest2::CompactionStall:3");
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_LT(NumTableFilesAtLevel(0),
- options.level0_file_num_compaction_trigger);
- ASSERT_GT(listener->compacted_files_cnt_.load(),
- 10 - options.level0_file_num_compaction_trigger);
- ASSERT_EQ(listener->compacting_files_cnt_.load(),
- listener->compacted_files_cnt_.load());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, FirstSnapshotTest) {
- Options options;
- options.write_buffer_size = 100000; // Small write buffer
- options = CurrentOptions(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // This snapshot will have sequence number 0 what is expected behaviour.
- const Snapshot* s1 = db_->GetSnapshot();
- ASSERT_OK(Put(1, "k1", std::string(100000, 'x'))); // Fill memtable
- ASSERT_OK(Put(1, "k2", std::string(100000, 'y'))); // Trigger flush
- db_->ReleaseSnapshot(s1);
- }
- TEST_F(DBTest2, DuplicateSnapshot) {
- Options options;
- options = CurrentOptions(options);
- std::vector<const Snapshot*> snapshots;
- DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
- SequenceNumber oldest_ww_snap, first_ww_snap;
- ASSERT_OK(Put("k", "v")); // inc seq
- snapshots.push_back(db_->GetSnapshot());
- snapshots.push_back(db_->GetSnapshot());
- ASSERT_OK(Put("k", "v")); // inc seq
- snapshots.push_back(db_->GetSnapshot());
- snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
- first_ww_snap = snapshots.back()->GetSequenceNumber();
- ASSERT_OK(Put("k", "v")); // inc seq
- snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary());
- snapshots.push_back(db_->GetSnapshot());
- ASSERT_OK(Put("k", "v")); // inc seq
- snapshots.push_back(db_->GetSnapshot());
- {
- InstrumentedMutexLock l(dbi->mutex());
- auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap);
- ASSERT_EQ(seqs.size(), 4); // duplicates are not counted
- ASSERT_EQ(oldest_ww_snap, first_ww_snap);
- }
- for (auto s : snapshots) {
- db_->ReleaseSnapshot(s);
- }
- }
- class PinL0IndexAndFilterBlocksTest
- : public DBTestBase,
- public testing::WithParamInterface<std::tuple<bool, bool>> {
- public:
- PinL0IndexAndFilterBlocksTest()
- : DBTestBase("db_pin_l0_index_bloom_test", /*env_do_fsync=*/true) {}
- void SetUp() override {
- infinite_max_files_ = std::get<0>(GetParam());
- disallow_preload_ = std::get<1>(GetParam());
- }
- void CreateTwoLevels(Options* options, bool close_afterwards) {
- if (infinite_max_files_) {
- options->max_open_files = -1;
- }
- options->create_if_missing = true;
- options->statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- BlockBasedTableOptions table_options;
- table_options.cache_index_and_filter_blocks = true;
- table_options.pin_l0_filter_and_index_blocks_in_cache = true;
- table_options.filter_policy.reset(NewBloomFilterPolicy(20));
- options->table_factory.reset(NewBlockBasedTableFactory(table_options));
- CreateAndReopenWithCF({"pikachu"}, *options);
- ASSERT_OK(Put(1, "a", "begin"));
- ASSERT_OK(Put(1, "z", "end"));
- ASSERT_OK(Flush(1));
- // move this table to L1
- ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
- ASSERT_EQ(1, NumTableFilesAtLevel(1, 1));
- // reset block cache
- table_options.block_cache = NewLRUCache(64 * 1024);
- options->table_factory.reset(NewBlockBasedTableFactory(table_options));
- ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, *options));
- // create new table at L0
- ASSERT_OK(Put(1, "a2", "begin2"));
- ASSERT_OK(Put(1, "z2", "end2"));
- ASSERT_OK(Flush(1));
- if (close_afterwards) {
- Close(); // This ensures that there is no ref to block cache entries
- }
- table_options.block_cache->EraseUnRefEntries();
- }
- bool infinite_max_files_;
- bool disallow_preload_;
- };
- TEST_P(PinL0IndexAndFilterBlocksTest,
- IndexAndFilterBlocksOfNewTableAddedToCacheWithPinning) {
- Options options = CurrentOptions();
- if (infinite_max_files_) {
- options.max_open_files = -1;
- }
- options.create_if_missing = true;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- BlockBasedTableOptions table_options;
- table_options.cache_index_and_filter_blocks = true;
- table_options.pin_l0_filter_and_index_blocks_in_cache = true;
- table_options.filter_policy.reset(NewBloomFilterPolicy(20));
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- CreateAndReopenWithCF({"pikachu"}, options);
- ASSERT_OK(Put(1, "key", "val"));
- // Create a new table.
- ASSERT_OK(Flush(1));
- // index/filter blocks added to block cache right after table creation.
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- // only index/filter were added
- ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
- // Miss and hit count should remain the same, they're all pinned.
- ASSERT_TRUE(db_->KeyMayExist(ReadOptions(), handles_[1], "key", nullptr));
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- // Miss and hit count should remain the same, they're all pinned.
- std::string value = Get(1, "key");
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- }
- TEST_P(PinL0IndexAndFilterBlocksTest,
- MultiLevelIndexAndFilterBlocksCachedWithPinning) {
- Options options = CurrentOptions();
- PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, false);
- // get base cache values
- uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
- uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
- uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
- uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
- std::string value;
- // this should be read from L0
- // so cache values don't change
- value = Get(1, "a2");
- ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- // this should be read from L1
- // the file is opened, prefetching results in a cache filter miss
- // the block is loaded and added to the cache,
- // then the get results in a cache hit for L1
- // When we have inifinite max_files, there is still cache miss because we have
- // reset the block cache
- value = Get(1, "a");
- ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- }
- TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) {
- Options options = CurrentOptions();
- // This ensures that db does not ref anything in the block cache, so
- // EraseUnRefEntries could clear them up.
- bool close_afterwards = true;
- PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, close_afterwards);
- // Get base cache values
- uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
- uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
- uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
- uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
- if (disallow_preload_) {
- // Now we have two files. We narrow the max open files to allow 3 entries
- // so that preloading SST files won't happen.
- options.max_open_files = 13;
- // RocksDB sanitize max open files to at least 20. Modify it back.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
- int* max_open_files = static_cast<int*>(arg);
- *max_open_files = 13;
- });
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // Reopen database. If max_open_files is set as -1, table readers will be
- // preloaded. This will trigger a BlockBasedTable::Open() and prefetch
- // L0 index and filter. Level 1's prefetching is disabled in DB::Open()
- ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- if (!disallow_preload_) {
- // After reopen, cache miss are increased by one because we read (and only
- // read) filter and index on L0
- ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- } else {
- // If max_open_files is not -1, we do not preload table readers, so there is
- // no change.
- ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- }
- std::string value;
- // this should be read from L0
- value = Get(1, "a2");
- // If max_open_files is -1, we have pinned index and filter in Rep, so there
- // will not be changes in index and filter misses or hits. If max_open_files
- // is not -1, Get() will open a TableReader and prefetch index and filter.
- ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- // this should be read from L1
- value = Get(1, "a");
- if (!disallow_preload_) {
- // In infinite max files case, there's a cache miss in executing Get()
- // because index and filter are not prefetched before.
- ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- } else {
- // In this case, cache miss will be increased by one in
- // BlockBasedTable::Open() because this is not in DB::Open() code path so we
- // will prefetch L1's index and filter. Cache hit will also be increased by
- // one because Get() will read index and filter from the block cache
- // prefetched in previous Open() call.
- ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- }
- // Force a full compaction to one single file. There will be a block
- // cache read for both of index and filter. If prefetch doesn't explicitly
- // happen, it will happen when verifying the file.
- Compact(1, "a", "zzzzz");
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- if (!disallow_preload_) {
- ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- } else {
- ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- }
- // Bloom and index hit will happen when a Get() happens.
- value = Get(1, "a");
- if (!disallow_preload_) {
- ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- } else {
- ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
- ASSERT_EQ(fh + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
- ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
- ASSERT_EQ(ih + 4, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
- }
- }
- INSTANTIATE_TEST_CASE_P(PinL0IndexAndFilterBlocksTest,
- PinL0IndexAndFilterBlocksTest,
- ::testing::Values(std::make_tuple(true, false),
- std::make_tuple(false, false),
- std::make_tuple(false, true)));
- TEST_F(DBTest2, MaxCompactionBytesTest) {
- Options options = CurrentOptions();
- options.memtable_factory.reset(test::NewSpecialSkipListFactory(
- DBTestBase::kNumKeysByGenerateNewRandomFile));
- options.compaction_style = kCompactionStyleLevel;
- options.write_buffer_size = 200 << 10;
- options.arena_block_size = 4 << 10;
- options.level0_file_num_compaction_trigger = 4;
- options.num_levels = 4;
- options.compression = kNoCompression;
- options.max_bytes_for_level_base = 450 << 10;
- options.target_file_size_base = 100 << 10;
- // Infinite for full compaction.
- options.max_compaction_bytes = options.target_file_size_base * 100;
- Reopen(options);
- Random rnd(301);
- for (int num = 0; num < 8; num++) {
- GenerateNewRandomFile(&rnd);
- }
- CompactRangeOptions cro;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- ASSERT_EQ("0,0,8", FilesPerLevel(0));
- // When compact from Ln -> Ln+1, cut a file if the file overlaps with
- // more than three files in Ln+1.
- options.max_compaction_bytes = options.target_file_size_base * 3;
- Reopen(options);
- GenerateNewRandomFile(&rnd);
- // Add three more small files that overlap with the previous file
- for (int i = 0; i < 3; i++) {
- ASSERT_OK(Put("a", "z"));
- ASSERT_OK(Flush());
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // Output files to L1 are cut to 4 pieces, according to
- // options.max_compaction_bytes (300K)
- // There are 8 files on L2 (grandparents level), each one is 100K. The first
- // file overlaps with a, b which max_compaction_bytes is less than 300K, the
- // second one overlaps with d, e, which is also less than 300K. Including any
- // extra grandparent file will make the future compaction larger than 300K.
- // L1: [ 1 ] [ 2 ] [ 3 ] [ 4 ]
- // L2: [a] [b] [c] [d] [e] [f] [g] [h]
- ASSERT_EQ("0,4,8", FilesPerLevel(0));
- }
- static void UniqueIdCallback(void* arg) {
- int* result = static_cast<int*>(arg);
- if (*result == -1) {
- *result = 0;
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback);
- }
- class MockPersistentCache : public PersistentCache {
- public:
- explicit MockPersistentCache(const bool is_compressed, const size_t max_size)
- : is_compressed_(is_compressed), max_size_(max_size) {
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback);
- }
- ~MockPersistentCache() override = default;
- PersistentCache::StatsType Stats() override {
- return PersistentCache::StatsType();
- }
- uint64_t NewId() override {
- return last_id_.fetch_add(1, std::memory_order_relaxed);
- }
- Status Insert(const Slice& page_key, const char* data,
- const size_t size) override {
- MutexLock _(&lock_);
- if (size_ > max_size_) {
- size_ -= data_.begin()->second.size();
- data_.erase(data_.begin());
- }
- data_.insert(std::make_pair(page_key.ToString(), std::string(data, size)));
- size_ += size;
- return Status::OK();
- }
- Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
- size_t* size) override {
- MutexLock _(&lock_);
- auto it = data_.find(page_key.ToString());
- if (it == data_.end()) {
- return Status::NotFound();
- }
- assert(page_key.ToString() == it->first);
- data->reset(new char[it->second.size()]);
- memcpy(data->get(), it->second.c_str(), it->second.size());
- *size = it->second.size();
- return Status::OK();
- }
- bool IsCompressed() override { return is_compressed_; }
- std::string GetPrintableOptions() const override {
- return "MockPersistentCache";
- }
- port::Mutex lock_;
- std::map<std::string, std::string> data_;
- const bool is_compressed_ = true;
- size_t size_ = 0;
- const size_t max_size_ = 10 * 1024; // 10KiB
- std::atomic<uint64_t> last_id_{1};
- };
- #ifdef OS_LINUX
- // Make sure that in CPU time perf context counters, Env::NowCPUNanos()
- // is used, rather than Env::CPUNanos();
- TEST_F(DBTest2, TestPerfContextGetCpuTime) {
- // force resizing table cache so table handle is not preloaded so that
- // we can measure find_table_nanos during Get().
- dbfull()->TEST_table_cache()->SetCapacity(0);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- env_->now_cpu_count_.store(0);
- env_->SetMockSleep();
- // NOTE: Presumed unnecessary and removed: resetting mock time in env
- // CPU timing is not enabled with kEnableTimeExceptForMutex
- SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex);
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_EQ(0, get_perf_context()->get_cpu_nanos);
- ASSERT_EQ(0, env_->now_cpu_count_.load());
- constexpr uint64_t kDummyAddonSeconds = uint64_t{1000000};
- constexpr uint64_t kDummyAddonNanos = 1000000000U * kDummyAddonSeconds;
- // Add time to NowNanos() reading.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "TableCache::FindTable:0",
- [&](void* /*arg*/) { env_->MockSleepForSeconds(kDummyAddonSeconds); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_GT(env_->now_cpu_count_.load(), 2);
- ASSERT_LT(get_perf_context()->get_cpu_nanos, kDummyAddonNanos);
- ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonNanos);
- SetPerfLevel(PerfLevel::kDisable);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, TestPerfContextIterCpuTime) {
- DestroyAndReopen(CurrentOptions());
- // force resizing table cache so table handle is not preloaded so that
- // we can measure find_table_nanos during iteration
- dbfull()->TEST_table_cache()->SetCapacity(0);
- const size_t kNumEntries = 10;
- for (size_t i = 0; i < kNumEntries; ++i) {
- ASSERT_OK(Put("k" + std::to_string(i), "v" + std::to_string(i)));
- }
- ASSERT_OK(Flush());
- for (size_t i = 0; i < kNumEntries; ++i) {
- ASSERT_EQ("v" + std::to_string(i), Get("k" + std::to_string(i)));
- }
- std::string last_key = "k" + std::to_string(kNumEntries - 1);
- std::string last_value = "v" + std::to_string(kNumEntries - 1);
- env_->now_cpu_count_.store(0);
- env_->SetMockSleep();
- // NOTE: Presumed unnecessary and removed: resetting mock time in env
- // CPU timing is not enabled with kEnableTimeExceptForMutex
- SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex);
- Iterator* iter = db_->NewIterator(ReadOptions());
- iter->Seek("k0");
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v0", iter->value().ToString());
- iter->SeekForPrev(last_key);
- ASSERT_TRUE(iter->Valid());
- iter->SeekToLast();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ(last_value, iter->value().ToString());
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v0", iter->value().ToString());
- ASSERT_EQ(0, get_perf_context()->iter_seek_cpu_nanos);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v1", iter->value().ToString());
- ASSERT_EQ(0, get_perf_context()->iter_next_cpu_nanos);
- iter->Prev();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ("v0", iter->value().ToString());
- ASSERT_EQ(0, get_perf_context()->iter_prev_cpu_nanos);
- ASSERT_EQ(0, env_->now_cpu_count_.load());
- delete iter;
- constexpr uint64_t kDummyAddonSeconds = uint64_t{1000000};
- constexpr uint64_t kDummyAddonNanos = 1000000000U * kDummyAddonSeconds;
- // Add time to NowNanos() reading.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "TableCache::FindTable:0",
- [&](void* /*arg*/) { env_->MockSleepForSeconds(kDummyAddonSeconds); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex);
- iter = db_->NewIterator(ReadOptions());
- iter->Seek("k0");
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v0", iter->value().ToString());
- iter->SeekForPrev(last_key);
- ASSERT_TRUE(iter->Valid());
- iter->SeekToLast();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ(last_value, iter->value().ToString());
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v0", iter->value().ToString());
- ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, 0);
- ASSERT_LT(get_perf_context()->iter_seek_cpu_nanos, kDummyAddonNanos);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("v1", iter->value().ToString());
- ASSERT_GT(get_perf_context()->iter_next_cpu_nanos, 0);
- ASSERT_LT(get_perf_context()->iter_next_cpu_nanos, kDummyAddonNanos);
- iter->Prev();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ("v0", iter->value().ToString());
- ASSERT_GT(get_perf_context()->iter_prev_cpu_nanos, 0);
- ASSERT_LT(get_perf_context()->iter_prev_cpu_nanos, kDummyAddonNanos);
- ASSERT_GE(env_->now_cpu_count_.load(), 12);
- ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonNanos);
- SetPerfLevel(PerfLevel::kDisable);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- delete iter;
- }
- #endif // OS_LINUX
- #if !defined OS_SOLARIS
- TEST_F(DBTest2, PersistentCache) {
- int num_iter = 80;
- Options options;
- options.write_buffer_size = 64 * 1024; // small write buffer
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- options = CurrentOptions(options);
- auto bsizes = {/*no block cache*/ 0, /*1M*/ 1 * 1024 * 1024};
- auto types = {/*compressed*/ 1, /*uncompressed*/ 0};
- for (auto bsize : bsizes) {
- for (auto type : types) {
- BlockBasedTableOptions table_options;
- table_options.persistent_cache.reset(
- new MockPersistentCache(type, 10 * 1024));
- table_options.no_block_cache = true;
- table_options.block_cache = bsize ? NewLRUCache(bsize) : nullptr;
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"pikachu"}, options);
- // default column family doesn't have block cache
- Options no_block_cache_opts;
- no_block_cache_opts.statistics = options.statistics;
- no_block_cache_opts = CurrentOptions(no_block_cache_opts);
- BlockBasedTableOptions table_options_no_bc;
- table_options_no_bc.no_block_cache = true;
- no_block_cache_opts.table_factory.reset(
- NewBlockBasedTableFactory(table_options_no_bc));
- ReopenWithColumnFamilies(
- {"default", "pikachu"},
- std::vector<Options>({no_block_cache_opts, options}));
- Random rnd(301);
- // Write 8MB (80 values, each 100K)
- ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
- std::vector<std::string> values;
- std::string str;
- for (int i = 0; i < num_iter; i++) {
- if (i % 4 == 0) { // high compression ratio
- str = rnd.RandomString(1000);
- }
- values.push_back(str);
- ASSERT_OK(Put(1, Key(i), values[i]));
- }
- // flush all data from memtable so that reads are from block cache
- ASSERT_OK(Flush(1));
- for (int i = 0; i < num_iter; i++) {
- ASSERT_EQ(Get(1, Key(i)), values[i]);
- }
- auto hit = options.statistics->getTickerCount(PERSISTENT_CACHE_HIT);
- auto miss = options.statistics->getTickerCount(PERSISTENT_CACHE_MISS);
- ASSERT_GT(hit, 0);
- ASSERT_GT(miss, 0);
- }
- }
- }
- #endif // !defined OS_SOLARIS
- namespace {
- void CountSyncPoint() {
- TEST_SYNC_POINT_CALLBACK("DBTest2::MarkedPoint", nullptr /* arg */);
- }
- } // anonymous namespace
- TEST_F(DBTest2, SyncPointMarker) {
- std::atomic<int> sync_point_called(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBTest2::MarkedPoint",
- [&](void* /*arg*/) { sync_point_called.fetch_add(1); });
- // The first dependency enforces Marker can be loaded before MarkedPoint.
- // The second checks that thread 1's MarkedPoint should be disabled here.
- // Execution order:
- // | Thread 1 | Thread 2 |
- // | | Marker |
- // | MarkedPoint | |
- // | Thread1First | |
- // | | MarkedPoint |
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependencyAndMarkers(
- {{"DBTest2::SyncPointMarker:Thread1First", "DBTest2::MarkedPoint"}},
- {{"DBTest2::SyncPointMarker:Marker", "DBTest2::MarkedPoint"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- std::function<void()> func1 = [&]() {
- CountSyncPoint();
- TEST_SYNC_POINT("DBTest2::SyncPointMarker:Thread1First");
- };
- std::function<void()> func2 = [&]() {
- TEST_SYNC_POINT("DBTest2::SyncPointMarker:Marker");
- CountSyncPoint();
- };
- auto thread1 = port::Thread(func1);
- auto thread2 = port::Thread(func2);
- thread1.join();
- thread2.join();
- // Callback is only executed once
- ASSERT_EQ(sync_point_called.load(), 1);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- size_t GetEncodedEntrySize(size_t key_size, size_t value_size) {
- std::string buffer;
- PutVarint32(&buffer, static_cast<uint32_t>(0));
- PutVarint32(&buffer, static_cast<uint32_t>(key_size));
- PutVarint32(&buffer, static_cast<uint32_t>(value_size));
- return buffer.size() + key_size + value_size;
- }
- TEST_F(DBTest2, ReadAmpBitmap) {
- Options options = CurrentOptions();
- BlockBasedTableOptions bbto;
- uint32_t bytes_per_bit[2] = {1, 16};
- for (size_t k = 0; k < 2; k++) {
- // Disable delta encoding to make it easier to calculate read amplification
- bbto.use_delta_encoding = false;
- // Huge block cache to make it easier to calculate read amplification
- bbto.block_cache = NewLRUCache(1024 * 1024 * 1024);
- bbto.read_amp_bytes_per_bit = bytes_per_bit[k];
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- DestroyAndReopen(options);
- const size_t kNumEntries = 10000;
- Random rnd(301);
- for (size_t i = 0; i < kNumEntries; i++) {
- ASSERT_OK(Put(Key(static_cast<int>(i)), rnd.RandomString(100)));
- }
- ASSERT_OK(Flush());
- Close();
- Reopen(options);
- // Read keys/values randomly and verify that reported read amp error
- // is less than 2%
- uint64_t total_useful_bytes = 0;
- std::set<int> read_keys;
- std::string value;
- for (size_t i = 0; i < kNumEntries * 5; i++) {
- int key_idx = rnd.Next() % kNumEntries;
- std::string key = Key(key_idx);
- ASSERT_OK(db_->Get(ReadOptions(), key, &value));
- if (read_keys.find(key_idx) == read_keys.end()) {
- auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
- total_useful_bytes +=
- GetEncodedEntrySize(internal_key.size(), value.size());
- read_keys.insert(key_idx);
- }
- double expected_read_amp =
- static_cast<double>(total_useful_bytes) /
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
- double read_amp =
- static_cast<double>(options.statistics->getTickerCount(
- READ_AMP_ESTIMATE_USEFUL_BYTES)) /
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
- double error_pct = fabs(expected_read_amp - read_amp) * 100;
- // Error between reported read amp and real read amp should be less than
- // 2%
- EXPECT_LE(error_pct, 2);
- }
- // Make sure we read every thing in the DB (which is smaller than our cache)
- Iterator* iter = db_->NewIterator(ReadOptions());
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_EQ(iter->value().ToString(), Get(iter->key().ToString()));
- }
- ASSERT_OK(iter->status());
- delete iter;
- // Read amp is on average 100% since we read all what we loaded in memory
- if (k == 0) {
- ASSERT_EQ(
- options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES),
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES));
- } else {
- ASSERT_NEAR(
- options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES) *
- 1.0f /
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES),
- 1, .01);
- }
- }
- }
- #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented
- TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) {
- {
- const int kIdBufLen = 100;
- char id_buf[kIdBufLen];
- Status s = Status::NotSupported();
- #ifndef OS_WIN
- // You can't open a directory on windows using random access file
- std::unique_ptr<RandomAccessFile> file;
- s = env_->NewRandomAccessFile(dbname_, &file, EnvOptions());
- if (s.ok()) {
- if (file->GetUniqueId(id_buf, kIdBufLen) == 0) {
- // fs holding db directory doesn't support getting a unique file id,
- // this means that running this test will fail because lru_cache will
- // load the blocks again regardless of them being already in the cache
- return;
- }
- }
- #endif
- if (!s.ok()) {
- std::unique_ptr<Directory> dir;
- ASSERT_OK(env_->NewDirectory(dbname_, &dir));
- if (dir->GetUniqueId(id_buf, kIdBufLen) == 0) {
- // fs holding db directory doesn't support getting a unique file id,
- // this means that running this test will fail because lru_cache will
- // load the blocks again regardless of them being already in the cache
- return;
- }
- }
- }
- uint32_t bytes_per_bit[2] = {1, 16};
- for (size_t k = 0; k < 2; k++) {
- std::shared_ptr<Cache> lru_cache = NewLRUCache(1024 * 1024 * 1024);
- std::shared_ptr<Statistics> stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
- Options options = CurrentOptions();
- BlockBasedTableOptions bbto;
- // Disable delta encoding to make it easier to calculate read amplification
- bbto.use_delta_encoding = false;
- // Huge block cache to make it easier to calculate read amplification
- bbto.block_cache = lru_cache;
- bbto.read_amp_bytes_per_bit = bytes_per_bit[k];
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- options.statistics = stats;
- DestroyAndReopen(options);
- const int kNumEntries = 10000;
- Random rnd(301);
- for (int i = 0; i < kNumEntries; i++) {
- ASSERT_OK(Put(Key(i), rnd.RandomString(100)));
- }
- ASSERT_OK(Flush());
- Close();
- Reopen(options);
- std::set<int> read_keys;
- std::string value;
- // Iter1: Read half the DB, Read even keys
- // Key(0), Key(2), Key(4), Key(6), Key(8), ...
- for (int i = 0; i < kNumEntries; i += 2) {
- std::string key = Key(i);
- ASSERT_OK(db_->Get(ReadOptions(), key, &value));
- if (read_keys.find(i) == read_keys.end()) {
- auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
- read_keys.insert(i);
- }
- }
- size_t total_useful_bytes_iter1 =
- options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
- size_t total_loaded_bytes_iter1 =
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
- Close();
- std::shared_ptr<Statistics> new_statistics =
- ROCKSDB_NAMESPACE::CreateDBStatistics();
- // Destroy old statistics obj that the blocks in lru_cache are pointing to
- options.statistics.reset();
- // Use the statistics object that we just created
- options.statistics = new_statistics;
- Reopen(options);
- // Iter2: Read half the DB, Read odd keys
- // Key(1), Key(3), Key(5), Key(7), Key(9), ...
- for (int i = 1; i < kNumEntries; i += 2) {
- std::string key = Key(i);
- ASSERT_OK(db_->Get(ReadOptions(), key, &value));
- if (read_keys.find(i) == read_keys.end()) {
- auto internal_key = InternalKey(key, 0, ValueType::kTypeValue);
- read_keys.insert(i);
- }
- }
- size_t total_useful_bytes_iter2 =
- options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES);
- size_t total_loaded_bytes_iter2 =
- options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES);
- // Read amp is on average 100% since we read all what we loaded in memory
- if (k == 0) {
- ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2,
- total_loaded_bytes_iter1 + total_loaded_bytes_iter2);
- } else {
- ASSERT_NEAR((total_useful_bytes_iter1 + total_useful_bytes_iter2) * 1.0f /
- (total_loaded_bytes_iter1 + total_loaded_bytes_iter2),
- 1, .01);
- }
- }
- }
- #endif // !OS_SOLARIS
- TEST_F(DBTest2, AutomaticCompactionOverlapManualCompaction) {
- Options options = CurrentOptions();
- options.num_levels = 3;
- options.IncreaseParallelism(20);
- DestroyAndReopen(options);
- ASSERT_OK(Put(Key(0), "a"));
- ASSERT_OK(Put(Key(5), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(Key(10), "a"));
- ASSERT_OK(Put(Key(15), "a"));
- ASSERT_OK(Flush());
- CompactRangeOptions cro;
- cro.change_level = true;
- cro.target_level = 2;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- auto get_stat = [](std::string level_str, LevelStatType type,
- std::map<std::string, std::string> props) {
- auto prop_str =
- "compaction." + level_str + "." +
- InternalStats::compaction_level_stats.at(type).property_name.c_str();
- auto prop_item = props.find(prop_str);
- return prop_item == props.end() ? 0 : std::stod(prop_item->second);
- };
- // Trivial move 2 files to L2
- ASSERT_EQ("0,0,2", FilesPerLevel());
- // Also test that the stats GetMapProperty API reporting the same result
- {
- std::map<std::string, std::string> prop;
- ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop));
- ASSERT_EQ(0, get_stat("L0", LevelStatType::NUM_FILES, prop));
- ASSERT_EQ(0, get_stat("L1", LevelStatType::NUM_FILES, prop));
- ASSERT_EQ(2, get_stat("L2", LevelStatType::NUM_FILES, prop));
- ASSERT_EQ(2, get_stat("Sum", LevelStatType::NUM_FILES, prop));
- }
- // While the compaction is running, we will create 2 new files that
- // can fit in L2, these 2 files will be moved to L2 and overlap with
- // the running compaction and break the LSM consistency.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():Start", [&](void* /*arg*/) {
- ASSERT_OK(
- dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
- {"max_bytes_for_level_base", "1"}}));
- ASSERT_OK(Put(Key(6), "a"));
- ASSERT_OK(Put(Key(7), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(Key(8), "a"));
- ASSERT_OK(Put(Key(9), "a"));
- ASSERT_OK(Flush());
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // Run a manual compaction that will compact the 2 files in L2
- // into 1 file in L2
- cro.exclusive_manual_compaction = false;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- // Test that the stats GetMapProperty API reporting 1 file in L2
- {
- std::map<std::string, std::string> prop;
- ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop));
- ASSERT_EQ(1, get_stat("L2", LevelStatType::NUM_FILES, prop));
- }
- }
- TEST_F(DBTest2, ManualCompactionOverlapManualCompaction) {
- Options options = CurrentOptions();
- options.num_levels = 2;
- options.IncreaseParallelism(20);
- options.disable_auto_compactions = true;
- DestroyAndReopen(options);
- ASSERT_OK(Put(Key(0), "a"));
- ASSERT_OK(Put(Key(5), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(Key(10), "a"));
- ASSERT_OK(Put(Key(15), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- // Trivial move 2 files to L1
- ASSERT_EQ("0,2", FilesPerLevel());
- std::function<void()> bg_manual_compact = [&]() {
- std::string k1 = Key(6);
- std::string k2 = Key(9);
- Slice k1s(k1);
- Slice k2s(k2);
- CompactRangeOptions cro;
- cro.exclusive_manual_compaction = false;
- ASSERT_OK(db_->CompactRange(cro, &k1s, &k2s));
- };
- ROCKSDB_NAMESPACE::port::Thread bg_thread;
- // While the compaction is running, we will create 2 new files that
- // can fit in L1, these 2 files will be moved to L1 and overlap with
- // the running compaction and break the LSM consistency.
- std::atomic<bool> flag(false);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():Start", [&](void* /*arg*/) {
- if (flag.exchange(true)) {
- // We want to make sure to call this callback only once
- return;
- }
- ASSERT_OK(Put(Key(6), "a"));
- ASSERT_OK(Put(Key(7), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(Key(8), "a"));
- ASSERT_OK(Put(Key(9), "a"));
- ASSERT_OK(Flush());
- // Start a non-exclusive manual compaction in a bg thread
- bg_thread = port::Thread(bg_manual_compact);
- // This manual compaction conflict with the other manual compaction
- // so it should wait until the first compaction finish
- env_->SleepForMicroseconds(1000000);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // Run a manual compaction that will compact the 2 files in L1
- // into 1 file in L1
- CompactRangeOptions cro;
- cro.exclusive_manual_compaction = false;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- bg_thread.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, PausingManualCompaction1) {
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- DestroyAndReopen(options);
- Random rnd(301);
- // Generate a file containing 10 keys.
- for (int i = 0; i < 10; i++) {
- ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- // Generate another file containing same keys
- for (int i = 0; i < 10; i++) {
- ASSERT_OK(Put(Key(i), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- int manual_compactions_paused = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():PausingManualCompaction:1", [&](void* arg) {
- auto canceled = static_cast<std::atomic<bool>*>(arg);
- // CompactRange triggers manual compaction and cancel the compaction
- // by set *canceled as true
- if (canceled != nullptr) {
- canceled->store(true, std::memory_order_release);
- }
- manual_compactions_paused += 1;
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "TestCompactFiles:PausingManualCompaction:3", [&](void* arg) {
- auto paused = static_cast<std::atomic<int>*>(arg);
- // CompactFiles() relies on manual_compactions_paused to
- // determine if this compaction should be paused or not
- ASSERT_EQ(0, paused->load(std::memory_order_acquire));
- paused->fetch_add(1, std::memory_order_release);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- std::vector<std::string> files_before_compact, files_after_compact;
- // Remember file name before compaction is triggered
- std::vector<LiveFileMetaData> files_meta;
- dbfull()->GetLiveFilesMetaData(&files_meta);
- for (const auto& file : files_meta) {
- files_before_compact.push_back(file.name);
- }
- // OK, now trigger a manual compaction
- ASSERT_TRUE(dbfull()
- ->CompactRange(CompactRangeOptions(), nullptr, nullptr)
- .IsManualCompactionPaused());
- // Wait for compactions to get scheduled and stopped
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // Get file names after compaction is stopped
- files_meta.clear();
- dbfull()->GetLiveFilesMetaData(&files_meta);
- for (const auto& file : files_meta) {
- files_after_compact.push_back(file.name);
- }
- // Like nothing happened
- ASSERT_EQ(files_before_compact, files_after_compact);
- ASSERT_EQ(manual_compactions_paused, 1);
- manual_compactions_paused = 0;
- // Now make sure CompactFiles also not run
- ASSERT_TRUE(dbfull()
- ->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(),
- files_before_compact, 0)
- .IsManualCompactionPaused());
- // Wait for manual compaction to get scheduled and finish
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- files_meta.clear();
- files_after_compact.clear();
- dbfull()->GetLiveFilesMetaData(&files_meta);
- for (const auto& file : files_meta) {
- files_after_compact.push_back(file.name);
- }
- ASSERT_EQ(files_before_compact, files_after_compact);
- // CompactFiles returns at entry point
- ASSERT_EQ(manual_compactions_paused, 0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // PausingManualCompaction does not affect auto compaction
- TEST_F(DBTest2, PausingManualCompaction2) {
- Options options = CurrentOptions();
- options.level0_file_num_compaction_trigger = 2;
- options.disable_auto_compactions = false;
- DestroyAndReopen(options);
- dbfull()->DisableManualCompaction();
- Random rnd(301);
- for (int i = 0; i < 2; i++) {
- // Generate a file containing 100 keys.
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(Key(j), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- std::vector<LiveFileMetaData> files_meta;
- dbfull()->GetLiveFilesMetaData(&files_meta);
- ASSERT_EQ(files_meta.size(), 1);
- }
- TEST_F(DBTest2, PausingManualCompaction3) {
- CompactRangeOptions compact_options;
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- Random rnd(301);
- auto generate_files = [&]() {
- for (int i = 0; i < options.num_levels; i++) {
- for (int j = 0; j < options.num_levels - i + 1; j++) {
- for (int k = 0; k < 1000; k++) {
- ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- for (int l = 1; l < options.num_levels - i; l++) {
- MoveFilesToLevel(l);
- }
- }
- };
- DestroyAndReopen(options);
- generate_files();
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- int run_manual_compactions = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():PausingManualCompaction:1",
- [&](void* /*arg*/) { run_manual_compactions++; });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- dbfull()->DisableManualCompaction();
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // As manual compaction disabled, not even reach sync point
- ASSERT_EQ(run_manual_compactions, 0);
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionJob::Run():PausingManualCompaction:1");
- dbfull()->EnableManualCompaction();
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, PausingManualCompaction4) {
- CompactRangeOptions compact_options;
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- Random rnd(301);
- auto generate_files = [&]() {
- for (int i = 0; i < options.num_levels; i++) {
- for (int j = 0; j < options.num_levels - i + 1; j++) {
- for (int k = 0; k < 1000; k++) {
- ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- for (int l = 1; l < options.num_levels - i; l++) {
- MoveFilesToLevel(l);
- }
- }
- };
- DestroyAndReopen(options);
- generate_files();
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- int run_manual_compactions = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():PausingManualCompaction:2", [&](void* arg) {
- auto canceled = static_cast<std::atomic<bool>*>(arg);
- // CompactRange triggers manual compaction and cancel the compaction
- // by set *canceled as true
- if (canceled != nullptr) {
- canceled->store(true, std::memory_order_release);
- }
- run_manual_compactions++;
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "TestCompactFiles:PausingManualCompaction:3", [&](void* arg) {
- auto paused = static_cast<std::atomic<int>*>(arg);
- // CompactFiles() relies on manual_compactions_paused to
- // determine if thie compaction should be paused or not
- ASSERT_EQ(0, paused->load(std::memory_order_acquire));
- paused->fetch_add(1, std::memory_order_release);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ(run_manual_compactions, 1);
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionJob::Run():PausingManualCompaction:2");
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, CancelManualCompaction1) {
- CompactRangeOptions compact_options;
- auto canceledPtr =
- std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
- compact_options.canceled = canceledPtr.get();
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- Random rnd(301);
- auto generate_files = [&]() {
- for (int i = 0; i < options.num_levels; i++) {
- for (int j = 0; j < options.num_levels - i + 1; j++) {
- for (int k = 0; k < 1000; k++) {
- ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- for (int l = 1; l < options.num_levels - i; l++) {
- MoveFilesToLevel(l);
- }
- }
- };
- DestroyAndReopen(options);
- generate_files();
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- int run_manual_compactions = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():PausingManualCompaction:1",
- [&](void* /*arg*/) { run_manual_compactions++; });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // Setup a callback to disable compactions after a couple of levels are
- // compacted
- int compactions_run = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::RunManualCompaction()::1",
- [&](void* /*arg*/) { ++compactions_run; });
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // Since compactions are disabled, we shouldn't start compacting.
- // E.g. we should call the compaction function exactly one time.
- ASSERT_EQ(compactions_run, 0);
- ASSERT_EQ(run_manual_compactions, 0);
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- compactions_run = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "DBImpl::RunManualCompaction()::1");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::RunManualCompaction()::1", [&](void* /*arg*/) {
- ++compactions_run;
- // After 3 compactions disable
- if (compactions_run == 3) {
- compact_options.canceled->store(true, std::memory_order_release);
- }
- });
- compact_options.canceled->store(false, std::memory_order_release);
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ(compactions_run, 3);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "DBImpl::RunManualCompaction()::1");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionJob::Run():PausingManualCompaction:1");
- // Compactions should work again if we re-enable them..
- compact_options.canceled->store(false, std::memory_order_relaxed);
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, CancelManualCompaction2) {
- CompactRangeOptions compact_options;
- auto canceledPtr =
- std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
- compact_options.canceled = canceledPtr.get();
- compact_options.max_subcompactions = 1;
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- Random rnd(301);
- auto generate_files = [&]() {
- for (int i = 0; i < options.num_levels; i++) {
- for (int j = 0; j < options.num_levels - i + 1; j++) {
- for (int k = 0; k < 1000; k++) {
- ASSERT_OK(Put(Key(k + j * 1000), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- for (int l = 1; l < options.num_levels - i; l++) {
- MoveFilesToLevel(l);
- }
- }
- };
- DestroyAndReopen(options);
- generate_files();
- ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- int compactions_run = 0;
- std::atomic<int> kv_compactions{0};
- int compactions_stopped_at = 0;
- int kv_compactions_stopped_at = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::RunManualCompaction()::1", [&](void* /*arg*/) {
- ++compactions_run;
- // After 3 compactions disable
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionIterator:ProcessKV", [&](void* /*arg*/) {
- int kv_compactions_run =
- kv_compactions.fetch_add(1, std::memory_order_release);
- if (kv_compactions_run == 5) {
- compact_options.canceled->store(true, std::memory_order_release);
- kv_compactions_stopped_at = kv_compactions_run;
- compactions_stopped_at = compactions_run;
- }
- });
- compact_options.canceled->store(false, std::memory_order_release);
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // NOTE: as we set compact_options.max_subcompacitons = 1, and store true to
- // the canceled variable from the single compacting thread (via callback),
- // this value is deterministically kv_compactions_stopped_at + 1.
- ASSERT_EQ(kv_compactions, kv_compactions_stopped_at + 1);
- ASSERT_EQ(compactions_run, compactions_stopped_at);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionIterator::ProcessKV");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "DBImpl::RunManualCompaction()::1");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionJob::Run():PausingManualCompaction:1");
- // Compactions should work again if we re-enable them..
- compact_options.canceled->store(false, std::memory_order_relaxed);
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- class CancelCompactionListener : public EventListener {
- public:
- CancelCompactionListener()
- : num_compaction_started_(0), num_compaction_ended_(0) {}
- void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
- ASSERT_EQ(ci.cf_name, "default");
- ASSERT_EQ(ci.base_input_level, 0);
- num_compaction_started_++;
- }
- void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
- ASSERT_EQ(ci.cf_name, "default");
- ASSERT_EQ(ci.base_input_level, 0);
- ASSERT_EQ(ci.status.code(), code_);
- ASSERT_EQ(ci.status.subcode(), subcode_);
- num_compaction_ended_++;
- }
- std::atomic<size_t> num_compaction_started_;
- std::atomic<size_t> num_compaction_ended_;
- Status::Code code_;
- Status::SubCode subcode_;
- };
- TEST_F(DBTest2, CancelManualCompactionWithListener) {
- CompactRangeOptions compact_options;
- auto canceledPtr =
- std::unique_ptr<std::atomic<bool>>(new std::atomic<bool>{true});
- compact_options.canceled = canceledPtr.get();
- compact_options.max_subcompactions = 1;
- Options options = CurrentOptions();
- options.disable_auto_compactions = true;
- CancelCompactionListener* listener = new CancelCompactionListener();
- options.listeners.emplace_back(listener);
- DestroyAndReopen(options);
- Random rnd(301);
- for (int i = 0; i < 10; i++) {
- for (int j = 0; j < 10; j++) {
- ASSERT_OK(Put(Key(i + j * 10), rnd.RandomString(50)));
- }
- ASSERT_OK(Flush());
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionIterator:ProcessKV", [&](void* /*arg*/) {
- compact_options.canceled->store(true, std::memory_order_release);
- });
- int running_compaction = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::FinishCompactionOutputFile1",
- [&](void* /*arg*/) { running_compaction++; });
- // Case I: 1 Notify begin compaction, 2 Set *canceled as true to disable
- // manual compaction in the callback function, 3 Compaction not run,
- // 4 Notify compaction end.
- listener->code_ = Status::kIncomplete;
- listener->subcode_ = Status::SubCode::kManualCompactionPaused;
- compact_options.canceled->store(false, std::memory_order_release);
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_GT(listener->num_compaction_started_, 0);
- ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
- ASSERT_EQ(running_compaction, 0);
- listener->num_compaction_started_ = 0;
- listener->num_compaction_ended_ = 0;
- // Case II: 1 Set *canceled as true in the callback function to disable manual
- // compaction, 2 Notify begin compaction (return without notifying), 3 Notify
- // compaction end (return without notifying).
- ASSERT_TRUE(dbfull()
- ->CompactRange(compact_options, nullptr, nullptr)
- .IsManualCompactionPaused());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ(listener->num_compaction_started_, 0);
- ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
- ASSERT_EQ(running_compaction, 0);
- // Case III: 1 Notify begin compaction, 2 Compaction in between
- // 3. Set *canceled as true in the callback function to disable manual
- // compaction, 4 Notify compaction end.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
- "CompactionIterator:ProcessKV");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run:BeforeVerify", [&](void* /*arg*/) {
- compact_options.canceled->store(true, std::memory_order_release);
- });
- listener->code_ = Status::kOk;
- listener->subcode_ = Status::SubCode::kNone;
- compact_options.canceled->store(false, std::memory_order_release);
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_GT(listener->num_compaction_started_, 0);
- ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
- // Compaction job will succeed.
- ASSERT_GT(running_compaction, 0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, CompactionOnBottomPriorityWithListener) {
- int num_levels = 3;
- const int kNumFilesTrigger = 4;
- Options options = CurrentOptions();
- env_->SetBackgroundThreads(0, Env::Priority::HIGH);
- env_->SetBackgroundThreads(0, Env::Priority::LOW);
- env_->SetBackgroundThreads(1, Env::Priority::BOTTOM);
- options.env = env_;
- options.compaction_style = kCompactionStyleUniversal;
- options.num_levels = num_levels;
- options.write_buffer_size = 100 << 10; // 100KB
- options.target_file_size_base = 32 << 10; // 32KB
- options.level0_file_num_compaction_trigger = kNumFilesTrigger;
- // Trigger compaction if size amplification exceeds 110%
- options.compaction_options_universal.max_size_amplification_percent = 110;
- CancelCompactionListener* listener = new CancelCompactionListener();
- options.listeners.emplace_back(listener);
- DestroyAndReopen(options);
- int num_bottom_thread_compaction_scheduled = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::BackgroundCompaction:ForwardToBottomPriPool",
- [&](void* /*arg*/) { num_bottom_thread_compaction_scheduled++; });
- int num_compaction_jobs = 0;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run():End",
- [&](void* /*arg*/) { num_compaction_jobs++; });
- listener->code_ = Status::kOk;
- listener->subcode_ = Status::SubCode::kNone;
- Random rnd(301);
- for (int i = 0; i < 1; ++i) {
- for (int num = 0; num < kNumFilesTrigger; num++) {
- int key_idx = 0;
- GenerateNewFile(&rnd, &key_idx, true /* no_wait */);
- // use no_wait above because that one waits for flush and compaction. We
- // don't want to wait for compaction because the full compaction is
- // intentionally blocked while more files are flushed.
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
- }
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_GT(num_bottom_thread_compaction_scheduled, 0);
- ASSERT_EQ(num_compaction_jobs, 1);
- ASSERT_GT(listener->num_compaction_started_, 0);
- ASSERT_EQ(listener->num_compaction_started_, listener->num_compaction_ended_);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, OptimizeForPointLookup) {
- Options options = CurrentOptions();
- Close();
- options.OptimizeForPointLookup(2);
- ASSERT_OK(DB::Open(options, dbname_, &db_));
- ASSERT_OK(Put("foo", "v1"));
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_OK(Flush());
- ASSERT_EQ("v1", Get("foo"));
- }
- TEST_F(DBTest2, OptimizeForSmallDB) {
- Options options = CurrentOptions();
- Close();
- options.OptimizeForSmallDb();
- // Find the cache object
- ASSERT_TRUE(options.table_factory->IsInstanceOf(
- TableFactory::kBlockBasedTableName()));
- auto table_options =
- options.table_factory->GetOptions<BlockBasedTableOptions>();
- ASSERT_TRUE(table_options != nullptr);
- std::shared_ptr<Cache> cache = table_options->block_cache;
- ASSERT_EQ(0, cache->GetUsage());
- ASSERT_OK(DB::Open(options, dbname_, &db_));
- ASSERT_OK(Put("foo", "v1"));
- // memtable size is costed to the block cache
- ASSERT_NE(0, cache->GetUsage());
- ASSERT_EQ("v1", Get("foo"));
- ASSERT_OK(Flush());
- size_t prev_size = cache->GetUsage();
- // Remember block cache size, so that we can find that
- // it is filled after Get().
- // Use pinnable slice so that it can ping the block so that
- // when we check the size it is not evicted.
- PinnableSlice value;
- ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "foo", &value));
- ASSERT_GT(cache->GetUsage(), prev_size);
- value.Reset();
- }
- TEST_F(DBTest2, IterRaceFlush1) {
- ASSERT_OK(Put("foo", "v1"));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::NewIterator:1", "DBTest2::IterRaceFlush:1"},
- {"DBTest2::IterRaceFlush:2", "DBImpl::NewIterator:2"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::port::Thread t1([&] {
- TEST_SYNC_POINT("DBTest2::IterRaceFlush:1");
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("DBTest2::IterRaceFlush:2");
- });
- // iterator is created after the first Put(), and its snapshot sequence is
- // assigned after second Put(), so it must see v2.
- {
- std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
- it->Seek("foo");
- ASSERT_TRUE(it->Valid());
- ASSERT_OK(it->status());
- ASSERT_EQ("foo", it->key().ToString());
- ASSERT_EQ("v2", it->value().ToString());
- }
- t1.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, IterRaceFlush2) {
- ASSERT_OK(Put("foo", "v1"));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::NewIterator:3", "DBTest2::IterRaceFlush2:1"},
- {"DBTest2::IterRaceFlush2:2", "DBImpl::NewIterator:4"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::port::Thread t1([&] {
- TEST_SYNC_POINT("DBTest2::IterRaceFlush2:1");
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("DBTest2::IterRaceFlush2:2");
- });
- // iterator is created after the first Put(), and its snapshot sequence is
- // assigned before second Put(), thus it must see v1.
- {
- std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
- it->Seek("foo");
- ASSERT_TRUE(it->Valid());
- ASSERT_OK(it->status());
- ASSERT_EQ("foo", it->key().ToString());
- ASSERT_EQ("v1", it->value().ToString());
- }
- t1.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, IterRefreshRaceFlush) {
- ASSERT_OK(Put("foo", "v1"));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"ArenaWrappedDBIter::Refresh:1", "DBTest2::IterRefreshRaceFlush:1"},
- {"DBTest2::IterRefreshRaceFlush:2", "ArenaWrappedDBIter::Refresh:2"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::port::Thread t1([&] {
- TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:1");
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:2");
- });
- // iterator is refreshed after the first Put(), and its sequence number is
- // assigned after second Put(), thus it must see v2.
- {
- std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
- ASSERT_OK(it->status());
- ASSERT_OK(it->Refresh());
- it->Seek("foo");
- ASSERT_TRUE(it->Valid());
- ASSERT_OK(it->status());
- ASSERT_EQ("foo", it->key().ToString());
- ASSERT_EQ("v2", it->value().ToString());
- }
- t1.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, GetRaceFlush1) {
- ASSERT_OK(Put("foo", "v1"));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::GetImpl:1", "DBTest2::GetRaceFlush:1"},
- {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:2"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ROCKSDB_NAMESPACE::port::Thread t1([&] {
- TEST_SYNC_POINT("DBTest2::GetRaceFlush:1");
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("DBTest2::GetRaceFlush:2");
- });
- // Get() is issued after the first Put(), so it should see either
- // "v1" or "v2".
- ASSERT_NE("NOT_FOUND", Get("foo"));
- t1.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, GetRaceFlush2) {
- ASSERT_OK(Put("foo", "v1"));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::GetImpl:3", "DBTest2::GetRaceFlush:1"},
- {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:4"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- port::Thread t1([&] {
- TEST_SYNC_POINT("DBTest2::GetRaceFlush:1");
- ASSERT_OK(Put("foo", "v2"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("DBTest2::GetRaceFlush:2");
- });
- // Get() is issued after the first Put(), so it should see either
- // "v1" or "v2".
- ASSERT_NE("NOT_FOUND", Get("foo"));
- t1.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, DirectIO) {
- if (!IsDirectIOSupported()) {
- return;
- }
- Options options = CurrentOptions();
- options.use_direct_reads = options.use_direct_io_for_flush_and_compaction =
- true;
- options.allow_mmap_reads = options.allow_mmap_writes = false;
- DestroyAndReopen(options);
- ASSERT_OK(Put(Key(0), "a"));
- ASSERT_OK(Put(Key(5), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(Key(10), "a"));
- ASSERT_OK(Put(Key(15), "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- Reopen(options);
- }
- TEST_F(DBTest2, MemtableOnlyIterator) {
- Options options = CurrentOptions();
- CreateAndReopenWithCF({"pikachu"}, options);
- ASSERT_OK(Put(1, "foo", "first"));
- ASSERT_OK(Put(1, "bar", "second"));
- ReadOptions ropt;
- ropt.read_tier = kMemtableTier;
- std::string value;
- Iterator* it = nullptr;
- // Before flushing
- // point lookups
- ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
- ASSERT_EQ("first", value);
- ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
- ASSERT_EQ("second", value);
- // Memtable-only iterator (read_tier=kMemtableTier); data not flushed yet.
- it = db_->NewIterator(ropt, handles_[1]);
- int count = 0;
- for (it->SeekToFirst(); it->Valid(); it->Next()) {
- ASSERT_TRUE(it->Valid());
- count++;
- }
- ASSERT_TRUE(!it->Valid());
- ASSERT_OK(it->status());
- ASSERT_EQ(2, count);
- delete it;
- ASSERT_OK(Flush(1));
- // After flushing
- // point lookups
- ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
- ASSERT_EQ("first", value);
- ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
- ASSERT_EQ("second", value);
- // nothing should be returned using memtable-only iterator after flushing.
- it = db_->NewIterator(ropt, handles_[1]);
- ASSERT_OK(it->status());
- count = 0;
- for (it->SeekToFirst(); it->Valid(); it->Next()) {
- ASSERT_TRUE(it->Valid());
- count++;
- }
- ASSERT_TRUE(!it->Valid());
- ASSERT_EQ(0, count);
- ASSERT_OK(it->status());
- delete it;
- // Add a key to memtable
- ASSERT_OK(Put(1, "foobar", "third"));
- it = db_->NewIterator(ropt, handles_[1]);
- ASSERT_OK(it->status());
- count = 0;
- for (it->SeekToFirst(); it->Valid(); it->Next()) {
- ASSERT_TRUE(it->Valid());
- ASSERT_EQ("foobar", it->key().ToString());
- ASSERT_EQ("third", it->value().ToString());
- count++;
- }
- ASSERT_TRUE(!it->Valid());
- ASSERT_EQ(1, count);
- ASSERT_OK(it->status());
- delete it;
- }
- TEST_F(DBTest2, LowPriWrite) {
- Options options = CurrentOptions();
- // Compaction pressure should trigger since 6 files
- options.level0_file_num_compaction_trigger = 4;
- options.level0_slowdown_writes_trigger = 12;
- options.level0_stop_writes_trigger = 30;
- options.delayed_write_rate = 8 * 1024 * 1024;
- Reopen(options);
- std::atomic<int> rate_limit_count(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "GenericRateLimiter::Request:1", [&](void* arg) {
- rate_limit_count.fetch_add(1);
- int64_t* rate_bytes_per_sec = static_cast<int64_t*>(arg);
- ASSERT_EQ(1024 * 1024, *rate_bytes_per_sec);
- });
- // Make a trivial L5 for L0 to compact into. L6 will be large so debt ratio
- // will not cause compaction pressure.
- Random rnd(301);
- ASSERT_OK(Put("", rnd.RandomString(102400)));
- ASSERT_OK(Flush());
- MoveFilesToLevel(6);
- ASSERT_OK(Put("", ""));
- ASSERT_OK(Flush());
- MoveFilesToLevel(5);
- // Block compaction
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
- {"DBTest.LowPriWrite:0", "DBImpl::BGWorkCompaction"},
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- WriteOptions wo;
- for (int i = 0; i < 6; i++) {
- wo.low_pri = false;
- ASSERT_OK(Put("", "", wo));
- wo.low_pri = true;
- ASSERT_OK(Put("", "", wo));
- ASSERT_OK(Flush());
- }
- ASSERT_EQ(0, rate_limit_count.load());
- wo.low_pri = true;
- ASSERT_OK(Put("", "", wo));
- ASSERT_EQ(1, rate_limit_count.load());
- wo.low_pri = false;
- ASSERT_OK(Put("", "", wo));
- ASSERT_EQ(1, rate_limit_count.load());
- wo.low_pri = true;
- std::string big_value = std::string(1 * 1024 * 1024, 'x');
- ASSERT_OK(Put("", big_value, wo));
- ASSERT_LT(1, rate_limit_count.load());
- // Reset
- rate_limit_count = 0;
- wo.low_pri = false;
- ASSERT_OK(Put("", big_value, wo));
- ASSERT_EQ(0, rate_limit_count.load());
- TEST_SYNC_POINT("DBTest.LowPriWrite:0");
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- wo.low_pri = true;
- ASSERT_OK(Put("", "", wo));
- ASSERT_EQ(0, rate_limit_count.load());
- wo.low_pri = false;
- ASSERT_OK(Put("", "", wo));
- ASSERT_EQ(0, rate_limit_count.load());
- }
- TEST_F(DBTest2, RateLimitedCompactionReads) {
- // compaction input has 512KB data
- const int kNumKeysPerFile = 128;
- const int kBytesPerKey = 1024;
- const int kNumL0Files = 4;
- for (int compaction_readahead_size : {0, 32 << 10}) {
- for (auto use_direct_io : {false, true}) {
- if (use_direct_io && !IsDirectIOSupported()) {
- continue;
- }
- Options options = CurrentOptions();
- options.compaction_readahead_size = compaction_readahead_size;
- options.compression = kNoCompression;
- options.level0_file_num_compaction_trigger = kNumL0Files;
- options.memtable_factory.reset(
- test::NewSpecialSkipListFactory(kNumKeysPerFile));
- // takes roughly one second, split into 100 x 10ms intervals. Each
- // interval permits 5.12KB, which is smaller than the block size, so this
- // test exercises the code for chunking reads.
- options.rate_limiter.reset(NewGenericRateLimiter(
- static_cast<int64_t>(kNumL0Files * kNumKeysPerFile *
- kBytesPerKey) /* rate_bytes_per_sec */,
- 10 * 1000 /* refill_period_us */, 10 /* fairness */,
- RateLimiter::Mode::kReadsOnly));
- options.use_direct_reads =
- options.use_direct_io_for_flush_and_compaction = use_direct_io;
- BlockBasedTableOptions bbto;
- bbto.block_size = 16384;
- bbto.no_block_cache = true;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- DestroyAndReopen(options);
- // To precisely control when to start bg compaction for excluding previous
- // rate-limited bytes of flush read for table verification
- std::shared_ptr<test::SleepingBackgroundTask> sleeping_task(
- new test::SleepingBackgroundTask());
- env_->SetBackgroundThreads(1, Env::LOW);
- env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
- sleeping_task.get(), Env::Priority::LOW);
- sleeping_task->WaitUntilSleeping();
- for (int i = 0; i < kNumL0Files; ++i) {
- for (int j = 0; j <= kNumKeysPerFile; ++j) {
- ASSERT_OK(Put(Key(j), DummyString(kBytesPerKey)));
- }
- ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
- if (i + 1 < kNumL0Files) {
- ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
- }
- }
- size_t rate_limited_bytes_start_bytes =
- options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL);
- sleeping_task->WakeUp();
- sleeping_task->WaitUntilDone();
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ(0, NumTableFilesAtLevel(0));
- // should be slightly above 512KB due to non-data blocks read. Arbitrarily
- // chose 1MB as the upper bound on the total bytes read.
- size_t rate_limited_bytes =
- static_cast<size_t>(
- options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL)) -
- rate_limited_bytes_start_bytes;
- // The charges can exist for `IO_LOW` and `IO_USER` priorities.
- size_t rate_limited_bytes_by_pri =
- options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
- options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
- ASSERT_EQ(rate_limited_bytes,
- static_cast<size_t>(rate_limited_bytes_by_pri));
- // Include the explicit prefetch of the footer in direct I/O case.
- size_t direct_io_extra = use_direct_io ? 512 * 1024 : 0;
- ASSERT_GE(
- rate_limited_bytes,
- static_cast<size_t>(kNumKeysPerFile * kBytesPerKey * kNumL0Files));
- ASSERT_LT(
- rate_limited_bytes,
- static_cast<size_t>(2 * kNumKeysPerFile * kBytesPerKey * kNumL0Files +
- direct_io_extra));
- Iterator* iter = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter->status());
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_EQ(iter->value().ToString(), DummyString(kBytesPerKey));
- }
- delete iter;
- // bytes read for user iterator shouldn't count against the rate limit.
- rate_limited_bytes_by_pri =
- options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
- options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
- ASSERT_EQ(rate_limited_bytes,
- static_cast<size_t>(rate_limited_bytes_by_pri));
- }
- }
- }
- // Make sure DB can be reopen with reduced number of levels, given no file
- // is on levels higher than the new num_levels.
- TEST_F(DBTest2, ReduceLevel) {
- Options options;
- options.env = env_;
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- Reopen(options);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(6);
- ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel());
- CompactRangeOptions compact_options;
- compact_options.change_level = true;
- compact_options.target_level = 1;
- ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr));
- ASSERT_EQ("0,1", FilesPerLevel());
- options.num_levels = 3;
- Reopen(options);
- ASSERT_EQ("0,1", FilesPerLevel());
- }
- // Test that ReadCallback is actually used in both memtbale and sst tables
- TEST_F(DBTest2, ReadCallbackTest) {
- Options options;
- options.disable_auto_compactions = true;
- options.num_levels = 7;
- options.env = env_;
- Reopen(options);
- std::vector<const Snapshot*> snapshots;
- // Try to create a db with multiple layers and a memtable
- const std::string key = "foo";
- const std::string value = "bar";
- // This test assumes that the seq start with 1 and increased by 1 after each
- // write batch of size 1. If that behavior changes, the test needs to be
- // updated as well.
- // TODO(myabandeh): update this test to use the seq number that is returned by
- // the DB instead of assuming what seq the DB used.
- int i = 1;
- for (; i < 10; i++) {
- ASSERT_OK(Put(key, value + std::to_string(i)));
- // Take a snapshot to avoid the value being removed during compaction
- auto snapshot = dbfull()->GetSnapshot();
- snapshots.push_back(snapshot);
- }
- ASSERT_OK(Flush());
- for (; i < 20; i++) {
- ASSERT_OK(Put(key, value + std::to_string(i)));
- // Take a snapshot to avoid the value being removed during compaction
- auto snapshot = dbfull()->GetSnapshot();
- snapshots.push_back(snapshot);
- }
- ASSERT_OK(Flush());
- MoveFilesToLevel(6);
- ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());
- for (; i < 30; i++) {
- ASSERT_OK(Put(key, value + std::to_string(i)));
- auto snapshot = dbfull()->GetSnapshot();
- snapshots.push_back(snapshot);
- }
- ASSERT_OK(Flush());
- ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel());
- // And also add some values to the memtable
- for (; i < 40; i++) {
- ASSERT_OK(Put(key, value + std::to_string(i)));
- auto snapshot = dbfull()->GetSnapshot();
- snapshots.push_back(snapshot);
- }
- class TestReadCallback : public ReadCallback {
- public:
- explicit TestReadCallback(SequenceNumber snapshot)
- : ReadCallback(snapshot), snapshot_(snapshot) {}
- bool IsVisibleFullCheck(SequenceNumber seq) override {
- return seq <= snapshot_;
- }
- private:
- SequenceNumber snapshot_;
- };
- for (int seq = 1; seq < i; seq++) {
- PinnableSlice pinnable_val;
- ReadOptions roptions;
- TestReadCallback callback(seq);
- bool dont_care = true;
- DBImpl::GetImplOptions get_impl_options;
- get_impl_options.column_family = dbfull()->DefaultColumnFamily();
- get_impl_options.value = &pinnable_val;
- get_impl_options.value_found = &dont_care;
- get_impl_options.callback = &callback;
- Status s = dbfull()->GetImpl(roptions, key, get_impl_options);
- ASSERT_TRUE(s.ok());
- // Assuming that after each Put the DB increased seq by one, the value and
- // seq number must be equal since we also inc value by 1 after each Put.
- ASSERT_EQ(value + std::to_string(seq), pinnable_val.ToString());
- }
- for (auto snapshot : snapshots) {
- dbfull()->ReleaseSnapshot(snapshot);
- }
- }
- TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) {
- // Regression test for race condition where an obsolete file is returned to
- // user as a "live file" but then deleted, all while file deletions are
- // disabled.
- //
- // It happened like this:
- //
- // 1. [flush thread] Log file "x.log" found by FindObsoleteFiles
- // 2. [user thread] DisableFileDeletions, GetSortedWalFiles are called and the
- // latter returned "x.log"
- // 3. [flush thread] PurgeObsoleteFiles deleted "x.log"
- // 4. [user thread] Reading "x.log" failed
- //
- // Unfortunately the only regression test I can come up with involves sleep.
- // We cannot set SyncPoints to repro since, once the fix is applied, the
- // SyncPoints would cause a deadlock as the repro's sequence of events is now
- // prohibited.
- //
- // Instead, if we sleep for a second between Find and Purge, and ensure the
- // read attempt happens after purge, then the sequence of events will almost
- // certainly happen on the old code.
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
- {"DBImpl::BackgroundCallFlush:FilesFound",
- "DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered"},
- {"DBImpl::PurgeObsoleteFiles:End",
- "DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured"},
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::PurgeObsoleteFiles:Begin",
- [&](void* /*arg*/) { env_->SleepForMicroseconds(1000000); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_OK(Put("key", "val"));
- FlushOptions flush_opts;
- flush_opts.wait = false;
- ASSERT_OK(db_->Flush(flush_opts));
- TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered");
- ASSERT_OK(db_->DisableFileDeletions());
- VectorWalPtr log_files;
- ASSERT_OK(db_->GetSortedWalFiles(log_files));
- TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured");
- for (const auto& log_file : log_files) {
- ASSERT_OK(env_->FileExists(LogFileName(dbname_, log_file->LogNumber())));
- }
- ASSERT_OK(db_->EnableFileDeletions());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, TestNumPread) {
- Options options = CurrentOptions();
- bool prefetch_supported =
- test::IsPrefetchSupported(env_->GetFileSystem(), dbname_);
- // disable block cache
- BlockBasedTableOptions table_options;
- table_options.no_block_cache = true;
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- Reopen(options);
- env_->count_random_reads_ = true;
- env_->random_file_open_counter_.store(0);
- ASSERT_OK(Put("bar", "foo"));
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- if (prefetch_supported) {
- // After flush, we'll open the file and read footer, meta block,
- // property block and index block.
- ASSERT_EQ(4, env_->random_read_counter_.Read());
- } else {
- // With prefetch not supported, we will do a single read into a buffer
- ASSERT_EQ(1, env_->random_read_counter_.Read());
- }
- ASSERT_EQ(1, env_->random_file_open_counter_.load());
- // One pread per a normal data block read
- env_->random_file_open_counter_.store(0);
- env_->random_read_counter_.Reset();
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_EQ(1, env_->random_read_counter_.Read());
- // All files are already opened.
- ASSERT_EQ(0, env_->random_file_open_counter_.load());
- env_->random_file_open_counter_.store(0);
- env_->random_read_counter_.Reset();
- ASSERT_OK(Put("bar2", "foo2"));
- ASSERT_OK(Put("foo2", "bar2"));
- ASSERT_OK(Flush());
- if (prefetch_supported) {
- // After flush, we'll open the file and read footer, meta block,
- // property block and index block.
- ASSERT_EQ(4, env_->random_read_counter_.Read());
- } else {
- // With prefetch not supported, we will do a single read into a buffer
- ASSERT_EQ(1, env_->random_read_counter_.Read());
- }
- ASSERT_EQ(1, env_->random_file_open_counter_.load());
- env_->random_file_open_counter_.store(0);
- env_->random_read_counter_.Reset();
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- if (prefetch_supported) {
- // Compaction needs two input blocks, which requires 2 preads, and
- // generate a new SST file which needs 4 preads (footer, meta block,
- // property block and index block). In total 6.
- ASSERT_EQ(6, env_->random_read_counter_.Read());
- } else {
- // With prefetch off, compaction needs two input blocks,
- // followed by a single buffered read. In total 3.
- ASSERT_EQ(3, env_->random_read_counter_.Read());
- }
- // All compaction input files should have already been opened.
- ASSERT_EQ(1, env_->random_file_open_counter_.load());
- // One pread per a normal data block read
- env_->random_file_open_counter_.store(0);
- env_->random_read_counter_.Reset();
- ASSERT_EQ("foo2", Get("bar2"));
- ASSERT_EQ(1, env_->random_read_counter_.Read());
- // SST files are already opened.
- ASSERT_EQ(0, env_->random_file_open_counter_.load());
- }
- class TraceExecutionResultHandler : public TraceRecordResult::Handler {
- public:
- TraceExecutionResultHandler() = default;
- ~TraceExecutionResultHandler() override = default;
- Status Handle(const StatusOnlyTraceExecutionResult& result) override {
- if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
- return Status::InvalidArgument("Invalid timestamps.");
- }
- result.GetStatus().PermitUncheckedError();
- switch (result.GetTraceType()) {
- case kTraceWrite: {
- total_latency_ += result.GetLatency();
- cnt_++;
- writes_++;
- break;
- }
- default:
- return Status::Corruption("Type mismatch.");
- }
- return Status::OK();
- }
- Status Handle(const SingleValueTraceExecutionResult& result) override {
- if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
- return Status::InvalidArgument("Invalid timestamps.");
- }
- result.GetStatus().PermitUncheckedError();
- switch (result.GetTraceType()) {
- case kTraceGet: {
- total_latency_ += result.GetLatency();
- cnt_++;
- gets_++;
- break;
- }
- default:
- return Status::Corruption("Type mismatch.");
- }
- return Status::OK();
- }
- Status Handle(const MultiValuesTraceExecutionResult& result) override {
- if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
- return Status::InvalidArgument("Invalid timestamps.");
- }
- for (const Status& s : result.GetMultiStatus()) {
- s.PermitUncheckedError();
- }
- switch (result.GetTraceType()) {
- case kTraceMultiGet: {
- total_latency_ += result.GetLatency();
- cnt_++;
- multigets_++;
- break;
- }
- default:
- return Status::Corruption("Type mismatch.");
- }
- return Status::OK();
- }
- Status Handle(const IteratorTraceExecutionResult& result) override {
- if (result.GetStartTimestamp() > result.GetEndTimestamp()) {
- return Status::InvalidArgument("Invalid timestamps.");
- }
- result.GetStatus().PermitUncheckedError();
- switch (result.GetTraceType()) {
- case kTraceIteratorSeek:
- case kTraceIteratorSeekForPrev: {
- total_latency_ += result.GetLatency();
- cnt_++;
- seeks_++;
- break;
- }
- default:
- return Status::Corruption("Type mismatch.");
- }
- return Status::OK();
- }
- void Reset() {
- total_latency_ = 0;
- cnt_ = 0;
- writes_ = 0;
- gets_ = 0;
- seeks_ = 0;
- multigets_ = 0;
- }
- double GetAvgLatency() const {
- return cnt_ == 0 ? 0.0 : 1.0 * total_latency_ / cnt_;
- }
- int GetNumWrites() const { return writes_; }
- int GetNumGets() const { return gets_; }
- int GetNumIterSeeks() const { return seeks_; }
- int GetNumMultiGets() const { return multigets_; }
- private:
- std::atomic<uint64_t> total_latency_{0};
- std::atomic<uint32_t> cnt_{0};
- std::atomic<int> writes_{0};
- std::atomic<int> gets_{0};
- std::atomic<int> seeks_{0};
- std::atomic<int> multigets_{0};
- };
- TEST_F(DBTest2, TraceAndReplay) {
- Options options = CurrentOptions();
- options.merge_operator = MergeOperators::CreatePutOperator();
- ReadOptions ro;
- WriteOptions wo;
- TraceOptions trace_opts;
- EnvOptions env_opts;
- CreateAndReopenWithCF({"pikachu"}, options);
- Random rnd(301);
- Iterator* single_iter = nullptr;
- ASSERT_TRUE(db_->EndTrace().IsIOError());
- std::string trace_filename = dbname_ + "/rocksdb.trace";
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
- ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
- // 5 Writes
- ASSERT_OK(Put(0, "a", "1"));
- ASSERT_OK(Merge(0, "b", "2"));
- ASSERT_OK(Delete(0, "c"));
- ASSERT_OK(SingleDelete(0, "d"));
- ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
- // 6th Write
- WriteBatch batch;
- ASSERT_OK(batch.Put("f", "11"));
- ASSERT_OK(batch.Merge("g", "12"));
- ASSERT_OK(batch.Delete("h"));
- ASSERT_OK(batch.SingleDelete("i"));
- ASSERT_OK(batch.DeleteRange("j", "k"));
- ASSERT_OK(db_->Write(wo, &batch));
- // 2 Seek(ForPrev)s
- single_iter = db_->NewIterator(ro);
- single_iter->Seek("f"); // Seek 1
- single_iter->SeekForPrev("g");
- ASSERT_OK(single_iter->status());
- delete single_iter;
- // 2 Gets
- ASSERT_EQ("1", Get(0, "a"));
- ASSERT_EQ("12", Get(0, "g"));
- // 7th and 8th Write, 3rd Get
- ASSERT_OK(Put(1, "foo", "bar"));
- ASSERT_OK(Put(1, "rocksdb", "rocks"));
- ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
- // Total Write x 8, Get x 3, Seek x 2.
- ASSERT_OK(db_->EndTrace());
- // These should not get into the trace file as it is after EndTrace.
- ASSERT_OK(Put("hello", "world"));
- ASSERT_OK(Merge("foo", "bar"));
- // Open another db, replay, and verify the data
- std::string value;
- std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay");
- ASSERT_OK(DestroyDB(dbname2, options));
- // Using a different name than db2, to pacify infer's use-after-lifetime
- // warnings (http://fbinfer.com).
- DB* db2_init = nullptr;
- options.create_if_missing = true;
- ASSERT_OK(DB::Open(options, dbname2, &db2_init));
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
- delete cf;
- delete db2_init;
- DB* db2 = nullptr;
- std::vector<ColumnFamilyDescriptor> column_families;
- ColumnFamilyOptions cf_options;
- cf_options.merge_operator = MergeOperators::CreatePutOperator();
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- std::vector<ColumnFamilyHandle*> handles;
- DBOptions db_opts;
- db_opts.env = env_;
- ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
- env_->SleepForMicroseconds(100);
- // Verify that the keys don't already exist
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
- std::unique_ptr<Replayer> replayer;
- ASSERT_OK(
- db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
- TraceExecutionResultHandler res_handler;
- std::function<void(Status, std::unique_ptr<TraceRecordResult>&&)> res_cb =
- [&res_handler](Status exec_s, std::unique_ptr<TraceRecordResult>&& res) {
- ASSERT_TRUE(exec_s.ok() || exec_s.IsNotSupported());
- if (res != nullptr) {
- ASSERT_OK(res->Accept(&res_handler));
- res.reset();
- }
- };
- // Unprepared replay should fail with Status::Incomplete()
- ASSERT_TRUE(replayer->Replay(ReplayOptions(), nullptr).IsIncomplete());
- ASSERT_OK(replayer->Prepare());
- // Ok to repeatedly Prepare().
- ASSERT_OK(replayer->Prepare());
- // Replay using 1 thread, 1x speed.
- ASSERT_OK(replayer->Replay(ReplayOptions(1, 1.0), res_cb));
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 8);
- ASSERT_EQ(res_handler.GetNumGets(), 3);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- ASSERT_OK(db2->Get(ro, handles[0], "a", &value));
- ASSERT_EQ("1", value);
- ASSERT_OK(db2->Get(ro, handles[0], "g", &value));
- ASSERT_EQ("12", value);
- ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
- ASSERT_OK(db2->Get(ro, handles[1], "foo", &value));
- ASSERT_EQ("bar", value);
- ASSERT_OK(db2->Get(ro, handles[1], "rocksdb", &value));
- ASSERT_EQ("rocks", value);
- // Re-replay should fail with Status::Incomplete() if Prepare() was not
- // called. Currently we don't distinguish between unprepared and trace end.
- ASSERT_TRUE(replayer->Replay(ReplayOptions(), nullptr).IsIncomplete());
- // Re-replay using 2 threads, 2x speed.
- ASSERT_OK(replayer->Prepare());
- ASSERT_OK(replayer->Replay(ReplayOptions(2, 2.0), res_cb));
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 8);
- ASSERT_EQ(res_handler.GetNumGets(), 3);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- // Re-replay using 2 threads, 1/2 speed.
- ASSERT_OK(replayer->Prepare());
- ASSERT_OK(replayer->Replay(ReplayOptions(2, 0.5), res_cb));
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 8);
- ASSERT_EQ(res_handler.GetNumGets(), 3);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- replayer.reset();
- for (auto handle : handles) {
- delete handle;
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- }
- TEST_F(DBTest2, TraceAndManualReplay) {
- Options options = CurrentOptions();
- options.merge_operator = MergeOperators::CreatePutOperator();
- ReadOptions ro;
- WriteOptions wo;
- TraceOptions trace_opts;
- EnvOptions env_opts;
- CreateAndReopenWithCF({"pikachu"}, options);
- Random rnd(301);
- Iterator* single_iter = nullptr;
- ASSERT_TRUE(db_->EndTrace().IsIOError());
- std::string trace_filename = dbname_ + "/rocksdb.trace";
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
- ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
- ASSERT_OK(Put(0, "a", "1"));
- ASSERT_OK(Merge(0, "b", "2"));
- ASSERT_OK(Delete(0, "c"));
- ASSERT_OK(SingleDelete(0, "d"));
- ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
- WriteBatch batch;
- ASSERT_OK(batch.Put("f", "11"));
- ASSERT_OK(batch.Merge("g", "12"));
- ASSERT_OK(batch.Delete("h"));
- ASSERT_OK(batch.SingleDelete("i"));
- ASSERT_OK(batch.DeleteRange("j", "k"));
- ASSERT_OK(db_->Write(wo, &batch));
- single_iter = db_->NewIterator(ro);
- single_iter->Seek("f");
- single_iter->SeekForPrev("g");
- ASSERT_OK(single_iter->status());
- delete single_iter;
- // Write some sequenced keys for testing lower/upper bounds of iterator.
- batch.Clear();
- ASSERT_OK(batch.Put("iter-0", "iter-0"));
- ASSERT_OK(batch.Put("iter-1", "iter-1"));
- ASSERT_OK(batch.Put("iter-2", "iter-2"));
- ASSERT_OK(batch.Put("iter-3", "iter-3"));
- ASSERT_OK(batch.Put("iter-4", "iter-4"));
- ASSERT_OK(db_->Write(wo, &batch));
- ReadOptions bounded_ro = ro;
- Slice lower_bound("iter-1");
- Slice upper_bound("iter-3");
- bounded_ro.iterate_lower_bound = &lower_bound;
- bounded_ro.iterate_upper_bound = &upper_bound;
- single_iter = db_->NewIterator(bounded_ro);
- single_iter->Seek("iter-0");
- ASSERT_EQ(single_iter->key().ToString(), "iter-1");
- single_iter->Seek("iter-2");
- ASSERT_EQ(single_iter->key().ToString(), "iter-2");
- single_iter->Seek("iter-4");
- ASSERT_FALSE(single_iter->Valid());
- single_iter->SeekForPrev("iter-0");
- ASSERT_FALSE(single_iter->Valid());
- single_iter->SeekForPrev("iter-2");
- ASSERT_EQ(single_iter->key().ToString(), "iter-2");
- single_iter->SeekForPrev("iter-4");
- ASSERT_EQ(single_iter->key().ToString(), "iter-2");
- ASSERT_OK(single_iter->status());
- delete single_iter;
- ASSERT_EQ("1", Get(0, "a"));
- ASSERT_EQ("12", Get(0, "g"));
- ASSERT_OK(Put(1, "foo", "bar"));
- ASSERT_OK(Put(1, "rocksdb", "rocks"));
- ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
- // Same as TraceAndReplay, Write x 8, Get x 3, Seek x 2.
- // Plus 1 WriteBatch for iterator with lower/upper bounds, and 6
- // Seek(ForPrev)s.
- // Total Write x 9, Get x 3, Seek x 8
- ASSERT_OK(db_->EndTrace());
- // These should not get into the trace file as it is after EndTrace.
- ASSERT_OK(Put("hello", "world"));
- ASSERT_OK(Merge("foo", "bar"));
- // Open another db, replay, and verify the data
- std::string value;
- std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay");
- ASSERT_OK(DestroyDB(dbname2, options));
- // Using a different name than db2, to pacify infer's use-after-lifetime
- // warnings (http://fbinfer.com).
- DB* db2_init = nullptr;
- options.create_if_missing = true;
- ASSERT_OK(DB::Open(options, dbname2, &db2_init));
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
- delete cf;
- delete db2_init;
- DB* db2 = nullptr;
- std::vector<ColumnFamilyDescriptor> column_families;
- ColumnFamilyOptions cf_options;
- cf_options.merge_operator = MergeOperators::CreatePutOperator();
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- std::vector<ColumnFamilyHandle*> handles;
- DBOptions db_opts;
- db_opts.env = env_;
- ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
- env_->SleepForMicroseconds(100);
- // Verify that the keys don't already exist
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
- std::unique_ptr<Replayer> replayer;
- ASSERT_OK(
- db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
- TraceExecutionResultHandler res_handler;
- // Manual replay for 2 times. The 2nd checks if the replay can restart.
- std::unique_ptr<TraceRecord> record;
- std::unique_ptr<TraceRecordResult> result;
- for (int i = 0; i < 2; i++) {
- // Next should fail if unprepared.
- ASSERT_TRUE(replayer->Next(nullptr).IsIncomplete());
- ASSERT_OK(replayer->Prepare());
- Status s = Status::OK();
- // Looping until trace end.
- while (s.ok()) {
- s = replayer->Next(&record);
- // Skip unsupported operations.
- if (s.IsNotSupported()) {
- continue;
- }
- if (s.ok()) {
- ASSERT_OK(replayer->Execute(record, &result));
- if (result != nullptr) {
- ASSERT_OK(result->Accept(&res_handler));
- if (record->GetTraceType() == kTraceIteratorSeek ||
- record->GetTraceType() == kTraceIteratorSeekForPrev) {
- IteratorSeekQueryTraceRecord* iter_rec =
- dynamic_cast<IteratorSeekQueryTraceRecord*>(record.get());
- IteratorTraceExecutionResult* iter_res =
- dynamic_cast<IteratorTraceExecutionResult*>(result.get());
- // Check if lower/upper bounds are correctly saved and decoded.
- std::string lower_str = iter_rec->GetLowerBound().ToString();
- std::string upper_str = iter_rec->GetUpperBound().ToString();
- std::string iter_key = iter_res->GetKey().ToString();
- std::string iter_value = iter_res->GetValue().ToString();
- if (!lower_str.empty() && !upper_str.empty()) {
- ASSERT_EQ(lower_str, "iter-1");
- ASSERT_EQ(upper_str, "iter-3");
- if (iter_res->GetValid()) {
- // If iterator is valid, then lower_bound <= key < upper_bound.
- ASSERT_GE(iter_key, lower_str);
- ASSERT_LT(iter_key, upper_str);
- } else {
- // If iterator is invalid, then
- // key < lower_bound or key >= upper_bound.
- ASSERT_TRUE(iter_key < lower_str || iter_key >= upper_str);
- }
- }
- // If iterator is invalid, the key and value should be empty.
- if (!iter_res->GetValid()) {
- ASSERT_TRUE(iter_key.empty());
- ASSERT_TRUE(iter_value.empty());
- }
- }
- result.reset();
- }
- }
- }
- // Status::Incomplete() will be returned when manually reading the trace
- // end, or Prepare() was not called.
- ASSERT_TRUE(s.IsIncomplete());
- ASSERT_TRUE(replayer->Next(nullptr).IsIncomplete());
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 9);
- ASSERT_EQ(res_handler.GetNumGets(), 3);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 8);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- }
- ASSERT_OK(db2->Get(ro, handles[0], "a", &value));
- ASSERT_EQ("1", value);
- ASSERT_OK(db2->Get(ro, handles[0], "g", &value));
- ASSERT_EQ("12", value);
- ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
- ASSERT_OK(db2->Get(ro, handles[1], "foo", &value));
- ASSERT_EQ("bar", value);
- ASSERT_OK(db2->Get(ro, handles[1], "rocksdb", &value));
- ASSERT_EQ("rocks", value);
- // Test execution of artificially created TraceRecords.
- uint64_t fake_ts = 1U;
- // Write
- batch.Clear();
- ASSERT_OK(batch.Put("trace-record-write1", "write1"));
- ASSERT_OK(batch.Put("trace-record-write2", "write2"));
- record.reset(new WriteQueryTraceRecord(batch.Data(), fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // Write x 1
- ASSERT_OK(db2->Get(ro, handles[0], "trace-record-write1", &value));
- ASSERT_EQ("write1", value);
- ASSERT_OK(db2->Get(ro, handles[0], "trace-record-write2", &value));
- ASSERT_EQ("write2", value);
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 1);
- ASSERT_EQ(res_handler.GetNumGets(), 0);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- // Get related
- // Get an existing key.
- record.reset(new GetQueryTraceRecord(handles[0]->GetID(),
- "trace-record-write1", fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // Get x 1
- // Get an non-existing key, should still return Status::OK().
- record.reset(new GetQueryTraceRecord(handles[0]->GetID(), "trace-record-get",
- fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // Get x 2
- // Get from an invalid (non-existing) cf_id.
- uint32_t invalid_cf_id = handles[1]->GetID() + 1;
- record.reset(new GetQueryTraceRecord(invalid_cf_id, "whatever", fake_ts++));
- ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
- ASSERT_TRUE(result == nullptr);
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 0);
- ASSERT_EQ(res_handler.GetNumGets(), 2);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- // Iteration related
- for (IteratorSeekQueryTraceRecord::SeekType seekType :
- {IteratorSeekQueryTraceRecord::kSeek,
- IteratorSeekQueryTraceRecord::kSeekForPrev}) {
- // Seek to an existing key.
- record.reset(new IteratorSeekQueryTraceRecord(
- seekType, handles[0]->GetID(), "trace-record-write1", fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // Seek x 1 in one iteration
- // Seek to an non-existing key, should still return Status::OK().
- record.reset(new IteratorSeekQueryTraceRecord(
- seekType, handles[0]->GetID(), "trace-record-get", fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // Seek x 2 in one iteration
- // Seek from an invalid cf_id.
- record.reset(new IteratorSeekQueryTraceRecord(seekType, invalid_cf_id,
- "whatever", fake_ts++));
- ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
- ASSERT_TRUE(result == nullptr);
- }
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 0);
- ASSERT_EQ(res_handler.GetNumGets(), 0);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 4); // Seek x 2 in two iterations
- ASSERT_EQ(res_handler.GetNumMultiGets(), 0);
- res_handler.Reset();
- // MultiGet related
- // Get existing keys.
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
- std::vector<std::string>({"a", "foo"}), fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 1
- // Get all non-existing keys, should still return Status::OK().
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
- std::vector<std::string>({"no1", "no2"}), fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 2
- // Get mixed of existing and non-existing keys, should still return
- // Status::OK().
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
- std::vector<std::string>({"a", "no2"}), fake_ts++));
- ASSERT_OK(replayer->Execute(record, &result));
- ASSERT_TRUE(result != nullptr);
- MultiValuesTraceExecutionResult* mvr =
- dynamic_cast<MultiValuesTraceExecutionResult*>(result.get());
- ASSERT_TRUE(mvr != nullptr);
- ASSERT_OK(mvr->GetMultiStatus()[0]);
- ASSERT_TRUE(mvr->GetMultiStatus()[1].IsNotFound());
- ASSERT_EQ(mvr->GetValues()[0], "1");
- ASSERT_EQ(mvr->GetValues()[1], "");
- ASSERT_OK(result->Accept(&res_handler)); // MultiGet x 3
- // Get from an invalid (non-existing) cf_id.
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>(
- {handles[0]->GetID(), handles[1]->GetID(), invalid_cf_id}),
- std::vector<std::string>({"a", "foo", "whatever"}), fake_ts++));
- ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
- ASSERT_TRUE(result == nullptr);
- // Empty MultiGet
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>(), std::vector<std::string>(), fake_ts++));
- ASSERT_TRUE(replayer->Execute(record, &result).IsInvalidArgument());
- ASSERT_TRUE(result == nullptr);
- // MultiGet size mismatch
- record.reset(new MultiGetQueryTraceRecord(
- std::vector<uint32_t>({handles[0]->GetID(), handles[1]->GetID()}),
- std::vector<std::string>({"a"}), fake_ts++));
- ASSERT_TRUE(replayer->Execute(record, &result).IsInvalidArgument());
- ASSERT_TRUE(result == nullptr);
- ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
- ASSERT_EQ(res_handler.GetNumWrites(), 0);
- ASSERT_EQ(res_handler.GetNumGets(), 0);
- ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
- ASSERT_EQ(res_handler.GetNumMultiGets(), 3);
- res_handler.Reset();
- replayer.reset();
- for (auto handle : handles) {
- delete handle;
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- }
- TEST_F(DBTest2, TraceWithLimit) {
- Options options = CurrentOptions();
- options.merge_operator = MergeOperators::CreatePutOperator();
- ReadOptions ro;
- WriteOptions wo;
- TraceOptions trace_opts;
- EnvOptions env_opts;
- CreateAndReopenWithCF({"pikachu"}, options);
- Random rnd(301);
- // test the max trace file size options
- trace_opts.max_trace_file_size = 5;
- std::string trace_filename = dbname_ + "/rocksdb.trace1";
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
- ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
- ASSERT_OK(Put(0, "a", "1"));
- ASSERT_OK(Put(0, "b", "1"));
- ASSERT_OK(Put(0, "c", "1"));
- ASSERT_OK(db_->EndTrace());
- std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay2");
- std::string value;
- ASSERT_OK(DestroyDB(dbname2, options));
- // Using a different name than db2, to pacify infer's use-after-lifetime
- // warnings (http://fbinfer.com).
- DB* db2_init = nullptr;
- options.create_if_missing = true;
- ASSERT_OK(DB::Open(options, dbname2, &db2_init));
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
- delete cf;
- delete db2_init;
- DB* db2 = nullptr;
- std::vector<ColumnFamilyDescriptor> column_families;
- ColumnFamilyOptions cf_options;
- cf_options.merge_operator = MergeOperators::CreatePutOperator();
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- std::vector<ColumnFamilyHandle*> handles;
- DBOptions db_opts;
- db_opts.env = env_;
- ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
- env_->SleepForMicroseconds(100);
- // Verify that the keys don't already exist
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
- std::unique_ptr<Replayer> replayer;
- ASSERT_OK(
- db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
- ASSERT_OK(replayer->Prepare());
- ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
- replayer.reset();
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
- for (auto handle : handles) {
- delete handle;
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- }
- TEST_F(DBTest2, TraceWithSampling) {
- Options options = CurrentOptions();
- ReadOptions ro;
- WriteOptions wo;
- TraceOptions trace_opts;
- EnvOptions env_opts;
- CreateAndReopenWithCF({"pikachu"}, options);
- Random rnd(301);
- // test the trace file sampling options
- trace_opts.sampling_frequency = 2;
- std::string trace_filename = dbname_ + "/rocksdb.trace_sampling";
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
- ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
- ASSERT_OK(Put(0, "a", "1"));
- ASSERT_OK(Put(0, "b", "2"));
- ASSERT_OK(Put(0, "c", "3"));
- ASSERT_OK(Put(0, "d", "4"));
- ASSERT_OK(Put(0, "e", "5"));
- ASSERT_OK(db_->EndTrace());
- std::string dbname2 = test::PerThreadDBPath(env_, "/db_replay_sampling");
- std::string value;
- ASSERT_OK(DestroyDB(dbname2, options));
- // Using a different name than db2, to pacify infer's use-after-lifetime
- // warnings (http://fbinfer.com).
- DB* db2_init = nullptr;
- options.create_if_missing = true;
- ASSERT_OK(DB::Open(options, dbname2, &db2_init));
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
- delete cf;
- delete db2_init;
- DB* db2 = nullptr;
- std::vector<ColumnFamilyDescriptor> column_families;
- ColumnFamilyOptions cf_options;
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- std::vector<ColumnFamilyHandle*> handles;
- DBOptions db_opts;
- db_opts.env = env_;
- ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
- env_->SleepForMicroseconds(100);
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "d", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound());
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
- std::unique_ptr<Replayer> replayer;
- ASSERT_OK(
- db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
- ASSERT_OK(replayer->Prepare());
- ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
- replayer.reset();
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_FALSE(db2->Get(ro, handles[0], "b", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound());
- ASSERT_FALSE(db2->Get(ro, handles[0], "d", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound());
- for (auto handle : handles) {
- delete handle;
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- }
- TEST_F(DBTest2, TraceWithFilter) {
- Options options = CurrentOptions();
- options.merge_operator = MergeOperators::CreatePutOperator();
- ReadOptions ro;
- WriteOptions wo;
- TraceOptions trace_opts;
- EnvOptions env_opts;
- CreateAndReopenWithCF({"pikachu"}, options);
- Random rnd(301);
- Iterator* single_iter = nullptr;
- trace_opts.filter = TraceFilterType::kTraceFilterWrite;
- std::string trace_filename = dbname_ + "/rocksdb.trace";
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer));
- ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer)));
- ASSERT_OK(Put(0, "a", "1"));
- ASSERT_OK(Merge(0, "b", "2"));
- ASSERT_OK(Delete(0, "c"));
- ASSERT_OK(SingleDelete(0, "d"));
- ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f"));
- WriteBatch batch;
- ASSERT_OK(batch.Put("f", "11"));
- ASSERT_OK(batch.Merge("g", "12"));
- ASSERT_OK(batch.Delete("h"));
- ASSERT_OK(batch.SingleDelete("i"));
- ASSERT_OK(batch.DeleteRange("j", "k"));
- ASSERT_OK(db_->Write(wo, &batch));
- single_iter = db_->NewIterator(ro);
- single_iter->Seek("f");
- single_iter->SeekForPrev("g");
- delete single_iter;
- ASSERT_EQ("1", Get(0, "a"));
- ASSERT_EQ("12", Get(0, "g"));
- ASSERT_OK(Put(1, "foo", "bar"));
- ASSERT_OK(Put(1, "rocksdb", "rocks"));
- ASSERT_EQ("NOT_FOUND", Get(1, "leveldb"));
- ASSERT_OK(db_->EndTrace());
- // These should not get into the trace file as it is after EndTrace.
- ASSERT_OK(Put("hello", "world"));
- ASSERT_OK(Merge("foo", "bar"));
- // Open another db, replay, and verify the data
- std::string value;
- std::string dbname2 = test::PerThreadDBPath(env_, "db_replay");
- ASSERT_OK(DestroyDB(dbname2, options));
- // Using a different name than db2, to pacify infer's use-after-lifetime
- // warnings (http://fbinfer.com).
- DB* db2_init = nullptr;
- options.create_if_missing = true;
- ASSERT_OK(DB::Open(options, dbname2, &db2_init));
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf));
- delete cf;
- delete db2_init;
- DB* db2 = nullptr;
- std::vector<ColumnFamilyDescriptor> column_families;
- ColumnFamilyOptions cf_options;
- cf_options.merge_operator = MergeOperators::CreatePutOperator();
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- std::vector<ColumnFamilyHandle*> handles;
- DBOptions db_opts;
- db_opts.env = env_;
- ASSERT_OK(DB::Open(db_opts, dbname2, column_families, &handles, &db2));
- env_->SleepForMicroseconds(100);
- // Verify that the keys don't already exist
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader));
- std::unique_ptr<Replayer> replayer;
- ASSERT_OK(
- db2->NewDefaultReplayer(handles, std::move(trace_reader), &replayer));
- ASSERT_OK(replayer->Prepare());
- ASSERT_OK(replayer->Replay(ReplayOptions(), nullptr));
- replayer.reset();
- // All the key-values should not present since we filter out the WRITE ops.
- ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "foo", &value).IsNotFound());
- ASSERT_TRUE(db2->Get(ro, handles[0], "rocksdb", &value).IsNotFound());
- for (auto handle : handles) {
- delete handle;
- }
- delete db2;
- ASSERT_OK(DestroyDB(dbname2, options));
- // Set up a new db.
- std::string dbname3 = test::PerThreadDBPath(env_, "db_not_trace_read");
- ASSERT_OK(DestroyDB(dbname3, options));
- DB* db3_init = nullptr;
- options.create_if_missing = true;
- ColumnFamilyHandle* cf3;
- ASSERT_OK(DB::Open(options, dbname3, &db3_init));
- ASSERT_OK(
- db3_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf3));
- delete cf3;
- delete db3_init;
- column_families.clear();
- column_families.emplace_back("default", cf_options);
- column_families.emplace_back("pikachu", ColumnFamilyOptions());
- handles.clear();
- DB* db3 = nullptr;
- ASSERT_OK(DB::Open(db_opts, dbname3, column_families, &handles, &db3));
- env_->SleepForMicroseconds(100);
- // Verify that the keys don't already exist
- ASSERT_TRUE(db3->Get(ro, handles[0], "a", &value).IsNotFound());
- ASSERT_TRUE(db3->Get(ro, handles[0], "g", &value).IsNotFound());
- // The tracer will not record the READ ops.
- trace_opts.filter = TraceFilterType::kTraceFilterGet;
- std::string trace_filename3 = dbname_ + "/rocksdb.trace_3";
- std::unique_ptr<TraceWriter> trace_writer3;
- ASSERT_OK(
- NewFileTraceWriter(env_, env_opts, trace_filename3, &trace_writer3));
- ASSERT_OK(db3->StartTrace(trace_opts, std::move(trace_writer3)));
- ASSERT_OK(db3->Put(wo, handles[0], "a", "1"));
- ASSERT_OK(db3->Merge(wo, handles[0], "b", "2"));
- ASSERT_OK(db3->Delete(wo, handles[0], "c"));
- ASSERT_OK(db3->SingleDelete(wo, handles[0], "d"));
- ASSERT_OK(db3->Get(ro, handles[0], "a", &value));
- ASSERT_EQ(value, "1");
- ASSERT_TRUE(db3->Get(ro, handles[0], "c", &value).IsNotFound());
- ASSERT_OK(db3->EndTrace());
- for (auto handle : handles) {
- delete handle;
- }
- delete db3;
- ASSERT_OK(DestroyDB(dbname3, options));
- std::unique_ptr<TraceReader> trace_reader3;
- ASSERT_OK(
- NewFileTraceReader(env_, env_opts, trace_filename3, &trace_reader3));
- // Count the number of records in the trace file;
- int count = 0;
- std::string data;
- Status s;
- while (true) {
- s = trace_reader3->Read(&data);
- if (!s.ok()) {
- break;
- }
- count += 1;
- }
- // We also need to count the header and footer
- // 4 WRITE + HEADER + FOOTER = 6
- ASSERT_EQ(count, 6);
- }
- TEST_F(DBTest2, PinnableSliceAndMmapReads) {
- Options options = CurrentOptions();
- options.env = env_;
- if (!IsMemoryMappedAccessSupported()) {
- ROCKSDB_GTEST_SKIP("Test requires default environment");
- return;
- }
- options.allow_mmap_reads = true;
- options.max_open_files = 100;
- options.compression = kNoCompression;
- Reopen(options);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- PinnableSlice pinned_value;
- ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
- // It is not safe to pin mmap files as they might disappear by compaction
- ASSERT_FALSE(pinned_value.IsPinned());
- ASSERT_EQ(pinned_value.ToString(), "bar");
- ASSERT_OK(dbfull()->TEST_CompactRange(
- 0 /* level */, nullptr /* begin */, nullptr /* end */,
- nullptr /* column_family */, true /* disallow_trivial_move */));
- // Ensure pinned_value doesn't rely on memory munmap'd by the above
- // compaction. It crashes if it does.
- ASSERT_EQ(pinned_value.ToString(), "bar");
- pinned_value.Reset();
- // Unsafe to pin mmap files when they could be kicked out of table cache
- Close();
- ASSERT_OK(ReadOnlyReopen(options));
- ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
- ASSERT_FALSE(pinned_value.IsPinned());
- ASSERT_EQ(pinned_value.ToString(), "bar");
- pinned_value.Reset();
- // In read-only mode with infinite capacity on table cache it should pin the
- // value and avoid the memcpy
- Close();
- options.max_open_files = -1;
- ASSERT_OK(ReadOnlyReopen(options));
- ASSERT_EQ(Get("foo", &pinned_value), Status::OK());
- ASSERT_TRUE(pinned_value.IsPinned());
- ASSERT_EQ(pinned_value.ToString(), "bar");
- }
- TEST_F(DBTest2, DISABLED_IteratorPinnedMemory) {
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- BlockBasedTableOptions bbto;
- bbto.no_block_cache = false;
- bbto.cache_index_and_filter_blocks = false;
- bbto.block_cache = NewLRUCache(100000);
- bbto.block_size = 400; // small block size
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- Reopen(options);
- Random rnd(301);
- std::string v = rnd.RandomString(400);
- // Since v is the size of a block, each key should take a block
- // of 400+ bytes.
- ASSERT_OK(Put("1", v));
- ASSERT_OK(Put("3", v));
- ASSERT_OK(Put("5", v));
- ASSERT_OK(Put("7", v));
- ASSERT_OK(Flush());
- ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage());
- // Verify that iterators don't pin more than one data block in block cache
- // at each time.
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
- iter->SeekToFirst();
- for (int i = 0; i < 4; i++) {
- ASSERT_TRUE(iter->Valid());
- // Block cache should contain exactly one block.
- ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
- ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
- iter->Next();
- }
- ASSERT_FALSE(iter->Valid());
- iter->Seek("4");
- ASSERT_TRUE(iter->Valid());
- ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
- ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
- iter->Seek("3");
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0);
- ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800);
- }
- ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage());
- // Test compaction case
- ASSERT_OK(Put("2", v));
- ASSERT_OK(Put("5", v));
- ASSERT_OK(Put("6", v));
- ASSERT_OK(Put("8", v));
- ASSERT_OK(Flush());
- // Clear existing data in block cache
- bbto.block_cache->SetCapacity(0);
- bbto.block_cache->SetCapacity(100000);
- // Verify compaction input iterators don't hold more than one data blocks at
- // one time.
- std::atomic<bool> finished(false);
- std::atomic<int> block_newed(0);
- std::atomic<int> block_destroyed(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Block::Block:0", [&](void* /*arg*/) {
- if (finished) {
- return;
- }
- // Two iterators. At most 2 outstanding blocks.
- EXPECT_GE(block_newed.load(), block_destroyed.load());
- EXPECT_LE(block_newed.load(), block_destroyed.load() + 1);
- block_newed.fetch_add(1);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "Block::~Block", [&](void* /*arg*/) {
- if (finished) {
- return;
- }
- // Two iterators. At most 2 outstanding blocks.
- EXPECT_GE(block_newed.load(), block_destroyed.load() + 1);
- EXPECT_LE(block_newed.load(), block_destroyed.load() + 2);
- block_destroyed.fetch_add(1);
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactionJob::Run:BeforeVerify",
- [&](void* /*arg*/) { finished = true; });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- // Two input files. Each of them has 4 data blocks.
- ASSERT_EQ(8, block_newed.load());
- ASSERT_EQ(8, block_destroyed.load());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, TestGetColumnFamilyHandleUnlocked) {
- // Setup sync point dependency to reproduce the race condition of
- // DBImpl::GetColumnFamilyHandleUnlocked
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
- {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1",
- "TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2"},
- {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2",
- "TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- CreateColumnFamilies({"test1", "test2"}, Options());
- ASSERT_EQ(handles_.size(), 2);
- DBImpl* dbi = static_cast_with_check<DBImpl>(db_);
- port::Thread user_thread1([&]() {
- auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[0]->GetID());
- ASSERT_EQ(cfh->GetID(), handles_[0]->GetID());
- TEST_SYNC_POINT(
- "TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1");
- TEST_SYNC_POINT(
- "TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1");
- ASSERT_EQ(cfh->GetID(), handles_[0]->GetID());
- });
- port::Thread user_thread2([&]() {
- TEST_SYNC_POINT(
- "TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2");
- auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[1]->GetID());
- ASSERT_EQ(cfh->GetID(), handles_[1]->GetID());
- TEST_SYNC_POINT(
- "TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2");
- ASSERT_EQ(cfh->GetID(), handles_[1]->GetID());
- });
- user_thread1.join();
- user_thread2.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBTest2, TestCompactFiles) {
- // Setup sync point dependency to reproduce the race condition of
- // DBImpl::GetColumnFamilyHandleUnlocked
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
- {"TestCompactFiles::IngestExternalFile1",
- "TestCompactFiles::IngestExternalFile2"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- Options options;
- options.env = env_;
- options.num_levels = 2;
- options.disable_auto_compactions = true;
- Reopen(options);
- auto* handle = db_->DefaultColumnFamily();
- ASSERT_EQ(db_->NumberLevels(handle), 2);
- ROCKSDB_NAMESPACE::SstFileWriter sst_file_writer{
- ROCKSDB_NAMESPACE::EnvOptions(), options};
- std::string external_file1 = dbname_ + "/test_compact_files1.sst_t";
- std::string external_file2 = dbname_ + "/test_compact_files2.sst_t";
- std::string external_file3 = dbname_ + "/test_compact_files3.sst_t";
- ASSERT_OK(sst_file_writer.Open(external_file1));
- ASSERT_OK(sst_file_writer.Put("1", "1"));
- ASSERT_OK(sst_file_writer.Put("2", "2"));
- ASSERT_OK(sst_file_writer.Finish());
- ASSERT_OK(sst_file_writer.Open(external_file2));
- ASSERT_OK(sst_file_writer.Put("3", "3"));
- ASSERT_OK(sst_file_writer.Put("4", "4"));
- ASSERT_OK(sst_file_writer.Finish());
- ASSERT_OK(sst_file_writer.Open(external_file3));
- ASSERT_OK(sst_file_writer.Put("5", "5"));
- ASSERT_OK(sst_file_writer.Put("6", "6"));
- ASSERT_OK(sst_file_writer.Finish());
- ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file3},
- IngestExternalFileOptions()));
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2);
- std::vector<std::string> files;
- GetSstFiles(env_, dbname_, &files);
- ASSERT_EQ(files.size(), 2);
- Status user_thread1_status;
- port::Thread user_thread1([&]() {
- user_thread1_status =
- db_->CompactFiles(CompactionOptions(), handle, files, 1);
- });
- Status user_thread2_status;
- port::Thread user_thread2([&]() {
- user_thread2_status = db_->IngestExternalFile(handle, {external_file2},
- IngestExternalFileOptions());
- TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile1");
- });
- user_thread1.join();
- user_thread2.join();
- ASSERT_OK(user_thread1_status);
- ASSERT_OK(user_thread2_status);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBTest2, TestCancelCompactFiles) {
- SyncPoint::GetInstance()->EnableProcessing();
- Options options;
- options.env = env_;
- options.num_levels = 2;
- options.disable_auto_compactions = true;
- Reopen(options);
- auto* handle = db_->DefaultColumnFamily();
- ASSERT_EQ(db_->NumberLevels(handle), 2);
- ROCKSDB_NAMESPACE::SstFileWriter sst_file_writer{
- ROCKSDB_NAMESPACE::EnvOptions(), options};
- // ingest large SST files
- std::vector<std::string> external_sst_file_names;
- int key_counter = 0;
- const int num_keys_per_file = 100000;
- const int num_files = 10;
- for (int i = 0; i < num_files; ++i) {
- std::string file_name =
- dbname_ + "/test_compact_files" + std::to_string(i) + ".sst_t";
- external_sst_file_names.push_back(file_name);
- ASSERT_OK(sst_file_writer.Open(file_name));
- for (int j = 0; j < num_keys_per_file; ++j) {
- ASSERT_OK(sst_file_writer.Put(Key(j + num_keys_per_file * key_counter),
- std::to_string(j)));
- }
- key_counter += 1;
- ASSERT_OK(sst_file_writer.Finish());
- }
- ASSERT_OK(db_->IngestExternalFile(handle, external_sst_file_names,
- IngestExternalFileOptions()));
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
- std::vector<std::string> files;
- GetSstFiles(env_, dbname_, &files);
- ASSERT_EQ(files.size(), num_files);
- // Test that 0 compactions happen - canceled is set to True initially
- CompactionOptions compaction_options;
- std::atomic<bool> canceled(true);
- compaction_options.canceled = &canceled;
- ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
- .IsManualCompactionPaused());
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
- // Test cancellation before the check to cancel compaction happens -
- // compaction should not occur
- bool disable_compaction = false;
- compaction_options.canceled->store(false, std::memory_order_release);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "TestCancelCompactFiles:SuccessfulCompaction", [&](void* arg) {
- auto paused = static_cast<std::atomic<int>*>(arg);
- if (disable_compaction) {
- db_->DisableManualCompaction();
- ASSERT_EQ(1, paused->load(std::memory_order_acquire));
- } else {
- compaction_options.canceled->store(true, std::memory_order_release);
- ASSERT_EQ(0, paused->load(std::memory_order_acquire));
- }
- });
- ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
- .IsManualCompactionPaused());
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
- // DisableManualCompaction() should successfully cancel compaction
- disable_compaction = true;
- compaction_options.canceled->store(false, std::memory_order_release);
- ASSERT_TRUE(db_->CompactFiles(compaction_options, handle, files, 1)
- .IsManualCompactionPaused());
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), num_files);
- // unlike CompactRange, value of compaction_options.canceled will be
- // unaffected by calling DisableManualCompactions()
- ASSERT_FALSE(compaction_options.canceled->load());
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- db_->EnableManualCompaction();
- // Test cancelation after the check to cancel compaction - compaction should
- // occur, leaving only 1 file
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "CompactFilesImpl:0", [&](void* /*arg*/) {
- compaction_options.canceled->store(true, std::memory_order_release);
- });
- compaction_options.canceled->store(false, std::memory_order_release);
- ASSERT_OK(db_->CompactFiles(compaction_options, handle, files, 1));
- ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBTest2, MultiDBParallelOpenTest) {
- const int kNumDbs = 2;
- Options options = CurrentOptions();
- std::vector<std::string> dbnames;
- for (int i = 0; i < kNumDbs; ++i) {
- dbnames.emplace_back(test::PerThreadDBPath(env_, "db" + std::to_string(i)));
- ASSERT_OK(DestroyDB(dbnames.back(), options));
- }
- // Verify empty DBs can be created in parallel
- std::vector<std::thread> open_threads;
- std::vector<DB*> dbs{static_cast<unsigned int>(kNumDbs), nullptr};
- options.create_if_missing = true;
- for (int i = 0; i < kNumDbs; ++i) {
- open_threads.emplace_back(
- [&](int dbnum) {
- ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum]));
- },
- i);
- }
- // Now add some data and close, so next we can verify non-empty DBs can be
- // recovered in parallel
- for (int i = 0; i < kNumDbs; ++i) {
- open_threads[i].join();
- ASSERT_OK(dbs[i]->Put(WriteOptions(), "xi", "gua"));
- delete dbs[i];
- }
- // Verify non-empty DBs can be recovered in parallel
- open_threads.clear();
- for (int i = 0; i < kNumDbs; ++i) {
- open_threads.emplace_back(
- [&](int dbnum) {
- ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum]));
- },
- i);
- }
- // Wait and cleanup
- for (int i = 0; i < kNumDbs; ++i) {
- open_threads[i].join();
- delete dbs[i];
- ASSERT_OK(DestroyDB(dbnames[i], options));
- }
- }
- namespace {
- class DummyOldStats : public Statistics {
- public:
- const char* Name() const override { return "DummyOldStats"; }
- uint64_t getTickerCount(uint32_t /*ticker_type*/) const override { return 0; }
- void recordTick(uint32_t /* ticker_type */, uint64_t /* count */) override {
- num_rt++;
- }
- void setTickerCount(uint32_t /*ticker_type*/, uint64_t /*count*/) override {}
- uint64_t getAndResetTickerCount(uint32_t /*ticker_type*/) override {
- return 0;
- }
- void measureTime(uint32_t /*histogram_type*/, uint64_t /*count*/) override {
- num_mt++;
- }
- void histogramData(
- uint32_t /*histogram_type*/,
- ROCKSDB_NAMESPACE::HistogramData* const /*data*/) const override {}
- std::string getHistogramString(uint32_t /*type*/) const override {
- return "";
- }
- bool HistEnabledForType(uint32_t /*type*/) const override { return false; }
- std::string ToString() const override { return ""; }
- std::atomic<int> num_rt{0};
- std::atomic<int> num_mt{0};
- };
- } // anonymous namespace
- TEST_F(DBTest2, OldStatsInterface) {
- DummyOldStats* dos = new DummyOldStats();
- std::shared_ptr<Statistics> stats(dos);
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.statistics = stats;
- Reopen(options);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_OK(Flush());
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_GT(dos->num_rt, 0);
- ASSERT_GT(dos->num_mt, 0);
- }
- TEST_F(DBTest2, CloseWithUnreleasedSnapshot) {
- const Snapshot* ss = db_->GetSnapshot();
- for (auto h : handles_) {
- db_->DestroyColumnFamilyHandle(h);
- }
- handles_.clear();
- ASSERT_NOK(db_->Close());
- db_->ReleaseSnapshot(ss);
- ASSERT_OK(db_->Close());
- delete db_;
- db_ = nullptr;
- }
- TEST_F(DBTest2, PrefixBloomReseek) {
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.prefix_extractor.reset(NewCappedPrefixTransform(3));
- BlockBasedTableOptions bbto;
- bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
- bbto.whole_key_filtering = false;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- DestroyAndReopen(options);
- // Construct two L1 files with keys:
- // f1:[aaa1 ccc1] f2:[ddd0]
- ASSERT_OK(Put("aaa1", ""));
- ASSERT_OK(Put("ccc1", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("ddd0", ""));
- ASSERT_OK(Flush());
- CompactRangeOptions cro;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- ASSERT_OK(Put("bbb1", ""));
- Iterator* iter = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter->status());
- // Seeking into f1, the iterator will check bloom filter which returns the
- // file iterator ot be invalidate, and the cursor will put into f2, with
- // the next key to be "ddd0".
- iter->Seek("bbb1");
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("bbb1", iter->key().ToString());
- // Reseek ccc1, the L1 iterator needs to go back to f1 and reseek.
- iter->Seek("ccc1");
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("ccc1", iter->key().ToString());
- delete iter;
- }
- TEST_F(DBTest2, PrefixBloomFilteredOut) {
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.prefix_extractor.reset(NewCappedPrefixTransform(3));
- BlockBasedTableOptions bbto;
- bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
- bbto.whole_key_filtering = false;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- // This test is also the primary test for prefix_seek_opt_in_only
- for (bool opt_in : {false, true}) {
- options.prefix_seek_opt_in_only = opt_in;
- DestroyAndReopen(options);
- // Construct two L1 files with keys:
- // f1:[aaa1 ccc1] f2:[ddd0]
- ASSERT_OK(Put("aaa1", ""));
- ASSERT_OK(Put("ccc1", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("ddd0", ""));
- ASSERT_OK(Flush());
- CompactRangeOptions cro;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- ReadOptions ropts;
- for (bool same : {false, true}) {
- ropts.prefix_same_as_start = same;
- std::unique_ptr<Iterator> iter(db_->NewIterator(ropts));
- ASSERT_OK(iter->status());
- iter->Seek("bbb1");
- ASSERT_OK(iter->status());
- if (opt_in && !same) {
- // Unbounded total order seek
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ(iter->key(), "ccc1");
- } else {
- // Bloom filter is filterd out by f1. When same == false, this is just
- // one valid position following the contract. Postioning to ccc1 or ddd0
- // is also valid. This is just to validate the behavior of the current
- // implementation. If underlying implementation changes, the test might
- // fail here.
- ASSERT_FALSE(iter->Valid());
- }
- }
- }
- }
- TEST_F(DBTest2, RowCacheSnapshot) {
- Options options = CurrentOptions();
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- options.row_cache = NewLRUCache(8 * 8192);
- DestroyAndReopen(options);
- ASSERT_OK(Put("foo", "bar1"));
- const Snapshot* s1 = db_->GetSnapshot();
- ASSERT_OK(Put("foo", "bar2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("foo2", "bar"));
- const Snapshot* s2 = db_->GetSnapshot();
- ASSERT_OK(Put("foo3", "bar"));
- const Snapshot* s3 = db_->GetSnapshot();
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
- ASSERT_EQ(Get("foo"), "bar2");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
- ASSERT_EQ(Get("foo"), "bar2");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
- ASSERT_EQ(Get("foo", s1), "bar1");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
- ASSERT_EQ(Get("foo", s2), "bar2");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
- ASSERT_EQ(Get("foo", s1), "bar1");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
- ASSERT_EQ(Get("foo", s3), "bar2");
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 4);
- ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
- db_->ReleaseSnapshot(s1);
- db_->ReleaseSnapshot(s2);
- db_->ReleaseSnapshot(s3);
- }
- // When DB is reopened with multiple column families, the manifest file
- // is written after the first CF is flushed, and it is written again
- // after each flush. If DB crashes between the flushes, the flushed CF
- // flushed will pass the latest log file, and now we require it not
- // to be corrupted, and triggering a corruption report.
- // We need to fix the bug and enable the test.
- TEST_F(DBTest2, CrashInRecoveryMultipleCF) {
- const std::vector<std::string> sync_points = {
- "DBImpl::RecoverLogFiles:BeforeFlushFinalMemtable",
- "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0"};
- for (const auto& test_sync_point : sync_points) {
- Options options = CurrentOptions();
- // First destroy original db to ensure a clean start.
- DestroyAndReopen(options);
- options.create_if_missing = true;
- options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
- CreateAndReopenWithCF({"pikachu"}, options);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(1, "foo", "bar"));
- ASSERT_OK(Flush(1));
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put(1, "foo", "bar"));
- // The value is large enough to be divided to two blocks.
- std::string large_value(400, ' ');
- ASSERT_OK(Put("foo1", large_value));
- ASSERT_OK(Put("foo2", large_value));
- Close();
- // Corrupt the log file in the middle, so that it is not corrupted
- // in the tail.
- std::vector<std::string> filenames;
- ASSERT_OK(env_->GetChildren(dbname_, &filenames));
- for (const auto& f : filenames) {
- uint64_t number;
- FileType type;
- if (ParseFileName(f, &number, &type) && type == FileType::kWalFile) {
- std::string fname = dbname_ + "/" + f;
- std::string file_content;
- ASSERT_OK(ReadFileToString(env_, fname, &file_content));
- file_content[400] = 'h';
- file_content[401] = 'a';
- ASSERT_OK(WriteStringToFile(env_, file_content, fname, false));
- break;
- }
- }
- // Reopen and freeze the file system after the first manifest write.
- FaultInjectionTestEnv fit_env(options.env);
- options.env = &fit_env;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- test_sync_point,
- [&](void* /*arg*/) { fit_env.SetFilesystemActive(false); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_NOK(TryReopenWithColumnFamilies(
- {kDefaultColumnFamilyName, "pikachu"}, options));
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- fit_env.SetFilesystemActive(true);
- // If we continue using failure ingestion Env, it will conplain something
- // when renaming current file, which is not expected. Need to investigate
- // why.
- options.env = env_;
- ASSERT_OK(TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"},
- options));
- }
- }
- TEST_F(DBTest2, SeekFileRangeDeleteTail) {
- Options options = CurrentOptions();
- options.prefix_extractor.reset(NewCappedPrefixTransform(1));
- options.num_levels = 3;
- DestroyAndReopen(options);
- ASSERT_OK(Put("a", "a"));
- const Snapshot* s1 = db_->GetSnapshot();
- ASSERT_OK(
- db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f"));
- ASSERT_OK(Put("b", "a"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("x", "a"));
- ASSERT_OK(Put("z", "a"));
- ASSERT_OK(Flush());
- CompactRangeOptions cro;
- cro.change_level = true;
- cro.target_level = 2;
- ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
- {
- ReadOptions ro;
- ro.total_order_seek = true;
- std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
- ASSERT_OK(iter->status());
- iter->Seek("e");
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ("x", iter->key().ToString());
- }
- db_->ReleaseSnapshot(s1);
- }
- TEST_F(DBTest2, BackgroundPurgeTest) {
- Options options = CurrentOptions();
- options.write_buffer_manager =
- std::make_shared<ROCKSDB_NAMESPACE::WriteBufferManager>(1 << 20);
- options.avoid_unnecessary_blocking_io = true;
- DestroyAndReopen(options);
- size_t base_value = options.write_buffer_manager->memory_usage();
- ASSERT_OK(Put("a", "a"));
- Iterator* iter = db_->NewIterator(ReadOptions());
- ASSERT_OK(iter->status());
- ASSERT_OK(Flush());
- size_t value = options.write_buffer_manager->memory_usage();
- ASSERT_GT(value, base_value);
- db_->GetEnv()->SetBackgroundThreads(1, Env::Priority::HIGH);
- test::SleepingBackgroundTask sleeping_task_after;
- db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
- &sleeping_task_after, Env::Priority::HIGH);
- delete iter;
- Env::Default()->SleepForMicroseconds(100000);
- value = options.write_buffer_manager->memory_usage();
- ASSERT_GT(value, base_value);
- sleeping_task_after.WakeUp();
- sleeping_task_after.WaitUntilDone();
- test::SleepingBackgroundTask sleeping_task_after2;
- db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
- &sleeping_task_after2, Env::Priority::HIGH);
- sleeping_task_after2.WakeUp();
- sleeping_task_after2.WaitUntilDone();
- value = options.write_buffer_manager->memory_usage();
- ASSERT_EQ(base_value, value);
- }
- TEST_F(DBTest2, SwitchMemtableRaceWithNewManifest) {
- Options options = CurrentOptions();
- DestroyAndReopen(options);
- options.max_manifest_file_size = 10;
- options.create_if_missing = true;
- CreateAndReopenWithCF({"pikachu"}, options);
- ASSERT_EQ(2, handles_.size());
- ASSERT_OK(Put("foo", "value"));
- const int kL0Files = options.level0_file_num_compaction_trigger;
- for (int i = 0; i < kL0Files; ++i) {
- ASSERT_OK(Put(/*cf=*/1, "a", std::to_string(i)));
- ASSERT_OK(Flush(/*cf=*/1));
- }
- port::Thread thread([&]() { ASSERT_OK(Flush()); });
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- thread.join();
- }
- TEST_F(DBTest2, SameSmallestInSameLevel) {
- // This test validates fractional casacading logic when several files at one
- // one level only contains the same user key.
- Options options = CurrentOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- DestroyAndReopen(options);
- ASSERT_OK(Put("key", "1"));
- ASSERT_OK(Put("key", "2"));
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "3"));
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "4"));
- ASSERT_OK(Flush());
- CompactRangeOptions cro;
- cro.change_level = true;
- cro.target_level = 2;
- ASSERT_OK(dbfull()->CompactRange(cro, db_->DefaultColumnFamily(), nullptr,
- nullptr));
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "5"));
- ASSERT_OK(Flush());
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "6"));
- ASSERT_OK(Flush());
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "7"));
- ASSERT_OK(Flush());
- ASSERT_OK(db_->Merge(WriteOptions(), "key", "8"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,4,1", FilesPerLevel());
- ASSERT_EQ("2,3,4,5,6,7,8", Get("key"));
- }
- TEST_F(DBTest2, FileConsistencyCheckInOpen) {
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(
- "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) {
- Status* ret_s = static_cast<Status*>(arg);
- *ret_s = Status::Corruption("fcc");
- });
- SyncPoint::GetInstance()->EnableProcessing();
- Options options = CurrentOptions();
- options.force_consistency_checks = true;
- ASSERT_NOK(TryReopen(options));
- SyncPoint::GetInstance()->DisableProcessing();
- }
- TEST_F(DBTest2, BlockBasedTablePrefixIndexSeekForPrev) {
- // create a DB with block prefix index
- BlockBasedTableOptions table_options;
- Options options = CurrentOptions();
- table_options.block_size = 300;
- table_options.index_type = BlockBasedTableOptions::kHashSearch;
- table_options.index_shortening =
- BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- options.prefix_extractor.reset(NewFixedPrefixTransform(1));
- Reopen(options);
- Random rnd(301);
- std::string large_value = rnd.RandomString(500);
- ASSERT_OK(Put("a1", large_value));
- ASSERT_OK(Put("x1", large_value));
- ASSERT_OK(Put("y1", large_value));
- ASSERT_OK(Flush());
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
- ASSERT_OK(iterator->status());
- iterator->SeekForPrev("x3");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("x1", iterator->key().ToString());
- iterator->SeekForPrev("a3");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("a1", iterator->key().ToString());
- iterator->SeekForPrev("y3");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("y1", iterator->key().ToString());
- // Query more than one non-existing prefix to cover the case both
- // of empty hash bucket and hash bucket conflict.
- iterator->SeekForPrev("b1");
- // Result should be not valid or "a1".
- if (iterator->Valid()) {
- ASSERT_EQ("a1", iterator->key().ToString());
- }
- iterator->SeekForPrev("c1");
- // Result should be not valid or "a1".
- if (iterator->Valid()) {
- ASSERT_EQ("a1", iterator->key().ToString());
- }
- iterator->SeekForPrev("d1");
- // Result should be not valid or "a1".
- if (iterator->Valid()) {
- ASSERT_EQ("a1", iterator->key().ToString());
- }
- iterator->SeekForPrev("y3");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("y1", iterator->key().ToString());
- }
- }
- TEST_F(DBTest2, PartitionedIndexPrefetchFailure) {
- Options options = last_options_;
- options.env = env_;
- options.max_open_files = 20;
- BlockBasedTableOptions bbto;
- bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
- bbto.metadata_block_size = 128;
- bbto.block_size = 128;
- bbto.block_cache = NewLRUCache(16777216);
- bbto.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- DestroyAndReopen(options);
- // Force no table cache so every read will preload the SST file.
- dbfull()->TEST_table_cache()->SetCapacity(0);
- bbto.block_cache->SetCapacity(0);
- Random rnd(301);
- for (int i = 0; i < 4096; i++) {
- ASSERT_OK(Put(Key(i), rnd.RandomString(32)));
- }
- ASSERT_OK(Flush());
- // Try different random failures in table open for 300 times.
- for (int i = 0; i < 300; i++) {
- env_->num_reads_fails_ = 0;
- env_->rand_reads_fail_odd_ = 8;
- std::string value;
- Status s = dbfull()->Get(ReadOptions(), Key(1), &value);
- if (env_->num_reads_fails_ > 0) {
- ASSERT_NOK(s);
- } else {
- ASSERT_OK(s);
- }
- }
- env_->rand_reads_fail_odd_ = 0;
- }
- TEST_F(DBTest2, ChangePrefixExtractor) {
- for (bool use_partitioned_filter : {true, false}) {
- // create a DB with block prefix index
- BlockBasedTableOptions table_options;
- Options options = CurrentOptions();
- options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
- // Sometimes filter is checked based on upper bound. Assert counters
- // for that case. Otherwise, only check data correctness.
- bool expect_filter_check = !use_partitioned_filter;
- table_options.partition_filters = use_partitioned_filter;
- if (use_partitioned_filter) {
- table_options.index_type =
- BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
- }
- table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- options.statistics = CreateDBStatistics();
- options.prefix_extractor.reset(NewFixedPrefixTransform(2));
- DestroyAndReopen(options);
- Random rnd(301);
- ASSERT_OK(Put("aa", ""));
- ASSERT_OK(Put("xb", ""));
- ASSERT_OK(Put("xx1", ""));
- ASSERT_OK(Put("xz1", ""));
- ASSERT_OK(Put("zz", ""));
- ASSERT_OK(Flush());
- // After reopening DB with prefix size 2 => 1, prefix extractor
- // won't take effective unless it won't change results based
- // on upper bound and seek key.
- options.prefix_extractor.reset(NewFixedPrefixTransform(1));
- Reopen(options);
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
- ASSERT_OK(iterator->status());
- iterator->Seek("xa");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- iterator->Seek("xz");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xz1", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- }
- std::string ub_str = "xg9";
- Slice ub(ub_str);
- ReadOptions ro;
- ro.iterate_upper_bound = &ub;
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- ASSERT_OK(iterator->status());
- // SeekForPrev() never uses prefix bloom if it is changed.
- iterator->SeekForPrev("xg0");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- }
- ub_str = "xx9";
- ub = Slice(ub_str);
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- ASSERT_OK(iterator->status());
- iterator->Seek("x");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(0, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- iterator->Seek("xx0");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xx1", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- }
- CompactRangeOptions compact_range_opts;
- compact_range_opts.bottommost_level_compaction =
- BottommostLevelCompaction::kForce;
- ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr));
- ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr));
- // Re-execute similar queries after a full compaction
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
- iterator->Seek("x");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- iterator->Seek("xg");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xx1", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- iterator->Seek("xz");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xz1", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- ASSERT_OK(iterator->status());
- }
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->SeekForPrev("xx0");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- iterator->Seek("xx0");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xx1", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- ASSERT_OK(iterator->status());
- }
- ub_str = "xg9";
- ub = Slice(ub_str);
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->SeekForPrev("xg0");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("xb", iterator->key().ToString());
- if (expect_filter_check) {
- EXPECT_EQ(1, PopTicker(options, NON_LAST_LEVEL_SEEK_FILTER_MATCH));
- }
- ASSERT_OK(iterator->status());
- }
- }
- }
- TEST_F(DBTest2, BlockBasedTablePrefixGetIndexNotFound) {
- // create a DB with block prefix index
- BlockBasedTableOptions table_options;
- Options options = CurrentOptions();
- table_options.block_size = 300;
- table_options.index_type = BlockBasedTableOptions::kHashSearch;
- table_options.index_shortening =
- BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- options.prefix_extractor.reset(NewFixedPrefixTransform(1));
- options.level0_file_num_compaction_trigger = 8;
- Reopen(options);
- ASSERT_OK(Put("b1", "ok"));
- ASSERT_OK(Flush());
- // Flushing several files so that the chance that hash bucket
- // is empty fo "b" in at least one of the files is high.
- ASSERT_OK(Put("a1", ""));
- ASSERT_OK(Put("c1", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("a2", ""));
- ASSERT_OK(Put("c2", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("a3", ""));
- ASSERT_OK(Put("c3", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("a4", ""));
- ASSERT_OK(Put("c4", ""));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("a5", ""));
- ASSERT_OK(Put("c5", ""));
- ASSERT_OK(Flush());
- ASSERT_EQ("ok", Get("b1"));
- }
- TEST_F(DBTest2, AutoPrefixMode1) {
- do {
- // create a DB with block prefix index
- Options options = CurrentOptions();
- BlockBasedTableOptions table_options =
- *options.table_factory->GetOptions<BlockBasedTableOptions>();
- table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
- options.table_factory.reset(NewBlockBasedTableFactory(table_options));
- options.prefix_extractor.reset(NewFixedPrefixTransform(1));
- options.statistics = CreateDBStatistics();
- Reopen(options);
- Random rnd(301);
- std::string large_value = rnd.RandomString(500);
- ASSERT_OK(Put("a1", large_value));
- ASSERT_OK(Put("x1", large_value));
- ASSERT_OK(Put("y1", large_value));
- ASSERT_OK(Flush());
- ReadOptions ro;
- ro.total_order_seek = false;
- ro.auto_prefix_mode = true;
- const auto hit_stat = options.num_levels == 1
- ? LAST_LEVEL_SEEK_FILTER_MATCH
- : NON_LAST_LEVEL_SEEK_FILTER_MATCH;
- const auto miss_stat = options.num_levels == 1
- ? LAST_LEVEL_SEEK_FILTERED
- : NON_LAST_LEVEL_SEEK_FILTERED;
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek("b1");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("x1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- Slice ub;
- ro.iterate_upper_bound = &ub;
- ub = "b9";
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek("b1");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- ub = "z";
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek("b1");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("x1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- ub = "c";
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek("b1");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- ub = "c1";
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek("b1");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- // The same queries without recreating iterator
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- ub = "b9";
- iterator->Seek("b1");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- ub = "z";
- iterator->Seek("b1");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("x1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "c";
- iterator->Seek("b1");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ub = "b9";
- iterator->SeekForPrev("b1");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("a1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "zz";
- iterator->SeekToLast();
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("y1", iterator->key().ToString());
- iterator->SeekToFirst();
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("a1", iterator->key().ToString());
- }
- // Similar, now with reverse comparator
- // Technically, we are violating axiom 2 of prefix_extractors, but
- // it should be revised because of major use-cases using
- // ReverseBytewiseComparator with capped/fixed prefix Seek. (FIXME)
- options.comparator = ReverseBytewiseComparator();
- options.prefix_extractor.reset(NewFixedPrefixTransform(1));
- DestroyAndReopen(options);
- ASSERT_OK(Put("a1", large_value));
- ASSERT_OK(Put("x1", large_value));
- ASSERT_OK(Put("y1", large_value));
- ASSERT_OK(Flush());
- {
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- ub = "b1";
- iterator->Seek("b9");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- ub = "b1";
- iterator->Seek("z");
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("y1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "b1";
- iterator->Seek("c");
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "b";
- iterator->Seek("c9");
- ASSERT_FALSE(iterator->Valid());
- // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
- // is "correctly" implemented.
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "a";
- iterator->Seek("b9");
- // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
- // is "correctly" implemented.
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("a1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "b";
- iterator->Seek("a");
- ASSERT_FALSE(iterator->Valid());
- // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
- // matches BytewiseComparator::IsSameLengthImmediateSuccessor. Upper
- // comparing before seek key prevents a real bug from surfacing.
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "b1";
- iterator->SeekForPrev("b9");
- ASSERT_TRUE(iterator->Valid());
- // Fails if ReverseBytewiseComparator::IsSameLengthImmediateSuccessor
- // is "correctly" implemented.
- ASSERT_EQ("x1", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ub = "a";
- iterator->SeekToLast();
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("a1", iterator->key().ToString());
- iterator->SeekToFirst();
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("y1", iterator->key().ToString());
- }
- // Now something a bit different, related to "short" keys that
- // auto_prefix_mode can omit. See "BUG" section of auto_prefix_mode.
- options.comparator = BytewiseComparator();
- for (const auto config : {"fixed:2", "capped:2"}) {
- ASSERT_OK(SliceTransform::CreateFromString(ConfigOptions(), config,
- &options.prefix_extractor));
- // FIXME: kHashSearch, etc. requires all keys be InDomain
- if (StartsWith(config, "fixed") &&
- (table_options.index_type == BlockBasedTableOptions::kHashSearch ||
- StartsWith(options.memtable_factory->Name(), "Hash"))) {
- continue;
- }
- DestroyAndReopen(options);
- const char* a_end_stuff = "a\xffXYZ";
- const char* b_begin_stuff = "b\x00XYZ";
- ASSERT_OK(Put("a", large_value));
- ASSERT_OK(Put("b", large_value));
- ASSERT_OK(Put(Slice(b_begin_stuff, 3), large_value));
- ASSERT_OK(Put("c", large_value));
- ASSERT_OK(Flush());
- // control showing valid optimization with auto_prefix mode
- ub = Slice(a_end_stuff, 4);
- ro.iterate_upper_bound = &ub;
- std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
- iterator->Seek(Slice(a_end_stuff, 2));
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- // test, cannot be validly optimized with auto_prefix_mode
- ub = Slice(b_begin_stuff, 2);
- ro.iterate_upper_bound = &ub;
- iterator->Seek(Slice(a_end_stuff, 2));
- // !!! BUG !!! See "BUG" section of auto_prefix_mode.
- ASSERT_FALSE(iterator->Valid());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(1, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- // To prove that is the wrong result, now use total order seek
- ReadOptions tos_ro = ro;
- tos_ro.total_order_seek = true;
- tos_ro.auto_prefix_mode = false;
- iterator.reset(db_->NewIterator(tos_ro));
- iterator->Seek(Slice(a_end_stuff, 2));
- ASSERT_TRUE(iterator->Valid());
- ASSERT_EQ("b", iterator->key().ToString());
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, hit_stat));
- EXPECT_EQ(0, TestGetAndResetTickerCount(options, miss_stat));
- ASSERT_OK(iterator->status());
- }
- } while (ChangeOptions(kSkipPlainTable));
- }
- class RenameCurrentTest : public DBTestBase,
- public testing::WithParamInterface<std::string> {
- public:
- RenameCurrentTest()
- : DBTestBase("rename_current_test", /*env_do_fsync=*/true),
- sync_point_(GetParam()) {}
- ~RenameCurrentTest() override = default;
- void SetUp() override {
- env_->no_file_overwrite_.store(true, std::memory_order_release);
- }
- void TearDown() override {
- env_->no_file_overwrite_.store(false, std::memory_order_release);
- }
- void SetupSyncPoints() {
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->SetCallBack(sync_point_, [&](void* arg) {
- Status* s = static_cast<Status*>(arg);
- assert(s);
- *s = Status::IOError("Injected IO error.");
- });
- }
- const std::string sync_point_;
- };
- INSTANTIATE_TEST_CASE_P(DistributedFS, RenameCurrentTest,
- ::testing::Values("SetCurrentFile:BeforeRename",
- "SetCurrentFile:AfterRename"));
- TEST_P(RenameCurrentTest, Open) {
- Destroy(last_options_);
- Options options = GetDefaultOptions();
- options.create_if_missing = true;
- SetupSyncPoints();
- SyncPoint::GetInstance()->EnableProcessing();
- Status s = TryReopen(options);
- ASSERT_NOK(s);
- SyncPoint::GetInstance()->DisableProcessing();
- Reopen(options);
- }
- TEST_P(RenameCurrentTest, Flush) {
- Destroy(last_options_);
- Options options = GetDefaultOptions();
- options.max_manifest_file_size = 1;
- options.create_if_missing = true;
- Reopen(options);
- ASSERT_OK(Put("key", "value"));
- SetupSyncPoints();
- SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_NOK(Flush());
- ASSERT_NOK(Put("foo", "value"));
- SyncPoint::GetInstance()->DisableProcessing();
- Reopen(options);
- ASSERT_EQ("value", Get("key"));
- ASSERT_EQ("NOT_FOUND", Get("foo"));
- }
- TEST_P(RenameCurrentTest, Compaction) {
- Destroy(last_options_);
- Options options = GetDefaultOptions();
- options.max_manifest_file_size = 1;
- options.create_if_missing = true;
- Reopen(options);
- ASSERT_OK(Put("a", "a_value"));
- ASSERT_OK(Put("c", "c_value"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("b", "b_value"));
- ASSERT_OK(Put("d", "d_value"));
- ASSERT_OK(Flush());
- SetupSyncPoints();
- SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_NOK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
- /*end=*/nullptr));
- ASSERT_NOK(Put("foo", "value"));
- SyncPoint::GetInstance()->DisableProcessing();
- Reopen(options);
- ASSERT_EQ("NOT_FOUND", Get("foo"));
- ASSERT_EQ("d_value", Get("d"));
- }
- TEST_F(DBTest2, VariousFileTemperatures) {
- constexpr size_t kNumberFileTypes = static_cast<size_t>(kBlobFile) + 1U;
- struct MyTestFS : public FileTemperatureTestFS {
- explicit MyTestFS(const std::shared_ptr<FileSystem>& fs)
- : FileTemperatureTestFS(fs) {
- Reset();
- }
- IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
- std::unique_ptr<FSWritableFile>* result,
- IODebugContext* dbg) override {
- IOStatus ios =
- FileTemperatureTestFS::NewWritableFile(fname, opts, result, dbg);
- if (ios.ok()) {
- uint64_t number;
- FileType type;
- if (ParseFileName(GetFileName(fname), &number, "LOG", &type)) {
- if (type == kTableFile) {
- // Not checked here
- } else if (type == kWalFile) {
- if (opts.temperature != expected_wal_temperature) {
- std::cerr << "Attempt to open " << fname << " with temperature "
- << temperature_to_string[opts.temperature]
- << " rather than "
- << temperature_to_string[expected_wal_temperature]
- << std::endl;
- assert(false);
- }
- } else if (type == kDescriptorFile) {
- if (opts.temperature != expected_manifest_temperature) {
- std::cerr << "Attempt to open " << fname << " with temperature "
- << temperature_to_string[opts.temperature]
- << " rather than "
- << temperature_to_string[expected_wal_temperature]
- << std::endl;
- assert(false);
- }
- } else if (opts.temperature != expected_other_metadata_temperature) {
- std::cerr << "Attempt to open " << fname << " with temperature "
- << temperature_to_string[opts.temperature]
- << " rather than "
- << temperature_to_string[expected_wal_temperature]
- << std::endl;
- assert(false);
- }
- UpdateCount(type, 1);
- }
- }
- return ios;
- }
- IOStatus RenameFile(const std::string& src, const std::string& dst,
- const IOOptions& options,
- IODebugContext* dbg) override {
- IOStatus ios = FileTemperatureTestFS::RenameFile(src, dst, options, dbg);
- if (ios.ok()) {
- uint64_t number;
- FileType src_type;
- FileType dst_type;
- assert(ParseFileName(GetFileName(src), &number, "LOG", &src_type));
- assert(ParseFileName(GetFileName(dst), &number, "LOG", &dst_type));
- UpdateCount(src_type, -1);
- UpdateCount(dst_type, 1);
- }
- return ios;
- }
- void UpdateCount(FileType type, int delta) {
- size_t i = static_cast<size_t>(type);
- assert(i < kNumberFileTypes);
- counts[i].FetchAddRelaxed(delta);
- }
- std::map<FileType, size_t> PopCounts() {
- std::map<FileType, size_t> ret;
- for (size_t i = 0; i < kNumberFileTypes; ++i) {
- int c = counts[i].ExchangeRelaxed(0);
- if (c > 0) {
- ret[static_cast<FileType>(i)] = c;
- }
- }
- return ret;
- }
- FileOptions OptimizeForLogWrite(
- const FileOptions& file_options,
- const DBOptions& /*db_options*/) const override {
- FileOptions opts = file_options;
- if (optimize_wal_temperature != Temperature::kUnknown) {
- opts.temperature = optimize_wal_temperature;
- }
- return opts;
- }
- FileOptions OptimizeForManifestWrite(
- const FileOptions& file_options) const override {
- FileOptions opts = file_options;
- if (optimize_manifest_temperature != Temperature::kUnknown) {
- opts.temperature = optimize_manifest_temperature;
- }
- return opts;
- }
- void Reset() {
- optimize_manifest_temperature = Temperature::kUnknown;
- optimize_wal_temperature = Temperature::kUnknown;
- expected_manifest_temperature = Temperature::kUnknown;
- expected_other_metadata_temperature = Temperature::kUnknown;
- expected_wal_temperature = Temperature::kUnknown;
- for (auto& c : counts) {
- c.StoreRelaxed(0);
- }
- }
- Temperature optimize_manifest_temperature;
- Temperature optimize_wal_temperature;
- Temperature expected_manifest_temperature;
- Temperature expected_other_metadata_temperature;
- Temperature expected_wal_temperature;
- std::array<RelaxedAtomic<int>, kNumberFileTypes> counts;
- };
- // We don't have enough non-unknown temps to confidently distinguish that
- // a specific setting caused a specific outcome, in a single run. Using
- // RandomKnownTemperature() is a reasonable work-around without blowing up
- // test time.
- auto test_fs = std::make_shared<MyTestFS>(env_->GetFileSystem());
- std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
- for (bool use_optimize : {false, true}) {
- std::cerr << "use_optimize: " << std::to_string(use_optimize) << std::endl;
- for (bool use_temp_options : {false, true}) {
- std::cerr << "use_temp_options: " << std::to_string(use_temp_options)
- << std::endl;
- Options options = CurrentOptions();
- // Currently require for last level temperature
- options.compaction_style = kCompactionStyleUniversal;
- options.env = env.get();
- test_fs->Reset();
- if (use_optimize) {
- test_fs->optimize_manifest_temperature = RandomKnownTemperature();
- test_fs->expected_manifest_temperature =
- test_fs->optimize_manifest_temperature;
- test_fs->optimize_wal_temperature = RandomKnownTemperature();
- test_fs->expected_wal_temperature = test_fs->optimize_wal_temperature;
- }
- if (use_temp_options) {
- options.metadata_write_temperature = RandomKnownTemperature();
- test_fs->expected_manifest_temperature =
- options.metadata_write_temperature;
- test_fs->expected_other_metadata_temperature =
- options.metadata_write_temperature;
- options.wal_write_temperature = RandomKnownTemperature();
- test_fs->expected_wal_temperature = options.wal_write_temperature;
- options.last_level_temperature = RandomKnownTemperature();
- options.default_write_temperature = RandomKnownTemperature();
- }
- DestroyAndReopen(options);
- Defer closer([&] { Close(); });
- using FTC = std::map<FileType, size_t>;
- // Files on DB startup
- ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
- {kDescriptorFile, 2},
- {kCurrentFile, 2},
- {kIdentityFile, 1},
- {kOptionsFile, 1}}));
- // Temperature count map
- using TCM = std::map<Temperature, size_t>;
- ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), TCM({}));
- ASSERT_OK(Put("foo", "1"));
- ASSERT_OK(Put("bar", "1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("foo", "2"));
- ASSERT_OK(Put("bar", "2"));
- ASSERT_OK(Flush());
- ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
- TCM({{options.default_write_temperature, 2}}));
- ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
- ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
- ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
- TCM({{options.last_level_temperature, 1}}));
- ASSERT_OK(Put("foo", "3"));
- ASSERT_OK(Put("bar", "3"));
- ASSERT_OK(Flush());
- // Just in memtable/WAL
- ASSERT_OK(Put("dog", "3"));
- {
- TCM expected;
- expected[options.default_write_temperature] += 1;
- expected[options.last_level_temperature] += 1;
- ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), expected);
- }
- // New files during operation
- ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 3}, {kTableFile, 4}}));
- Reopen(options);
- // New files during re-open/recovery
- ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
- {kTableFile, 1},
- {kDescriptorFile, 1},
- {kCurrentFile, 1},
- {kOptionsFile, 1}}));
- Destroy(options);
- }
- }
- }
- TEST_F(DBTest2, LastLevelTemperature) {
- class TestListener : public EventListener {
- public:
- void OnFileReadFinish(const FileOperationInfo& info) override {
- UpdateFileTemperature(info);
- }
- void OnFileWriteFinish(const FileOperationInfo& info) override {
- UpdateFileTemperature(info);
- }
- void OnFileFlushFinish(const FileOperationInfo& info) override {
- UpdateFileTemperature(info);
- }
- void OnFileSyncFinish(const FileOperationInfo& info) override {
- UpdateFileTemperature(info);
- }
- void OnFileCloseFinish(const FileOperationInfo& info) override {
- UpdateFileTemperature(info);
- }
- bool ShouldBeNotifiedOnFileIO() override { return true; }
- std::unordered_map<uint64_t, Temperature> file_temperatures;
- private:
- void UpdateFileTemperature(const FileOperationInfo& info) {
- auto filename = GetFileName(info.path);
- uint64_t number;
- FileType type;
- ASSERT_TRUE(ParseFileName(filename, &number, &type));
- if (type == kTableFile) {
- MutexLock l(&mutex_);
- auto ret = file_temperatures.insert({number, info.temperature});
- if (!ret.second) {
- // the same file temperature should always be the same for all events
- ASSERT_TRUE(ret.first->second == info.temperature);
- }
- }
- }
- std::string GetFileName(const std::string& fname) {
- auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
- // workaround only for Windows that the file path could contain both
- // Windows FilePathSeparator and '/'
- filename = filename.substr(filename.find_last_of('/') + 1);
- return filename;
- }
- port::Mutex mutex_;
- };
- const int kNumLevels = 7;
- const int kLastLevel = kNumLevels - 1;
- auto* listener = new TestListener();
- Options options = CurrentOptions();
- options.last_level_temperature = Temperature::kWarm;
- options.level0_file_num_compaction_trigger = 2;
- options.level_compaction_dynamic_level_bytes = true;
- options.num_levels = kNumLevels;
- options.statistics = CreateDBStatistics();
- options.listeners.emplace_back(listener);
- Reopen(options);
- auto size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kHot);
- ASSERT_EQ(size, 0);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- get_iostats_context()->Reset();
- IOStatsContext* iostats = get_iostats_context();
- ColumnFamilyMetaData metadata;
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(1, metadata.file_count);
- SstFileMetaData meta = metadata.levels[kLastLevel].files[0];
- ASSERT_EQ(Temperature::kWarm, meta.temperature);
- uint64_t number;
- FileType type;
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
- ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
- // non-bottommost file still has unknown temperature
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_EQ("bar", Get("bar"));
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 1);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
- ASSERT_GT(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(2, metadata.file_count);
- meta = metadata.levels[0].files[0];
- ASSERT_EQ(Temperature::kUnknown, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- meta = metadata.levels[kLastLevel].files[0];
- ASSERT_EQ(Temperature::kWarm, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- // reopen and check the information is persisted
- Reopen(options);
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(2, metadata.file_count);
- meta = metadata.levels[0].files[0];
- ASSERT_EQ(Temperature::kUnknown, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- meta = metadata.levels[kLastLevel].files[0];
- ASSERT_EQ(Temperature::kWarm, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- // check other non-exist temperatures
- size = GetSstSizeHelper(Temperature::kHot);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kCold);
- ASSERT_EQ(size, 0);
- std::string prop;
- ASSERT_TRUE(dbfull()->GetProperty(
- DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
- &prop));
- ASSERT_EQ(std::atoi(prop.c_str()), 0);
- Reopen(options);
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(2, metadata.file_count);
- meta = metadata.levels[0].files[0];
- ASSERT_EQ(Temperature::kUnknown, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- meta = metadata.levels[kLastLevel].files[0];
- ASSERT_EQ(Temperature::kWarm, meta.temperature);
- ASSERT_TRUE(ParseFileName(meta.name, &number, &type));
- ASSERT_EQ(listener->file_temperatures.at(number), meta.temperature);
- }
- TEST_F(DBTest2, LastLevelTemperatureUniversal) {
- const int kTriggerNum = 3;
- const int kNumLevels = 5;
- const int kBottommostLevel = kNumLevels - 1;
- Options options = CurrentOptions();
- options.compaction_style = kCompactionStyleUniversal;
- options.level0_file_num_compaction_trigger = kTriggerNum;
- options.num_levels = kNumLevels;
- options.statistics = CreateDBStatistics();
- DestroyAndReopen(options);
- auto size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kHot);
- ASSERT_EQ(size, 0);
- get_iostats_context()->Reset();
- IOStatsContext* iostats = get_iostats_context();
- for (int i = 0; i < kTriggerNum; i++) {
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ColumnFamilyMetaData metadata;
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(1, metadata.file_count);
- ASSERT_EQ(Temperature::kUnknown,
- metadata.levels[kBottommostLevel].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
- ASSERT_EQ("bar", Get("foo"));
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_read_count, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.hot_file_bytes_read, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.warm_file_bytes_read, 0);
- ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(2, metadata.file_count);
- ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- // Update last level temperature
- options.last_level_temperature = Temperature::kWarm;
- Reopen(options);
- db_->GetColumnFamilyMetaData(&metadata);
- // Should not impact existing ones
- ASSERT_EQ(Temperature::kUnknown,
- metadata.levels[kBottommostLevel].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- // new generated file should have the new settings
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(1, metadata.file_count);
- ASSERT_EQ(Temperature::kWarm,
- metadata.levels[kBottommostLevel].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_BYTES), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(HOT_FILE_READ_COUNT), 0);
- ASSERT_GT(options.statistics->getTickerCount(WARM_FILE_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(COLD_FILE_READ_COUNT), 0);
- // non-bottommost file still has unknown temperature
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(2, metadata.file_count);
- ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- // check other non-exist temperatures
- size = GetSstSizeHelper(Temperature::kHot);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kCold);
- ASSERT_EQ(size, 0);
- std::string prop;
- ASSERT_TRUE(dbfull()->GetProperty(
- DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
- &prop));
- ASSERT_EQ(std::atoi(prop.c_str()), 0);
- // Update last level temperature dynamically with SetOptions
- auto s = db_->SetOptions({{"last_level_temperature", "kCold"}});
- ASSERT_OK(s);
- ASSERT_EQ(db_->GetOptions().last_level_temperature, Temperature::kCold);
- db_->GetColumnFamilyMetaData(&metadata);
- // Should not impact the existing files
- ASSERT_EQ(Temperature::kWarm,
- metadata.levels[kBottommostLevel].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- size = GetSstSizeHelper(Temperature::kCold);
- ASSERT_EQ(size, 0);
- // new generated files should have the new settings
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- db_->GetColumnFamilyMetaData(&metadata);
- ASSERT_EQ(1, metadata.file_count);
- ASSERT_EQ(Temperature::kCold,
- metadata.levels[kBottommostLevel].files[0].temperature);
- size = GetSstSizeHelper(Temperature::kUnknown);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_EQ(size, 0);
- size = GetSstSizeHelper(Temperature::kCold);
- ASSERT_GT(size, 0);
- // kLastTemperature is an invalid temperature
- options.last_level_temperature = Temperature::kLastTemperature;
- s = TryReopen(options);
- ASSERT_TRUE(s.IsIOError());
- }
- TEST_F(DBTest2, LastLevelStatistics) {
- for (bool write_time_default : {false, true}) {
- SCOPED_TRACE("write time default? " + std::to_string(write_time_default));
- Options options = CurrentOptions();
- options.last_level_temperature = Temperature::kWarm;
- if (write_time_default) {
- options.default_write_temperature = Temperature::kHot;
- ASSERT_EQ(options.default_temperature, Temperature::kUnknown);
- } else {
- options.default_temperature = Temperature::kHot;
- ASSERT_EQ(options.default_write_temperature, Temperature::kUnknown);
- }
- options.level0_file_num_compaction_trigger = 2;
- options.level_compaction_dynamic_level_bytes = true;
- options.statistics = CreateDBStatistics();
- BlockBasedTableOptions bbto;
- bbto.no_block_cache = true;
- options.table_factory.reset(NewBlockBasedTableFactory(bbto));
- DestroyAndReopen(options);
- // generate 1 sst on level 0
- ASSERT_OK(Put("foo1", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_EQ("bar", Get("bar"));
- ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0);
- ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0);
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0);
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0);
- // 2nd flush to trigger compaction
- ASSERT_OK(Put("foo2", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("bar", Get("bar"));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
- auto pre_bytes =
- options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES);
- auto pre_count =
- options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
- // 3rd flush to generate 1 sst on level 0
- ASSERT_OK(Put("foo3", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_EQ("bar", Get("foo1"));
- ASSERT_EQ("bar", Get("foo2"));
- ASSERT_EQ("bar", Get("foo3"));
- ASSERT_EQ("bar", Get("bar"));
- ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- pre_bytes);
- ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- pre_count);
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
- // Control
- ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT));
- // Not a realistic setting to make last level kWarm and default temp kCold.
- // This is just for testing default temp can be reset on reopen while the
- // last level temp is consistent across DB reopen because those file's temp
- // are persisted in manifest.
- options.default_temperature = Temperature::kCold;
- ASSERT_OK(options.statistics->Reset());
- Reopen(options);
- ASSERT_EQ("bar", Get("foo1"));
- ASSERT_EQ("bar", Get("foo2"));
- ASSERT_EQ("bar", Get("foo3"));
- ASSERT_EQ("bar", Get("bar"));
- if (write_time_default) {
- // Unchanged
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(HOT_FILE_READ_COUNT));
- ASSERT_LT(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_EQ(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
- } else {
- // Changed (in how we map kUnknown)
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(COLD_FILE_READ_COUNT));
- ASSERT_EQ(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES));
- ASSERT_LT(0, options.statistics->getTickerCount(COLD_FILE_READ_BYTES));
- }
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES),
- options.statistics->getTickerCount(WARM_FILE_READ_BYTES));
- ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(WARM_FILE_READ_COUNT));
- // Control
- ASSERT_NE(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT),
- options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT));
- }
- }
- TEST_F(DBTest2, CheckpointFileTemperature) {
- class NoLinkTestFS : public FileTemperatureTestFS {
- using FileTemperatureTestFS::FileTemperatureTestFS;
- IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
- IODebugContext*) override {
- // return not supported to force checkpoint copy the file instead of just
- // link
- return IOStatus::NotSupported();
- }
- };
- auto test_fs = std::make_shared<NoLinkTestFS>(env_->GetFileSystem());
- std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
- Options options = CurrentOptions();
- options.last_level_temperature = Temperature::kWarm;
- // set dynamic_level to true so the compaction would compact the data to the
- // last level directly which will have the last_level_temperature
- options.level_compaction_dynamic_level_bytes = true;
- options.level0_file_num_compaction_trigger = 2;
- options.env = env.get();
- Reopen(options);
- // generate a bottommost file and a non-bottommost file
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Put("bar", "bar"));
- ASSERT_OK(Flush());
- auto size = GetSstSizeHelper(Temperature::kWarm);
- ASSERT_GT(size, 0);
- std::map<uint64_t, Temperature> temperatures;
- std::vector<LiveFileStorageInfo> infos;
- ASSERT_OK(
- dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), &infos));
- for (const auto& info : infos) {
- temperatures.emplace(info.file_number, info.temperature);
- }
- test_fs->PopRequestedSstFileTemperatures();
- Checkpoint* checkpoint;
- ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
- ASSERT_OK(
- checkpoint->CreateCheckpoint(dbname_ + kFilePathSeparator + "tempcp"));
- // checking src file src_temperature hints: 2 sst files: 1 sst is kWarm,
- // another is kUnknown
- std::vector<std::pair<uint64_t, Temperature>> requested_temps;
- test_fs->PopRequestedSstFileTemperatures(&requested_temps);
- // Two requests
- ASSERT_EQ(requested_temps.size(), 2);
- std::set<uint64_t> distinct_requests;
- for (const auto& requested_temp : requested_temps) {
- // Matching manifest temperatures
- ASSERT_EQ(temperatures.at(requested_temp.first), requested_temp.second);
- distinct_requests.insert(requested_temp.first);
- }
- // Each request to distinct file
- ASSERT_EQ(distinct_requests.size(), requested_temps.size());
- delete checkpoint;
- Close();
- }
- TEST_F(DBTest2, FileTemperatureManifestFixup) {
- auto test_fs = std::make_shared<FileTemperatureTestFS>(env_->GetFileSystem());
- std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
- Options options = CurrentOptions();
- options.last_level_temperature = Temperature::kWarm;
- // set dynamic_level to true so the compaction would compact the data to the
- // last level directly which will have the last_level_temperature
- options.level_compaction_dynamic_level_bytes = true;
- options.level0_file_num_compaction_trigger = 2;
- options.env = env.get();
- std::vector<std::string> cfs = {/*"default",*/ "test1", "test2"};
- CreateAndReopenWithCF(cfs, options);
- // Needed for later re-opens (weird)
- cfs.insert(cfs.begin(), kDefaultColumnFamilyName);
- // Generate a bottommost file in all CFs
- for (int cf = 0; cf < 3; ++cf) {
- ASSERT_OK(Put(cf, "a", "val"));
- ASSERT_OK(Put(cf, "c", "val"));
- ASSERT_OK(Flush(cf));
- ASSERT_OK(Put(cf, "b", "val"));
- ASSERT_OK(Put(cf, "d", "val"));
- ASSERT_OK(Flush(cf));
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- // verify
- ASSERT_GT(GetSstSizeHelper(Temperature::kWarm), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
- // Generate a non-bottommost file in all CFs
- for (int cf = 0; cf < 3; ++cf) {
- ASSERT_OK(Put(cf, "e", "val"));
- ASSERT_OK(Flush(cf));
- }
- // re-verify
- ASSERT_GT(GetSstSizeHelper(Temperature::kWarm), 0);
- // Not supported: ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
- // Now change FS temperature on bottommost file(s) to kCold
- std::map<uint64_t, Temperature> current_temps;
- test_fs->CopyCurrentSstFileTemperatures(¤t_temps);
- for (auto e : current_temps) {
- if (e.second == Temperature::kWarm) {
- test_fs->OverrideSstFileTemperature(e.first, Temperature::kCold);
- }
- }
- // Metadata not yet updated
- ASSERT_EQ(Get("a"), "val");
- ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
- // Update with Close and UpdateManifestForFilesState, but first save cf
- // descriptors
- std::vector<ColumnFamilyDescriptor> column_families;
- for (size_t i = 0; i < handles_.size(); ++i) {
- ColumnFamilyDescriptor cfdescriptor;
- handles_[i]->GetDescriptor(&cfdescriptor).PermitUncheckedError();
- column_families.push_back(cfdescriptor);
- }
- Close();
- experimental::UpdateManifestForFilesStateOptions update_opts;
- update_opts.update_temperatures = true;
- ASSERT_OK(experimental::UpdateManifestForFilesState(
- options, dbname_, column_families, update_opts));
- // Re-open and re-verify after update
- ReopenWithColumnFamilies(cfs, options);
- ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0);
- // Not supported: ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kWarm), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kHot), 0);
- // Change kUnknown to kHot
- test_fs->CopyCurrentSstFileTemperatures(¤t_temps);
- for (auto e : current_temps) {
- if (e.second == Temperature::kUnknown) {
- test_fs->OverrideSstFileTemperature(e.first, Temperature::kHot);
- }
- }
- // Update with Close and UpdateManifestForFilesState
- Close();
- ASSERT_OK(experimental::UpdateManifestForFilesState(
- options, dbname_, column_families, update_opts));
- // Re-open and re-verify after update
- ReopenWithColumnFamilies(cfs, options);
- ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0);
- ASSERT_EQ(GetSstSizeHelper(Temperature::kWarm), 0);
- ASSERT_GT(GetSstSizeHelper(Temperature::kHot), 0);
- Close();
- }
- // WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery.
- TEST_F(DBTest2, PointInTimeRecoveryWithIOErrorWhileReadingWal) {
- Options options = CurrentOptions();
- DestroyAndReopen(options);
- ASSERT_OK(Put("foo", "value0"));
- Close();
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- bool should_inject_error = false;
- SyncPoint::GetInstance()->SetCallBack(
- "DBImpl::RecoverLogFiles:BeforeReadWal",
- [&](void* /*arg*/) { should_inject_error = true; });
- SyncPoint::GetInstance()->SetCallBack(
- "LogReader::ReadMore:AfterReadFile", [&](void* arg) {
- if (should_inject_error) {
- ASSERT_NE(nullptr, arg);
- *static_cast<Status*>(arg) = Status::IOError("Injected IOError");
- }
- });
- SyncPoint::GetInstance()->EnableProcessing();
- options.avoid_flush_during_recovery = true;
- options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
- Status s = TryReopen(options);
- ASSERT_TRUE(s.IsIOError());
- }
- TEST_F(DBTest2, PointInTimeRecoveryWithSyncFailureInCFCreation) {
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBImpl::BackgroundCallFlush:Start:1",
- "PointInTimeRecoveryWithSyncFailureInCFCreation:1"},
- {"PointInTimeRecoveryWithSyncFailureInCFCreation:2",
- "DBImpl::BackgroundCallFlush:Start:2"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- CreateColumnFamilies({"test1"}, Options());
- ASSERT_OK(Put("foo", "bar"));
- // Creating a CF when a flush is going on, log is synced but the
- // closed log file is not synced and corrupted.
- port::Thread flush_thread([&]() { ASSERT_NOK(Flush()); });
- TEST_SYNC_POINT("PointInTimeRecoveryWithSyncFailureInCFCreation:1");
- CreateColumnFamilies({"test2"}, Options());
- env_->corrupt_in_sync_ = true;
- TEST_SYNC_POINT("PointInTimeRecoveryWithSyncFailureInCFCreation:2");
- flush_thread.join();
- env_->corrupt_in_sync_ = false;
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- // Reopening the DB should not corrupt anything
- Options options = CurrentOptions();
- options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
- ReopenWithColumnFamilies({"default", "test1", "test2"}, options);
- }
- TEST_F(DBTest2, SortL0FilesByEpochNumber) {
- Options options = CurrentOptions();
- options.num_levels = 1;
- options.compaction_style = kCompactionStyleUniversal;
- DestroyAndReopen(options);
- // Set up L0 files to be sorted by their epoch_number
- ASSERT_OK(Put("key1", "seq1"));
- SstFileWriter sst_file_writer{EnvOptions(), options};
- std::string external_file1 = dbname_ + "/test_files1.sst";
- std::string external_file2 = dbname_ + "/test_files2.sst";
- ASSERT_OK(sst_file_writer.Open(external_file1));
- ASSERT_OK(sst_file_writer.Put("key2", "seq0"));
- ASSERT_OK(sst_file_writer.Finish());
- ASSERT_OK(sst_file_writer.Open(external_file2));
- ASSERT_OK(sst_file_writer.Put("key3", "seq0"));
- ASSERT_OK(sst_file_writer.Finish());
- ASSERT_OK(Put("key4", "seq2"));
- ASSERT_OK(Flush());
- auto* handle = db_->DefaultColumnFamily();
- ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file2},
- IngestExternalFileOptions()));
- // To verify L0 files are sorted by epoch_number in descending order
- // instead of largest_seqno
- std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
- ASSERT_EQ(level0_files.size(), 3);
- EXPECT_EQ(level0_files[0]->epoch_number, 3);
- EXPECT_EQ(level0_files[0]->fd.largest_seqno, 0);
- ASSERT_EQ(level0_files[0]->num_entries, 1);
- ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key3"));
- EXPECT_EQ(level0_files[1]->epoch_number, 2);
- EXPECT_EQ(level0_files[1]->fd.largest_seqno, 0);
- ASSERT_EQ(level0_files[1]->num_entries, 1);
- ASSERT_TRUE(level0_files[1]->largest.user_key() == Slice("key2"));
- EXPECT_EQ(level0_files[2]->epoch_number, 1);
- EXPECT_EQ(level0_files[2]->fd.largest_seqno, 2);
- ASSERT_EQ(level0_files[2]->num_entries, 2);
- ASSERT_TRUE(level0_files[2]->largest.user_key() == Slice("key4"));
- ASSERT_TRUE(level0_files[2]->smallest.user_key() == Slice("key1"));
- // To verify compacted file is assigned with the minimum epoch_number
- // among input files'
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
- level0_files = GetLevelFileMetadatas(0 /* level*/);
- ASSERT_EQ(level0_files.size(), 1);
- EXPECT_EQ(level0_files[0]->epoch_number, 1);
- ASSERT_EQ(level0_files[0]->num_entries, 4);
- ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key4"));
- ASSERT_TRUE(level0_files[0]->smallest.user_key() == Slice("key1"));
- }
- TEST_F(DBTest2, SameEpochNumberAfterCompactRangeChangeLevel) {
- Options options = CurrentOptions();
- options.num_levels = 7;
- options.compaction_style = CompactionStyle::kCompactionStyleLevel;
- options.disable_auto_compactions = true;
- DestroyAndReopen(options);
- // Set up the file in L1 to be moved to L0 in later step of CompactRange()
- ASSERT_OK(Put("key1", "seq1"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(1, 0);
- std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
- ASSERT_EQ(level0_files.size(), 0);
- std::vector<FileMetaData*> level1_files = GetLevelFileMetadatas(1 /* level*/);
- ASSERT_EQ(level1_files.size(), 1);
- std::vector<FileMetaData*> level2_files = GetLevelFileMetadatas(2 /* level*/);
- ASSERT_EQ(level2_files.size(), 0);
- ASSERT_EQ(level1_files[0]->epoch_number, 1);
- // To verify CompactRange() moving file to L0 still keeps the file's
- // epoch_number
- CompactRangeOptions croptions;
- croptions.change_level = true;
- croptions.target_level = 0;
- ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
- level0_files = GetLevelFileMetadatas(0 /* level*/);
- level1_files = GetLevelFileMetadatas(1 /* level*/);
- ASSERT_EQ(level0_files.size(), 1);
- ASSERT_EQ(level1_files.size(), 0);
- EXPECT_EQ(level0_files[0]->epoch_number, 1);
- ASSERT_EQ(level0_files[0]->num_entries, 1);
- ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key1"));
- }
- TEST_F(DBTest2, RecoverEpochNumber) {
- for (bool allow_ingest_behind : {true, false}) {
- Options options = CurrentOptions();
- options.allow_ingest_behind = allow_ingest_behind;
- options.num_levels = 7;
- options.compaction_style = kCompactionStyleLevel;
- options.disable_auto_compactions = true;
- DestroyAndReopen(options);
- CreateAndReopenWithCF({"cf1"}, options);
- VersionSet* versions = dbfull()->GetVersionSet();
- assert(versions);
- const ColumnFamilyData* default_cf =
- versions->GetColumnFamilySet()->GetDefault();
- const ColumnFamilyData* cf1 =
- versions->GetColumnFamilySet()->GetColumnFamily("cf1");
- // Set up files in default CF to recover in later step
- ASSERT_OK(Put("key1", "epoch1"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(1 /* level*/, 0 /* cf*/);
- ASSERT_OK(Put("key2", "epoch2"));
- ASSERT_OK(Flush());
- std::vector<FileMetaData*> level0_files =
- GetLevelFileMetadatas(0 /* level*/);
- ASSERT_EQ(level0_files.size(), 1);
- ASSERT_EQ(level0_files[0]->epoch_number,
- allow_ingest_behind
- ? 2 + kReservedEpochNumberForFileIngestedBehind
- : 2);
- ASSERT_EQ(level0_files[0]->num_entries, 1);
- ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
- std::vector<FileMetaData*> level1_files =
- GetLevelFileMetadatas(1 /* level*/);
- ASSERT_EQ(level1_files.size(), 1);
- ASSERT_EQ(level1_files[0]->epoch_number,
- allow_ingest_behind
- ? 1 + kReservedEpochNumberForFileIngestedBehind
- : 1);
- ASSERT_EQ(level1_files[0]->num_entries, 1);
- ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
- // Set up files in cf1 to recover in later step
- ASSERT_OK(Put(1 /* cf */, "cf1_key1", "epoch1"));
- ASSERT_OK(Flush(1 /* cf */));
- std::vector<FileMetaData*> level0_files_cf1 =
- GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
- ASSERT_EQ(level0_files_cf1.size(), 1);
- ASSERT_EQ(level0_files_cf1[0]->epoch_number,
- allow_ingest_behind
- ? 1 + kReservedEpochNumberForFileIngestedBehind
- : 1);
- ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
- ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
- ASSERT_EQ(default_cf->GetNextEpochNumber(),
- allow_ingest_behind
- ? 3 + kReservedEpochNumberForFileIngestedBehind
- : 3);
- ASSERT_EQ(cf1->GetNextEpochNumber(),
- allow_ingest_behind
- ? 2 + kReservedEpochNumberForFileIngestedBehind
- : 2);
- // To verify epoch_number of files of different levels/CFs are
- // persisted and recovered correctly
- ReopenWithColumnFamilies({"default", "cf1"}, options);
- versions = dbfull()->GetVersionSet();
- assert(versions);
- default_cf = versions->GetColumnFamilySet()->GetDefault();
- cf1 = versions->GetColumnFamilySet()->GetColumnFamily("cf1");
- level0_files = GetLevelFileMetadatas(0 /* level*/);
- ASSERT_EQ(level0_files.size(), 1);
- EXPECT_EQ(level0_files[0]->epoch_number,
- allow_ingest_behind
- ? 2 + kReservedEpochNumberForFileIngestedBehind
- : 2);
- ASSERT_EQ(level0_files[0]->num_entries, 1);
- ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
- level1_files = GetLevelFileMetadatas(1 /* level*/);
- ASSERT_EQ(level1_files.size(), 1);
- EXPECT_EQ(level1_files[0]->epoch_number,
- allow_ingest_behind
- ? 1 + kReservedEpochNumberForFileIngestedBehind
- : 1);
- ASSERT_EQ(level1_files[0]->num_entries, 1);
- ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
- level0_files_cf1 = GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
- ASSERT_EQ(level0_files_cf1.size(), 1);
- EXPECT_EQ(level0_files_cf1[0]->epoch_number,
- allow_ingest_behind
- ? 1 + kReservedEpochNumberForFileIngestedBehind
- : 1);
- ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
- ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
- // To verify next epoch number is recovered correctly
- EXPECT_EQ(default_cf->GetNextEpochNumber(),
- allow_ingest_behind
- ? 3 + kReservedEpochNumberForFileIngestedBehind
- : 3);
- EXPECT_EQ(cf1->GetNextEpochNumber(),
- allow_ingest_behind
- ? 2 + kReservedEpochNumberForFileIngestedBehind
- : 2);
- }
- }
- TEST_F(DBTest2, RenameDirectory) {
- Options options = CurrentOptions();
- DestroyAndReopen(options);
- ASSERT_OK(Put("foo", "value0"));
- Close();
- auto old_dbname = dbname_;
- auto new_dbname = dbname_ + "_2";
- EXPECT_OK(env_->RenameFile(dbname_, new_dbname));
- options.create_if_missing = false;
- dbname_ = new_dbname;
- ASSERT_OK(TryReopen(options));
- ASSERT_EQ("value0", Get("foo"));
- Destroy(options);
- dbname_ = old_dbname;
- }
- TEST_F(DBTest2, SstUniqueIdVerifyBackwardCompatible) {
- const int kNumSst = 3;
- const int kLevel0Trigger = 4;
- auto options = CurrentOptions();
- options.level0_file_num_compaction_trigger = kLevel0Trigger;
- options.statistics = CreateDBStatistics();
- // Skip for now
- options.verify_sst_unique_id_in_manifest = false;
- Reopen(options);
- std::atomic_int skipped = 0;
- std::atomic_int passed = 0;
- SyncPoint::GetInstance()->SetCallBack(
- "BlockBasedTable::Open::SkippedVerifyUniqueId",
- [&](void* /*arg*/) { skipped++; });
- SyncPoint::GetInstance()->SetCallBack(
- "BlockBasedTable::Open::PassedVerifyUniqueId",
- [&](void* /*arg*/) { passed++; });
- SyncPoint::GetInstance()->EnableProcessing();
- // generate a few SSTs
- for (int i = 0; i < kNumSst; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush());
- }
- // Verification has been skipped on files so far
- EXPECT_EQ(skipped, kNumSst);
- EXPECT_EQ(passed, 0);
- // Reopen with verification
- options.verify_sst_unique_id_in_manifest = true;
- skipped = 0;
- passed = 0;
- Reopen(options);
- EXPECT_EQ(skipped, 0);
- EXPECT_EQ(passed, kNumSst);
- // Now simulate no unique id in manifest for next file
- // NOTE: this only works for loading manifest from disk,
- // not in-memory manifest, so we need to re-open below.
- SyncPoint::GetInstance()->SetCallBack(
- "VersionEdit::EncodeTo:UniqueId", [&](void* arg) {
- auto unique_id = static_cast<UniqueId64x2*>(arg);
- // remove id before writing it to manifest
- (*unique_id)[0] = 0;
- (*unique_id)[1] = 0;
- });
- // test compaction generated Sst
- for (int i = kNumSst; i < kLevel0Trigger; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush());
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,1", FilesPerLevel(0));
- // Reopen (with verification)
- ASSERT_TRUE(options.verify_sst_unique_id_in_manifest);
- skipped = 0;
- passed = 0;
- Reopen(options);
- EXPECT_EQ(skipped, 1);
- EXPECT_EQ(passed, 0);
- }
- TEST_F(DBTest2, SstUniqueIdVerify) {
- const int kNumSst = 3;
- const int kLevel0Trigger = 4;
- auto options = CurrentOptions();
- options.level0_file_num_compaction_trigger = kLevel0Trigger;
- // Allow mismatch for now
- options.verify_sst_unique_id_in_manifest = false;
- Reopen(options);
- SyncPoint::GetInstance()->SetCallBack(
- "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
- auto props = static_cast<TableProperties*>(props_vs);
- // update table property session_id to a different one, which
- // changes unique ID
- props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
- });
- SyncPoint::GetInstance()->EnableProcessing();
- // generate a few SSTs
- for (int i = 0; i < kNumSst; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush());
- }
- // Reopen with verification should report corruption
- options.verify_sst_unique_id_in_manifest = true;
- auto s = TryReopen(options);
- ASSERT_TRUE(s.IsCorruption());
- // Reopen without verification should be fine
- options.verify_sst_unique_id_in_manifest = false;
- Reopen(options);
- // test compaction generated Sst
- for (int i = kNumSst; i < kLevel0Trigger; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush());
- }
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
- ASSERT_EQ("0,1", FilesPerLevel(0));
- // Reopen with verification should fail
- options.verify_sst_unique_id_in_manifest = true;
- s = TryReopen(options);
- ASSERT_TRUE(s.IsCorruption());
- }
- TEST_F(DBTest2, SstUniqueIdVerifyMultiCFs) {
- const int kNumSst = 3;
- const int kLevel0Trigger = 4;
- auto options = CurrentOptions();
- options.level0_file_num_compaction_trigger = kLevel0Trigger;
- // Allow mismatch for now
- options.verify_sst_unique_id_in_manifest = false;
- CreateAndReopenWithCF({"one", "two"}, options);
- // generate good SSTs
- for (int cf_num : {0, 2}) {
- for (int i = 0; i < kNumSst; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(cf_num, Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush(cf_num));
- }
- }
- // generate SSTs with bad unique id
- SyncPoint::GetInstance()->SetCallBack(
- "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
- auto props = static_cast<TableProperties*>(props_vs);
- // update table property session_id to a different one
- props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
- });
- SyncPoint::GetInstance()->EnableProcessing();
- for (int i = 0; i < kNumSst; i++) {
- for (int j = 0; j < 100; j++) {
- ASSERT_OK(Put(1, Key(i * 10 + j), "value"));
- }
- ASSERT_OK(Flush(1));
- }
- // Reopen with verification should report corruption
- options.verify_sst_unique_id_in_manifest = true;
- auto s = TryReopenWithColumnFamilies({"default", "one", "two"}, options);
- ASSERT_TRUE(s.IsCorruption());
- }
- TEST_F(DBTest2, BestEffortsRecoveryWithSstUniqueIdVerification) {
- const auto tamper_with_uniq_id = [&](void* arg) {
- auto props = static_cast<TableProperties*>(arg);
- assert(props);
- // update table property session_id to a different one
- props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
- };
- const auto assert_db = [&](size_t expected_count,
- const std::string& expected_v) {
- std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
- size_t cnt = 0;
- for (it->SeekToFirst(); it->Valid(); it->Next(), ++cnt) {
- ASSERT_EQ(std::to_string(cnt), it->key());
- ASSERT_EQ(expected_v, it->value());
- }
- EXPECT_OK(it->status());
- ASSERT_EQ(expected_count, cnt);
- };
- const int num_l0_compaction_trigger = 8;
- const int num_l0 = num_l0_compaction_trigger - 1;
- Options options = CurrentOptions();
- options.level0_file_num_compaction_trigger = num_l0_compaction_trigger;
- for (int k = 0; k < num_l0; ++k) {
- // Allow mismatch for now
- options.verify_sst_unique_id_in_manifest = false;
- DestroyAndReopen(options);
- constexpr size_t num_keys_per_file = 10;
- for (int i = 0; i < num_l0; ++i) {
- for (size_t j = 0; j < num_keys_per_file; ++j) {
- ASSERT_OK(Put(std::to_string(j), "v" + std::to_string(i)));
- }
- if (i == k) {
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->SetCallBack(
- "PropertyBlockBuilder::AddTableProperty:Start",
- tamper_with_uniq_id);
- SyncPoint::GetInstance()->EnableProcessing();
- }
- ASSERT_OK(Flush());
- }
- options.verify_sst_unique_id_in_manifest = true;
- Status s = TryReopen(options);
- ASSERT_TRUE(s.IsCorruption());
- options.best_efforts_recovery = true;
- Reopen(options);
- assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
- // Reopen with regular recovery
- options.best_efforts_recovery = false;
- Reopen(options);
- assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- for (size_t i = 0; i < num_keys_per_file; ++i) {
- ASSERT_OK(Put(std::to_string(i), "v"));
- }
- ASSERT_OK(Flush());
- Reopen(options);
- {
- for (size_t i = 0; i < num_keys_per_file; ++i) {
- ASSERT_EQ("v", Get(std::to_string(i)));
- }
- }
- }
- }
- TEST_F(DBTest2, GetLatestSeqAndTsForKey) {
- Destroy(last_options_);
- Options options = CurrentOptions();
- options.max_write_buffer_size_to_maintain = 64 << 10;
- options.create_if_missing = true;
- options.disable_auto_compactions = true;
- options.comparator = test::BytewiseComparatorWithU64TsWrapper();
- options.statistics = CreateDBStatistics();
- Reopen(options);
- constexpr uint64_t kTsU64Value = 12;
- for (uint64_t key = 0; key < 100; ++key) {
- std::string ts;
- PutFixed64(&ts, kTsU64Value);
- std::string key_str;
- PutFixed64(&key_str, key);
- std::reverse(key_str.begin(), key_str.end());
- ASSERT_OK(db_->Put(WriteOptions(), key_str, ts, "value"));
- }
- ASSERT_OK(Flush());
- constexpr bool cache_only = true;
- constexpr SequenceNumber lower_bound_seq = 0;
- auto* cfhi = static_cast_with_check<ColumnFamilyHandleImpl>(
- dbfull()->DefaultColumnFamily());
- assert(cfhi);
- assert(cfhi->cfd());
- SuperVersion* sv = cfhi->cfd()->GetSuperVersion();
- for (uint64_t key = 0; key < 100; ++key) {
- std::string key_str;
- PutFixed64(&key_str, key);
- std::reverse(key_str.begin(), key_str.end());
- std::string ts;
- SequenceNumber seq = kMaxSequenceNumber;
- bool found_record_for_key = false;
- bool is_blob_index = false;
- const Status s = dbfull()->GetLatestSequenceForKey(
- sv, key_str, cache_only, lower_bound_seq, &seq, &ts,
- &found_record_for_key, &is_blob_index);
- ASSERT_OK(s);
- std::string expected_ts;
- PutFixed64(&expected_ts, kTsU64Value);
- ASSERT_EQ(expected_ts, ts);
- ASSERT_TRUE(found_record_for_key);
- ASSERT_FALSE(is_blob_index);
- }
- // Verify that no read to SST files.
- ASSERT_EQ(0, options.statistics->getTickerCount(GET_HIT_L0));
- }
- #if defined(ZSTD)
- TEST_F(DBTest2, ZSTDChecksum) {
- // Verify that corruption during decompression is caught.
- Options options = CurrentOptions();
- options.create_if_missing = true;
- options.compression = kZSTD;
- options.compression_opts.max_compressed_bytes_per_kb = 1024;
- options.compression_opts.checksum = true;
- DestroyAndReopen(options);
- Random rnd(33);
- ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
- SyncPoint::GetInstance()->SetCallBack(
- "BlockBasedTableBuilder::WriteBlock:TamperWithCompressedData",
- [&](void* arg) {
- std::string* output = static_cast<std::string*>(arg);
- // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames
- // Checksum is the last 4 bytes, corrupting that part in unit test is
- // more controllable.
- output->data()[output->size() - 1]++;
- });
- SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_OK(Flush());
- PinnableSlice val;
- Status s = Get(Key(0), &val);
- ASSERT_TRUE(s.IsCorruption());
- // Corruption caught during flush.
- options.paranoid_file_checks = true;
- DestroyAndReopen(options);
- ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
- s = Flush();
- ASSERT_TRUE(s.IsCorruption());
- }
- #endif
- TEST_F(DBTest2, TableCacheMissDuringReadFromBlockCacheTier) {
- Options options = CurrentOptions();
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- Reopen(options);
- // Give table cache zero capacity to prevent preloading tables. That way,
- // `kBlockCacheTier` reads will fail due to table cache misses.
- dbfull()->TEST_table_cache()->SetCapacity(0);
- ASSERT_OK(Put("foo", "bar"));
- ASSERT_OK(Flush());
- uint64_t orig_num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
- ReadOptions non_blocking_opts;
- non_blocking_opts.read_tier = kBlockCacheTier;
- std::string value;
- ASSERT_TRUE(db_->Get(non_blocking_opts, "foo", &value).IsIncomplete());
- ASSERT_EQ(orig_num_file_opens, TestGetTickerCount(options, NO_FILE_OPENS));
- }
- TEST_F(DBTest2, GetFileChecksumsFromCurrentManifest_CRC32) {
- Options opts = CurrentOptions();
- opts.create_if_missing = true;
- opts.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory();
- opts.level0_file_num_compaction_trigger = 10;
- // Bootstrap the test database.
- DB* db = nullptr;
- std::string dbname = test::PerThreadDBPath("file_chksum");
- ASSERT_OK(DB::Open(opts, dbname, &db));
- WriteOptions wopts;
- FlushOptions fopts;
- fopts.wait = true;
- Random rnd(test::RandomSeed());
- // Write 4 files into the default column family.
- for (int i = 0; i < 4; i++) {
- ASSERT_OK(db->Put(wopts, Key(i), rnd.RandomString(100)));
- ASSERT_OK(db->Flush(fopts));
- }
- // Create a new column family, write 1 file into it and drop it.
- ColumnFamilyHandle* cf;
- ASSERT_OK(
- db->CreateColumnFamily(ColumnFamilyOptions(), "soon_to_be_deleted", &cf));
- ASSERT_OK(db->Put(wopts, cf, "some_key", "some_value"));
- ASSERT_OK(db->Flush(fopts, cf));
- // Drop column family should generate corresponding version edit
- // in manifest, which we expect to be correctly interpreted by
- // GetFileChecksumsFromCurrentManifest API after db close.
- ASSERT_OK(db->DropColumnFamily(cf));
- delete cf;
- cf = nullptr;
- // Obtain rich files metadata for source of truth.
- std::vector<LiveFileMetaData> live_files;
- db->GetLiveFilesMetaData(&live_files);
- ASSERT_OK(db->Close());
- delete db;
- db = nullptr;
- // Process current MANIFEST file and build internal file checksum mappings.
- std::unique_ptr<FileChecksumList> checksum_list(NewFileChecksumList());
- auto read_only_fs =
- std::make_shared<ReadOnlyFileSystem>(env_->GetFileSystem());
- ASSERT_OK(experimental::GetFileChecksumsFromCurrentManifest(
- read_only_fs.get(), dbname, checksum_list.get()));
- ASSERT_TRUE(checksum_list != nullptr);
- // Retrieve files, related checksums and checksum functions.
- std::vector<uint64_t> file_numbers;
- std::vector<std::string> checksums;
- std::vector<std::string> checksum_func_names;
- ASSERT_OK(checksum_list->GetAllFileChecksums(&file_numbers, &checksums,
- &checksum_func_names));
- // Compare results.
- ASSERT_EQ(live_files.size(), checksum_list->size());
- for (size_t i = 0; i < live_files.size(); i++) {
- std::string stored_checksum;
- std::string stored_func_name;
- ASSERT_OK(checksum_list->SearchOneFileChecksum(
- live_files[i].file_number, &stored_checksum, &stored_func_name));
- ASSERT_EQ(live_files[i].file_checksum, stored_checksum);
- ASSERT_EQ(live_files[i].file_checksum_func_name, stored_func_name);
- }
- }
- } // namespace ROCKSDB_NAMESPACE
- int main(int argc, char** argv) {
- ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
- ::testing::InitGoogleTest(&argc, argv);
- RegisterCustomObjects(argc, argv);
- return RUN_ALL_TESTS();
- }
|