db_test.cc 221 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. // Introduction of SyncPoint effectively disabled building and running this test
  10. // in Release build.
  11. // which is a pity, it is a good test
  12. #include <fcntl.h>
  13. #include <algorithm>
  14. #include <set>
  15. #include <thread>
  16. #include <unordered_set>
  17. #include <utility>
  18. #ifndef OS_WIN
  19. #include <unistd.h>
  20. #endif
  21. #ifdef OS_SOLARIS
  22. #include <alloca.h>
  23. #endif
  24. #include "cache/lru_cache.h"
  25. #include "db/blob_index.h"
  26. #include "db/db_impl/db_impl.h"
  27. #include "db/db_test_util.h"
  28. #include "db/dbformat.h"
  29. #include "db/job_context.h"
  30. #include "db/version_set.h"
  31. #include "db/write_batch_internal.h"
  32. #include "env/mock_env.h"
  33. #include "file/filename.h"
  34. #include "memtable/hash_linklist_rep.h"
  35. #include "monitoring/thread_status_util.h"
  36. #include "port/port.h"
  37. #include "port/stack_trace.h"
  38. #include "rocksdb/cache.h"
  39. #include "rocksdb/compaction_filter.h"
  40. #include "rocksdb/convenience.h"
  41. #include "rocksdb/db.h"
  42. #include "rocksdb/env.h"
  43. #include "rocksdb/experimental.h"
  44. #include "rocksdb/filter_policy.h"
  45. #include "rocksdb/options.h"
  46. #include "rocksdb/perf_context.h"
  47. #include "rocksdb/slice.h"
  48. #include "rocksdb/slice_transform.h"
  49. #include "rocksdb/snapshot.h"
  50. #include "rocksdb/table.h"
  51. #include "rocksdb/table_properties.h"
  52. #include "rocksdb/thread_status.h"
  53. #include "rocksdb/utilities/checkpoint.h"
  54. #include "rocksdb/utilities/optimistic_transaction_db.h"
  55. #include "rocksdb/utilities/write_batch_with_index.h"
  56. #include "table/block_based/block_based_table_factory.h"
  57. #include "table/mock_table.h"
  58. #include "table/plain/plain_table_factory.h"
  59. #include "table/scoped_arena_iterator.h"
  60. #include "test_util/sync_point.h"
  61. #include "test_util/testharness.h"
  62. #include "test_util/testutil.h"
  63. #include "util/compression.h"
  64. #include "util/mutexlock.h"
  65. #include "util/rate_limiter.h"
  66. #include "util/string_util.h"
  67. #include "utilities/merge_operators.h"
  68. namespace ROCKSDB_NAMESPACE {
  69. class DBTest : public DBTestBase {
  70. public:
  71. DBTest() : DBTestBase("/db_test") {}
  72. };
  73. class DBTestWithParam
  74. : public DBTest,
  75. public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
  76. public:
  77. DBTestWithParam() {
  78. max_subcompactions_ = std::get<0>(GetParam());
  79. exclusive_manual_compaction_ = std::get<1>(GetParam());
  80. }
  81. // Required if inheriting from testing::WithParamInterface<>
  82. static void SetUpTestCase() {}
  83. static void TearDownTestCase() {}
  84. uint32_t max_subcompactions_;
  85. bool exclusive_manual_compaction_;
  86. };
  87. TEST_F(DBTest, MockEnvTest) {
  88. std::unique_ptr<MockEnv> env{new MockEnv(Env::Default())};
  89. Options options;
  90. options.create_if_missing = true;
  91. options.env = env.get();
  92. DB* db;
  93. const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
  94. const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
  95. ASSERT_OK(DB::Open(options, "/dir/db", &db));
  96. for (size_t i = 0; i < 3; ++i) {
  97. ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
  98. }
  99. for (size_t i = 0; i < 3; ++i) {
  100. std::string res;
  101. ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
  102. ASSERT_TRUE(res == vals[i]);
  103. }
  104. Iterator* iterator = db->NewIterator(ReadOptions());
  105. iterator->SeekToFirst();
  106. for (size_t i = 0; i < 3; ++i) {
  107. ASSERT_TRUE(iterator->Valid());
  108. ASSERT_TRUE(keys[i] == iterator->key());
  109. ASSERT_TRUE(vals[i] == iterator->value());
  110. iterator->Next();
  111. }
  112. ASSERT_TRUE(!iterator->Valid());
  113. delete iterator;
  114. // TEST_FlushMemTable() is not supported in ROCKSDB_LITE
  115. #ifndef ROCKSDB_LITE
  116. DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
  117. ASSERT_OK(dbi->TEST_FlushMemTable());
  118. for (size_t i = 0; i < 3; ++i) {
  119. std::string res;
  120. ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
  121. ASSERT_TRUE(res == vals[i]);
  122. }
  123. #endif // ROCKSDB_LITE
  124. delete db;
  125. }
  126. // NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
  127. // defined.
  128. #ifndef ROCKSDB_LITE
  129. TEST_F(DBTest, MemEnvTest) {
  130. std::unique_ptr<Env> env{NewMemEnv(Env::Default())};
  131. Options options;
  132. options.create_if_missing = true;
  133. options.env = env.get();
  134. DB* db;
  135. const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
  136. const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
  137. ASSERT_OK(DB::Open(options, "/dir/db", &db));
  138. for (size_t i = 0; i < 3; ++i) {
  139. ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
  140. }
  141. for (size_t i = 0; i < 3; ++i) {
  142. std::string res;
  143. ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
  144. ASSERT_TRUE(res == vals[i]);
  145. }
  146. Iterator* iterator = db->NewIterator(ReadOptions());
  147. iterator->SeekToFirst();
  148. for (size_t i = 0; i < 3; ++i) {
  149. ASSERT_TRUE(iterator->Valid());
  150. ASSERT_TRUE(keys[i] == iterator->key());
  151. ASSERT_TRUE(vals[i] == iterator->value());
  152. iterator->Next();
  153. }
  154. ASSERT_TRUE(!iterator->Valid());
  155. delete iterator;
  156. DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
  157. ASSERT_OK(dbi->TEST_FlushMemTable());
  158. for (size_t i = 0; i < 3; ++i) {
  159. std::string res;
  160. ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
  161. ASSERT_TRUE(res == vals[i]);
  162. }
  163. delete db;
  164. options.create_if_missing = false;
  165. ASSERT_OK(DB::Open(options, "/dir/db", &db));
  166. for (size_t i = 0; i < 3; ++i) {
  167. std::string res;
  168. ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
  169. ASSERT_TRUE(res == vals[i]);
  170. }
  171. delete db;
  172. }
  173. #endif // ROCKSDB_LITE
  174. TEST_F(DBTest, WriteEmptyBatch) {
  175. Options options = CurrentOptions();
  176. options.env = env_;
  177. options.write_buffer_size = 100000;
  178. CreateAndReopenWithCF({"pikachu"}, options);
  179. ASSERT_OK(Put(1, "foo", "bar"));
  180. WriteOptions wo;
  181. wo.sync = true;
  182. wo.disableWAL = false;
  183. WriteBatch empty_batch;
  184. ASSERT_OK(dbfull()->Write(wo, &empty_batch));
  185. // make sure we can re-open it.
  186. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  187. ASSERT_EQ("bar", Get(1, "foo"));
  188. }
  189. TEST_F(DBTest, SkipDelay) {
  190. Options options = CurrentOptions();
  191. options.env = env_;
  192. options.write_buffer_size = 100000;
  193. CreateAndReopenWithCF({"pikachu"}, options);
  194. for (bool sync : {true, false}) {
  195. for (bool disableWAL : {true, false}) {
  196. if (sync && disableWAL) {
  197. // sync and disableWAL is incompatible.
  198. continue;
  199. }
  200. // Use a small number to ensure a large delay that is still effective
  201. // when we do Put
  202. // TODO(myabandeh): this is time dependent and could potentially make
  203. // the test flaky
  204. auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
  205. std::atomic<int> sleep_count(0);
  206. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  207. "DBImpl::DelayWrite:Sleep",
  208. [&](void* /*arg*/) { sleep_count.fetch_add(1); });
  209. std::atomic<int> wait_count(0);
  210. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  211. "DBImpl::DelayWrite:Wait",
  212. [&](void* /*arg*/) { wait_count.fetch_add(1); });
  213. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  214. WriteOptions wo;
  215. wo.sync = sync;
  216. wo.disableWAL = disableWAL;
  217. wo.no_slowdown = true;
  218. dbfull()->Put(wo, "foo", "bar");
  219. // We need the 2nd write to trigger delay. This is because delay is
  220. // estimated based on the last write size which is 0 for the first write.
  221. ASSERT_NOK(dbfull()->Put(wo, "foo2", "bar2"));
  222. ASSERT_GE(sleep_count.load(), 0);
  223. ASSERT_GE(wait_count.load(), 0);
  224. token.reset();
  225. token = dbfull()->TEST_write_controler().GetDelayToken(1000000000);
  226. wo.no_slowdown = false;
  227. ASSERT_OK(dbfull()->Put(wo, "foo3", "bar3"));
  228. ASSERT_GE(sleep_count.load(), 1);
  229. token.reset();
  230. }
  231. }
  232. }
  233. TEST_F(DBTest, MixedSlowdownOptions) {
  234. Options options = CurrentOptions();
  235. options.env = env_;
  236. options.write_buffer_size = 100000;
  237. CreateAndReopenWithCF({"pikachu"}, options);
  238. std::vector<port::Thread> threads;
  239. std::atomic<int> thread_num(0);
  240. std::function<void()> write_slowdown_func = [&]() {
  241. int a = thread_num.fetch_add(1);
  242. std::string key = "foo" + std::to_string(a);
  243. WriteOptions wo;
  244. wo.no_slowdown = false;
  245. ASSERT_OK(dbfull()->Put(wo, key, "bar"));
  246. };
  247. std::function<void()> write_no_slowdown_func = [&]() {
  248. int a = thread_num.fetch_add(1);
  249. std::string key = "foo" + std::to_string(a);
  250. WriteOptions wo;
  251. wo.no_slowdown = true;
  252. ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
  253. };
  254. // Use a small number to ensure a large delay that is still effective
  255. // when we do Put
  256. // TODO(myabandeh): this is time dependent and could potentially make
  257. // the test flaky
  258. auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
  259. std::atomic<int> sleep_count(0);
  260. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  261. "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) {
  262. sleep_count.fetch_add(1);
  263. if (threads.empty()) {
  264. for (int i = 0; i < 2; ++i) {
  265. threads.emplace_back(write_slowdown_func);
  266. }
  267. for (int i = 0; i < 2; ++i) {
  268. threads.emplace_back(write_no_slowdown_func);
  269. }
  270. }
  271. });
  272. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  273. WriteOptions wo;
  274. wo.sync = false;
  275. wo.disableWAL = false;
  276. wo.no_slowdown = false;
  277. dbfull()->Put(wo, "foo", "bar");
  278. // We need the 2nd write to trigger delay. This is because delay is
  279. // estimated based on the last write size which is 0 for the first write.
  280. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
  281. token.reset();
  282. for (auto& t : threads) {
  283. t.join();
  284. }
  285. ASSERT_GE(sleep_count.load(), 1);
  286. wo.no_slowdown = true;
  287. ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
  288. }
  289. TEST_F(DBTest, MixedSlowdownOptionsInQueue) {
  290. Options options = CurrentOptions();
  291. options.env = env_;
  292. options.write_buffer_size = 100000;
  293. CreateAndReopenWithCF({"pikachu"}, options);
  294. std::vector<port::Thread> threads;
  295. std::atomic<int> thread_num(0);
  296. std::function<void()> write_no_slowdown_func = [&]() {
  297. int a = thread_num.fetch_add(1);
  298. std::string key = "foo" + std::to_string(a);
  299. WriteOptions wo;
  300. wo.no_slowdown = true;
  301. ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
  302. };
  303. // Use a small number to ensure a large delay that is still effective
  304. // when we do Put
  305. // TODO(myabandeh): this is time dependent and could potentially make
  306. // the test flaky
  307. auto token = dbfull()->TEST_write_controler().GetDelayToken(1);
  308. std::atomic<int> sleep_count(0);
  309. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  310. "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) {
  311. sleep_count.fetch_add(1);
  312. if (threads.empty()) {
  313. for (int i = 0; i < 2; ++i) {
  314. threads.emplace_back(write_no_slowdown_func);
  315. }
  316. // Sleep for 2s to allow the threads to insert themselves into the
  317. // write queue
  318. env_->SleepForMicroseconds(3000000ULL);
  319. }
  320. });
  321. std::atomic<int> wait_count(0);
  322. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  323. "DBImpl::DelayWrite:Wait",
  324. [&](void* /*arg*/) { wait_count.fetch_add(1); });
  325. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  326. WriteOptions wo;
  327. wo.sync = false;
  328. wo.disableWAL = false;
  329. wo.no_slowdown = false;
  330. dbfull()->Put(wo, "foo", "bar");
  331. // We need the 2nd write to trigger delay. This is because delay is
  332. // estimated based on the last write size which is 0 for the first write.
  333. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
  334. token.reset();
  335. for (auto& t : threads) {
  336. t.join();
  337. }
  338. ASSERT_EQ(sleep_count.load(), 1);
  339. ASSERT_GE(wait_count.load(), 0);
  340. }
  341. TEST_F(DBTest, MixedSlowdownOptionsStop) {
  342. Options options = CurrentOptions();
  343. options.env = env_;
  344. options.write_buffer_size = 100000;
  345. CreateAndReopenWithCF({"pikachu"}, options);
  346. std::vector<port::Thread> threads;
  347. std::atomic<int> thread_num(0);
  348. std::function<void()> write_slowdown_func = [&]() {
  349. int a = thread_num.fetch_add(1);
  350. std::string key = "foo" + std::to_string(a);
  351. WriteOptions wo;
  352. wo.no_slowdown = false;
  353. ASSERT_OK(dbfull()->Put(wo, key, "bar"));
  354. };
  355. std::function<void()> write_no_slowdown_func = [&]() {
  356. int a = thread_num.fetch_add(1);
  357. std::string key = "foo" + std::to_string(a);
  358. WriteOptions wo;
  359. wo.no_slowdown = true;
  360. ASSERT_NOK(dbfull()->Put(wo, key, "bar"));
  361. };
  362. std::function<void()> wakeup_writer = [&]() {
  363. dbfull()->mutex_.Lock();
  364. dbfull()->bg_cv_.SignalAll();
  365. dbfull()->mutex_.Unlock();
  366. };
  367. // Use a small number to ensure a large delay that is still effective
  368. // when we do Put
  369. // TODO(myabandeh): this is time dependent and could potentially make
  370. // the test flaky
  371. auto token = dbfull()->TEST_write_controler().GetStopToken();
  372. std::atomic<int> wait_count(0);
  373. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  374. "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
  375. wait_count.fetch_add(1);
  376. if (threads.empty()) {
  377. for (int i = 0; i < 2; ++i) {
  378. threads.emplace_back(write_slowdown_func);
  379. }
  380. for (int i = 0; i < 2; ++i) {
  381. threads.emplace_back(write_no_slowdown_func);
  382. }
  383. // Sleep for 2s to allow the threads to insert themselves into the
  384. // write queue
  385. env_->SleepForMicroseconds(3000000ULL);
  386. }
  387. token.reset();
  388. threads.emplace_back(wakeup_writer);
  389. });
  390. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  391. WriteOptions wo;
  392. wo.sync = false;
  393. wo.disableWAL = false;
  394. wo.no_slowdown = false;
  395. dbfull()->Put(wo, "foo", "bar");
  396. // We need the 2nd write to trigger delay. This is because delay is
  397. // estimated based on the last write size which is 0 for the first write.
  398. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2"));
  399. token.reset();
  400. for (auto& t : threads) {
  401. t.join();
  402. }
  403. ASSERT_GE(wait_count.load(), 1);
  404. wo.no_slowdown = true;
  405. ASSERT_OK(dbfull()->Put(wo, "foo3", "bar"));
  406. }
  407. #ifndef ROCKSDB_LITE
  408. TEST_F(DBTest, LevelLimitReopen) {
  409. Options options = CurrentOptions();
  410. CreateAndReopenWithCF({"pikachu"}, options);
  411. const std::string value(1024 * 1024, ' ');
  412. int i = 0;
  413. while (NumTableFilesAtLevel(2, 1) == 0) {
  414. ASSERT_OK(Put(1, Key(i++), value));
  415. }
  416. options.num_levels = 1;
  417. options.max_bytes_for_level_multiplier_additional.resize(1, 1);
  418. Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  419. ASSERT_EQ(s.IsInvalidArgument(), true);
  420. ASSERT_EQ(s.ToString(),
  421. "Invalid argument: db has more levels than options.num_levels");
  422. options.num_levels = 10;
  423. options.max_bytes_for_level_multiplier_additional.resize(10, 1);
  424. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  425. }
  426. #endif // ROCKSDB_LITE
  427. TEST_F(DBTest, PutSingleDeleteGet) {
  428. do {
  429. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  430. ASSERT_OK(Put(1, "foo", "v1"));
  431. ASSERT_EQ("v1", Get(1, "foo"));
  432. ASSERT_OK(Put(1, "foo2", "v2"));
  433. ASSERT_EQ("v2", Get(1, "foo2"));
  434. ASSERT_OK(SingleDelete(1, "foo"));
  435. ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
  436. // Skip FIFO and universal compaction beccause they do not apply to the test
  437. // case. Skip MergePut because single delete does not get removed when it
  438. // encounters a merge.
  439. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
  440. kSkipMergePut));
  441. }
  442. TEST_F(DBTest, ReadFromPersistedTier) {
  443. do {
  444. Random rnd(301);
  445. Options options = CurrentOptions();
  446. for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
  447. CreateAndReopenWithCF({"pikachu"}, options);
  448. WriteOptions wopt;
  449. wopt.disableWAL = (disableWAL == 1);
  450. // 1st round: put but not flush
  451. ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
  452. ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
  453. ASSERT_EQ("first", Get(1, "foo"));
  454. ASSERT_EQ("one", Get(1, "bar"));
  455. // Read directly from persited data.
  456. ReadOptions ropt;
  457. ropt.read_tier = kPersistedTier;
  458. std::string value;
  459. if (wopt.disableWAL) {
  460. // as data has not yet being flushed, we expect not found.
  461. ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
  462. ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
  463. } else {
  464. ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
  465. ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
  466. }
  467. // Multiget
  468. std::vector<ColumnFamilyHandle*> multiget_cfs;
  469. multiget_cfs.push_back(handles_[1]);
  470. multiget_cfs.push_back(handles_[1]);
  471. std::vector<Slice> multiget_keys;
  472. multiget_keys.push_back("foo");
  473. multiget_keys.push_back("bar");
  474. std::vector<std::string> multiget_values;
  475. auto statuses =
  476. db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
  477. if (wopt.disableWAL) {
  478. ASSERT_TRUE(statuses[0].IsNotFound());
  479. ASSERT_TRUE(statuses[1].IsNotFound());
  480. } else {
  481. ASSERT_OK(statuses[0]);
  482. ASSERT_OK(statuses[1]);
  483. }
  484. // 2nd round: flush and put a new value in memtable.
  485. ASSERT_OK(Flush(1));
  486. ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
  487. // once the data has been flushed, we are able to get the
  488. // data when kPersistedTier is used.
  489. ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
  490. ASSERT_EQ(value, "first");
  491. ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
  492. ASSERT_EQ(value, "one");
  493. if (wopt.disableWAL) {
  494. ASSERT_TRUE(
  495. db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
  496. } else {
  497. ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
  498. ASSERT_EQ(value, "hello");
  499. }
  500. // Expect same result in multiget
  501. multiget_cfs.push_back(handles_[1]);
  502. multiget_keys.push_back("rocksdb");
  503. statuses =
  504. db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
  505. ASSERT_TRUE(statuses[0].ok());
  506. ASSERT_EQ("first", multiget_values[0]);
  507. ASSERT_TRUE(statuses[1].ok());
  508. ASSERT_EQ("one", multiget_values[1]);
  509. if (wopt.disableWAL) {
  510. ASSERT_TRUE(statuses[2].IsNotFound());
  511. } else {
  512. ASSERT_OK(statuses[2]);
  513. }
  514. // 3rd round: delete and flush
  515. ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
  516. Flush(1);
  517. ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
  518. ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
  519. if (wopt.disableWAL) {
  520. // Still expect finding the value as its delete has not yet being
  521. // flushed.
  522. ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
  523. ASSERT_EQ(value, "one");
  524. } else {
  525. ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
  526. }
  527. ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
  528. ASSERT_EQ(value, "hello");
  529. statuses =
  530. db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
  531. ASSERT_TRUE(statuses[0].IsNotFound());
  532. if (wopt.disableWAL) {
  533. ASSERT_TRUE(statuses[1].ok());
  534. ASSERT_EQ("one", multiget_values[1]);
  535. } else {
  536. ASSERT_TRUE(statuses[1].IsNotFound());
  537. }
  538. ASSERT_TRUE(statuses[2].ok());
  539. ASSERT_EQ("hello", multiget_values[2]);
  540. if (wopt.disableWAL == 0) {
  541. DestroyAndReopen(options);
  542. }
  543. }
  544. } while (ChangeOptions());
  545. }
  546. TEST_F(DBTest, SingleDeleteFlush) {
  547. // Test to check whether flushing preserves a single delete hidden
  548. // behind a put.
  549. do {
  550. Random rnd(301);
  551. Options options = CurrentOptions();
  552. options.disable_auto_compactions = true;
  553. CreateAndReopenWithCF({"pikachu"}, options);
  554. // Put values on second level (so that they will not be in the same
  555. // compaction as the other operations.
  556. Put(1, "foo", "first");
  557. Put(1, "bar", "one");
  558. ASSERT_OK(Flush(1));
  559. MoveFilesToLevel(2, 1);
  560. // (Single) delete hidden by a put
  561. SingleDelete(1, "foo");
  562. Put(1, "foo", "second");
  563. Delete(1, "bar");
  564. Put(1, "bar", "two");
  565. ASSERT_OK(Flush(1));
  566. SingleDelete(1, "foo");
  567. Delete(1, "bar");
  568. ASSERT_OK(Flush(1));
  569. dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
  570. nullptr);
  571. ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
  572. ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
  573. // Skip FIFO and universal compaction beccause they do not apply to the test
  574. // case. Skip MergePut because single delete does not get removed when it
  575. // encounters a merge.
  576. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
  577. kSkipMergePut));
  578. }
  579. TEST_F(DBTest, SingleDeletePutFlush) {
  580. // Single deletes that encounter the matching put in a flush should get
  581. // removed.
  582. do {
  583. Random rnd(301);
  584. Options options = CurrentOptions();
  585. options.disable_auto_compactions = true;
  586. CreateAndReopenWithCF({"pikachu"}, options);
  587. Put(1, "foo", Slice());
  588. Put(1, "a", Slice());
  589. SingleDelete(1, "a");
  590. ASSERT_OK(Flush(1));
  591. ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
  592. // Skip FIFO and universal compaction beccause they do not apply to the test
  593. // case. Skip MergePut because single delete does not get removed when it
  594. // encounters a merge.
  595. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
  596. kSkipMergePut));
  597. }
  598. // Disable because not all platform can run it.
  599. // It requires more than 9GB memory to run it, With single allocation
  600. // of more than 3GB.
  601. TEST_F(DBTest, DISABLED_SanitizeVeryVeryLargeValue) {
  602. const size_t kValueSize = 4 * size_t{1024 * 1024 * 1024}; // 4GB value
  603. std::string raw(kValueSize, 'v');
  604. Options options = CurrentOptions();
  605. options.env = env_;
  606. options.merge_operator = MergeOperators::CreatePutOperator();
  607. options.write_buffer_size = 100000; // Small write buffer
  608. options.paranoid_checks = true;
  609. DestroyAndReopen(options);
  610. ASSERT_OK(Put("boo", "v1"));
  611. ASSERT_TRUE(Put("foo", raw).IsInvalidArgument());
  612. ASSERT_TRUE(Merge("foo", raw).IsInvalidArgument());
  613. WriteBatch wb;
  614. ASSERT_TRUE(wb.Put("foo", raw).IsInvalidArgument());
  615. ASSERT_TRUE(wb.Merge("foo", raw).IsInvalidArgument());
  616. Slice value_slice = raw;
  617. Slice key_slice = "foo";
  618. SliceParts sp_key(&key_slice, 1);
  619. SliceParts sp_value(&value_slice, 1);
  620. ASSERT_TRUE(wb.Put(sp_key, sp_value).IsInvalidArgument());
  621. ASSERT_TRUE(wb.Merge(sp_key, sp_value).IsInvalidArgument());
  622. }
  623. // Disable because not all platform can run it.
  624. // It requires more than 9GB memory to run it, With single allocation
  625. // of more than 3GB.
  626. TEST_F(DBTest, DISABLED_VeryLargeValue) {
  627. const size_t kValueSize = 3221225472u; // 3GB value
  628. const size_t kKeySize = 8388608u; // 8MB key
  629. std::string raw(kValueSize, 'v');
  630. std::string key1(kKeySize, 'c');
  631. std::string key2(kKeySize, 'd');
  632. Options options = CurrentOptions();
  633. options.env = env_;
  634. options.write_buffer_size = 100000; // Small write buffer
  635. options.paranoid_checks = true;
  636. DestroyAndReopen(options);
  637. ASSERT_OK(Put("boo", "v1"));
  638. ASSERT_OK(Put("foo", "v1"));
  639. ASSERT_OK(Put(key1, raw));
  640. raw[0] = 'w';
  641. ASSERT_OK(Put(key2, raw));
  642. dbfull()->TEST_WaitForFlushMemTable();
  643. #ifndef ROCKSDB_LITE
  644. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  645. #endif // !ROCKSDB_LITE
  646. std::string value;
  647. Status s = db_->Get(ReadOptions(), key1, &value);
  648. ASSERT_OK(s);
  649. ASSERT_EQ(kValueSize, value.size());
  650. ASSERT_EQ('v', value[0]);
  651. s = db_->Get(ReadOptions(), key2, &value);
  652. ASSERT_OK(s);
  653. ASSERT_EQ(kValueSize, value.size());
  654. ASSERT_EQ('w', value[0]);
  655. // Compact all files.
  656. Flush();
  657. db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  658. // Check DB is not in read-only state.
  659. ASSERT_OK(Put("boo", "v1"));
  660. s = db_->Get(ReadOptions(), key1, &value);
  661. ASSERT_OK(s);
  662. ASSERT_EQ(kValueSize, value.size());
  663. ASSERT_EQ('v', value[0]);
  664. s = db_->Get(ReadOptions(), key2, &value);
  665. ASSERT_OK(s);
  666. ASSERT_EQ(kValueSize, value.size());
  667. ASSERT_EQ('w', value[0]);
  668. }
  669. TEST_F(DBTest, GetFromImmutableLayer) {
  670. do {
  671. Options options = CurrentOptions();
  672. options.env = env_;
  673. CreateAndReopenWithCF({"pikachu"}, options);
  674. ASSERT_OK(Put(1, "foo", "v1"));
  675. ASSERT_EQ("v1", Get(1, "foo"));
  676. // Block sync calls
  677. env_->delay_sstable_sync_.store(true, std::memory_order_release);
  678. Put(1, "k1", std::string(100000, 'x')); // Fill memtable
  679. Put(1, "k2", std::string(100000, 'y')); // Trigger flush
  680. ASSERT_EQ("v1", Get(1, "foo"));
  681. ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
  682. // Release sync calls
  683. env_->delay_sstable_sync_.store(false, std::memory_order_release);
  684. } while (ChangeOptions());
  685. }
  686. TEST_F(DBTest, GetLevel0Ordering) {
  687. do {
  688. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  689. // Check that we process level-0 files in correct order. The code
  690. // below generates two level-0 files where the earlier one comes
  691. // before the later one in the level-0 file list since the earlier
  692. // one has a smaller "smallest" key.
  693. ASSERT_OK(Put(1, "bar", "b"));
  694. ASSERT_OK(Put(1, "foo", "v1"));
  695. ASSERT_OK(Flush(1));
  696. ASSERT_OK(Put(1, "foo", "v2"));
  697. ASSERT_OK(Flush(1));
  698. ASSERT_EQ("v2", Get(1, "foo"));
  699. } while (ChangeOptions());
  700. }
  701. TEST_F(DBTest, WrongLevel0Config) {
  702. Options options = CurrentOptions();
  703. Close();
  704. ASSERT_OK(DestroyDB(dbname_, options));
  705. options.level0_stop_writes_trigger = 1;
  706. options.level0_slowdown_writes_trigger = 2;
  707. options.level0_file_num_compaction_trigger = 3;
  708. ASSERT_OK(DB::Open(options, dbname_, &db_));
  709. }
  710. #ifndef ROCKSDB_LITE
  711. TEST_F(DBTest, GetOrderedByLevels) {
  712. do {
  713. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  714. ASSERT_OK(Put(1, "foo", "v1"));
  715. Compact(1, "a", "z");
  716. ASSERT_EQ("v1", Get(1, "foo"));
  717. ASSERT_OK(Put(1, "foo", "v2"));
  718. ASSERT_EQ("v2", Get(1, "foo"));
  719. ASSERT_OK(Flush(1));
  720. ASSERT_EQ("v2", Get(1, "foo"));
  721. } while (ChangeOptions());
  722. }
  723. TEST_F(DBTest, GetPicksCorrectFile) {
  724. do {
  725. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  726. // Arrange to have multiple files in a non-level-0 level.
  727. ASSERT_OK(Put(1, "a", "va"));
  728. Compact(1, "a", "b");
  729. ASSERT_OK(Put(1, "x", "vx"));
  730. Compact(1, "x", "y");
  731. ASSERT_OK(Put(1, "f", "vf"));
  732. Compact(1, "f", "g");
  733. ASSERT_EQ("va", Get(1, "a"));
  734. ASSERT_EQ("vf", Get(1, "f"));
  735. ASSERT_EQ("vx", Get(1, "x"));
  736. } while (ChangeOptions());
  737. }
  738. TEST_F(DBTest, GetEncountersEmptyLevel) {
  739. do {
  740. Options options = CurrentOptions();
  741. CreateAndReopenWithCF({"pikachu"}, options);
  742. // Arrange for the following to happen:
  743. // * sstable A in level 0
  744. // * nothing in level 1
  745. // * sstable B in level 2
  746. // Then do enough Get() calls to arrange for an automatic compaction
  747. // of sstable A. A bug would cause the compaction to be marked as
  748. // occurring at level 1 (instead of the correct level 0).
  749. // Step 1: First place sstables in levels 0 and 2
  750. Put(1, "a", "begin");
  751. Put(1, "z", "end");
  752. ASSERT_OK(Flush(1));
  753. dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
  754. dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
  755. Put(1, "a", "begin");
  756. Put(1, "z", "end");
  757. ASSERT_OK(Flush(1));
  758. ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
  759. ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
  760. // Step 2: clear level 1 if necessary.
  761. dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
  762. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
  763. ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
  764. ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
  765. // Step 3: read a bunch of times
  766. for (int i = 0; i < 1000; i++) {
  767. ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
  768. }
  769. // Step 4: Wait for compaction to finish
  770. dbfull()->TEST_WaitForCompact();
  771. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX
  772. } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
  773. }
  774. #endif // ROCKSDB_LITE
  775. TEST_F(DBTest, FlushMultipleMemtable) {
  776. do {
  777. Options options = CurrentOptions();
  778. WriteOptions writeOpt = WriteOptions();
  779. writeOpt.disableWAL = true;
  780. options.max_write_buffer_number = 4;
  781. options.min_write_buffer_number_to_merge = 3;
  782. options.max_write_buffer_size_to_maintain = -1;
  783. CreateAndReopenWithCF({"pikachu"}, options);
  784. ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
  785. ASSERT_OK(Flush(1));
  786. ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
  787. ASSERT_EQ("v1", Get(1, "foo"));
  788. ASSERT_EQ("v1", Get(1, "bar"));
  789. ASSERT_OK(Flush(1));
  790. } while (ChangeCompactOptions());
  791. }
  792. #ifndef ROCKSDB_LITE
  793. TEST_F(DBTest, FlushSchedule) {
  794. Options options = CurrentOptions();
  795. options.disable_auto_compactions = true;
  796. options.level0_stop_writes_trigger = 1 << 10;
  797. options.level0_slowdown_writes_trigger = 1 << 10;
  798. options.min_write_buffer_number_to_merge = 1;
  799. options.max_write_buffer_size_to_maintain =
  800. static_cast<int64_t>(options.write_buffer_size);
  801. options.max_write_buffer_number = 2;
  802. options.write_buffer_size = 120 * 1024;
  803. CreateAndReopenWithCF({"pikachu"}, options);
  804. std::vector<port::Thread> threads;
  805. std::atomic<int> thread_num(0);
  806. // each column family will have 5 thread, each thread generating 2 memtables.
  807. // each column family should end up with 10 table files
  808. std::function<void()> fill_memtable_func = [&]() {
  809. int a = thread_num.fetch_add(1);
  810. Random rnd(a);
  811. WriteOptions wo;
  812. // this should fill up 2 memtables
  813. for (int k = 0; k < 5000; ++k) {
  814. ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
  815. }
  816. };
  817. for (int i = 0; i < 10; ++i) {
  818. threads.emplace_back(fill_memtable_func);
  819. }
  820. for (auto& t : threads) {
  821. t.join();
  822. }
  823. auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
  824. auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
  825. ASSERT_LE(default_tables, static_cast<uint64_t>(10));
  826. ASSERT_GT(default_tables, static_cast<uint64_t>(0));
  827. ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
  828. ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
  829. }
  830. #endif // ROCKSDB_LITE
  831. namespace {
  832. class KeepFilter : public CompactionFilter {
  833. public:
  834. bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
  835. std::string* /*new_value*/,
  836. bool* /*value_changed*/) const override {
  837. return false;
  838. }
  839. const char* Name() const override { return "KeepFilter"; }
  840. };
  841. class KeepFilterFactory : public CompactionFilterFactory {
  842. public:
  843. explicit KeepFilterFactory(bool check_context = false)
  844. : check_context_(check_context) {}
  845. std::unique_ptr<CompactionFilter> CreateCompactionFilter(
  846. const CompactionFilter::Context& context) override {
  847. if (check_context_) {
  848. EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
  849. EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
  850. }
  851. return std::unique_ptr<CompactionFilter>(new KeepFilter());
  852. }
  853. const char* Name() const override { return "KeepFilterFactory"; }
  854. bool check_context_;
  855. std::atomic_bool expect_full_compaction_;
  856. std::atomic_bool expect_manual_compaction_;
  857. };
  858. class DelayFilter : public CompactionFilter {
  859. public:
  860. explicit DelayFilter(DBTestBase* d) : db_test(d) {}
  861. bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/,
  862. std::string* /*new_value*/,
  863. bool* /*value_changed*/) const override {
  864. db_test->env_->addon_time_.fetch_add(1000);
  865. return true;
  866. }
  867. const char* Name() const override { return "DelayFilter"; }
  868. private:
  869. DBTestBase* db_test;
  870. };
  871. class DelayFilterFactory : public CompactionFilterFactory {
  872. public:
  873. explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
  874. std::unique_ptr<CompactionFilter> CreateCompactionFilter(
  875. const CompactionFilter::Context& /*context*/) override {
  876. return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
  877. }
  878. const char* Name() const override { return "DelayFilterFactory"; }
  879. private:
  880. DBTestBase* db_test;
  881. };
  882. } // namespace
  883. #ifndef ROCKSDB_LITE
  884. static std::string CompressibleString(Random* rnd, int len) {
  885. std::string r;
  886. test::CompressibleString(rnd, 0.8, len, &r);
  887. return r;
  888. }
  889. #endif // ROCKSDB_LITE
  890. TEST_F(DBTest, FailMoreDbPaths) {
  891. Options options = CurrentOptions();
  892. options.db_paths.emplace_back(dbname_, 10000000);
  893. options.db_paths.emplace_back(dbname_ + "_2", 1000000);
  894. options.db_paths.emplace_back(dbname_ + "_3", 1000000);
  895. options.db_paths.emplace_back(dbname_ + "_4", 1000000);
  896. options.db_paths.emplace_back(dbname_ + "_5", 1000000);
  897. ASSERT_TRUE(TryReopen(options).IsNotSupported());
  898. }
  899. void CheckColumnFamilyMeta(
  900. const ColumnFamilyMetaData& cf_meta,
  901. const std::vector<std::vector<FileMetaData>>& files_by_level,
  902. uint64_t start_time, uint64_t end_time) {
  903. ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName);
  904. ASSERT_EQ(cf_meta.levels.size(), files_by_level.size());
  905. uint64_t cf_size = 0;
  906. size_t file_count = 0;
  907. for (size_t i = 0; i < cf_meta.levels.size(); ++i) {
  908. const auto& level_meta_from_cf = cf_meta.levels[i];
  909. const auto& level_meta_from_files = files_by_level[i];
  910. ASSERT_EQ(level_meta_from_cf.level, i);
  911. ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size());
  912. file_count += level_meta_from_cf.files.size();
  913. uint64_t level_size = 0;
  914. for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) {
  915. const auto& file_meta_from_cf = level_meta_from_cf.files[j];
  916. const auto& file_meta_from_files = level_meta_from_files[j];
  917. level_size += file_meta_from_cf.size;
  918. ASSERT_EQ(file_meta_from_cf.file_number,
  919. file_meta_from_files.fd.GetNumber());
  920. ASSERT_EQ(file_meta_from_cf.file_number,
  921. TableFileNameToNumber(file_meta_from_cf.name));
  922. ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size);
  923. ASSERT_EQ(file_meta_from_cf.smallest_seqno,
  924. file_meta_from_files.fd.smallest_seqno);
  925. ASSERT_EQ(file_meta_from_cf.largest_seqno,
  926. file_meta_from_files.fd.largest_seqno);
  927. ASSERT_EQ(file_meta_from_cf.smallestkey,
  928. file_meta_from_files.smallest.user_key().ToString());
  929. ASSERT_EQ(file_meta_from_cf.largestkey,
  930. file_meta_from_files.largest.user_key().ToString());
  931. ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number,
  932. file_meta_from_files.oldest_blob_file_number);
  933. ASSERT_EQ(file_meta_from_cf.oldest_ancester_time,
  934. file_meta_from_files.oldest_ancester_time);
  935. ASSERT_EQ(file_meta_from_cf.file_creation_time,
  936. file_meta_from_files.file_creation_time);
  937. ASSERT_GE(file_meta_from_cf.file_creation_time, start_time);
  938. ASSERT_LE(file_meta_from_cf.file_creation_time, end_time);
  939. ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time);
  940. ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time);
  941. }
  942. ASSERT_EQ(level_meta_from_cf.size, level_size);
  943. cf_size += level_size;
  944. }
  945. ASSERT_EQ(cf_meta.file_count, file_count);
  946. ASSERT_EQ(cf_meta.size, cf_size);
  947. }
  948. void CheckLiveFilesMeta(
  949. const std::vector<LiveFileMetaData>& live_file_meta,
  950. const std::vector<std::vector<FileMetaData>>& files_by_level) {
  951. size_t total_file_count = 0;
  952. for (const auto& f : files_by_level) {
  953. total_file_count += f.size();
  954. }
  955. ASSERT_EQ(live_file_meta.size(), total_file_count);
  956. int level = 0;
  957. int i = 0;
  958. for (const auto& meta : live_file_meta) {
  959. if (level != meta.level) {
  960. level = meta.level;
  961. i = 0;
  962. }
  963. ASSERT_LT(i, files_by_level[level].size());
  964. const auto& expected_meta = files_by_level[level][i];
  965. ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName);
  966. ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber());
  967. ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name));
  968. ASSERT_EQ(meta.size, expected_meta.fd.file_size);
  969. ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno);
  970. ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno);
  971. ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString());
  972. ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString());
  973. ASSERT_EQ(meta.oldest_blob_file_number,
  974. expected_meta.oldest_blob_file_number);
  975. ++i;
  976. }
  977. }
  978. #ifndef ROCKSDB_LITE
  979. TEST_F(DBTest, MetaDataTest) {
  980. Options options = CurrentOptions();
  981. options.create_if_missing = true;
  982. options.disable_auto_compactions = true;
  983. int64_t temp_time = 0;
  984. options.env->GetCurrentTime(&temp_time);
  985. uint64_t start_time = static_cast<uint64_t>(temp_time);
  986. DestroyAndReopen(options);
  987. Random rnd(301);
  988. int key_index = 0;
  989. for (int i = 0; i < 100; ++i) {
  990. // Add a single blob reference to each file
  991. std::string blob_index;
  992. BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000,
  993. /* offset */ 1234, /* size */ 5678, kNoCompression);
  994. WriteBatch batch;
  995. ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index),
  996. blob_index));
  997. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  998. ++key_index;
  999. // Fill up the rest of the file with random values.
  1000. GenerateNewFile(&rnd, &key_index, /* nowait */ true);
  1001. Flush();
  1002. }
  1003. std::vector<std::vector<FileMetaData>> files_by_level;
  1004. dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level);
  1005. options.env->GetCurrentTime(&temp_time);
  1006. uint64_t end_time = static_cast<uint64_t>(temp_time);
  1007. ColumnFamilyMetaData cf_meta;
  1008. db_->GetColumnFamilyMetaData(&cf_meta);
  1009. CheckColumnFamilyMeta(cf_meta, files_by_level, start_time, end_time);
  1010. std::vector<LiveFileMetaData> live_file_meta;
  1011. db_->GetLiveFilesMetaData(&live_file_meta);
  1012. CheckLiveFilesMeta(live_file_meta, files_by_level);
  1013. }
  1014. namespace {
  1015. void MinLevelHelper(DBTest* self, Options& options) {
  1016. Random rnd(301);
  1017. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
  1018. num++) {
  1019. std::vector<std::string> values;
  1020. // Write 120KB (12 values, each 10K)
  1021. for (int i = 0; i < 12; i++) {
  1022. values.push_back(DBTestBase::RandomString(&rnd, 10000));
  1023. ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
  1024. }
  1025. self->dbfull()->TEST_WaitForFlushMemTable();
  1026. ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
  1027. }
  1028. // generate one more file in level-0, and should trigger level-0 compaction
  1029. std::vector<std::string> values;
  1030. for (int i = 0; i < 12; i++) {
  1031. values.push_back(DBTestBase::RandomString(&rnd, 10000));
  1032. ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
  1033. }
  1034. self->dbfull()->TEST_WaitForCompact();
  1035. ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
  1036. ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
  1037. }
  1038. // returns false if the calling-Test should be skipped
  1039. bool MinLevelToCompress(CompressionType& type, Options& options, int wbits,
  1040. int lev, int strategy) {
  1041. fprintf(stderr,
  1042. "Test with compression options : window_bits = %d, level = %d, "
  1043. "strategy = %d}\n",
  1044. wbits, lev, strategy);
  1045. options.write_buffer_size = 100 << 10; // 100KB
  1046. options.arena_block_size = 4096;
  1047. options.num_levels = 3;
  1048. options.level0_file_num_compaction_trigger = 3;
  1049. options.create_if_missing = true;
  1050. if (Snappy_Supported()) {
  1051. type = kSnappyCompression;
  1052. fprintf(stderr, "using snappy\n");
  1053. } else if (Zlib_Supported()) {
  1054. type = kZlibCompression;
  1055. fprintf(stderr, "using zlib\n");
  1056. } else if (BZip2_Supported()) {
  1057. type = kBZip2Compression;
  1058. fprintf(stderr, "using bzip2\n");
  1059. } else if (LZ4_Supported()) {
  1060. type = kLZ4Compression;
  1061. fprintf(stderr, "using lz4\n");
  1062. } else if (XPRESS_Supported()) {
  1063. type = kXpressCompression;
  1064. fprintf(stderr, "using xpress\n");
  1065. } else if (ZSTD_Supported()) {
  1066. type = kZSTD;
  1067. fprintf(stderr, "using ZSTD\n");
  1068. } else {
  1069. fprintf(stderr, "skipping test, compression disabled\n");
  1070. return false;
  1071. }
  1072. options.compression_per_level.resize(options.num_levels);
  1073. // do not compress L0
  1074. for (int i = 0; i < 1; i++) {
  1075. options.compression_per_level[i] = kNoCompression;
  1076. }
  1077. for (int i = 1; i < options.num_levels; i++) {
  1078. options.compression_per_level[i] = type;
  1079. }
  1080. return true;
  1081. }
  1082. } // namespace
  1083. TEST_F(DBTest, MinLevelToCompress1) {
  1084. Options options = CurrentOptions();
  1085. CompressionType type = kSnappyCompression;
  1086. if (!MinLevelToCompress(type, options, -14, -1, 0)) {
  1087. return;
  1088. }
  1089. Reopen(options);
  1090. MinLevelHelper(this, options);
  1091. // do not compress L0 and L1
  1092. for (int i = 0; i < 2; i++) {
  1093. options.compression_per_level[i] = kNoCompression;
  1094. }
  1095. for (int i = 2; i < options.num_levels; i++) {
  1096. options.compression_per_level[i] = type;
  1097. }
  1098. DestroyAndReopen(options);
  1099. MinLevelHelper(this, options);
  1100. }
  1101. TEST_F(DBTest, MinLevelToCompress2) {
  1102. Options options = CurrentOptions();
  1103. CompressionType type = kSnappyCompression;
  1104. if (!MinLevelToCompress(type, options, 15, -1, 0)) {
  1105. return;
  1106. }
  1107. Reopen(options);
  1108. MinLevelHelper(this, options);
  1109. // do not compress L0 and L1
  1110. for (int i = 0; i < 2; i++) {
  1111. options.compression_per_level[i] = kNoCompression;
  1112. }
  1113. for (int i = 2; i < options.num_levels; i++) {
  1114. options.compression_per_level[i] = type;
  1115. }
  1116. DestroyAndReopen(options);
  1117. MinLevelHelper(this, options);
  1118. }
  1119. // This test may fail because of a legit case that multiple L0 files
  1120. // are trivial moved to L1.
  1121. TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) {
  1122. do {
  1123. Options options = CurrentOptions();
  1124. options.env = env_;
  1125. options.write_buffer_size = 100000; // Small write buffer
  1126. CreateAndReopenWithCF({"pikachu"}, options);
  1127. // We must have at most one file per level except for level-0,
  1128. // which may have up to kL0_StopWritesTrigger files.
  1129. const int kMaxFiles =
  1130. options.num_levels + options.level0_stop_writes_trigger;
  1131. Random rnd(301);
  1132. std::string value =
  1133. RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
  1134. for (int i = 0; i < 5 * kMaxFiles; i++) {
  1135. ASSERT_OK(Put(1, "key", value));
  1136. ASSERT_LE(TotalTableFiles(1), kMaxFiles);
  1137. }
  1138. } while (ChangeCompactOptions());
  1139. }
  1140. #endif // ROCKSDB_LITE
  1141. TEST_F(DBTest, SparseMerge) {
  1142. do {
  1143. Options options = CurrentOptions();
  1144. options.compression = kNoCompression;
  1145. CreateAndReopenWithCF({"pikachu"}, options);
  1146. FillLevels("A", "Z", 1);
  1147. // Suppose there is:
  1148. // small amount of data with prefix A
  1149. // large amount of data with prefix B
  1150. // small amount of data with prefix C
  1151. // and that recent updates have made small changes to all three prefixes.
  1152. // Check that we do not do a compaction that merges all of B in one shot.
  1153. const std::string value(1000, 'x');
  1154. Put(1, "A", "va");
  1155. // Write approximately 100MB of "B" values
  1156. for (int i = 0; i < 100000; i++) {
  1157. char key[100];
  1158. snprintf(key, sizeof(key), "B%010d", i);
  1159. Put(1, key, value);
  1160. }
  1161. Put(1, "C", "vc");
  1162. ASSERT_OK(Flush(1));
  1163. dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
  1164. // Make sparse update
  1165. Put(1, "A", "va2");
  1166. Put(1, "B100", "bvalue2");
  1167. Put(1, "C", "vc2");
  1168. ASSERT_OK(Flush(1));
  1169. // Compactions should not cause us to create a situation where
  1170. // a file overlaps too much data at the next level.
  1171. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
  1172. 20 * 1048576);
  1173. dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  1174. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
  1175. 20 * 1048576);
  1176. dbfull()->TEST_CompactRange(1, nullptr, nullptr);
  1177. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
  1178. 20 * 1048576);
  1179. } while (ChangeCompactOptions());
  1180. }
  1181. #ifndef ROCKSDB_LITE
  1182. static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  1183. bool result = (val >= low) && (val <= high);
  1184. if (!result) {
  1185. fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
  1186. (unsigned long long)(val), (unsigned long long)(low),
  1187. (unsigned long long)(high));
  1188. }
  1189. return result;
  1190. }
  1191. TEST_F(DBTest, ApproximateSizesMemTable) {
  1192. Options options = CurrentOptions();
  1193. options.write_buffer_size = 100000000; // Large write buffer
  1194. options.compression = kNoCompression;
  1195. options.create_if_missing = true;
  1196. DestroyAndReopen(options);
  1197. auto default_cf = db_->DefaultColumnFamily();
  1198. const int N = 128;
  1199. Random rnd(301);
  1200. for (int i = 0; i < N; i++) {
  1201. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  1202. }
  1203. uint64_t size;
  1204. std::string start = Key(50);
  1205. std::string end = Key(60);
  1206. Range r(start, end);
  1207. SizeApproximationOptions size_approx_options;
  1208. size_approx_options.include_memtabtles = true;
  1209. size_approx_options.include_files = true;
  1210. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1211. ASSERT_GT(size, 6000);
  1212. ASSERT_LT(size, 204800);
  1213. // Zero if not including mem table
  1214. db_->GetApproximateSizes(&r, 1, &size);
  1215. ASSERT_EQ(size, 0);
  1216. start = Key(500);
  1217. end = Key(600);
  1218. r = Range(start, end);
  1219. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1220. ASSERT_EQ(size, 0);
  1221. for (int i = 0; i < N; i++) {
  1222. ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
  1223. }
  1224. start = Key(500);
  1225. end = Key(600);
  1226. r = Range(start, end);
  1227. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1228. ASSERT_EQ(size, 0);
  1229. start = Key(100);
  1230. end = Key(1020);
  1231. r = Range(start, end);
  1232. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1233. ASSERT_GT(size, 6000);
  1234. options.max_write_buffer_number = 8;
  1235. options.min_write_buffer_number_to_merge = 5;
  1236. options.write_buffer_size = 1024 * N; // Not very large
  1237. DestroyAndReopen(options);
  1238. default_cf = db_->DefaultColumnFamily();
  1239. int keys[N * 3];
  1240. for (int i = 0; i < N; i++) {
  1241. keys[i * 3] = i * 5;
  1242. keys[i * 3 + 1] = i * 5 + 1;
  1243. keys[i * 3 + 2] = i * 5 + 2;
  1244. }
  1245. std::random_shuffle(std::begin(keys), std::end(keys));
  1246. for (int i = 0; i < N * 3; i++) {
  1247. ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
  1248. }
  1249. start = Key(100);
  1250. end = Key(300);
  1251. r = Range(start, end);
  1252. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1253. ASSERT_EQ(size, 0);
  1254. start = Key(1050);
  1255. end = Key(1080);
  1256. r = Range(start, end);
  1257. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1258. ASSERT_GT(size, 6000);
  1259. start = Key(2100);
  1260. end = Key(2300);
  1261. r = Range(start, end);
  1262. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1263. ASSERT_EQ(size, 0);
  1264. start = Key(1050);
  1265. end = Key(1080);
  1266. r = Range(start, end);
  1267. uint64_t size_with_mt, size_without_mt;
  1268. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
  1269. &size_with_mt);
  1270. ASSERT_GT(size_with_mt, 6000);
  1271. db_->GetApproximateSizes(&r, 1, &size_without_mt);
  1272. ASSERT_EQ(size_without_mt, 0);
  1273. Flush();
  1274. for (int i = 0; i < N; i++) {
  1275. ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
  1276. }
  1277. start = Key(1050);
  1278. end = Key(1080);
  1279. r = Range(start, end);
  1280. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1,
  1281. &size_with_mt);
  1282. db_->GetApproximateSizes(&r, 1, &size_without_mt);
  1283. ASSERT_GT(size_with_mt, size_without_mt);
  1284. ASSERT_GT(size_without_mt, 6000);
  1285. // Check that include_memtabtles flag works as expected
  1286. size_approx_options.include_memtabtles = false;
  1287. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1288. ASSERT_EQ(size, size_without_mt);
  1289. // Check that files_size_error_margin works as expected, when the heuristic
  1290. // conditions are not met
  1291. start = Key(1);
  1292. end = Key(1000 + N - 2);
  1293. r = Range(start, end);
  1294. size_approx_options.files_size_error_margin = -1.0; // disabled
  1295. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1296. uint64_t size2;
  1297. size_approx_options.files_size_error_margin = 0.5; // enabled, but not used
  1298. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2);
  1299. ASSERT_EQ(size, size2);
  1300. }
  1301. TEST_F(DBTest, ApproximateSizesFilesWithErrorMargin) {
  1302. Options options = CurrentOptions();
  1303. options.write_buffer_size = 1024 * 1024;
  1304. options.compression = kNoCompression;
  1305. options.create_if_missing = true;
  1306. options.target_file_size_base = 1024 * 1024;
  1307. DestroyAndReopen(options);
  1308. const auto default_cf = db_->DefaultColumnFamily();
  1309. const int N = 64000;
  1310. Random rnd(301);
  1311. for (int i = 0; i < N; i++) {
  1312. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  1313. }
  1314. // Flush everything to files
  1315. Flush();
  1316. // Compact the entire key space into the next level
  1317. db_->CompactRange(CompactRangeOptions(), default_cf, nullptr, nullptr);
  1318. // Write more keys
  1319. for (int i = N; i < (N + N / 4); i++) {
  1320. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  1321. }
  1322. // Flush everything to files again
  1323. Flush();
  1324. // Wait for compaction to finish
  1325. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1326. const std::string start = Key(0);
  1327. const std::string end = Key(2 * N);
  1328. const Range r(start, end);
  1329. SizeApproximationOptions size_approx_options;
  1330. size_approx_options.include_memtabtles = false;
  1331. size_approx_options.include_files = true;
  1332. size_approx_options.files_size_error_margin = -1.0; // disabled
  1333. // Get the precise size without any approximation heuristic
  1334. uint64_t size;
  1335. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size);
  1336. ASSERT_NE(size, 0);
  1337. // Get the size with an approximation heuristic
  1338. uint64_t size2;
  1339. const double error_margin = 0.2;
  1340. size_approx_options.files_size_error_margin = error_margin;
  1341. db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2);
  1342. ASSERT_LT(size2, size * (1 + error_margin));
  1343. ASSERT_GT(size2, size * (1 - error_margin));
  1344. }
  1345. TEST_F(DBTest, GetApproximateMemTableStats) {
  1346. Options options = CurrentOptions();
  1347. options.write_buffer_size = 100000000;
  1348. options.compression = kNoCompression;
  1349. options.create_if_missing = true;
  1350. DestroyAndReopen(options);
  1351. const int N = 128;
  1352. Random rnd(301);
  1353. for (int i = 0; i < N; i++) {
  1354. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  1355. }
  1356. uint64_t count;
  1357. uint64_t size;
  1358. std::string start = Key(50);
  1359. std::string end = Key(60);
  1360. Range r(start, end);
  1361. db_->GetApproximateMemTableStats(r, &count, &size);
  1362. ASSERT_GT(count, 0);
  1363. ASSERT_LE(count, N);
  1364. ASSERT_GT(size, 6000);
  1365. ASSERT_LT(size, 204800);
  1366. start = Key(500);
  1367. end = Key(600);
  1368. r = Range(start, end);
  1369. db_->GetApproximateMemTableStats(r, &count, &size);
  1370. ASSERT_EQ(count, 0);
  1371. ASSERT_EQ(size, 0);
  1372. Flush();
  1373. start = Key(50);
  1374. end = Key(60);
  1375. r = Range(start, end);
  1376. db_->GetApproximateMemTableStats(r, &count, &size);
  1377. ASSERT_EQ(count, 0);
  1378. ASSERT_EQ(size, 0);
  1379. for (int i = 0; i < N; i++) {
  1380. ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
  1381. }
  1382. start = Key(100);
  1383. end = Key(1020);
  1384. r = Range(start, end);
  1385. db_->GetApproximateMemTableStats(r, &count, &size);
  1386. ASSERT_GT(count, 20);
  1387. ASSERT_GT(size, 6000);
  1388. }
  1389. TEST_F(DBTest, ApproximateSizes) {
  1390. do {
  1391. Options options = CurrentOptions();
  1392. options.write_buffer_size = 100000000; // Large write buffer
  1393. options.compression = kNoCompression;
  1394. options.create_if_missing = true;
  1395. DestroyAndReopen(options);
  1396. CreateAndReopenWithCF({"pikachu"}, options);
  1397. ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
  1398. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  1399. ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
  1400. // Write 8MB (80 values, each 100K)
  1401. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1402. const int N = 80;
  1403. static const int S1 = 100000;
  1404. static const int S2 = 105000; // Allow some expansion from metadata
  1405. Random rnd(301);
  1406. for (int i = 0; i < N; i++) {
  1407. ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
  1408. }
  1409. // 0 because GetApproximateSizes() does not account for memtable space
  1410. ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
  1411. // Check sizes across recovery by reopening a few times
  1412. for (int run = 0; run < 3; run++) {
  1413. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  1414. for (int compact_start = 0; compact_start < N; compact_start += 10) {
  1415. for (int i = 0; i < N; i += 10) {
  1416. ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
  1417. ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
  1418. S2 * (i + 1)));
  1419. ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
  1420. }
  1421. ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
  1422. ASSERT_TRUE(
  1423. Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
  1424. std::string cstart_str = Key(compact_start);
  1425. std::string cend_str = Key(compact_start + 9);
  1426. Slice cstart = cstart_str;
  1427. Slice cend = cend_str;
  1428. dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]);
  1429. }
  1430. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1431. ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
  1432. }
  1433. // ApproximateOffsetOf() is not yet implemented in plain table format.
  1434. } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
  1435. kSkipPlainTable | kSkipHashIndex));
  1436. }
  1437. TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
  1438. do {
  1439. Options options = CurrentOptions();
  1440. options.compression = kNoCompression;
  1441. CreateAndReopenWithCF({"pikachu"}, options);
  1442. Random rnd(301);
  1443. std::string big1 = RandomString(&rnd, 100000);
  1444. ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
  1445. ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
  1446. ASSERT_OK(Put(1, Key(2), big1));
  1447. ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
  1448. ASSERT_OK(Put(1, Key(4), big1));
  1449. ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
  1450. ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
  1451. ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
  1452. // Check sizes across recovery by reopening a few times
  1453. for (int run = 0; run < 3; run++) {
  1454. ReopenWithColumnFamilies({"default", "pikachu"}, options);
  1455. ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
  1456. ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
  1457. ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
  1458. ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
  1459. ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
  1460. ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
  1461. ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
  1462. ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
  1463. ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
  1464. ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
  1465. dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
  1466. }
  1467. // ApproximateOffsetOf() is not yet implemented in plain table format.
  1468. } while (ChangeOptions(kSkipPlainTable));
  1469. }
  1470. #endif // ROCKSDB_LITE
  1471. #ifndef ROCKSDB_LITE
  1472. TEST_F(DBTest, Snapshot) {
  1473. anon::OptionsOverride options_override;
  1474. options_override.skip_policy = kSkipNoSnapshot;
  1475. do {
  1476. CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
  1477. Put(0, "foo", "0v1");
  1478. Put(1, "foo", "1v1");
  1479. const Snapshot* s1 = db_->GetSnapshot();
  1480. ASSERT_EQ(1U, GetNumSnapshots());
  1481. uint64_t time_snap1 = GetTimeOldestSnapshots();
  1482. ASSERT_GT(time_snap1, 0U);
  1483. ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
  1484. Put(0, "foo", "0v2");
  1485. Put(1, "foo", "1v2");
  1486. env_->addon_time_.fetch_add(1);
  1487. const Snapshot* s2 = db_->GetSnapshot();
  1488. ASSERT_EQ(2U, GetNumSnapshots());
  1489. ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
  1490. ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
  1491. Put(0, "foo", "0v3");
  1492. Put(1, "foo", "1v3");
  1493. {
  1494. ManagedSnapshot s3(db_);
  1495. ASSERT_EQ(3U, GetNumSnapshots());
  1496. ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
  1497. ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
  1498. Put(0, "foo", "0v4");
  1499. Put(1, "foo", "1v4");
  1500. ASSERT_EQ("0v1", Get(0, "foo", s1));
  1501. ASSERT_EQ("1v1", Get(1, "foo", s1));
  1502. ASSERT_EQ("0v2", Get(0, "foo", s2));
  1503. ASSERT_EQ("1v2", Get(1, "foo", s2));
  1504. ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
  1505. ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
  1506. ASSERT_EQ("0v4", Get(0, "foo"));
  1507. ASSERT_EQ("1v4", Get(1, "foo"));
  1508. }
  1509. ASSERT_EQ(2U, GetNumSnapshots());
  1510. ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
  1511. ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber());
  1512. ASSERT_EQ("0v1", Get(0, "foo", s1));
  1513. ASSERT_EQ("1v1", Get(1, "foo", s1));
  1514. ASSERT_EQ("0v2", Get(0, "foo", s2));
  1515. ASSERT_EQ("1v2", Get(1, "foo", s2));
  1516. ASSERT_EQ("0v4", Get(0, "foo"));
  1517. ASSERT_EQ("1v4", Get(1, "foo"));
  1518. db_->ReleaseSnapshot(s1);
  1519. ASSERT_EQ("0v2", Get(0, "foo", s2));
  1520. ASSERT_EQ("1v2", Get(1, "foo", s2));
  1521. ASSERT_EQ("0v4", Get(0, "foo"));
  1522. ASSERT_EQ("1v4", Get(1, "foo"));
  1523. ASSERT_EQ(1U, GetNumSnapshots());
  1524. ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
  1525. ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber());
  1526. db_->ReleaseSnapshot(s2);
  1527. ASSERT_EQ(0U, GetNumSnapshots());
  1528. ASSERT_EQ(GetSequenceOldestSnapshots(), 0);
  1529. ASSERT_EQ("0v4", Get(0, "foo"));
  1530. ASSERT_EQ("1v4", Get(1, "foo"));
  1531. } while (ChangeOptions());
  1532. }
  1533. TEST_F(DBTest, HiddenValuesAreRemoved) {
  1534. anon::OptionsOverride options_override;
  1535. options_override.skip_policy = kSkipNoSnapshot;
  1536. do {
  1537. Options options = CurrentOptions(options_override);
  1538. CreateAndReopenWithCF({"pikachu"}, options);
  1539. Random rnd(301);
  1540. FillLevels("a", "z", 1);
  1541. std::string big = RandomString(&rnd, 50000);
  1542. Put(1, "foo", big);
  1543. Put(1, "pastfoo", "v");
  1544. const Snapshot* snapshot = db_->GetSnapshot();
  1545. Put(1, "foo", "tiny");
  1546. Put(1, "pastfoo2", "v2"); // Advance sequence number one more
  1547. ASSERT_OK(Flush(1));
  1548. ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
  1549. ASSERT_EQ(big, Get(1, "foo", snapshot));
  1550. ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
  1551. db_->ReleaseSnapshot(snapshot);
  1552. ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
  1553. Slice x("x");
  1554. dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]);
  1555. ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
  1556. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1557. ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
  1558. dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]);
  1559. ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
  1560. ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
  1561. // ApproximateOffsetOf() is not yet implemented in plain table format,
  1562. // which is used by Size().
  1563. } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
  1564. kSkipPlainTable));
  1565. }
  1566. #endif // ROCKSDB_LITE
  1567. TEST_F(DBTest, UnremovableSingleDelete) {
  1568. // If we compact:
  1569. //
  1570. // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
  1571. //
  1572. // We do not want to end up with:
  1573. //
  1574. // Put(A, v1) Snapshot Put(A, v2)
  1575. //
  1576. // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
  1577. // but not Put(A, v1), so Get(A) would return v1.
  1578. anon::OptionsOverride options_override;
  1579. options_override.skip_policy = kSkipNoSnapshot;
  1580. do {
  1581. Options options = CurrentOptions(options_override);
  1582. options.disable_auto_compactions = true;
  1583. CreateAndReopenWithCF({"pikachu"}, options);
  1584. Put(1, "foo", "first");
  1585. const Snapshot* snapshot = db_->GetSnapshot();
  1586. SingleDelete(1, "foo");
  1587. Put(1, "foo", "second");
  1588. ASSERT_OK(Flush(1));
  1589. ASSERT_EQ("first", Get(1, "foo", snapshot));
  1590. ASSERT_EQ("second", Get(1, "foo"));
  1591. dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
  1592. nullptr);
  1593. ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
  1594. SingleDelete(1, "foo");
  1595. ASSERT_EQ("first", Get(1, "foo", snapshot));
  1596. ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
  1597. dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr,
  1598. nullptr);
  1599. ASSERT_EQ("first", Get(1, "foo", snapshot));
  1600. ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
  1601. db_->ReleaseSnapshot(snapshot);
  1602. // Skip FIFO and universal compaction beccause they do not apply to the test
  1603. // case. Skip MergePut because single delete does not get removed when it
  1604. // encounters a merge.
  1605. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
  1606. kSkipMergePut));
  1607. }
  1608. #ifndef ROCKSDB_LITE
  1609. TEST_F(DBTest, DeletionMarkers1) {
  1610. Options options = CurrentOptions();
  1611. CreateAndReopenWithCF({"pikachu"}, options);
  1612. Put(1, "foo", "v1");
  1613. ASSERT_OK(Flush(1));
  1614. const int last = 2;
  1615. MoveFilesToLevel(last, 1);
  1616. // foo => v1 is now in last level
  1617. ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  1618. // Place a table at level last-1 to prevent merging with preceding mutation
  1619. Put(1, "a", "begin");
  1620. Put(1, "z", "end");
  1621. Flush(1);
  1622. MoveFilesToLevel(last - 1, 1);
  1623. ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  1624. ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
  1625. Delete(1, "foo");
  1626. Put(1, "foo", "v2");
  1627. ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
  1628. ASSERT_OK(Flush(1)); // Moves to level last-2
  1629. ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
  1630. Slice z("z");
  1631. dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]);
  1632. // DEL eliminated, but v1 remains because we aren't compacting that level
  1633. // (DEL can be eliminated because v2 hides v1).
  1634. ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
  1635. dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
  1636. // Merging last-1 w/ last, so we are the base level for "foo", so
  1637. // DEL is removed. (as is v1).
  1638. ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
  1639. }
  1640. TEST_F(DBTest, DeletionMarkers2) {
  1641. Options options = CurrentOptions();
  1642. CreateAndReopenWithCF({"pikachu"}, options);
  1643. Put(1, "foo", "v1");
  1644. ASSERT_OK(Flush(1));
  1645. const int last = 2;
  1646. MoveFilesToLevel(last, 1);
  1647. // foo => v1 is now in last level
  1648. ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  1649. // Place a table at level last-1 to prevent merging with preceding mutation
  1650. Put(1, "a", "begin");
  1651. Put(1, "z", "end");
  1652. Flush(1);
  1653. MoveFilesToLevel(last - 1, 1);
  1654. ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
  1655. ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
  1656. Delete(1, "foo");
  1657. ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  1658. ASSERT_OK(Flush(1)); // Moves to level last-2
  1659. ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  1660. dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]);
  1661. // DEL kept: "last" file overlaps
  1662. ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
  1663. dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]);
  1664. // Merging last-1 w/ last, so we are the base level for "foo", so
  1665. // DEL is removed. (as is v1).
  1666. ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
  1667. }
  1668. TEST_F(DBTest, OverlapInLevel0) {
  1669. do {
  1670. Options options = CurrentOptions();
  1671. CreateAndReopenWithCF({"pikachu"}, options);
  1672. // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
  1673. // 0.
  1674. ASSERT_OK(Put(1, "100", "v100"));
  1675. ASSERT_OK(Put(1, "999", "v999"));
  1676. Flush(1);
  1677. MoveFilesToLevel(2, 1);
  1678. ASSERT_OK(Delete(1, "100"));
  1679. ASSERT_OK(Delete(1, "999"));
  1680. Flush(1);
  1681. MoveFilesToLevel(1, 1);
  1682. ASSERT_EQ("0,1,1", FilesPerLevel(1));
  1683. // Make files spanning the following ranges in level-0:
  1684. // files[0] 200 .. 900
  1685. // files[1] 300 .. 500
  1686. // Note that files are sorted by smallest key.
  1687. ASSERT_OK(Put(1, "300", "v300"));
  1688. ASSERT_OK(Put(1, "500", "v500"));
  1689. Flush(1);
  1690. ASSERT_OK(Put(1, "200", "v200"));
  1691. ASSERT_OK(Put(1, "600", "v600"));
  1692. ASSERT_OK(Put(1, "900", "v900"));
  1693. Flush(1);
  1694. ASSERT_EQ("2,1,1", FilesPerLevel(1));
  1695. // Compact away the placeholder files we created initially
  1696. dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]);
  1697. dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]);
  1698. ASSERT_EQ("2", FilesPerLevel(1));
  1699. // Do a memtable compaction. Before bug-fix, the compaction would
  1700. // not detect the overlap with level-0 files and would incorrectly place
  1701. // the deletion in a deeper level.
  1702. ASSERT_OK(Delete(1, "600"));
  1703. Flush(1);
  1704. ASSERT_EQ("3", FilesPerLevel(1));
  1705. ASSERT_EQ("NOT_FOUND", Get(1, "600"));
  1706. } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
  1707. }
  1708. #endif // ROCKSDB_LITE
  1709. TEST_F(DBTest, ComparatorCheck) {
  1710. class NewComparator : public Comparator {
  1711. public:
  1712. const char* Name() const override { return "rocksdb.NewComparator"; }
  1713. int Compare(const Slice& a, const Slice& b) const override {
  1714. return BytewiseComparator()->Compare(a, b);
  1715. }
  1716. void FindShortestSeparator(std::string* s, const Slice& l) const override {
  1717. BytewiseComparator()->FindShortestSeparator(s, l);
  1718. }
  1719. void FindShortSuccessor(std::string* key) const override {
  1720. BytewiseComparator()->FindShortSuccessor(key);
  1721. }
  1722. };
  1723. Options new_options, options;
  1724. NewComparator cmp;
  1725. do {
  1726. options = CurrentOptions();
  1727. CreateAndReopenWithCF({"pikachu"}, options);
  1728. new_options = CurrentOptions();
  1729. new_options.comparator = &cmp;
  1730. // only the non-default column family has non-matching comparator
  1731. Status s = TryReopenWithColumnFamilies(
  1732. {"default", "pikachu"}, std::vector<Options>({options, new_options}));
  1733. ASSERT_TRUE(!s.ok());
  1734. ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
  1735. << s.ToString();
  1736. } while (ChangeCompactOptions());
  1737. }
  1738. TEST_F(DBTest, CustomComparator) {
  1739. class NumberComparator : public Comparator {
  1740. public:
  1741. const char* Name() const override { return "test.NumberComparator"; }
  1742. int Compare(const Slice& a, const Slice& b) const override {
  1743. return ToNumber(a) - ToNumber(b);
  1744. }
  1745. void FindShortestSeparator(std::string* s, const Slice& l) const override {
  1746. ToNumber(*s); // Check format
  1747. ToNumber(l); // Check format
  1748. }
  1749. void FindShortSuccessor(std::string* key) const override {
  1750. ToNumber(*key); // Check format
  1751. }
  1752. private:
  1753. static int ToNumber(const Slice& x) {
  1754. // Check that there are no extra characters.
  1755. EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
  1756. << EscapeString(x);
  1757. int val;
  1758. char ignored;
  1759. EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
  1760. << EscapeString(x);
  1761. return val;
  1762. }
  1763. };
  1764. Options new_options;
  1765. NumberComparator cmp;
  1766. do {
  1767. new_options = CurrentOptions();
  1768. new_options.create_if_missing = true;
  1769. new_options.comparator = &cmp;
  1770. new_options.write_buffer_size = 4096; // Compact more often
  1771. new_options.arena_block_size = 4096;
  1772. new_options = CurrentOptions(new_options);
  1773. DestroyAndReopen(new_options);
  1774. CreateAndReopenWithCF({"pikachu"}, new_options);
  1775. ASSERT_OK(Put(1, "[10]", "ten"));
  1776. ASSERT_OK(Put(1, "[0x14]", "twenty"));
  1777. for (int i = 0; i < 2; i++) {
  1778. ASSERT_EQ("ten", Get(1, "[10]"));
  1779. ASSERT_EQ("ten", Get(1, "[0xa]"));
  1780. ASSERT_EQ("twenty", Get(1, "[20]"));
  1781. ASSERT_EQ("twenty", Get(1, "[0x14]"));
  1782. ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
  1783. ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
  1784. Compact(1, "[0]", "[9999]");
  1785. }
  1786. for (int run = 0; run < 2; run++) {
  1787. for (int i = 0; i < 1000; i++) {
  1788. char buf[100];
  1789. snprintf(buf, sizeof(buf), "[%d]", i * 10);
  1790. ASSERT_OK(Put(1, buf, buf));
  1791. }
  1792. Compact(1, "[0]", "[1000000]");
  1793. }
  1794. } while (ChangeCompactOptions());
  1795. }
  1796. TEST_F(DBTest, DBOpen_Options) {
  1797. Options options = CurrentOptions();
  1798. std::string dbname = test::PerThreadDBPath("db_options_test");
  1799. ASSERT_OK(DestroyDB(dbname, options));
  1800. // Does not exist, and create_if_missing == false: error
  1801. DB* db = nullptr;
  1802. options.create_if_missing = false;
  1803. Status s = DB::Open(options, dbname, &db);
  1804. ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
  1805. ASSERT_TRUE(db == nullptr);
  1806. // Does not exist, and create_if_missing == true: OK
  1807. options.create_if_missing = true;
  1808. s = DB::Open(options, dbname, &db);
  1809. ASSERT_OK(s);
  1810. ASSERT_TRUE(db != nullptr);
  1811. delete db;
  1812. db = nullptr;
  1813. // Does exist, and error_if_exists == true: error
  1814. options.create_if_missing = false;
  1815. options.error_if_exists = true;
  1816. s = DB::Open(options, dbname, &db);
  1817. ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
  1818. ASSERT_TRUE(db == nullptr);
  1819. // Does exist, and error_if_exists == false: OK
  1820. options.create_if_missing = true;
  1821. options.error_if_exists = false;
  1822. s = DB::Open(options, dbname, &db);
  1823. ASSERT_OK(s);
  1824. ASSERT_TRUE(db != nullptr);
  1825. delete db;
  1826. db = nullptr;
  1827. }
  1828. TEST_F(DBTest, DBOpen_Change_NumLevels) {
  1829. Options options = CurrentOptions();
  1830. options.create_if_missing = true;
  1831. DestroyAndReopen(options);
  1832. ASSERT_TRUE(db_ != nullptr);
  1833. CreateAndReopenWithCF({"pikachu"}, options);
  1834. ASSERT_OK(Put(1, "a", "123"));
  1835. ASSERT_OK(Put(1, "b", "234"));
  1836. Flush(1);
  1837. MoveFilesToLevel(3, 1);
  1838. Close();
  1839. options.create_if_missing = false;
  1840. options.num_levels = 2;
  1841. Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  1842. ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
  1843. ASSERT_TRUE(db_ == nullptr);
  1844. }
  1845. TEST_F(DBTest, DestroyDBMetaDatabase) {
  1846. std::string dbname = test::PerThreadDBPath("db_meta");
  1847. ASSERT_OK(env_->CreateDirIfMissing(dbname));
  1848. std::string metadbname = MetaDatabaseName(dbname, 0);
  1849. ASSERT_OK(env_->CreateDirIfMissing(metadbname));
  1850. std::string metametadbname = MetaDatabaseName(metadbname, 0);
  1851. ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
  1852. // Destroy previous versions if they exist. Using the long way.
  1853. Options options = CurrentOptions();
  1854. ASSERT_OK(DestroyDB(metametadbname, options));
  1855. ASSERT_OK(DestroyDB(metadbname, options));
  1856. ASSERT_OK(DestroyDB(dbname, options));
  1857. // Setup databases
  1858. DB* db = nullptr;
  1859. ASSERT_OK(DB::Open(options, dbname, &db));
  1860. delete db;
  1861. db = nullptr;
  1862. ASSERT_OK(DB::Open(options, metadbname, &db));
  1863. delete db;
  1864. db = nullptr;
  1865. ASSERT_OK(DB::Open(options, metametadbname, &db));
  1866. delete db;
  1867. db = nullptr;
  1868. // Delete databases
  1869. ASSERT_OK(DestroyDB(dbname, options));
  1870. // Check if deletion worked.
  1871. options.create_if_missing = false;
  1872. ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
  1873. ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
  1874. ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
  1875. }
  1876. #ifndef ROCKSDB_LITE
  1877. TEST_F(DBTest, SnapshotFiles) {
  1878. do {
  1879. Options options = CurrentOptions();
  1880. options.write_buffer_size = 100000000; // Large write buffer
  1881. CreateAndReopenWithCF({"pikachu"}, options);
  1882. Random rnd(301);
  1883. // Write 8MB (80 values, each 100K)
  1884. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1885. std::vector<std::string> values;
  1886. for (int i = 0; i < 80; i++) {
  1887. values.push_back(RandomString(&rnd, 100000));
  1888. ASSERT_OK(Put((i < 40), Key(i), values[i]));
  1889. }
  1890. // assert that nothing makes it to disk yet.
  1891. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  1892. // get a file snapshot
  1893. uint64_t manifest_number = 0;
  1894. uint64_t manifest_size = 0;
  1895. std::vector<std::string> files;
  1896. dbfull()->DisableFileDeletions();
  1897. dbfull()->GetLiveFiles(files, &manifest_size);
  1898. // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF)
  1899. ASSERT_EQ(files.size(), 5U);
  1900. uint64_t number = 0;
  1901. FileType type;
  1902. // copy these files to a new snapshot directory
  1903. std::string snapdir = dbname_ + ".snapdir/";
  1904. ASSERT_OK(env_->CreateDirIfMissing(snapdir));
  1905. for (size_t i = 0; i < files.size(); i++) {
  1906. // our clients require that GetLiveFiles returns
  1907. // files with "/" as first character!
  1908. ASSERT_EQ(files[i][0], '/');
  1909. std::string src = dbname_ + files[i];
  1910. std::string dest = snapdir + files[i];
  1911. uint64_t size;
  1912. ASSERT_OK(env_->GetFileSize(src, &size));
  1913. // record the number and the size of the
  1914. // latest manifest file
  1915. if (ParseFileName(files[i].substr(1), &number, &type)) {
  1916. if (type == kDescriptorFile) {
  1917. if (number > manifest_number) {
  1918. manifest_number = number;
  1919. ASSERT_GE(size, manifest_size);
  1920. size = manifest_size; // copy only valid MANIFEST data
  1921. }
  1922. }
  1923. }
  1924. CopyFile(src, dest, size);
  1925. }
  1926. // release file snapshot
  1927. dbfull()->DisableFileDeletions();
  1928. // overwrite one key, this key should not appear in the snapshot
  1929. std::vector<std::string> extras;
  1930. for (unsigned int i = 0; i < 1; i++) {
  1931. extras.push_back(RandomString(&rnd, 100000));
  1932. ASSERT_OK(Put(0, Key(i), extras[i]));
  1933. }
  1934. // verify that data in the snapshot are correct
  1935. std::vector<ColumnFamilyDescriptor> column_families;
  1936. column_families.emplace_back("default", ColumnFamilyOptions());
  1937. column_families.emplace_back("pikachu", ColumnFamilyOptions());
  1938. std::vector<ColumnFamilyHandle*> cf_handles;
  1939. DB* snapdb;
  1940. DBOptions opts;
  1941. opts.env = env_;
  1942. opts.create_if_missing = false;
  1943. Status stat =
  1944. DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
  1945. ASSERT_OK(stat);
  1946. ReadOptions roptions;
  1947. std::string val;
  1948. for (unsigned int i = 0; i < 80; i++) {
  1949. stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
  1950. ASSERT_EQ(values[i].compare(val), 0);
  1951. }
  1952. for (auto cfh : cf_handles) {
  1953. delete cfh;
  1954. }
  1955. delete snapdb;
  1956. // look at the new live files after we added an 'extra' key
  1957. // and after we took the first snapshot.
  1958. uint64_t new_manifest_number = 0;
  1959. uint64_t new_manifest_size = 0;
  1960. std::vector<std::string> newfiles;
  1961. dbfull()->DisableFileDeletions();
  1962. dbfull()->GetLiveFiles(newfiles, &new_manifest_size);
  1963. // find the new manifest file. assert that this manifest file is
  1964. // the same one as in the previous snapshot. But its size should be
  1965. // larger because we added an extra key after taking the
  1966. // previous shapshot.
  1967. for (size_t i = 0; i < newfiles.size(); i++) {
  1968. std::string src = dbname_ + "/" + newfiles[i];
  1969. // record the lognumber and the size of the
  1970. // latest manifest file
  1971. if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
  1972. if (type == kDescriptorFile) {
  1973. if (number > new_manifest_number) {
  1974. uint64_t size;
  1975. new_manifest_number = number;
  1976. ASSERT_OK(env_->GetFileSize(src, &size));
  1977. ASSERT_GE(size, new_manifest_size);
  1978. }
  1979. }
  1980. }
  1981. }
  1982. ASSERT_EQ(manifest_number, new_manifest_number);
  1983. ASSERT_GT(new_manifest_size, manifest_size);
  1984. // release file snapshot
  1985. dbfull()->DisableFileDeletions();
  1986. } while (ChangeCompactOptions());
  1987. }
  1988. TEST_F(DBTest, ReadonlyDBGetLiveManifestSize) {
  1989. do {
  1990. Options options = CurrentOptions();
  1991. options.level0_file_num_compaction_trigger = 2;
  1992. DestroyAndReopen(options);
  1993. ASSERT_OK(Put("foo", "bar"));
  1994. ASSERT_OK(Flush());
  1995. ASSERT_OK(Put("foo", "bar"));
  1996. ASSERT_OK(Flush());
  1997. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1998. Close();
  1999. ASSERT_OK(ReadOnlyReopen(options));
  2000. uint64_t manifest_size = 0;
  2001. std::vector<std::string> files;
  2002. dbfull()->GetLiveFiles(files, &manifest_size);
  2003. for (const std::string& f : files) {
  2004. uint64_t number = 0;
  2005. FileType type;
  2006. if (ParseFileName(f.substr(1), &number, &type)) {
  2007. if (type == kDescriptorFile) {
  2008. uint64_t size_on_disk;
  2009. env_->GetFileSize(dbname_ + "/" + f, &size_on_disk);
  2010. ASSERT_EQ(manifest_size, size_on_disk);
  2011. break;
  2012. }
  2013. }
  2014. }
  2015. Close();
  2016. } while (ChangeCompactOptions());
  2017. }
  2018. #endif
  2019. TEST_F(DBTest, PurgeInfoLogs) {
  2020. Options options = CurrentOptions();
  2021. options.keep_log_file_num = 5;
  2022. options.create_if_missing = true;
  2023. for (int mode = 0; mode <= 1; mode++) {
  2024. if (mode == 1) {
  2025. options.db_log_dir = dbname_ + "_logs";
  2026. env_->CreateDirIfMissing(options.db_log_dir);
  2027. } else {
  2028. options.db_log_dir = "";
  2029. }
  2030. for (int i = 0; i < 8; i++) {
  2031. Reopen(options);
  2032. }
  2033. std::vector<std::string> files;
  2034. env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir,
  2035. &files);
  2036. int info_log_count = 0;
  2037. for (std::string file : files) {
  2038. if (file.find("LOG") != std::string::npos) {
  2039. info_log_count++;
  2040. }
  2041. }
  2042. ASSERT_EQ(5, info_log_count);
  2043. Destroy(options);
  2044. // For mode (1), test DestroyDB() to delete all the logs under DB dir.
  2045. // For mode (2), no info log file should have been put under DB dir.
  2046. std::vector<std::string> db_files;
  2047. env_->GetChildren(dbname_, &db_files);
  2048. for (std::string file : db_files) {
  2049. ASSERT_TRUE(file.find("LOG") == std::string::npos);
  2050. }
  2051. if (mode == 1) {
  2052. // Cleaning up
  2053. env_->GetChildren(options.db_log_dir, &files);
  2054. for (std::string file : files) {
  2055. env_->DeleteFile(options.db_log_dir + "/" + file);
  2056. }
  2057. env_->DeleteDir(options.db_log_dir);
  2058. }
  2059. }
  2060. }
  2061. #ifndef ROCKSDB_LITE
  2062. // Multi-threaded test:
  2063. namespace {
  2064. static const int kColumnFamilies = 10;
  2065. static const int kNumThreads = 10;
  2066. static const int kTestSeconds = 10;
  2067. static const int kNumKeys = 1000;
  2068. struct MTState {
  2069. DBTest* test;
  2070. std::atomic<bool> stop;
  2071. std::atomic<int> counter[kNumThreads];
  2072. std::atomic<bool> thread_done[kNumThreads];
  2073. };
  2074. struct MTThread {
  2075. MTState* state;
  2076. int id;
  2077. bool multiget_batched;
  2078. };
  2079. static void MTThreadBody(void* arg) {
  2080. MTThread* t = reinterpret_cast<MTThread*>(arg);
  2081. int id = t->id;
  2082. DB* db = t->state->test->db_;
  2083. int counter = 0;
  2084. fprintf(stderr, "... starting thread %d\n", id);
  2085. Random rnd(1000 + id);
  2086. char valbuf[1500];
  2087. while (t->state->stop.load(std::memory_order_acquire) == false) {
  2088. t->state->counter[id].store(counter, std::memory_order_release);
  2089. int key = rnd.Uniform(kNumKeys);
  2090. char keybuf[20];
  2091. snprintf(keybuf, sizeof(keybuf), "%016d", key);
  2092. if (rnd.OneIn(2)) {
  2093. // Write values of the form <key, my id, counter, cf, unique_id>.
  2094. // into each of the CFs
  2095. // We add some padding for force compactions.
  2096. int unique_id = rnd.Uniform(1000000);
  2097. // Half of the time directly use WriteBatch. Half of the time use
  2098. // WriteBatchWithIndex.
  2099. if (rnd.OneIn(2)) {
  2100. WriteBatch batch;
  2101. for (int cf = 0; cf < kColumnFamilies; ++cf) {
  2102. snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
  2103. static_cast<int>(counter), cf, unique_id);
  2104. batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
  2105. }
  2106. ASSERT_OK(db->Write(WriteOptions(), &batch));
  2107. } else {
  2108. WriteBatchWithIndex batch(db->GetOptions().comparator);
  2109. for (int cf = 0; cf < kColumnFamilies; ++cf) {
  2110. snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
  2111. static_cast<int>(counter), cf, unique_id);
  2112. batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
  2113. }
  2114. ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
  2115. }
  2116. } else {
  2117. // Read a value and verify that it matches the pattern written above
  2118. // and that writes to all column families were atomic (unique_id is the
  2119. // same)
  2120. std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
  2121. std::vector<std::string> values;
  2122. std::vector<Status> statuses;
  2123. if (!t->multiget_batched) {
  2124. statuses = db->MultiGet(ReadOptions(), t->state->test->handles_, keys,
  2125. &values);
  2126. } else {
  2127. std::vector<PinnableSlice> pin_values(keys.size());
  2128. statuses.resize(keys.size());
  2129. const Snapshot* snapshot = db->GetSnapshot();
  2130. ReadOptions ro;
  2131. ro.snapshot = snapshot;
  2132. for (int cf = 0; cf < kColumnFamilies; ++cf) {
  2133. db->MultiGet(ro, t->state->test->handles_[cf], 1, &keys[cf],
  2134. &pin_values[cf], &statuses[cf]);
  2135. }
  2136. db->ReleaseSnapshot(snapshot);
  2137. values.resize(keys.size());
  2138. for (int cf = 0; cf < kColumnFamilies; ++cf) {
  2139. if (statuses[cf].ok()) {
  2140. values[cf].assign(pin_values[cf].data(), pin_values[cf].size());
  2141. }
  2142. }
  2143. }
  2144. Status s = statuses[0];
  2145. // all statuses have to be the same
  2146. for (size_t i = 1; i < statuses.size(); ++i) {
  2147. // they are either both ok or both not-found
  2148. ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
  2149. (s.IsNotFound() && statuses[i].IsNotFound()));
  2150. }
  2151. if (s.IsNotFound()) {
  2152. // Key has not yet been written
  2153. } else {
  2154. // Check that the writer thread counter is >= the counter in the value
  2155. ASSERT_OK(s);
  2156. int unique_id = -1;
  2157. for (int i = 0; i < kColumnFamilies; ++i) {
  2158. int k, w, c, cf, u;
  2159. ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c,
  2160. &cf, &u))
  2161. << values[i];
  2162. ASSERT_EQ(k, key);
  2163. ASSERT_GE(w, 0);
  2164. ASSERT_LT(w, kNumThreads);
  2165. ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
  2166. ASSERT_EQ(cf, i);
  2167. if (i == 0) {
  2168. unique_id = u;
  2169. } else {
  2170. // this checks that updates across column families happened
  2171. // atomically -- all unique ids are the same
  2172. ASSERT_EQ(u, unique_id);
  2173. }
  2174. }
  2175. }
  2176. }
  2177. counter++;
  2178. }
  2179. t->state->thread_done[id].store(true, std::memory_order_release);
  2180. fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
  2181. }
  2182. } // namespace
  2183. class MultiThreadedDBTest
  2184. : public DBTest,
  2185. public ::testing::WithParamInterface<std::tuple<int, bool>> {
  2186. public:
  2187. void SetUp() override {
  2188. std::tie(option_config_, multiget_batched_) = GetParam();
  2189. }
  2190. static std::vector<int> GenerateOptionConfigs() {
  2191. std::vector<int> optionConfigs;
  2192. for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
  2193. optionConfigs.push_back(optionConfig);
  2194. }
  2195. return optionConfigs;
  2196. }
  2197. bool multiget_batched_;
  2198. };
  2199. TEST_P(MultiThreadedDBTest, MultiThreaded) {
  2200. if (option_config_ == kPipelinedWrite) return;
  2201. anon::OptionsOverride options_override;
  2202. options_override.skip_policy = kSkipNoSnapshot;
  2203. Options options = CurrentOptions(options_override);
  2204. std::vector<std::string> cfs;
  2205. for (int i = 1; i < kColumnFamilies; ++i) {
  2206. cfs.push_back(ToString(i));
  2207. }
  2208. Reopen(options);
  2209. CreateAndReopenWithCF(cfs, options);
  2210. // Initialize state
  2211. MTState mt;
  2212. mt.test = this;
  2213. mt.stop.store(false, std::memory_order_release);
  2214. for (int id = 0; id < kNumThreads; id++) {
  2215. mt.counter[id].store(0, std::memory_order_release);
  2216. mt.thread_done[id].store(false, std::memory_order_release);
  2217. }
  2218. // Start threads
  2219. MTThread thread[kNumThreads];
  2220. for (int id = 0; id < kNumThreads; id++) {
  2221. thread[id].state = &mt;
  2222. thread[id].id = id;
  2223. thread[id].multiget_batched = multiget_batched_;
  2224. env_->StartThread(MTThreadBody, &thread[id]);
  2225. }
  2226. // Let them run for a while
  2227. env_->SleepForMicroseconds(kTestSeconds * 1000000);
  2228. // Stop the threads and wait for them to finish
  2229. mt.stop.store(true, std::memory_order_release);
  2230. for (int id = 0; id < kNumThreads; id++) {
  2231. while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
  2232. env_->SleepForMicroseconds(100000);
  2233. }
  2234. }
  2235. }
  2236. INSTANTIATE_TEST_CASE_P(
  2237. MultiThreaded, MultiThreadedDBTest,
  2238. ::testing::Combine(
  2239. ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()),
  2240. ::testing::Bool()));
  2241. #endif // ROCKSDB_LITE
  2242. // Group commit test:
  2243. #if !defined(TRAVIS) && !defined(OS_WIN)
  2244. // Disable this test temporarily on Travis and appveyor as it fails
  2245. // intermittently. Github issue: #4151
  2246. namespace {
  2247. static const int kGCNumThreads = 4;
  2248. static const int kGCNumKeys = 1000;
  2249. struct GCThread {
  2250. DB* db;
  2251. int id;
  2252. std::atomic<bool> done;
  2253. };
  2254. static void GCThreadBody(void* arg) {
  2255. GCThread* t = reinterpret_cast<GCThread*>(arg);
  2256. int id = t->id;
  2257. DB* db = t->db;
  2258. WriteOptions wo;
  2259. for (int i = 0; i < kGCNumKeys; ++i) {
  2260. std::string kv(ToString(i + id * kGCNumKeys));
  2261. ASSERT_OK(db->Put(wo, kv, kv));
  2262. }
  2263. t->done = true;
  2264. }
  2265. } // namespace
  2266. TEST_F(DBTest, GroupCommitTest) {
  2267. do {
  2268. Options options = CurrentOptions();
  2269. options.env = env_;
  2270. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  2271. Reopen(options);
  2272. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  2273. {{"WriteThread::JoinBatchGroup:BeganWaiting",
  2274. "DBImpl::WriteImpl:BeforeLeaderEnters"},
  2275. {"WriteThread::AwaitState:BlockingWaiting",
  2276. "WriteThread::EnterAsBatchGroupLeader:End"}});
  2277. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2278. // Start threads
  2279. GCThread thread[kGCNumThreads];
  2280. for (int id = 0; id < kGCNumThreads; id++) {
  2281. thread[id].id = id;
  2282. thread[id].db = db_;
  2283. thread[id].done = false;
  2284. env_->StartThread(GCThreadBody, &thread[id]);
  2285. }
  2286. env_->WaitForJoin();
  2287. ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
  2288. std::vector<std::string> expected_db;
  2289. for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
  2290. expected_db.push_back(ToString(i));
  2291. }
  2292. std::sort(expected_db.begin(), expected_db.end());
  2293. Iterator* itr = db_->NewIterator(ReadOptions());
  2294. itr->SeekToFirst();
  2295. for (auto x : expected_db) {
  2296. ASSERT_TRUE(itr->Valid());
  2297. ASSERT_EQ(itr->key().ToString(), x);
  2298. ASSERT_EQ(itr->value().ToString(), x);
  2299. itr->Next();
  2300. }
  2301. ASSERT_TRUE(!itr->Valid());
  2302. delete itr;
  2303. HistogramData hist_data;
  2304. options.statistics->histogramData(DB_WRITE, &hist_data);
  2305. ASSERT_GT(hist_data.average, 0.0);
  2306. } while (ChangeOptions(kSkipNoSeekToLast));
  2307. }
  2308. #endif // TRAVIS
  2309. namespace {
  2310. typedef std::map<std::string, std::string> KVMap;
  2311. }
  2312. class ModelDB : public DB {
  2313. public:
  2314. class ModelSnapshot : public Snapshot {
  2315. public:
  2316. KVMap map_;
  2317. SequenceNumber GetSequenceNumber() const override {
  2318. // no need to call this
  2319. assert(false);
  2320. return 0;
  2321. }
  2322. };
  2323. explicit ModelDB(const Options& options) : options_(options) {}
  2324. using DB::Put;
  2325. Status Put(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
  2326. const Slice& v) override {
  2327. WriteBatch batch;
  2328. batch.Put(cf, k, v);
  2329. return Write(o, &batch);
  2330. }
  2331. using DB::Close;
  2332. Status Close() override { return Status::OK(); }
  2333. using DB::Delete;
  2334. Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
  2335. const Slice& key) override {
  2336. WriteBatch batch;
  2337. batch.Delete(cf, key);
  2338. return Write(o, &batch);
  2339. }
  2340. using DB::SingleDelete;
  2341. Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
  2342. const Slice& key) override {
  2343. WriteBatch batch;
  2344. batch.SingleDelete(cf, key);
  2345. return Write(o, &batch);
  2346. }
  2347. using DB::Merge;
  2348. Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k,
  2349. const Slice& v) override {
  2350. WriteBatch batch;
  2351. batch.Merge(cf, k, v);
  2352. return Write(o, &batch);
  2353. }
  2354. using DB::Get;
  2355. Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/,
  2356. const Slice& key, PinnableSlice* /*value*/) override {
  2357. return Status::NotSupported(key);
  2358. }
  2359. using DB::GetMergeOperands;
  2360. virtual Status GetMergeOperands(
  2361. const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/,
  2362. const Slice& key, PinnableSlice* /*slice*/,
  2363. GetMergeOperandsOptions* /*merge_operands_options*/,
  2364. int* /*number_of_operands*/) override {
  2365. return Status::NotSupported(key);
  2366. }
  2367. using DB::MultiGet;
  2368. std::vector<Status> MultiGet(
  2369. const ReadOptions& /*options*/,
  2370. const std::vector<ColumnFamilyHandle*>& /*column_family*/,
  2371. const std::vector<Slice>& keys,
  2372. std::vector<std::string>* /*values*/) override {
  2373. std::vector<Status> s(keys.size(),
  2374. Status::NotSupported("Not implemented."));
  2375. return s;
  2376. }
  2377. #ifndef ROCKSDB_LITE
  2378. using DB::IngestExternalFile;
  2379. Status IngestExternalFile(
  2380. ColumnFamilyHandle* /*column_family*/,
  2381. const std::vector<std::string>& /*external_files*/,
  2382. const IngestExternalFileOptions& /*options*/) override {
  2383. return Status::NotSupported("Not implemented.");
  2384. }
  2385. using DB::IngestExternalFiles;
  2386. Status IngestExternalFiles(
  2387. const std::vector<IngestExternalFileArg>& /*args*/) override {
  2388. return Status::NotSupported("Not implemented");
  2389. }
  2390. using DB::CreateColumnFamilyWithImport;
  2391. virtual Status CreateColumnFamilyWithImport(
  2392. const ColumnFamilyOptions& /*options*/,
  2393. const std::string& /*column_family_name*/,
  2394. const ImportColumnFamilyOptions& /*import_options*/,
  2395. const ExportImportFilesMetaData& /*metadata*/,
  2396. ColumnFamilyHandle** /*handle*/) override {
  2397. return Status::NotSupported("Not implemented.");
  2398. }
  2399. using DB::VerifyChecksum;
  2400. Status VerifyChecksum(const ReadOptions&) override {
  2401. return Status::NotSupported("Not implemented.");
  2402. }
  2403. using DB::GetPropertiesOfAllTables;
  2404. Status GetPropertiesOfAllTables(
  2405. ColumnFamilyHandle* /*column_family*/,
  2406. TablePropertiesCollection* /*props*/) override {
  2407. return Status();
  2408. }
  2409. Status GetPropertiesOfTablesInRange(
  2410. ColumnFamilyHandle* /*column_family*/, const Range* /*range*/,
  2411. std::size_t /*n*/, TablePropertiesCollection* /*props*/) override {
  2412. return Status();
  2413. }
  2414. #endif // ROCKSDB_LITE
  2415. using DB::KeyMayExist;
  2416. bool KeyMayExist(const ReadOptions& /*options*/,
  2417. ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/,
  2418. std::string* /*value*/,
  2419. bool* value_found = nullptr) override {
  2420. if (value_found != nullptr) {
  2421. *value_found = false;
  2422. }
  2423. return true; // Not Supported directly
  2424. }
  2425. using DB::NewIterator;
  2426. Iterator* NewIterator(const ReadOptions& options,
  2427. ColumnFamilyHandle* /*column_family*/) override {
  2428. if (options.snapshot == nullptr) {
  2429. KVMap* saved = new KVMap;
  2430. *saved = map_;
  2431. return new ModelIter(saved, true);
  2432. } else {
  2433. const KVMap* snapshot_state =
  2434. &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
  2435. return new ModelIter(snapshot_state, false);
  2436. }
  2437. }
  2438. Status NewIterators(const ReadOptions& /*options*/,
  2439. const std::vector<ColumnFamilyHandle*>& /*column_family*/,
  2440. std::vector<Iterator*>* /*iterators*/) override {
  2441. return Status::NotSupported("Not supported yet");
  2442. }
  2443. const Snapshot* GetSnapshot() override {
  2444. ModelSnapshot* snapshot = new ModelSnapshot;
  2445. snapshot->map_ = map_;
  2446. return snapshot;
  2447. }
  2448. void ReleaseSnapshot(const Snapshot* snapshot) override {
  2449. delete reinterpret_cast<const ModelSnapshot*>(snapshot);
  2450. }
  2451. Status Write(const WriteOptions& /*options*/, WriteBatch* batch) override {
  2452. class Handler : public WriteBatch::Handler {
  2453. public:
  2454. KVMap* map_;
  2455. void Put(const Slice& key, const Slice& value) override {
  2456. (*map_)[key.ToString()] = value.ToString();
  2457. }
  2458. void Merge(const Slice& /*key*/, const Slice& /*value*/) override {
  2459. // ignore merge for now
  2460. // (*map_)[key.ToString()] = value.ToString();
  2461. }
  2462. void Delete(const Slice& key) override { map_->erase(key.ToString()); }
  2463. };
  2464. Handler handler;
  2465. handler.map_ = &map_;
  2466. return batch->Iterate(&handler);
  2467. }
  2468. using DB::GetProperty;
  2469. bool GetProperty(ColumnFamilyHandle* /*column_family*/,
  2470. const Slice& /*property*/, std::string* /*value*/) override {
  2471. return false;
  2472. }
  2473. using DB::GetIntProperty;
  2474. bool GetIntProperty(ColumnFamilyHandle* /*column_family*/,
  2475. const Slice& /*property*/, uint64_t* /*value*/) override {
  2476. return false;
  2477. }
  2478. using DB::GetMapProperty;
  2479. bool GetMapProperty(ColumnFamilyHandle* /*column_family*/,
  2480. const Slice& /*property*/,
  2481. std::map<std::string, std::string>* /*value*/) override {
  2482. return false;
  2483. }
  2484. using DB::GetAggregatedIntProperty;
  2485. bool GetAggregatedIntProperty(const Slice& /*property*/,
  2486. uint64_t* /*value*/) override {
  2487. return false;
  2488. }
  2489. using DB::GetApproximateSizes;
  2490. Status GetApproximateSizes(const SizeApproximationOptions& /*options*/,
  2491. ColumnFamilyHandle* /*column_family*/,
  2492. const Range* /*range*/, int n,
  2493. uint64_t* sizes) override {
  2494. for (int i = 0; i < n; i++) {
  2495. sizes[i] = 0;
  2496. }
  2497. return Status::OK();
  2498. }
  2499. using DB::GetApproximateMemTableStats;
  2500. void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/,
  2501. const Range& /*range*/,
  2502. uint64_t* const count,
  2503. uint64_t* const size) override {
  2504. *count = 0;
  2505. *size = 0;
  2506. }
  2507. using DB::CompactRange;
  2508. Status CompactRange(const CompactRangeOptions& /*options*/,
  2509. ColumnFamilyHandle* /*column_family*/,
  2510. const Slice* /*start*/, const Slice* /*end*/) override {
  2511. return Status::NotSupported("Not supported operation.");
  2512. }
  2513. Status SetDBOptions(
  2514. const std::unordered_map<std::string, std::string>& /*new_options*/)
  2515. override {
  2516. return Status::NotSupported("Not supported operation.");
  2517. }
  2518. using DB::CompactFiles;
  2519. Status CompactFiles(
  2520. const CompactionOptions& /*compact_options*/,
  2521. ColumnFamilyHandle* /*column_family*/,
  2522. const std::vector<std::string>& /*input_file_names*/,
  2523. const int /*output_level*/, const int /*output_path_id*/ = -1,
  2524. std::vector<std::string>* const /*output_file_names*/ = nullptr,
  2525. CompactionJobInfo* /*compaction_job_info*/ = nullptr) override {
  2526. return Status::NotSupported("Not supported operation.");
  2527. }
  2528. Status PauseBackgroundWork() override {
  2529. return Status::NotSupported("Not supported operation.");
  2530. }
  2531. Status ContinueBackgroundWork() override {
  2532. return Status::NotSupported("Not supported operation.");
  2533. }
  2534. Status EnableAutoCompaction(
  2535. const std::vector<ColumnFamilyHandle*>& /*column_family_handles*/)
  2536. override {
  2537. return Status::NotSupported("Not supported operation.");
  2538. }
  2539. void EnableManualCompaction() override { return; }
  2540. void DisableManualCompaction() override { return; }
  2541. using DB::NumberLevels;
  2542. int NumberLevels(ColumnFamilyHandle* /*column_family*/) override { return 1; }
  2543. using DB::MaxMemCompactionLevel;
  2544. int MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) override {
  2545. return 1;
  2546. }
  2547. using DB::Level0StopWriteTrigger;
  2548. int Level0StopWriteTrigger(ColumnFamilyHandle* /*column_family*/) override {
  2549. return -1;
  2550. }
  2551. const std::string& GetName() const override { return name_; }
  2552. Env* GetEnv() const override { return nullptr; }
  2553. using DB::GetOptions;
  2554. Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override {
  2555. return options_;
  2556. }
  2557. using DB::GetDBOptions;
  2558. DBOptions GetDBOptions() const override { return options_; }
  2559. using DB::Flush;
  2560. Status Flush(const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
  2561. ColumnFamilyHandle* /*column_family*/) override {
  2562. Status ret;
  2563. return ret;
  2564. }
  2565. Status Flush(
  2566. const ROCKSDB_NAMESPACE::FlushOptions& /*options*/,
  2567. const std::vector<ColumnFamilyHandle*>& /*column_families*/) override {
  2568. return Status::OK();
  2569. }
  2570. Status SyncWAL() override { return Status::OK(); }
  2571. #ifndef ROCKSDB_LITE
  2572. Status DisableFileDeletions() override { return Status::OK(); }
  2573. Status EnableFileDeletions(bool /*force*/) override { return Status::OK(); }
  2574. Status GetLiveFiles(std::vector<std::string>&, uint64_t* /*size*/,
  2575. bool /*flush_memtable*/ = true) override {
  2576. return Status::OK();
  2577. }
  2578. Status GetSortedWalFiles(VectorLogPtr& /*files*/) override {
  2579. return Status::OK();
  2580. }
  2581. Status GetCurrentWalFile(
  2582. std::unique_ptr<LogFile>* /*current_log_file*/) override {
  2583. return Status::OK();
  2584. }
  2585. virtual Status GetCreationTimeOfOldestFile(
  2586. uint64_t* /*creation_time*/) override {
  2587. return Status::NotSupported();
  2588. }
  2589. Status DeleteFile(std::string /*name*/) override { return Status::OK(); }
  2590. Status GetUpdatesSince(
  2591. ROCKSDB_NAMESPACE::SequenceNumber,
  2592. std::unique_ptr<ROCKSDB_NAMESPACE::TransactionLogIterator>*,
  2593. const TransactionLogIterator::ReadOptions& /*read_options*/ =
  2594. TransactionLogIterator::ReadOptions()) override {
  2595. return Status::NotSupported("Not supported in Model DB");
  2596. }
  2597. void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
  2598. ColumnFamilyMetaData* /*metadata*/) override {}
  2599. #endif // ROCKSDB_LITE
  2600. Status GetDbIdentity(std::string& /*identity*/) const override {
  2601. return Status::OK();
  2602. }
  2603. SequenceNumber GetLatestSequenceNumber() const override { return 0; }
  2604. bool SetPreserveDeletesSequenceNumber(SequenceNumber /*seqnum*/) override {
  2605. return true;
  2606. }
  2607. ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; }
  2608. private:
  2609. class ModelIter : public Iterator {
  2610. public:
  2611. ModelIter(const KVMap* map, bool owned)
  2612. : map_(map), owned_(owned), iter_(map_->end()) {}
  2613. ~ModelIter() override {
  2614. if (owned_) delete map_;
  2615. }
  2616. bool Valid() const override { return iter_ != map_->end(); }
  2617. void SeekToFirst() override { iter_ = map_->begin(); }
  2618. void SeekToLast() override {
  2619. if (map_->empty()) {
  2620. iter_ = map_->end();
  2621. } else {
  2622. iter_ = map_->find(map_->rbegin()->first);
  2623. }
  2624. }
  2625. void Seek(const Slice& k) override {
  2626. iter_ = map_->lower_bound(k.ToString());
  2627. }
  2628. void SeekForPrev(const Slice& k) override {
  2629. iter_ = map_->upper_bound(k.ToString());
  2630. Prev();
  2631. }
  2632. void Next() override { ++iter_; }
  2633. void Prev() override {
  2634. if (iter_ == map_->begin()) {
  2635. iter_ = map_->end();
  2636. return;
  2637. }
  2638. --iter_;
  2639. }
  2640. Slice key() const override { return iter_->first; }
  2641. Slice value() const override { return iter_->second; }
  2642. Status status() const override { return Status::OK(); }
  2643. private:
  2644. const KVMap* const map_;
  2645. const bool owned_; // Do we own map_
  2646. KVMap::const_iterator iter_;
  2647. };
  2648. const Options options_;
  2649. KVMap map_;
  2650. std::string name_ = "";
  2651. };
  2652. #ifndef ROCKSDB_VALGRIND_RUN
  2653. static std::string RandomKey(Random* rnd, int minimum = 0) {
  2654. int len;
  2655. do {
  2656. len = (rnd->OneIn(3)
  2657. ? 1 // Short sometimes to encourage collisions
  2658. : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  2659. } while (len < minimum);
  2660. return test::RandomKey(rnd, len);
  2661. }
  2662. static bool CompareIterators(int step, DB* model, DB* db,
  2663. const Snapshot* model_snap,
  2664. const Snapshot* db_snap) {
  2665. ReadOptions options;
  2666. options.snapshot = model_snap;
  2667. Iterator* miter = model->NewIterator(options);
  2668. options.snapshot = db_snap;
  2669. Iterator* dbiter = db->NewIterator(options);
  2670. bool ok = true;
  2671. int count = 0;
  2672. for (miter->SeekToFirst(), dbiter->SeekToFirst();
  2673. ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
  2674. count++;
  2675. if (miter->key().compare(dbiter->key()) != 0) {
  2676. fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
  2677. EscapeString(miter->key()).c_str(),
  2678. EscapeString(dbiter->key()).c_str());
  2679. ok = false;
  2680. break;
  2681. }
  2682. if (miter->value().compare(dbiter->value()) != 0) {
  2683. fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
  2684. step, EscapeString(miter->key()).c_str(),
  2685. EscapeString(miter->value()).c_str(),
  2686. EscapeString(miter->value()).c_str());
  2687. ok = false;
  2688. }
  2689. }
  2690. if (ok) {
  2691. if (miter->Valid() != dbiter->Valid()) {
  2692. fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
  2693. step, miter->Valid(), dbiter->Valid());
  2694. ok = false;
  2695. }
  2696. }
  2697. delete miter;
  2698. delete dbiter;
  2699. return ok;
  2700. }
  2701. class DBTestRandomized : public DBTest,
  2702. public ::testing::WithParamInterface<int> {
  2703. public:
  2704. void SetUp() override { option_config_ = GetParam(); }
  2705. static std::vector<int> GenerateOptionConfigs() {
  2706. std::vector<int> option_configs;
  2707. // skip cuckoo hash as it does not support snapshot.
  2708. for (int option_config = kDefault; option_config < kEnd; ++option_config) {
  2709. if (!ShouldSkipOptions(option_config,
  2710. kSkipDeletesFilterFirst | kSkipNoSeekToLast)) {
  2711. option_configs.push_back(option_config);
  2712. }
  2713. }
  2714. option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
  2715. return option_configs;
  2716. }
  2717. };
  2718. INSTANTIATE_TEST_CASE_P(
  2719. DBTestRandomized, DBTestRandomized,
  2720. ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
  2721. TEST_P(DBTestRandomized, Randomized) {
  2722. anon::OptionsOverride options_override;
  2723. options_override.skip_policy = kSkipNoSnapshot;
  2724. Options options = CurrentOptions(options_override);
  2725. DestroyAndReopen(options);
  2726. Random rnd(test::RandomSeed() + GetParam());
  2727. ModelDB model(options);
  2728. const int N = 10000;
  2729. const Snapshot* model_snap = nullptr;
  2730. const Snapshot* db_snap = nullptr;
  2731. std::string k, v;
  2732. for (int step = 0; step < N; step++) {
  2733. // TODO(sanjay): Test Get() works
  2734. int p = rnd.Uniform(100);
  2735. int minimum = 0;
  2736. if (option_config_ == kHashSkipList || option_config_ == kHashLinkList ||
  2737. option_config_ == kPlainTableFirstBytePrefix ||
  2738. option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
  2739. option_config_ == kBlockBasedTableWithPrefixHashIndex) {
  2740. minimum = 1;
  2741. }
  2742. if (p < 45) { // Put
  2743. k = RandomKey(&rnd, minimum);
  2744. v = RandomString(&rnd,
  2745. rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8));
  2746. ASSERT_OK(model.Put(WriteOptions(), k, v));
  2747. ASSERT_OK(db_->Put(WriteOptions(), k, v));
  2748. } else if (p < 90) { // Delete
  2749. k = RandomKey(&rnd, minimum);
  2750. ASSERT_OK(model.Delete(WriteOptions(), k));
  2751. ASSERT_OK(db_->Delete(WriteOptions(), k));
  2752. } else { // Multi-element batch
  2753. WriteBatch b;
  2754. const int num = rnd.Uniform(8);
  2755. for (int i = 0; i < num; i++) {
  2756. if (i == 0 || !rnd.OneIn(10)) {
  2757. k = RandomKey(&rnd, minimum);
  2758. } else {
  2759. // Periodically re-use the same key from the previous iter, so
  2760. // we have multiple entries in the write batch for the same key
  2761. }
  2762. if (rnd.OneIn(2)) {
  2763. v = RandomString(&rnd, rnd.Uniform(10));
  2764. b.Put(k, v);
  2765. } else {
  2766. b.Delete(k);
  2767. }
  2768. }
  2769. ASSERT_OK(model.Write(WriteOptions(), &b));
  2770. ASSERT_OK(db_->Write(WriteOptions(), &b));
  2771. }
  2772. if ((step % 100) == 0) {
  2773. // For DB instances that use the hash index + block-based table, the
  2774. // iterator will be invalid right when seeking a non-existent key, right
  2775. // than return a key that is close to it.
  2776. if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
  2777. option_config_ != kBlockBasedTableWithPrefixHashIndex) {
  2778. ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
  2779. ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
  2780. }
  2781. // Save a snapshot from each DB this time that we'll use next
  2782. // time we compare things, to make sure the current state is
  2783. // preserved with the snapshot
  2784. if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
  2785. if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
  2786. Reopen(options);
  2787. ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
  2788. model_snap = model.GetSnapshot();
  2789. db_snap = db_->GetSnapshot();
  2790. }
  2791. }
  2792. if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
  2793. if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
  2794. }
  2795. #endif // ROCKSDB_VALGRIND_RUN
  2796. TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
  2797. // create a DB with block prefix index
  2798. BlockBasedTableOptions table_options;
  2799. Options options = CurrentOptions();
  2800. table_options.index_type = BlockBasedTableOptions::kHashSearch;
  2801. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2802. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  2803. Reopen(options);
  2804. ASSERT_OK(Put("k1", "v1"));
  2805. Flush();
  2806. ASSERT_OK(Put("k2", "v2"));
  2807. // Reopen it without prefix extractor, make sure everything still works.
  2808. // RocksDB should just fall back to the binary index.
  2809. table_options.index_type = BlockBasedTableOptions::kBinarySearch;
  2810. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2811. options.prefix_extractor.reset();
  2812. Reopen(options);
  2813. ASSERT_EQ("v1", Get("k1"));
  2814. ASSERT_EQ("v2", Get("k2"));
  2815. }
  2816. TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) {
  2817. // create a DB with block prefix index
  2818. BlockBasedTableOptions table_options;
  2819. Options options = CurrentOptions();
  2820. options.max_open_files = 10;
  2821. table_options.index_type = BlockBasedTableOptions::kHashSearch;
  2822. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2823. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  2824. // RocksDB sanitize max open files to at least 20. Modify it back.
  2825. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  2826. "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
  2827. int* max_open_files = static_cast<int*>(arg);
  2828. *max_open_files = 11;
  2829. });
  2830. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  2831. Reopen(options);
  2832. ASSERT_OK(Put("k1", "v1"));
  2833. Flush();
  2834. CompactRangeOptions cro;
  2835. cro.change_level = true;
  2836. cro.target_level = 1;
  2837. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  2838. // Force evict tables
  2839. dbfull()->TEST_table_cache()->SetCapacity(0);
  2840. // Make table cache to keep one entry.
  2841. dbfull()->TEST_table_cache()->SetCapacity(1);
  2842. ReadOptions read_options;
  2843. read_options.total_order_seek = true;
  2844. {
  2845. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  2846. iter->Seek("k1");
  2847. ASSERT_TRUE(iter->Valid());
  2848. ASSERT_EQ("k1", iter->key().ToString());
  2849. }
  2850. // After total order seek, prefix index should still be used.
  2851. read_options.total_order_seek = false;
  2852. {
  2853. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  2854. iter->Seek("k1");
  2855. ASSERT_TRUE(iter->Valid());
  2856. ASSERT_EQ("k1", iter->key().ToString());
  2857. }
  2858. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  2859. }
  2860. TEST_F(DBTest, ChecksumTest) {
  2861. BlockBasedTableOptions table_options;
  2862. Options options = CurrentOptions();
  2863. table_options.checksum = kCRC32c;
  2864. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2865. Reopen(options);
  2866. ASSERT_OK(Put("a", "b"));
  2867. ASSERT_OK(Put("c", "d"));
  2868. ASSERT_OK(Flush()); // table with crc checksum
  2869. table_options.checksum = kxxHash;
  2870. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2871. Reopen(options);
  2872. ASSERT_OK(Put("e", "f"));
  2873. ASSERT_OK(Put("g", "h"));
  2874. ASSERT_OK(Flush()); // table with xxhash checksum
  2875. table_options.checksum = kCRC32c;
  2876. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2877. Reopen(options);
  2878. ASSERT_EQ("b", Get("a"));
  2879. ASSERT_EQ("d", Get("c"));
  2880. ASSERT_EQ("f", Get("e"));
  2881. ASSERT_EQ("h", Get("g"));
  2882. table_options.checksum = kCRC32c;
  2883. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2884. Reopen(options);
  2885. ASSERT_EQ("b", Get("a"));
  2886. ASSERT_EQ("d", Get("c"));
  2887. ASSERT_EQ("f", Get("e"));
  2888. ASSERT_EQ("h", Get("g"));
  2889. }
  2890. #ifndef ROCKSDB_LITE
  2891. TEST_P(DBTestWithParam, FIFOCompactionTest) {
  2892. for (int iter = 0; iter < 2; ++iter) {
  2893. // first iteration -- auto compaction
  2894. // second iteration -- manual compaction
  2895. Options options;
  2896. options.compaction_style = kCompactionStyleFIFO;
  2897. options.write_buffer_size = 100 << 10; // 100KB
  2898. options.arena_block_size = 4096;
  2899. options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB
  2900. options.compression = kNoCompression;
  2901. options.create_if_missing = true;
  2902. options.max_subcompactions = max_subcompactions_;
  2903. if (iter == 1) {
  2904. options.disable_auto_compactions = true;
  2905. }
  2906. options = CurrentOptions(options);
  2907. DestroyAndReopen(options);
  2908. Random rnd(301);
  2909. for (int i = 0; i < 6; ++i) {
  2910. for (int j = 0; j < 110; ++j) {
  2911. ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 980)));
  2912. }
  2913. // flush should happen here
  2914. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  2915. }
  2916. if (iter == 0) {
  2917. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2918. } else {
  2919. CompactRangeOptions cro;
  2920. cro.exclusive_manual_compaction = exclusive_manual_compaction_;
  2921. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  2922. }
  2923. // only 5 files should survive
  2924. ASSERT_EQ(NumTableFilesAtLevel(0), 5);
  2925. for (int i = 0; i < 50; ++i) {
  2926. // these keys should be deleted in previous compaction
  2927. ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
  2928. }
  2929. }
  2930. }
  2931. TEST_F(DBTest, FIFOCompactionTestWithCompaction) {
  2932. Options options;
  2933. options.compaction_style = kCompactionStyleFIFO;
  2934. options.write_buffer_size = 20 << 10; // 20K
  2935. options.arena_block_size = 4096;
  2936. options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
  2937. options.compaction_options_fifo.allow_compaction = true;
  2938. options.level0_file_num_compaction_trigger = 6;
  2939. options.compression = kNoCompression;
  2940. options.create_if_missing = true;
  2941. options = CurrentOptions(options);
  2942. DestroyAndReopen(options);
  2943. Random rnd(301);
  2944. for (int i = 0; i < 60; i++) {
  2945. // Generate and flush a file about 20KB.
  2946. for (int j = 0; j < 20; j++) {
  2947. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  2948. }
  2949. Flush();
  2950. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2951. }
  2952. // It should be compacted to 10 files.
  2953. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  2954. for (int i = 0; i < 60; i++) {
  2955. // Generate and flush a file about 20KB.
  2956. for (int j = 0; j < 20; j++) {
  2957. ASSERT_OK(Put(ToString(i * 20 + j + 2000), RandomString(&rnd, 980)));
  2958. }
  2959. Flush();
  2960. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2961. }
  2962. // It should be compacted to no more than 20 files.
  2963. ASSERT_GT(NumTableFilesAtLevel(0), 10);
  2964. ASSERT_LT(NumTableFilesAtLevel(0), 18);
  2965. // Size limit is still guaranteed.
  2966. ASSERT_LE(SizeAtLevel(0),
  2967. options.compaction_options_fifo.max_table_files_size);
  2968. }
  2969. TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) {
  2970. Options options;
  2971. options.compaction_style = kCompactionStyleFIFO;
  2972. options.write_buffer_size = 20 << 10; // 20K
  2973. options.arena_block_size = 4096;
  2974. options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB
  2975. options.compaction_options_fifo.allow_compaction = true;
  2976. options.level0_file_num_compaction_trigger = 3;
  2977. options.compression = kNoCompression;
  2978. options.create_if_missing = true;
  2979. options = CurrentOptions(options);
  2980. DestroyAndReopen(options);
  2981. Random rnd(301);
  2982. for (int i = 0; i < 3; i++) {
  2983. // Each file contains a different key which will be dropped later.
  2984. ASSERT_OK(Put("a" + ToString(i), RandomString(&rnd, 500)));
  2985. ASSERT_OK(Put("key" + ToString(i), ""));
  2986. ASSERT_OK(Put("z" + ToString(i), RandomString(&rnd, 500)));
  2987. Flush();
  2988. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  2989. }
  2990. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  2991. for (int i = 0; i < 3; i++) {
  2992. ASSERT_EQ("", Get("key" + ToString(i)));
  2993. }
  2994. for (int i = 0; i < 3; i++) {
  2995. // Each file contains a different key which will be dropped later.
  2996. ASSERT_OK(Put("a" + ToString(i), RandomString(&rnd, 500)));
  2997. ASSERT_OK(Delete("key" + ToString(i)));
  2998. ASSERT_OK(Put("z" + ToString(i), RandomString(&rnd, 500)));
  2999. Flush();
  3000. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3001. }
  3002. ASSERT_EQ(NumTableFilesAtLevel(0), 2);
  3003. for (int i = 0; i < 3; i++) {
  3004. ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
  3005. }
  3006. }
  3007. // Check that FIFO-with-TTL is not supported with max_open_files != -1.
  3008. TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) {
  3009. Options options;
  3010. options.compaction_style = kCompactionStyleFIFO;
  3011. options.create_if_missing = true;
  3012. options.ttl = 600; // seconds
  3013. // TTL is now supported with max_open_files != -1.
  3014. options.max_open_files = 100;
  3015. options = CurrentOptions(options);
  3016. ASSERT_OK(TryReopen(options));
  3017. options.max_open_files = -1;
  3018. ASSERT_OK(TryReopen(options));
  3019. }
  3020. // Check that FIFO-with-TTL is supported only with BlockBasedTableFactory.
  3021. TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) {
  3022. Options options;
  3023. options.compaction_style = kCompactionStyleFIFO;
  3024. options.create_if_missing = true;
  3025. options.ttl = 600; // seconds
  3026. options = CurrentOptions(options);
  3027. options.table_factory.reset(NewBlockBasedTableFactory());
  3028. ASSERT_OK(TryReopen(options));
  3029. Destroy(options);
  3030. options.table_factory.reset(NewPlainTableFactory());
  3031. ASSERT_TRUE(TryReopen(options).IsNotSupported());
  3032. Destroy(options);
  3033. options.table_factory.reset(NewAdaptiveTableFactory());
  3034. ASSERT_TRUE(TryReopen(options).IsNotSupported());
  3035. }
  3036. TEST_F(DBTest, FIFOCompactionWithTTLTest) {
  3037. Options options;
  3038. options.compaction_style = kCompactionStyleFIFO;
  3039. options.write_buffer_size = 10 << 10; // 10KB
  3040. options.arena_block_size = 4096;
  3041. options.compression = kNoCompression;
  3042. options.create_if_missing = true;
  3043. env_->time_elapse_only_sleep_ = false;
  3044. options.env = env_;
  3045. // Test to make sure that all files with expired ttl are deleted on next
  3046. // manual compaction.
  3047. {
  3048. env_->addon_time_.store(0);
  3049. options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
  3050. options.compaction_options_fifo.allow_compaction = false;
  3051. options.ttl = 1 * 60 * 60 ; // 1 hour
  3052. options = CurrentOptions(options);
  3053. DestroyAndReopen(options);
  3054. Random rnd(301);
  3055. for (int i = 0; i < 10; i++) {
  3056. // Generate and flush a file about 10KB.
  3057. for (int j = 0; j < 10; j++) {
  3058. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3059. }
  3060. Flush();
  3061. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3062. }
  3063. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  3064. // Sleep for 2 hours -- which is much greater than TTL.
  3065. // Note: Couldn't use SleepForMicroseconds because it takes an int instead
  3066. // of uint64_t. Hence used addon_time_ directly.
  3067. // env_->SleepForMicroseconds(2 * 60 * 60 * 1000 * 1000);
  3068. env_->addon_time_.fetch_add(2 * 60 * 60);
  3069. // Since no flushes and compactions have run, the db should still be in
  3070. // the same state even after considerable time has passed.
  3071. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3072. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  3073. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  3074. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  3075. }
  3076. // Test to make sure that all files with expired ttl are deleted on next
  3077. // automatic compaction.
  3078. {
  3079. options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
  3080. options.compaction_options_fifo.allow_compaction = false;
  3081. options.ttl = 1 * 60 * 60; // 1 hour
  3082. options = CurrentOptions(options);
  3083. DestroyAndReopen(options);
  3084. Random rnd(301);
  3085. for (int i = 0; i < 10; i++) {
  3086. // Generate and flush a file about 10KB.
  3087. for (int j = 0; j < 10; j++) {
  3088. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3089. }
  3090. Flush();
  3091. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3092. }
  3093. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  3094. // Sleep for 2 hours -- which is much greater than TTL.
  3095. env_->addon_time_.fetch_add(2 * 60 * 60);
  3096. // Just to make sure that we are in the same state even after sleeping.
  3097. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3098. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  3099. // Create 1 more file to trigger TTL compaction. The old files are dropped.
  3100. for (int i = 0; i < 1; i++) {
  3101. for (int j = 0; j < 10; j++) {
  3102. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3103. }
  3104. Flush();
  3105. }
  3106. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3107. // Only the new 10 files remain.
  3108. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  3109. ASSERT_LE(SizeAtLevel(0),
  3110. options.compaction_options_fifo.max_table_files_size);
  3111. }
  3112. // Test that shows the fall back to size-based FIFO compaction if TTL-based
  3113. // deletion doesn't move the total size to be less than max_table_files_size.
  3114. {
  3115. options.write_buffer_size = 10 << 10; // 10KB
  3116. options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
  3117. options.compaction_options_fifo.allow_compaction = false;
  3118. options.ttl = 1 * 60 * 60; // 1 hour
  3119. options = CurrentOptions(options);
  3120. DestroyAndReopen(options);
  3121. Random rnd(301);
  3122. for (int i = 0; i < 3; i++) {
  3123. // Generate and flush a file about 10KB.
  3124. for (int j = 0; j < 10; j++) {
  3125. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3126. }
  3127. Flush();
  3128. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3129. }
  3130. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  3131. // Sleep for 2 hours -- which is much greater than TTL.
  3132. env_->addon_time_.fetch_add(2 * 60 * 60);
  3133. // Just to make sure that we are in the same state even after sleeping.
  3134. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3135. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  3136. for (int i = 0; i < 5; i++) {
  3137. for (int j = 0; j < 140; j++) {
  3138. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3139. }
  3140. Flush();
  3141. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3142. }
  3143. // Size limit is still guaranteed.
  3144. ASSERT_LE(SizeAtLevel(0),
  3145. options.compaction_options_fifo.max_table_files_size);
  3146. }
  3147. // Test with TTL + Intra-L0 compactions.
  3148. {
  3149. options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB
  3150. options.compaction_options_fifo.allow_compaction = true;
  3151. options.ttl = 1 * 60 * 60; // 1 hour
  3152. options.level0_file_num_compaction_trigger = 6;
  3153. options = CurrentOptions(options);
  3154. DestroyAndReopen(options);
  3155. Random rnd(301);
  3156. for (int i = 0; i < 10; i++) {
  3157. // Generate and flush a file about 10KB.
  3158. for (int j = 0; j < 10; j++) {
  3159. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3160. }
  3161. Flush();
  3162. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3163. }
  3164. // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1
  3165. // (due to level0_file_num_compaction_trigger = 6).
  3166. // So total files = 1 + remaining 4 = 5.
  3167. ASSERT_EQ(NumTableFilesAtLevel(0), 5);
  3168. // Sleep for 2 hours -- which is much greater than TTL.
  3169. env_->addon_time_.fetch_add(2 * 60 * 60);
  3170. // Just to make sure that we are in the same state even after sleeping.
  3171. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3172. ASSERT_EQ(NumTableFilesAtLevel(0), 5);
  3173. // Create 10 more files. The old 5 files are dropped as their ttl expired.
  3174. for (int i = 0; i < 10; i++) {
  3175. for (int j = 0; j < 10; j++) {
  3176. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3177. }
  3178. Flush();
  3179. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3180. }
  3181. ASSERT_EQ(NumTableFilesAtLevel(0), 5);
  3182. ASSERT_LE(SizeAtLevel(0),
  3183. options.compaction_options_fifo.max_table_files_size);
  3184. }
  3185. // Test with large TTL + Intra-L0 compactions.
  3186. // Files dropped based on size, as ttl doesn't kick in.
  3187. {
  3188. options.write_buffer_size = 20 << 10; // 20K
  3189. options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1.5MB
  3190. options.compaction_options_fifo.allow_compaction = true;
  3191. options.ttl = 1 * 60 * 60; // 1 hour
  3192. options.level0_file_num_compaction_trigger = 6;
  3193. options = CurrentOptions(options);
  3194. DestroyAndReopen(options);
  3195. Random rnd(301);
  3196. for (int i = 0; i < 60; i++) {
  3197. // Generate and flush a file about 20KB.
  3198. for (int j = 0; j < 20; j++) {
  3199. ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980)));
  3200. }
  3201. Flush();
  3202. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3203. }
  3204. // It should be compacted to 10 files.
  3205. ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  3206. for (int i = 0; i < 60; i++) {
  3207. // Generate and flush a file about 20KB.
  3208. for (int j = 0; j < 20; j++) {
  3209. ASSERT_OK(Put(ToString(i * 20 + j + 2000), RandomString(&rnd, 980)));
  3210. }
  3211. Flush();
  3212. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  3213. }
  3214. // It should be compacted to no more than 20 files.
  3215. ASSERT_GT(NumTableFilesAtLevel(0), 10);
  3216. ASSERT_LT(NumTableFilesAtLevel(0), 18);
  3217. // Size limit is still guaranteed.
  3218. ASSERT_LE(SizeAtLevel(0),
  3219. options.compaction_options_fifo.max_table_files_size);
  3220. }
  3221. }
  3222. #endif // ROCKSDB_LITE
  3223. #ifndef ROCKSDB_LITE
  3224. /*
  3225. * This test is not reliable enough as it heavily depends on disk behavior.
  3226. * Disable as it is flaky.
  3227. */
  3228. TEST_F(DBTest, DISABLED_RateLimitingTest) {
  3229. Options options = CurrentOptions();
  3230. options.write_buffer_size = 1 << 20; // 1MB
  3231. options.level0_file_num_compaction_trigger = 2;
  3232. options.target_file_size_base = 1 << 20; // 1MB
  3233. options.max_bytes_for_level_base = 4 << 20; // 4MB
  3234. options.max_bytes_for_level_multiplier = 4;
  3235. options.compression = kNoCompression;
  3236. options.create_if_missing = true;
  3237. options.env = env_;
  3238. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  3239. options.IncreaseParallelism(4);
  3240. DestroyAndReopen(options);
  3241. WriteOptions wo;
  3242. wo.disableWAL = true;
  3243. // # no rate limiting
  3244. Random rnd(301);
  3245. uint64_t start = env_->NowMicros();
  3246. // Write ~96M data
  3247. for (int64_t i = 0; i < (96 << 10); ++i) {
  3248. ASSERT_OK(
  3249. Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
  3250. }
  3251. uint64_t elapsed = env_->NowMicros() - start;
  3252. double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed;
  3253. uint64_t rate_limiter_drains =
  3254. TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS);
  3255. ASSERT_EQ(0, rate_limiter_drains);
  3256. Close();
  3257. // # rate limiting with 0.7 x threshold
  3258. options.rate_limiter.reset(
  3259. NewGenericRateLimiter(static_cast<int64_t>(0.7 * raw_rate)));
  3260. env_->bytes_written_ = 0;
  3261. DestroyAndReopen(options);
  3262. start = env_->NowMicros();
  3263. // Write ~96M data
  3264. for (int64_t i = 0; i < (96 << 10); ++i) {
  3265. ASSERT_OK(
  3266. Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
  3267. }
  3268. rate_limiter_drains =
  3269. TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
  3270. rate_limiter_drains;
  3271. elapsed = env_->NowMicros() - start;
  3272. Close();
  3273. ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
  3274. // Most intervals should've been drained (interval time is 100ms, elapsed is
  3275. // micros)
  3276. ASSERT_GT(rate_limiter_drains, 0);
  3277. ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
  3278. double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
  3279. fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
  3280. ASSERT_TRUE(ratio < 0.8);
  3281. // # rate limiting with half of the raw_rate
  3282. options.rate_limiter.reset(
  3283. NewGenericRateLimiter(static_cast<int64_t>(raw_rate / 2)));
  3284. env_->bytes_written_ = 0;
  3285. DestroyAndReopen(options);
  3286. start = env_->NowMicros();
  3287. // Write ~96M data
  3288. for (int64_t i = 0; i < (96 << 10); ++i) {
  3289. ASSERT_OK(
  3290. Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo));
  3291. }
  3292. elapsed = env_->NowMicros() - start;
  3293. rate_limiter_drains =
  3294. TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) -
  3295. rate_limiter_drains;
  3296. Close();
  3297. ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
  3298. // Most intervals should've been drained (interval time is 100ms, elapsed is
  3299. // micros)
  3300. ASSERT_GT(rate_limiter_drains, elapsed / 100000 / 2);
  3301. ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1);
  3302. ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
  3303. fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
  3304. ASSERT_LT(ratio, 0.6);
  3305. }
  3306. TEST_F(DBTest, TableOptionsSanitizeTest) {
  3307. Options options = CurrentOptions();
  3308. options.create_if_missing = true;
  3309. DestroyAndReopen(options);
  3310. ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
  3311. options.table_factory.reset(new PlainTableFactory());
  3312. options.prefix_extractor.reset(NewNoopTransform());
  3313. Destroy(options);
  3314. ASSERT_TRUE(!TryReopen(options).IsNotSupported());
  3315. // Test for check of prefix_extractor when hash index is used for
  3316. // block-based table
  3317. BlockBasedTableOptions to;
  3318. to.index_type = BlockBasedTableOptions::kHashSearch;
  3319. options = CurrentOptions();
  3320. options.create_if_missing = true;
  3321. options.table_factory.reset(NewBlockBasedTableFactory(to));
  3322. ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
  3323. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  3324. ASSERT_OK(TryReopen(options));
  3325. }
  3326. TEST_F(DBTest, ConcurrentMemtableNotSupported) {
  3327. Options options = CurrentOptions();
  3328. options.allow_concurrent_memtable_write = true;
  3329. options.soft_pending_compaction_bytes_limit = 0;
  3330. options.hard_pending_compaction_bytes_limit = 100;
  3331. options.create_if_missing = true;
  3332. DestroyDB(dbname_, options);
  3333. options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
  3334. ASSERT_NOK(TryReopen(options));
  3335. options.memtable_factory.reset(new SkipListFactory);
  3336. ASSERT_OK(TryReopen(options));
  3337. ColumnFamilyOptions cf_options(options);
  3338. cf_options.memtable_factory.reset(
  3339. NewHashLinkListRepFactory(4, 0, 3, true, 4));
  3340. ColumnFamilyHandle* handle;
  3341. ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
  3342. }
  3343. #endif // ROCKSDB_LITE
  3344. TEST_F(DBTest, SanitizeNumThreads) {
  3345. for (int attempt = 0; attempt < 2; attempt++) {
  3346. const size_t kTotalTasks = 8;
  3347. test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
  3348. Options options = CurrentOptions();
  3349. if (attempt == 0) {
  3350. options.max_background_compactions = 3;
  3351. options.max_background_flushes = 2;
  3352. }
  3353. options.create_if_missing = true;
  3354. DestroyAndReopen(options);
  3355. for (size_t i = 0; i < kTotalTasks; i++) {
  3356. // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
  3357. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  3358. &sleeping_tasks[i],
  3359. (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
  3360. }
  3361. // Wait until 10s for they are scheduled.
  3362. for (int i = 0; i < 10000; i++) {
  3363. if (options.env->GetThreadPoolQueueLen(Env::Priority::LOW) <= 1 &&
  3364. options.env->GetThreadPoolQueueLen(Env::Priority::HIGH) <= 2) {
  3365. break;
  3366. }
  3367. env_->SleepForMicroseconds(1000);
  3368. }
  3369. // pool size 3, total task 4. Queue size should be 1.
  3370. ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
  3371. // pool size 2, total task 4. Queue size should be 2.
  3372. ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
  3373. for (size_t i = 0; i < kTotalTasks; i++) {
  3374. sleeping_tasks[i].WakeUp();
  3375. sleeping_tasks[i].WaitUntilDone();
  3376. }
  3377. ASSERT_OK(Put("abc", "def"));
  3378. ASSERT_EQ("def", Get("abc"));
  3379. Flush();
  3380. ASSERT_EQ("def", Get("abc"));
  3381. }
  3382. }
  3383. TEST_F(DBTest, WriteSingleThreadEntry) {
  3384. std::vector<port::Thread> threads;
  3385. dbfull()->TEST_LockMutex();
  3386. auto w = dbfull()->TEST_BeginWrite();
  3387. threads.emplace_back([&] { Put("a", "b"); });
  3388. env_->SleepForMicroseconds(10000);
  3389. threads.emplace_back([&] { Flush(); });
  3390. env_->SleepForMicroseconds(10000);
  3391. dbfull()->TEST_UnlockMutex();
  3392. dbfull()->TEST_LockMutex();
  3393. dbfull()->TEST_EndWrite(w);
  3394. dbfull()->TEST_UnlockMutex();
  3395. for (auto& t : threads) {
  3396. t.join();
  3397. }
  3398. }
  3399. TEST_F(DBTest, ConcurrentFlushWAL) {
  3400. const size_t cnt = 100;
  3401. Options options;
  3402. WriteOptions wopt;
  3403. ReadOptions ropt;
  3404. for (bool two_write_queues : {false, true}) {
  3405. for (bool manual_wal_flush : {false, true}) {
  3406. options.two_write_queues = two_write_queues;
  3407. options.manual_wal_flush = manual_wal_flush;
  3408. options.create_if_missing = true;
  3409. DestroyAndReopen(options);
  3410. std::vector<port::Thread> threads;
  3411. threads.emplace_back([&] {
  3412. for (size_t i = 0; i < cnt; i++) {
  3413. auto istr = ToString(i);
  3414. db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr, "b" + istr);
  3415. }
  3416. });
  3417. if (two_write_queues) {
  3418. threads.emplace_back([&] {
  3419. for (size_t i = cnt; i < 2 * cnt; i++) {
  3420. auto istr = ToString(i);
  3421. WriteBatch batch;
  3422. batch.Put("a" + istr, "b" + istr);
  3423. dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true);
  3424. }
  3425. });
  3426. }
  3427. threads.emplace_back([&] {
  3428. for (size_t i = 0; i < cnt * 100; i++) { // FlushWAL is faster than Put
  3429. db_->FlushWAL(false);
  3430. }
  3431. });
  3432. for (auto& t : threads) {
  3433. t.join();
  3434. }
  3435. options.create_if_missing = false;
  3436. // Recover from the wal and make sure that it is not corrupted
  3437. Reopen(options);
  3438. for (size_t i = 0; i < cnt; i++) {
  3439. PinnableSlice pval;
  3440. auto istr = ToString(i);
  3441. ASSERT_OK(
  3442. db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval));
  3443. ASSERT_TRUE(pval == ("b" + istr));
  3444. }
  3445. }
  3446. }
  3447. }
  3448. #ifndef ROCKSDB_LITE
  3449. TEST_F(DBTest, DynamicMemtableOptions) {
  3450. const uint64_t k64KB = 1 << 16;
  3451. const uint64_t k128KB = 1 << 17;
  3452. const uint64_t k5KB = 5 * 1024;
  3453. Options options;
  3454. options.env = env_;
  3455. options.create_if_missing = true;
  3456. options.compression = kNoCompression;
  3457. options.max_background_compactions = 1;
  3458. options.write_buffer_size = k64KB;
  3459. options.arena_block_size = 16 * 1024;
  3460. options.max_write_buffer_number = 2;
  3461. // Don't trigger compact/slowdown/stop
  3462. options.level0_file_num_compaction_trigger = 1024;
  3463. options.level0_slowdown_writes_trigger = 1024;
  3464. options.level0_stop_writes_trigger = 1024;
  3465. DestroyAndReopen(options);
  3466. auto gen_l0_kb = [this](int size) {
  3467. const int kNumPutsBeforeWaitForFlush = 64;
  3468. Random rnd(301);
  3469. for (int i = 0; i < size; i++) {
  3470. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  3471. // The following condition prevents a race condition between flush jobs
  3472. // acquiring work and this thread filling up multiple memtables. Without
  3473. // this, the flush might produce less files than expected because
  3474. // multiple memtables are flushed into a single L0 file. This race
  3475. // condition affects assertion (A).
  3476. if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
  3477. dbfull()->TEST_WaitForFlushMemTable();
  3478. }
  3479. }
  3480. dbfull()->TEST_WaitForFlushMemTable();
  3481. };
  3482. // Test write_buffer_size
  3483. gen_l0_kb(64);
  3484. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  3485. ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
  3486. ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
  3487. // Clean up L0
  3488. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  3489. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  3490. // Increase buffer size
  3491. ASSERT_OK(dbfull()->SetOptions({
  3492. {"write_buffer_size", "131072"},
  3493. }));
  3494. // The existing memtable inflated 64KB->128KB when we invoked SetOptions().
  3495. // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data.
  3496. gen_l0_kb(192);
  3497. ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A)
  3498. ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB);
  3499. ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB);
  3500. // Decrease buffer size below current usage
  3501. ASSERT_OK(dbfull()->SetOptions({
  3502. {"write_buffer_size", "65536"},
  3503. }));
  3504. // The existing memtable became eligible for flush when we reduced its
  3505. // capacity to 64KB. Two keys need to be added to trigger flush: first causes
  3506. // memtable to be marked full, second schedules the flush. Then we should have
  3507. // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key.
  3508. gen_l0_kb(2);
  3509. ASSERT_EQ(NumTableFilesAtLevel(0), 2);
  3510. ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
  3511. ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
  3512. // Test max_write_buffer_number
  3513. // Block compaction thread, which will also block the flushes because
  3514. // max_background_flushes == 0, so flushes are getting executed by the
  3515. // compaction thread
  3516. env_->SetBackgroundThreads(1, Env::LOW);
  3517. test::SleepingBackgroundTask sleeping_task_low;
  3518. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  3519. Env::Priority::LOW);
  3520. // Start from scratch and disable compaction/flush. Flush can only happen
  3521. // during compaction but trigger is pretty high
  3522. options.disable_auto_compactions = true;
  3523. DestroyAndReopen(options);
  3524. env_->SetBackgroundThreads(0, Env::HIGH);
  3525. // Put until writes are stopped, bounded by 256 puts. We should see stop at
  3526. // ~128KB
  3527. int count = 0;
  3528. Random rnd(301);
  3529. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  3530. "DBImpl::DelayWrite:Wait",
  3531. [&](void* /*arg*/) { sleeping_task_low.WakeUp(); });
  3532. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  3533. while (!sleeping_task_low.WokenUp() && count < 256) {
  3534. ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
  3535. count++;
  3536. }
  3537. ASSERT_GT(static_cast<double>(count), 128 * 0.8);
  3538. ASSERT_LT(static_cast<double>(count), 128 * 1.2);
  3539. sleeping_task_low.WaitUntilDone();
  3540. // Increase
  3541. ASSERT_OK(dbfull()->SetOptions({
  3542. {"max_write_buffer_number", "8"},
  3543. }));
  3544. // Clean up memtable and L0
  3545. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  3546. sleeping_task_low.Reset();
  3547. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  3548. Env::Priority::LOW);
  3549. count = 0;
  3550. while (!sleeping_task_low.WokenUp() && count < 1024) {
  3551. ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
  3552. count++;
  3553. }
  3554. // Windows fails this test. Will tune in the future and figure out
  3555. // approp number
  3556. #ifndef OS_WIN
  3557. ASSERT_GT(static_cast<double>(count), 512 * 0.8);
  3558. ASSERT_LT(static_cast<double>(count), 512 * 1.2);
  3559. #endif
  3560. sleeping_task_low.WaitUntilDone();
  3561. // Decrease
  3562. ASSERT_OK(dbfull()->SetOptions({
  3563. {"max_write_buffer_number", "4"},
  3564. }));
  3565. // Clean up memtable and L0
  3566. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  3567. sleeping_task_low.Reset();
  3568. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  3569. Env::Priority::LOW);
  3570. count = 0;
  3571. while (!sleeping_task_low.WokenUp() && count < 1024) {
  3572. ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
  3573. count++;
  3574. }
  3575. // Windows fails this test. Will tune in the future and figure out
  3576. // approp number
  3577. #ifndef OS_WIN
  3578. ASSERT_GT(static_cast<double>(count), 256 * 0.8);
  3579. ASSERT_LT(static_cast<double>(count), 266 * 1.2);
  3580. #endif
  3581. sleeping_task_low.WaitUntilDone();
  3582. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  3583. }
  3584. #endif // ROCKSDB_LITE
  3585. #ifdef ROCKSDB_USING_THREAD_STATUS
  3586. namespace {
  3587. void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type,
  3588. int expected_count) {
  3589. int op_count = 0;
  3590. std::vector<ThreadStatus> thread_list;
  3591. ASSERT_OK(env->GetThreadList(&thread_list));
  3592. for (auto thread : thread_list) {
  3593. if (thread.operation_type == op_type) {
  3594. op_count++;
  3595. }
  3596. }
  3597. ASSERT_EQ(op_count, expected_count);
  3598. }
  3599. } // namespace
  3600. TEST_F(DBTest, GetThreadStatus) {
  3601. Options options;
  3602. options.env = env_;
  3603. options.enable_thread_tracking = true;
  3604. TryReopen(options);
  3605. std::vector<ThreadStatus> thread_list;
  3606. Status s = env_->GetThreadList(&thread_list);
  3607. for (int i = 0; i < 2; ++i) {
  3608. // repeat the test with differet number of high / low priority threads
  3609. const int kTestCount = 3;
  3610. const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5};
  3611. const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3};
  3612. const unsigned int kBottomPriCounts[kTestCount] = {2, 1, 4};
  3613. for (int test = 0; test < kTestCount; ++test) {
  3614. // Change the number of threads in high / low priority pool.
  3615. env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH);
  3616. env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW);
  3617. env_->SetBackgroundThreads(kBottomPriCounts[test], Env::BOTTOM);
  3618. // Wait to ensure the all threads has been registered
  3619. unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES];
  3620. // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after
  3621. // all threads have been registered.
  3622. // Try up to 60 seconds.
  3623. for (int num_try = 0; num_try < 60000; num_try++) {
  3624. env_->SleepForMicroseconds(1000);
  3625. thread_list.clear();
  3626. s = env_->GetThreadList(&thread_list);
  3627. ASSERT_OK(s);
  3628. memset(thread_type_counts, 0, sizeof(thread_type_counts));
  3629. for (auto thread : thread_list) {
  3630. ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES);
  3631. thread_type_counts[thread.thread_type]++;
  3632. }
  3633. if (thread_type_counts[ThreadStatus::HIGH_PRIORITY] ==
  3634. kHighPriCounts[test] &&
  3635. thread_type_counts[ThreadStatus::LOW_PRIORITY] ==
  3636. kLowPriCounts[test] &&
  3637. thread_type_counts[ThreadStatus::BOTTOM_PRIORITY] ==
  3638. kBottomPriCounts[test]) {
  3639. break;
  3640. }
  3641. }
  3642. // Verify the number of high-priority threads
  3643. ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY],
  3644. kHighPriCounts[test]);
  3645. // Verify the number of low-priority threads
  3646. ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY],
  3647. kLowPriCounts[test]);
  3648. // Verify the number of bottom-priority threads
  3649. ASSERT_EQ(thread_type_counts[ThreadStatus::BOTTOM_PRIORITY],
  3650. kBottomPriCounts[test]);
  3651. }
  3652. if (i == 0) {
  3653. // repeat the test with multiple column families
  3654. CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
  3655. env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
  3656. true);
  3657. }
  3658. }
  3659. db_->DropColumnFamily(handles_[2]);
  3660. delete handles_[2];
  3661. handles_.erase(handles_.begin() + 2);
  3662. env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
  3663. true);
  3664. Close();
  3665. env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
  3666. true);
  3667. }
  3668. TEST_F(DBTest, DisableThreadStatus) {
  3669. Options options;
  3670. options.env = env_;
  3671. options.enable_thread_tracking = false;
  3672. TryReopen(options);
  3673. CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options);
  3674. // Verify non of the column family info exists
  3675. env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_,
  3676. false);
  3677. }
  3678. TEST_F(DBTest, ThreadStatusFlush) {
  3679. Options options;
  3680. options.env = env_;
  3681. options.write_buffer_size = 100000; // Small write buffer
  3682. options.enable_thread_tracking = true;
  3683. options = CurrentOptions(options);
  3684. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  3685. {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"},
  3686. {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"},
  3687. });
  3688. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  3689. CreateAndReopenWithCF({"pikachu"}, options);
  3690. VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
  3691. ASSERT_OK(Put(1, "foo", "v1"));
  3692. ASSERT_EQ("v1", Get(1, "foo"));
  3693. VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0);
  3694. uint64_t num_running_flushes = 0;
  3695. db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
  3696. ASSERT_EQ(num_running_flushes, 0);
  3697. Put(1, "k1", std::string(100000, 'x')); // Fill memtable
  3698. Put(1, "k2", std::string(100000, 'y')); // Trigger flush
  3699. // The first sync point is to make sure there's one flush job
  3700. // running when we perform VerifyOperationCount().
  3701. TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1");
  3702. VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1);
  3703. db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes);
  3704. ASSERT_EQ(num_running_flushes, 1);
  3705. // This second sync point is to ensure the flush job will not
  3706. // be completed until we already perform VerifyOperationCount().
  3707. TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2");
  3708. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  3709. }
  3710. TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) {
  3711. const int kTestKeySize = 16;
  3712. const int kTestValueSize = 984;
  3713. const int kEntrySize = kTestKeySize + kTestValueSize;
  3714. const int kEntriesPerBuffer = 100;
  3715. Options options;
  3716. options.create_if_missing = true;
  3717. options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  3718. options.compaction_style = kCompactionStyleLevel;
  3719. options.target_file_size_base = options.write_buffer_size;
  3720. options.max_bytes_for_level_base = options.target_file_size_base * 2;
  3721. options.max_bytes_for_level_multiplier = 2;
  3722. options.compression = kNoCompression;
  3723. options = CurrentOptions(options);
  3724. options.env = env_;
  3725. options.enable_thread_tracking = true;
  3726. const int kNumL0Files = 4;
  3727. options.level0_file_num_compaction_trigger = kNumL0Files;
  3728. options.max_subcompactions = max_subcompactions_;
  3729. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
  3730. {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"},
  3731. {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"},
  3732. {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"},
  3733. });
  3734. for (int tests = 0; tests < 2; ++tests) {
  3735. DestroyAndReopen(options);
  3736. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
  3737. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  3738. Random rnd(301);
  3739. // The Put Phase.
  3740. for (int file = 0; file < kNumL0Files; ++file) {
  3741. for (int key = 0; key < kEntriesPerBuffer; ++key) {
  3742. ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
  3743. RandomString(&rnd, kTestValueSize)));
  3744. }
  3745. Flush();
  3746. }
  3747. // This makes sure a compaction won't be scheduled until
  3748. // we have done with the above Put Phase.
  3749. uint64_t num_running_compactions = 0;
  3750. db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
  3751. &num_running_compactions);
  3752. ASSERT_EQ(num_running_compactions, 0);
  3753. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0");
  3754. ASSERT_GE(NumTableFilesAtLevel(0),
  3755. options.level0_file_num_compaction_trigger);
  3756. // This makes sure at least one compaction is running.
  3757. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1");
  3758. if (options.enable_thread_tracking) {
  3759. // expecting one single L0 to L1 compaction
  3760. VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1);
  3761. } else {
  3762. // If thread tracking is not enabled, compaction count should be 0.
  3763. VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0);
  3764. }
  3765. db_->GetIntProperty(DB::Properties::kNumRunningCompactions,
  3766. &num_running_compactions);
  3767. ASSERT_EQ(num_running_compactions, 1);
  3768. // TODO(yhchiang): adding assert to verify each compaction stage.
  3769. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2");
  3770. // repeat the test with disabling thread tracking.
  3771. options.enable_thread_tracking = false;
  3772. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  3773. }
  3774. }
  3775. TEST_P(DBTestWithParam, PreShutdownManualCompaction) {
  3776. Options options = CurrentOptions();
  3777. options.max_subcompactions = max_subcompactions_;
  3778. CreateAndReopenWithCF({"pikachu"}, options);
  3779. // iter - 0 with 7 levels
  3780. // iter - 1 with 3 levels
  3781. for (int iter = 0; iter < 2; ++iter) {
  3782. MakeTables(3, "p", "q", 1);
  3783. ASSERT_EQ("1,1,1", FilesPerLevel(1));
  3784. // Compaction range falls before files
  3785. Compact(1, "", "c");
  3786. ASSERT_EQ("1,1,1", FilesPerLevel(1));
  3787. // Compaction range falls after files
  3788. Compact(1, "r", "z");
  3789. ASSERT_EQ("1,1,1", FilesPerLevel(1));
  3790. // Compaction range overlaps files
  3791. Compact(1, "p1", "p9");
  3792. ASSERT_EQ("0,0,1", FilesPerLevel(1));
  3793. // Populate a different range
  3794. MakeTables(3, "c", "e", 1);
  3795. ASSERT_EQ("1,1,2", FilesPerLevel(1));
  3796. // Compact just the new range
  3797. Compact(1, "b", "f");
  3798. ASSERT_EQ("0,0,2", FilesPerLevel(1));
  3799. // Compact all
  3800. MakeTables(1, "a", "z", 1);
  3801. ASSERT_EQ("1,0,2", FilesPerLevel(1));
  3802. CancelAllBackgroundWork(db_);
  3803. db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
  3804. ASSERT_EQ("1,0,2", FilesPerLevel(1));
  3805. if (iter == 0) {
  3806. options = CurrentOptions();
  3807. options.num_levels = 3;
  3808. options.create_if_missing = true;
  3809. DestroyAndReopen(options);
  3810. CreateAndReopenWithCF({"pikachu"}, options);
  3811. }
  3812. }
  3813. }
  3814. TEST_F(DBTest, PreShutdownFlush) {
  3815. Options options = CurrentOptions();
  3816. CreateAndReopenWithCF({"pikachu"}, options);
  3817. ASSERT_OK(Put(1, "key", "value"));
  3818. CancelAllBackgroundWork(db_);
  3819. Status s =
  3820. db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr);
  3821. ASSERT_TRUE(s.IsShutdownInProgress());
  3822. }
  3823. TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) {
  3824. const int kTestKeySize = 16;
  3825. const int kTestValueSize = 984;
  3826. const int kEntrySize = kTestKeySize + kTestValueSize;
  3827. const int kEntriesPerBuffer = 40;
  3828. const int kNumL0Files = 4;
  3829. const int kHighPriCount = 3;
  3830. const int kLowPriCount = 5;
  3831. env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
  3832. env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
  3833. Options options;
  3834. options.create_if_missing = true;
  3835. options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  3836. options.compaction_style = kCompactionStyleLevel;
  3837. options.target_file_size_base = options.write_buffer_size;
  3838. options.max_bytes_for_level_base =
  3839. options.target_file_size_base * kNumL0Files;
  3840. options.compression = kNoCompression;
  3841. options = CurrentOptions(options);
  3842. options.env = env_;
  3843. options.enable_thread_tracking = true;
  3844. options.level0_file_num_compaction_trigger = kNumL0Files;
  3845. options.max_bytes_for_level_multiplier = 2;
  3846. options.max_background_compactions = kLowPriCount;
  3847. options.level0_stop_writes_trigger = 1 << 10;
  3848. options.level0_slowdown_writes_trigger = 1 << 10;
  3849. options.max_subcompactions = max_subcompactions_;
  3850. TryReopen(options);
  3851. Random rnd(301);
  3852. std::vector<ThreadStatus> thread_list;
  3853. // Delay both flush and compaction
  3854. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  3855. {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"},
  3856. {"CompactionJob::Run():Start",
  3857. "DBTest::PreShutdownMultipleCompaction:Preshutdown"},
  3858. {"CompactionJob::Run():Start",
  3859. "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"},
  3860. {"DBTest::PreShutdownMultipleCompaction:Preshutdown",
  3861. "CompactionJob::Run():End"},
  3862. {"CompactionJob::Run():End",
  3863. "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}});
  3864. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  3865. // Make rocksdb busy
  3866. int key = 0;
  3867. // check how many threads are doing compaction using GetThreadList
  3868. int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
  3869. for (int file = 0; file < 16 * kNumL0Files; ++file) {
  3870. for (int k = 0; k < kEntriesPerBuffer; ++k) {
  3871. ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
  3872. }
  3873. Status s = env_->GetThreadList(&thread_list);
  3874. for (auto thread : thread_list) {
  3875. operation_count[thread.operation_type]++;
  3876. }
  3877. // Speed up the test
  3878. if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
  3879. operation_count[ThreadStatus::OP_COMPACTION] >
  3880. 0.6 * options.max_background_compactions) {
  3881. break;
  3882. }
  3883. if (file == 15 * kNumL0Files) {
  3884. TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
  3885. }
  3886. }
  3887. TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown");
  3888. ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
  3889. CancelAllBackgroundWork(db_);
  3890. TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown");
  3891. dbfull()->TEST_WaitForCompact();
  3892. // Record the number of compactions at a time.
  3893. for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
  3894. operation_count[i] = 0;
  3895. }
  3896. Status s = env_->GetThreadList(&thread_list);
  3897. for (auto thread : thread_list) {
  3898. operation_count[thread.operation_type]++;
  3899. }
  3900. ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
  3901. }
  3902. TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) {
  3903. const int kTestKeySize = 16;
  3904. const int kTestValueSize = 984;
  3905. const int kEntrySize = kTestKeySize + kTestValueSize;
  3906. const int kEntriesPerBuffer = 40;
  3907. const int kNumL0Files = 4;
  3908. const int kHighPriCount = 3;
  3909. const int kLowPriCount = 5;
  3910. env_->SetBackgroundThreads(kHighPriCount, Env::HIGH);
  3911. env_->SetBackgroundThreads(kLowPriCount, Env::LOW);
  3912. Options options;
  3913. options.create_if_missing = true;
  3914. options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  3915. options.compaction_style = kCompactionStyleLevel;
  3916. options.target_file_size_base = options.write_buffer_size;
  3917. options.max_bytes_for_level_base =
  3918. options.target_file_size_base * kNumL0Files;
  3919. options.compression = kNoCompression;
  3920. options = CurrentOptions(options);
  3921. options.env = env_;
  3922. options.enable_thread_tracking = true;
  3923. options.level0_file_num_compaction_trigger = kNumL0Files;
  3924. options.max_bytes_for_level_multiplier = 2;
  3925. options.max_background_compactions = kLowPriCount;
  3926. options.level0_stop_writes_trigger = 1 << 10;
  3927. options.level0_slowdown_writes_trigger = 1 << 10;
  3928. options.max_subcompactions = max_subcompactions_;
  3929. TryReopen(options);
  3930. Random rnd(301);
  3931. std::vector<ThreadStatus> thread_list;
  3932. // Delay both flush and compaction
  3933. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  3934. {{"DBTest::PreShutdownCompactionMiddle:Preshutdown",
  3935. "CompactionJob::Run():Inprogress"},
  3936. {"CompactionJob::Run():Start",
  3937. "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"},
  3938. {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"},
  3939. {"CompactionJob::Run():End",
  3940. "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}});
  3941. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  3942. // Make rocksdb busy
  3943. int key = 0;
  3944. // check how many threads are doing compaction using GetThreadList
  3945. int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
  3946. for (int file = 0; file < 16 * kNumL0Files; ++file) {
  3947. for (int k = 0; k < kEntriesPerBuffer; ++k) {
  3948. ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize)));
  3949. }
  3950. Status s = env_->GetThreadList(&thread_list);
  3951. for (auto thread : thread_list) {
  3952. operation_count[thread.operation_type]++;
  3953. }
  3954. // Speed up the test
  3955. if (operation_count[ThreadStatus::OP_FLUSH] > 1 &&
  3956. operation_count[ThreadStatus::OP_COMPACTION] >
  3957. 0.6 * options.max_background_compactions) {
  3958. break;
  3959. }
  3960. if (file == 15 * kNumL0Files) {
  3961. TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction");
  3962. }
  3963. }
  3964. ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1);
  3965. CancelAllBackgroundWork(db_);
  3966. TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown");
  3967. TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown");
  3968. dbfull()->TEST_WaitForCompact();
  3969. // Record the number of compactions at a time.
  3970. for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) {
  3971. operation_count[i] = 0;
  3972. }
  3973. Status s = env_->GetThreadList(&thread_list);
  3974. for (auto thread : thread_list) {
  3975. operation_count[thread.operation_type]++;
  3976. }
  3977. ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0);
  3978. }
  3979. #endif // ROCKSDB_USING_THREAD_STATUS
  3980. #ifndef ROCKSDB_LITE
  3981. TEST_F(DBTest, FlushOnDestroy) {
  3982. WriteOptions wo;
  3983. wo.disableWAL = true;
  3984. ASSERT_OK(Put("foo", "v1", wo));
  3985. CancelAllBackgroundWork(db_);
  3986. }
  3987. TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
  3988. if (!Snappy_Supported()) {
  3989. return;
  3990. }
  3991. const int kNKeys = 120;
  3992. int keys[kNKeys];
  3993. for (int i = 0; i < kNKeys; i++) {
  3994. keys[i] = i;
  3995. }
  3996. std::random_shuffle(std::begin(keys), std::end(keys));
  3997. Random rnd(301);
  3998. Options options;
  3999. options.create_if_missing = true;
  4000. options.db_write_buffer_size = 20480;
  4001. options.write_buffer_size = 20480;
  4002. options.max_write_buffer_number = 2;
  4003. options.level0_file_num_compaction_trigger = 2;
  4004. options.level0_slowdown_writes_trigger = 2;
  4005. options.level0_stop_writes_trigger = 2;
  4006. options.target_file_size_base = 20480;
  4007. options.level_compaction_dynamic_level_bytes = true;
  4008. options.max_bytes_for_level_base = 102400;
  4009. options.max_bytes_for_level_multiplier = 4;
  4010. options.max_background_compactions = 1;
  4011. options.num_levels = 5;
  4012. options.compression_per_level.resize(3);
  4013. options.compression_per_level[0] = kNoCompression;
  4014. options.compression_per_level[1] = kNoCompression;
  4015. options.compression_per_level[2] = kSnappyCompression;
  4016. OnFileDeletionListener* listener = new OnFileDeletionListener();
  4017. options.listeners.emplace_back(listener);
  4018. DestroyAndReopen(options);
  4019. // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
  4020. // be compressed, so total data size should be more than 80K.
  4021. for (int i = 0; i < 20; i++) {
  4022. ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
  4023. }
  4024. Flush();
  4025. dbfull()->TEST_WaitForCompact();
  4026. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  4027. ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  4028. ASSERT_EQ(NumTableFilesAtLevel(3), 0);
  4029. // Assuming each files' metadata is at least 50 bytes/
  4030. ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4);
  4031. // Insert 400KB. Some data will be compressed
  4032. for (int i = 21; i < 120; i++) {
  4033. ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
  4034. }
  4035. Flush();
  4036. dbfull()->TEST_WaitForCompact();
  4037. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  4038. ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  4039. ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4),
  4040. 120U * 4000U + 50U * 24);
  4041. // Make sure data in files in L3 is not compacted by removing all files
  4042. // in L4 and calculate number of rows
  4043. ASSERT_OK(dbfull()->SetOptions({
  4044. {"disable_auto_compactions", "true"},
  4045. }));
  4046. ColumnFamilyMetaData cf_meta;
  4047. db_->GetColumnFamilyMetaData(&cf_meta);
  4048. for (auto file : cf_meta.levels[4].files) {
  4049. listener->SetExpectedFileName(dbname_ + file.name);
  4050. ASSERT_OK(dbfull()->DeleteFile(file.name));
  4051. }
  4052. listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
  4053. int num_keys = 0;
  4054. std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  4055. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  4056. num_keys++;
  4057. }
  4058. ASSERT_OK(iter->status());
  4059. ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U + num_keys * 10U);
  4060. }
  4061. TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
  4062. if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
  4063. return;
  4064. }
  4065. const int kNKeys = 500;
  4066. int keys[kNKeys];
  4067. for (int i = 0; i < kNKeys; i++) {
  4068. keys[i] = i;
  4069. }
  4070. std::random_shuffle(std::begin(keys), std::end(keys));
  4071. Random rnd(301);
  4072. Options options;
  4073. options.create_if_missing = true;
  4074. options.db_write_buffer_size = 6000000;
  4075. options.write_buffer_size = 600000;
  4076. options.max_write_buffer_number = 2;
  4077. options.level0_file_num_compaction_trigger = 2;
  4078. options.level0_slowdown_writes_trigger = 2;
  4079. options.level0_stop_writes_trigger = 2;
  4080. options.soft_pending_compaction_bytes_limit = 1024 * 1024;
  4081. options.target_file_size_base = 20;
  4082. options.level_compaction_dynamic_level_bytes = true;
  4083. options.max_bytes_for_level_base = 200;
  4084. options.max_bytes_for_level_multiplier = 8;
  4085. options.max_background_compactions = 1;
  4086. options.num_levels = 5;
  4087. std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
  4088. options.table_factory = mtf;
  4089. options.compression_per_level.resize(3);
  4090. options.compression_per_level[0] = kNoCompression;
  4091. options.compression_per_level[1] = kLZ4Compression;
  4092. options.compression_per_level[2] = kZlibCompression;
  4093. DestroyAndReopen(options);
  4094. // When base level is L4, L4 is LZ4.
  4095. std::atomic<int> num_zlib(0);
  4096. std::atomic<int> num_lz4(0);
  4097. std::atomic<int> num_no(0);
  4098. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4099. "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
  4100. Compaction* compaction = reinterpret_cast<Compaction*>(arg);
  4101. if (compaction->output_level() == 4) {
  4102. ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
  4103. num_lz4.fetch_add(1);
  4104. }
  4105. });
  4106. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4107. "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
  4108. auto* compression = reinterpret_cast<CompressionType*>(arg);
  4109. ASSERT_TRUE(*compression == kNoCompression);
  4110. num_no.fetch_add(1);
  4111. });
  4112. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  4113. for (int i = 0; i < 100; i++) {
  4114. std::string value = RandomString(&rnd, 200);
  4115. ASSERT_OK(Put(Key(keys[i]), value));
  4116. if (i % 25 == 24) {
  4117. Flush();
  4118. dbfull()->TEST_WaitForCompact();
  4119. }
  4120. }
  4121. Flush();
  4122. dbfull()->TEST_WaitForFlushMemTable();
  4123. dbfull()->TEST_WaitForCompact();
  4124. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4125. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4126. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  4127. ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  4128. ASSERT_EQ(NumTableFilesAtLevel(3), 0);
  4129. ASSERT_GT(NumTableFilesAtLevel(4), 0);
  4130. ASSERT_GT(num_no.load(), 2);
  4131. ASSERT_GT(num_lz4.load(), 0);
  4132. int prev_num_files_l4 = NumTableFilesAtLevel(4);
  4133. // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
  4134. num_lz4.store(0);
  4135. num_no.store(0);
  4136. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4137. "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
  4138. Compaction* compaction = reinterpret_cast<Compaction*>(arg);
  4139. if (compaction->output_level() == 4 && compaction->start_level() == 3) {
  4140. ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
  4141. num_zlib.fetch_add(1);
  4142. } else {
  4143. ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
  4144. num_lz4.fetch_add(1);
  4145. }
  4146. });
  4147. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  4148. "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
  4149. auto* compression = reinterpret_cast<CompressionType*>(arg);
  4150. ASSERT_TRUE(*compression == kNoCompression);
  4151. num_no.fetch_add(1);
  4152. });
  4153. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  4154. for (int i = 101; i < 500; i++) {
  4155. std::string value = RandomString(&rnd, 200);
  4156. ASSERT_OK(Put(Key(keys[i]), value));
  4157. if (i % 100 == 99) {
  4158. Flush();
  4159. dbfull()->TEST_WaitForCompact();
  4160. }
  4161. }
  4162. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  4163. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  4164. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  4165. ASSERT_EQ(NumTableFilesAtLevel(2), 0);
  4166. ASSERT_GT(NumTableFilesAtLevel(3), 0);
  4167. ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
  4168. ASSERT_GT(num_no.load(), 2);
  4169. ASSERT_GT(num_lz4.load(), 0);
  4170. ASSERT_GT(num_zlib.load(), 0);
  4171. }
  4172. TEST_F(DBTest, DynamicCompactionOptions) {
  4173. // minimum write buffer size is enforced at 64KB
  4174. const uint64_t k32KB = 1 << 15;
  4175. const uint64_t k64KB = 1 << 16;
  4176. const uint64_t k128KB = 1 << 17;
  4177. const uint64_t k1MB = 1 << 20;
  4178. const uint64_t k4KB = 1 << 12;
  4179. Options options;
  4180. options.env = env_;
  4181. options.create_if_missing = true;
  4182. options.compression = kNoCompression;
  4183. options.soft_pending_compaction_bytes_limit = 1024 * 1024;
  4184. options.write_buffer_size = k64KB;
  4185. options.arena_block_size = 4 * k4KB;
  4186. options.max_write_buffer_number = 2;
  4187. // Compaction related options
  4188. options.level0_file_num_compaction_trigger = 3;
  4189. options.level0_slowdown_writes_trigger = 4;
  4190. options.level0_stop_writes_trigger = 8;
  4191. options.target_file_size_base = k64KB;
  4192. options.max_compaction_bytes = options.target_file_size_base * 10;
  4193. options.target_file_size_multiplier = 1;
  4194. options.max_bytes_for_level_base = k128KB;
  4195. options.max_bytes_for_level_multiplier = 4;
  4196. // Block flush thread and disable compaction thread
  4197. env_->SetBackgroundThreads(1, Env::LOW);
  4198. env_->SetBackgroundThreads(1, Env::HIGH);
  4199. DestroyAndReopen(options);
  4200. auto gen_l0_kb = [this](int start, int size, int stride) {
  4201. Random rnd(301);
  4202. for (int i = 0; i < size; i++) {
  4203. ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024)));
  4204. }
  4205. dbfull()->TEST_WaitForFlushMemTable();
  4206. };
  4207. // Write 3 files that have the same key range.
  4208. // Since level0_file_num_compaction_trigger is 3, compaction should be
  4209. // triggered. The compaction should result in one L1 file
  4210. gen_l0_kb(0, 64, 1);
  4211. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  4212. gen_l0_kb(0, 64, 1);
  4213. ASSERT_EQ(NumTableFilesAtLevel(0), 2);
  4214. gen_l0_kb(0, 64, 1);
  4215. dbfull()->TEST_WaitForCompact();
  4216. ASSERT_EQ("0,1", FilesPerLevel());
  4217. std::vector<LiveFileMetaData> metadata;
  4218. db_->GetLiveFilesMetaData(&metadata);
  4219. ASSERT_EQ(1U, metadata.size());
  4220. ASSERT_LE(metadata[0].size, k64KB + k4KB);
  4221. ASSERT_GE(metadata[0].size, k64KB - k4KB);
  4222. // Test compaction trigger and target_file_size_base
  4223. // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
  4224. // Writing to 64KB L0 files should trigger a compaction. Since these
  4225. // 2 L0 files have the same key range, compaction merge them and should
  4226. // result in 2 32KB L1 files.
  4227. ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
  4228. {"target_file_size_base", ToString(k32KB)}}));
  4229. gen_l0_kb(0, 64, 1);
  4230. ASSERT_EQ("1,1", FilesPerLevel());
  4231. gen_l0_kb(0, 64, 1);
  4232. dbfull()->TEST_WaitForCompact();
  4233. ASSERT_EQ("0,2", FilesPerLevel());
  4234. metadata.clear();
  4235. db_->GetLiveFilesMetaData(&metadata);
  4236. ASSERT_EQ(2U, metadata.size());
  4237. ASSERT_LE(metadata[0].size, k32KB + k4KB);
  4238. ASSERT_GE(metadata[0].size, k32KB - k4KB);
  4239. ASSERT_LE(metadata[1].size, k32KB + k4KB);
  4240. ASSERT_GE(metadata[1].size, k32KB - k4KB);
  4241. // Test max_bytes_for_level_base
  4242. // Increase level base size to 256KB and write enough data that will
  4243. // fill L1 and L2. L1 size should be around 256KB while L2 size should be
  4244. // around 256KB x 4.
  4245. ASSERT_OK(
  4246. dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}}));
  4247. // writing 96 x 64KB => 6 * 1024KB
  4248. // (L1 + L2) = (1 + 4) * 1024KB
  4249. for (int i = 0; i < 96; ++i) {
  4250. gen_l0_kb(i, 64, 96);
  4251. }
  4252. dbfull()->TEST_WaitForCompact();
  4253. ASSERT_GT(SizeAtLevel(1), k1MB / 2);
  4254. ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
  4255. // Within (0.5, 1.5) of 4MB.
  4256. ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
  4257. ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
  4258. // Test max_bytes_for_level_multiplier and
  4259. // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
  4260. // After filling enough data that can fit in L1 - L3, we should see L1 size
  4261. // reduces to 128KB from 256KB which was asserted previously. Same for L2.
  4262. ASSERT_OK(
  4263. dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"},
  4264. {"max_bytes_for_level_base", ToString(k128KB)}}));
  4265. // writing 20 x 64KB = 10 x 128KB
  4266. // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
  4267. for (int i = 0; i < 20; ++i) {
  4268. gen_l0_kb(i, 64, 32);
  4269. }
  4270. dbfull()->TEST_WaitForCompact();
  4271. uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
  4272. ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
  4273. // Test level0_stop_writes_trigger.
  4274. // Clean up memtable and L0. Block compaction threads. If continue to write
  4275. // and flush memtables. We should see put stop after 8 memtable flushes
  4276. // since level0_stop_writes_trigger = 8
  4277. dbfull()->TEST_FlushMemTable(true, true);
  4278. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  4279. // Block compaction
  4280. test::SleepingBackgroundTask sleeping_task_low;
  4281. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  4282. Env::Priority::LOW);
  4283. sleeping_task_low.WaitUntilSleeping();
  4284. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  4285. int count = 0;
  4286. Random rnd(301);
  4287. WriteOptions wo;
  4288. while (count < 64) {
  4289. ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
  4290. dbfull()->TEST_FlushMemTable(true, true);
  4291. count++;
  4292. if (dbfull()->TEST_write_controler().IsStopped()) {
  4293. sleeping_task_low.WakeUp();
  4294. break;
  4295. }
  4296. }
  4297. // Stop trigger = 8
  4298. ASSERT_EQ(count, 8);
  4299. // Unblock
  4300. sleeping_task_low.WaitUntilDone();
  4301. // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
  4302. // Block compaction thread again. Perform the put and memtable flushes
  4303. // until we see the stop after 6 memtable flushes.
  4304. ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}}));
  4305. dbfull()->TEST_FlushMemTable(true);
  4306. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  4307. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  4308. // Block compaction again
  4309. sleeping_task_low.Reset();
  4310. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  4311. Env::Priority::LOW);
  4312. sleeping_task_low.WaitUntilSleeping();
  4313. count = 0;
  4314. while (count < 64) {
  4315. ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
  4316. dbfull()->TEST_FlushMemTable(true, true);
  4317. count++;
  4318. if (dbfull()->TEST_write_controler().IsStopped()) {
  4319. sleeping_task_low.WakeUp();
  4320. break;
  4321. }
  4322. }
  4323. ASSERT_EQ(count, 6);
  4324. // Unblock
  4325. sleeping_task_low.WaitUntilDone();
  4326. // Test disable_auto_compactions
  4327. // Compaction thread is unblocked but auto compaction is disabled. Write
  4328. // 4 L0 files and compaction should be triggered. If auto compaction is
  4329. // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
  4330. // L0 files do not change after the call.
  4331. ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
  4332. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  4333. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  4334. for (int i = 0; i < 4; ++i) {
  4335. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  4336. // Wait for compaction so that put won't stop
  4337. dbfull()->TEST_FlushMemTable(true);
  4338. }
  4339. dbfull()->TEST_WaitForCompact();
  4340. ASSERT_EQ(NumTableFilesAtLevel(0), 4);
  4341. // Enable auto compaction and perform the same test, # of L0 files should be
  4342. // reduced after compaction.
  4343. ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
  4344. dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  4345. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  4346. for (int i = 0; i < 4; ++i) {
  4347. ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
  4348. // Wait for compaction so that put won't stop
  4349. dbfull()->TEST_FlushMemTable(true);
  4350. }
  4351. dbfull()->TEST_WaitForCompact();
  4352. ASSERT_LT(NumTableFilesAtLevel(0), 4);
  4353. }
  4354. // Test dynamic FIFO compaction options.
  4355. // This test covers just option parsing and makes sure that the options are
  4356. // correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions
  4357. // test which makes sure that the FIFO compaction funcionality is working
  4358. // as expected on dynamically changing the options.
  4359. // Even more FIFOCompactionTests are at DBTest.FIFOCompaction* .
  4360. TEST_F(DBTest, DynamicFIFOCompactionOptions) {
  4361. Options options;
  4362. options.ttl = 0;
  4363. options.create_if_missing = true;
  4364. DestroyAndReopen(options);
  4365. // Initial defaults
  4366. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4367. 1024 * 1024 * 1024);
  4368. ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
  4369. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4370. false);
  4371. ASSERT_OK(dbfull()->SetOptions(
  4372. {{"compaction_options_fifo", "{max_table_files_size=23;}"}}));
  4373. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4374. 23);
  4375. ASSERT_EQ(dbfull()->GetOptions().ttl, 0);
  4376. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4377. false);
  4378. ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}}));
  4379. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4380. 23);
  4381. ASSERT_EQ(dbfull()->GetOptions().ttl, 97);
  4382. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4383. false);
  4384. ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}}));
  4385. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4386. 23);
  4387. ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
  4388. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4389. false);
  4390. ASSERT_OK(dbfull()->SetOptions(
  4391. {{"compaction_options_fifo", "{allow_compaction=true;}"}}));
  4392. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4393. 23);
  4394. ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
  4395. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4396. true);
  4397. ASSERT_OK(dbfull()->SetOptions(
  4398. {{"compaction_options_fifo", "{max_table_files_size=31;}"}}));
  4399. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4400. 31);
  4401. ASSERT_EQ(dbfull()->GetOptions().ttl, 203);
  4402. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4403. true);
  4404. ASSERT_OK(dbfull()->SetOptions(
  4405. {{"compaction_options_fifo",
  4406. "{max_table_files_size=51;allow_compaction=true;}"}}));
  4407. ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}}));
  4408. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size,
  4409. 51);
  4410. ASSERT_EQ(dbfull()->GetOptions().ttl, 49);
  4411. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction,
  4412. true);
  4413. }
  4414. TEST_F(DBTest, DynamicUniversalCompactionOptions) {
  4415. Options options;
  4416. options.create_if_missing = true;
  4417. DestroyAndReopen(options);
  4418. // Initial defaults
  4419. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 1U);
  4420. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
  4421. 2u);
  4422. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
  4423. UINT_MAX);
  4424. ASSERT_EQ(dbfull()
  4425. ->GetOptions()
  4426. .compaction_options_universal.max_size_amplification_percent,
  4427. 200u);
  4428. ASSERT_EQ(dbfull()
  4429. ->GetOptions()
  4430. .compaction_options_universal.compression_size_percent,
  4431. -1);
  4432. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
  4433. kCompactionStopStyleTotalSize);
  4434. ASSERT_EQ(
  4435. dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
  4436. false);
  4437. ASSERT_OK(dbfull()->SetOptions(
  4438. {{"compaction_options_universal", "{size_ratio=7;}"}}));
  4439. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
  4440. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
  4441. 2u);
  4442. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
  4443. UINT_MAX);
  4444. ASSERT_EQ(dbfull()
  4445. ->GetOptions()
  4446. .compaction_options_universal.max_size_amplification_percent,
  4447. 200u);
  4448. ASSERT_EQ(dbfull()
  4449. ->GetOptions()
  4450. .compaction_options_universal.compression_size_percent,
  4451. -1);
  4452. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
  4453. kCompactionStopStyleTotalSize);
  4454. ASSERT_EQ(
  4455. dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
  4456. false);
  4457. ASSERT_OK(dbfull()->SetOptions(
  4458. {{"compaction_options_universal", "{min_merge_width=11;}"}}));
  4459. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u);
  4460. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width,
  4461. 11u);
  4462. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width,
  4463. UINT_MAX);
  4464. ASSERT_EQ(dbfull()
  4465. ->GetOptions()
  4466. .compaction_options_universal.max_size_amplification_percent,
  4467. 200u);
  4468. ASSERT_EQ(dbfull()
  4469. ->GetOptions()
  4470. .compaction_options_universal.compression_size_percent,
  4471. -1);
  4472. ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style,
  4473. kCompactionStopStyleTotalSize);
  4474. ASSERT_EQ(
  4475. dbfull()->GetOptions().compaction_options_universal.allow_trivial_move,
  4476. false);
  4477. }
  4478. #endif // ROCKSDB_LITE
  4479. TEST_F(DBTest, FileCreationRandomFailure) {
  4480. Options options;
  4481. options.env = env_;
  4482. options.create_if_missing = true;
  4483. options.write_buffer_size = 100000; // Small write buffer
  4484. options.target_file_size_base = 200000;
  4485. options.max_bytes_for_level_base = 1000000;
  4486. options.max_bytes_for_level_multiplier = 2;
  4487. DestroyAndReopen(options);
  4488. Random rnd(301);
  4489. const int kCDTKeysPerBuffer = 4;
  4490. const int kTestSize = kCDTKeysPerBuffer * 4096;
  4491. const int kTotalIteration = 100;
  4492. // the second half of the test involves in random failure
  4493. // of file creation.
  4494. const int kRandomFailureTest = kTotalIteration / 2;
  4495. std::vector<std::string> values;
  4496. for (int i = 0; i < kTestSize; ++i) {
  4497. values.push_back("NOT_FOUND");
  4498. }
  4499. for (int j = 0; j < kTotalIteration; ++j) {
  4500. if (j == kRandomFailureTest) {
  4501. env_->non_writeable_rate_.store(90);
  4502. }
  4503. for (int k = 0; k < kTestSize; ++k) {
  4504. // here we expect some of the Put fails.
  4505. std::string value = RandomString(&rnd, 100);
  4506. Status s = Put(Key(k), Slice(value));
  4507. if (s.ok()) {
  4508. // update the latest successful put
  4509. values[k] = value;
  4510. }
  4511. // But everything before we simulate the failure-test should succeed.
  4512. if (j < kRandomFailureTest) {
  4513. ASSERT_OK(s);
  4514. }
  4515. }
  4516. }
  4517. // If rocksdb does not do the correct job, internal assert will fail here.
  4518. dbfull()->TEST_WaitForFlushMemTable();
  4519. dbfull()->TEST_WaitForCompact();
  4520. // verify we have the latest successful update
  4521. for (int k = 0; k < kTestSize; ++k) {
  4522. auto v = Get(Key(k));
  4523. ASSERT_EQ(v, values[k]);
  4524. }
  4525. // reopen and reverify we have the latest successful update
  4526. env_->non_writeable_rate_.store(0);
  4527. Reopen(options);
  4528. for (int k = 0; k < kTestSize; ++k) {
  4529. auto v = Get(Key(k));
  4530. ASSERT_EQ(v, values[k]);
  4531. }
  4532. }
  4533. #ifndef ROCKSDB_LITE
  4534. TEST_F(DBTest, DynamicMiscOptions) {
  4535. // Test max_sequential_skip_in_iterations
  4536. Options options;
  4537. options.env = env_;
  4538. options.create_if_missing = true;
  4539. options.max_sequential_skip_in_iterations = 16;
  4540. options.compression = kNoCompression;
  4541. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4542. DestroyAndReopen(options);
  4543. auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
  4544. int key0 = key_start;
  4545. int key1 = key_start + 1;
  4546. int key2 = key_start + 2;
  4547. Random rnd(301);
  4548. ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
  4549. for (int i = 0; i < 10; ++i) {
  4550. ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
  4551. }
  4552. ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
  4553. std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  4554. iter->Seek(Key(key1));
  4555. ASSERT_TRUE(iter->Valid());
  4556. ASSERT_EQ(iter->key().compare(Key(key1)), 0);
  4557. iter->Next();
  4558. ASSERT_TRUE(iter->Valid());
  4559. ASSERT_EQ(iter->key().compare(Key(key2)), 0);
  4560. ASSERT_EQ(num_reseek,
  4561. TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
  4562. };
  4563. // No reseek
  4564. assert_reseek_count(100, 0);
  4565. ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}}));
  4566. // Clear memtable and make new option effective
  4567. dbfull()->TEST_FlushMemTable(true);
  4568. // Trigger reseek
  4569. assert_reseek_count(200, 1);
  4570. ASSERT_OK(
  4571. dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}}));
  4572. // Clear memtable and make new option effective
  4573. dbfull()->TEST_FlushMemTable(true);
  4574. // No reseek
  4575. assert_reseek_count(300, 1);
  4576. MutableCFOptions mutable_cf_options;
  4577. CreateAndReopenWithCF({"pikachu"}, options);
  4578. // Test soft_pending_compaction_bytes_limit,
  4579. // hard_pending_compaction_bytes_limit
  4580. ASSERT_OK(dbfull()->SetOptions(
  4581. handles_[1], {{"soft_pending_compaction_bytes_limit", "200"},
  4582. {"hard_pending_compaction_bytes_limit", "300"}}));
  4583. ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
  4584. &mutable_cf_options));
  4585. ASSERT_EQ(200, mutable_cf_options.soft_pending_compaction_bytes_limit);
  4586. ASSERT_EQ(300, mutable_cf_options.hard_pending_compaction_bytes_limit);
  4587. // Test report_bg_io_stats
  4588. ASSERT_OK(
  4589. dbfull()->SetOptions(handles_[1], {{"report_bg_io_stats", "true"}}));
  4590. // sanity check
  4591. ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
  4592. &mutable_cf_options));
  4593. ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
  4594. // Test compression
  4595. // sanity check
  4596. ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}}));
  4597. ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
  4598. &mutable_cf_options));
  4599. ASSERT_EQ(CompressionType::kNoCompression, mutable_cf_options.compression);
  4600. if (Snappy_Supported()) {
  4601. ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}}));
  4602. ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0],
  4603. &mutable_cf_options));
  4604. ASSERT_EQ(CompressionType::kSnappyCompression,
  4605. mutable_cf_options.compression);
  4606. }
  4607. // Test paranoid_file_checks already done in db_block_cache_test
  4608. ASSERT_OK(
  4609. dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "true"}}));
  4610. ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1],
  4611. &mutable_cf_options));
  4612. ASSERT_TRUE(mutable_cf_options.report_bg_io_stats);
  4613. }
  4614. #endif // ROCKSDB_LITE
  4615. TEST_F(DBTest, L0L1L2AndUpHitCounter) {
  4616. Options options = CurrentOptions();
  4617. options.write_buffer_size = 32 * 1024;
  4618. options.target_file_size_base = 32 * 1024;
  4619. options.level0_file_num_compaction_trigger = 2;
  4620. options.level0_slowdown_writes_trigger = 2;
  4621. options.level0_stop_writes_trigger = 4;
  4622. options.max_bytes_for_level_base = 64 * 1024;
  4623. options.max_write_buffer_number = 2;
  4624. options.max_background_compactions = 8;
  4625. options.max_background_flushes = 8;
  4626. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4627. CreateAndReopenWithCF({"mypikachu"}, options);
  4628. int numkeys = 20000;
  4629. for (int i = 0; i < numkeys; i++) {
  4630. ASSERT_OK(Put(1, Key(i), "val"));
  4631. }
  4632. ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
  4633. ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
  4634. ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
  4635. ASSERT_OK(Flush(1));
  4636. dbfull()->TEST_WaitForCompact();
  4637. for (int i = 0; i < numkeys; i++) {
  4638. ASSERT_EQ(Get(1, Key(i)), "val");
  4639. }
  4640. ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
  4641. ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
  4642. ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);
  4643. ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
  4644. TestGetTickerCount(options, GET_HIT_L1) +
  4645. TestGetTickerCount(options, GET_HIT_L2_AND_UP));
  4646. }
  4647. TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
  4648. // iter 0 -- zlib
  4649. // iter 1 -- bzip2
  4650. // iter 2 -- lz4
  4651. // iter 3 -- lz4HC
  4652. // iter 4 -- xpress
  4653. CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
  4654. kLZ4Compression, kLZ4HCCompression,
  4655. kXpressCompression};
  4656. for (auto comp : compressions) {
  4657. if (!CompressionTypeSupported(comp)) {
  4658. continue;
  4659. }
  4660. // first_table_version 1 -- generate with table_version == 1, read with
  4661. // table_version == 2
  4662. // first_table_version 2 -- generate with table_version == 2, read with
  4663. // table_version == 1
  4664. for (int first_table_version = 1; first_table_version <= 2;
  4665. ++first_table_version) {
  4666. BlockBasedTableOptions table_options;
  4667. table_options.format_version = first_table_version;
  4668. table_options.filter_policy.reset(NewBloomFilterPolicy(10));
  4669. Options options = CurrentOptions();
  4670. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4671. options.create_if_missing = true;
  4672. options.compression = comp;
  4673. DestroyAndReopen(options);
  4674. int kNumKeysWritten = 1000;
  4675. Random rnd(301);
  4676. for (int i = 0; i < kNumKeysWritten; ++i) {
  4677. // compressible string
  4678. ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a')));
  4679. }
  4680. table_options.format_version = first_table_version == 1 ? 2 : 1;
  4681. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  4682. Reopen(options);
  4683. for (int i = 0; i < kNumKeysWritten; ++i) {
  4684. auto r = Get(Key(i));
  4685. ASSERT_EQ(r.substr(128), std::string(128, 'a'));
  4686. }
  4687. }
  4688. }
  4689. }
  4690. TEST_F(DBTest, CloseSpeedup) {
  4691. Options options = CurrentOptions();
  4692. options.compaction_style = kCompactionStyleLevel;
  4693. options.write_buffer_size = 110 << 10; // 110KB
  4694. options.arena_block_size = 4 << 10;
  4695. options.level0_file_num_compaction_trigger = 2;
  4696. options.num_levels = 4;
  4697. options.max_bytes_for_level_base = 400 * 1024;
  4698. options.max_write_buffer_number = 16;
  4699. // Block background threads
  4700. env_->SetBackgroundThreads(1, Env::LOW);
  4701. env_->SetBackgroundThreads(1, Env::HIGH);
  4702. test::SleepingBackgroundTask sleeping_task_low;
  4703. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  4704. Env::Priority::LOW);
  4705. test::SleepingBackgroundTask sleeping_task_high;
  4706. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  4707. &sleeping_task_high, Env::Priority::HIGH);
  4708. std::vector<std::string> filenames;
  4709. env_->GetChildren(dbname_, &filenames);
  4710. // Delete archival files.
  4711. for (size_t i = 0; i < filenames.size(); ++i) {
  4712. env_->DeleteFile(dbname_ + "/" + filenames[i]);
  4713. }
  4714. env_->DeleteDir(dbname_);
  4715. DestroyAndReopen(options);
  4716. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  4717. env_->SetBackgroundThreads(1, Env::LOW);
  4718. env_->SetBackgroundThreads(1, Env::HIGH);
  4719. Random rnd(301);
  4720. int key_idx = 0;
  4721. // First three 110KB files are not going to level 2
  4722. // After that, (100K, 200K)
  4723. for (int num = 0; num < 5; num++) {
  4724. GenerateNewFile(&rnd, &key_idx, true);
  4725. }
  4726. ASSERT_EQ(0, GetSstFileCount(dbname_));
  4727. Close();
  4728. ASSERT_EQ(0, GetSstFileCount(dbname_));
  4729. // Unblock background threads
  4730. sleeping_task_high.WakeUp();
  4731. sleeping_task_high.WaitUntilDone();
  4732. sleeping_task_low.WakeUp();
  4733. sleeping_task_low.WaitUntilDone();
  4734. Destroy(options);
  4735. }
  4736. class DelayedMergeOperator : public MergeOperator {
  4737. private:
  4738. DBTest* db_test_;
  4739. public:
  4740. explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
  4741. bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
  4742. MergeOperationOutput* merge_out) const override {
  4743. db_test_->env_->addon_time_.fetch_add(1000);
  4744. merge_out->new_value = "";
  4745. return true;
  4746. }
  4747. const char* Name() const override { return "DelayedMergeOperator"; }
  4748. };
  4749. TEST_F(DBTest, MergeTestTime) {
  4750. std::string one, two, three;
  4751. PutFixed64(&one, 1);
  4752. PutFixed64(&two, 2);
  4753. PutFixed64(&three, 3);
  4754. // Enable time profiling
  4755. SetPerfLevel(kEnableTime);
  4756. this->env_->addon_time_.store(0);
  4757. this->env_->time_elapse_only_sleep_ = true;
  4758. this->env_->no_slowdown_ = true;
  4759. Options options = CurrentOptions();
  4760. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4761. options.merge_operator.reset(new DelayedMergeOperator(this));
  4762. DestroyAndReopen(options);
  4763. ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
  4764. db_->Put(WriteOptions(), "foo", one);
  4765. ASSERT_OK(Flush());
  4766. ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
  4767. ASSERT_OK(Flush());
  4768. ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
  4769. ASSERT_OK(Flush());
  4770. ReadOptions opt;
  4771. opt.verify_checksums = true;
  4772. opt.snapshot = nullptr;
  4773. std::string result;
  4774. db_->Get(opt, "foo", &result);
  4775. ASSERT_EQ(1000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
  4776. ReadOptions read_options;
  4777. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  4778. int count = 0;
  4779. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  4780. ASSERT_OK(iter->status());
  4781. ++count;
  4782. }
  4783. ASSERT_EQ(1, count);
  4784. ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
  4785. #ifdef ROCKSDB_USING_THREAD_STATUS
  4786. ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0);
  4787. #endif // ROCKSDB_USING_THREAD_STATUS
  4788. this->env_->time_elapse_only_sleep_ = false;
  4789. }
  4790. #ifndef ROCKSDB_LITE
  4791. TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
  4792. SetPerfLevel(kEnableTime);
  4793. Options options = CurrentOptions();
  4794. options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
  4795. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4796. options.merge_operator.reset(new DelayedMergeOperator(this));
  4797. options.compaction_style = kCompactionStyleUniversal;
  4798. options.max_subcompactions = max_subcompactions_;
  4799. DestroyAndReopen(options);
  4800. for (int i = 0; i < 1000; i++) {
  4801. ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
  4802. ASSERT_OK(Flush());
  4803. }
  4804. dbfull()->TEST_WaitForFlushMemTable();
  4805. dbfull()->TEST_WaitForCompact();
  4806. ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
  4807. }
  4808. TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
  4809. Options options = CurrentOptions();
  4810. options.compaction_filter_factory =
  4811. std::make_shared<DelayFilterFactory>(this);
  4812. options.disable_auto_compactions = true;
  4813. options.create_if_missing = true;
  4814. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  4815. options.statistics->set_stats_level(kExceptTimeForMutex);
  4816. options.max_subcompactions = max_subcompactions_;
  4817. DestroyAndReopen(options);
  4818. // put some data
  4819. for (int table = 0; table < 4; ++table) {
  4820. for (int i = 0; i < 10 + table; ++i) {
  4821. Put(ToString(table * 100 + i), "val");
  4822. }
  4823. Flush();
  4824. }
  4825. CompactRangeOptions cro;
  4826. cro.exclusive_manual_compaction = exclusive_manual_compaction_;
  4827. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  4828. ASSERT_EQ(0U, CountLiveFiles());
  4829. Reopen(options);
  4830. Iterator* itr = db_->NewIterator(ReadOptions());
  4831. itr->SeekToFirst();
  4832. ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0);
  4833. delete itr;
  4834. }
  4835. #endif // ROCKSDB_LITE
  4836. TEST_F(DBTest, TestLogCleanup) {
  4837. Options options = CurrentOptions();
  4838. options.write_buffer_size = 64 * 1024; // very small
  4839. // only two memtables allowed ==> only two log files
  4840. options.max_write_buffer_number = 2;
  4841. Reopen(options);
  4842. for (int i = 0; i < 100000; ++i) {
  4843. Put(Key(i), "val");
  4844. // only 2 memtables will be alive, so logs_to_free needs to always be below
  4845. // 2
  4846. ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
  4847. }
  4848. }
  4849. #ifndef ROCKSDB_LITE
  4850. TEST_F(DBTest, EmptyCompactedDB) {
  4851. Options options = CurrentOptions();
  4852. options.max_open_files = -1;
  4853. Close();
  4854. ASSERT_OK(ReadOnlyReopen(options));
  4855. Status s = Put("new", "value");
  4856. ASSERT_TRUE(s.IsNotSupported());
  4857. Close();
  4858. }
  4859. #endif // ROCKSDB_LITE
  4860. #ifndef ROCKSDB_LITE
  4861. TEST_F(DBTest, SuggestCompactRangeTest) {
  4862. class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
  4863. public:
  4864. std::unique_ptr<CompactionFilter> CreateCompactionFilter(
  4865. const CompactionFilter::Context& context) override {
  4866. saved_context = context;
  4867. std::unique_ptr<CompactionFilter> empty_filter;
  4868. return empty_filter;
  4869. }
  4870. const char* Name() const override {
  4871. return "CompactionFilterFactoryGetContext";
  4872. }
  4873. static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
  4874. return reinterpret_cast<CompactionFilterFactoryGetContext*>(
  4875. compaction_filter_factory)
  4876. ->saved_context.is_manual_compaction;
  4877. }
  4878. CompactionFilter::Context saved_context;
  4879. };
  4880. Options options = CurrentOptions();
  4881. options.memtable_factory.reset(
  4882. new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
  4883. options.compaction_style = kCompactionStyleLevel;
  4884. options.compaction_filter_factory.reset(
  4885. new CompactionFilterFactoryGetContext());
  4886. options.write_buffer_size = 200 << 10;
  4887. options.arena_block_size = 4 << 10;
  4888. options.level0_file_num_compaction_trigger = 4;
  4889. options.num_levels = 4;
  4890. options.compression = kNoCompression;
  4891. options.max_bytes_for_level_base = 450 << 10;
  4892. options.target_file_size_base = 98 << 10;
  4893. options.max_compaction_bytes = static_cast<uint64_t>(1) << 60; // inf
  4894. Reopen(options);
  4895. Random rnd(301);
  4896. for (int num = 0; num < 3; num++) {
  4897. GenerateNewRandomFile(&rnd);
  4898. }
  4899. GenerateNewRandomFile(&rnd);
  4900. ASSERT_EQ("0,4", FilesPerLevel(0));
  4901. ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
  4902. options.compaction_filter_factory.get()));
  4903. GenerateNewRandomFile(&rnd);
  4904. ASSERT_EQ("1,4", FilesPerLevel(0));
  4905. GenerateNewRandomFile(&rnd);
  4906. ASSERT_EQ("2,4", FilesPerLevel(0));
  4907. GenerateNewRandomFile(&rnd);
  4908. ASSERT_EQ("3,4", FilesPerLevel(0));
  4909. GenerateNewRandomFile(&rnd);
  4910. ASSERT_EQ("0,4,4", FilesPerLevel(0));
  4911. GenerateNewRandomFile(&rnd);
  4912. ASSERT_EQ("1,4,4", FilesPerLevel(0));
  4913. GenerateNewRandomFile(&rnd);
  4914. ASSERT_EQ("2,4,4", FilesPerLevel(0));
  4915. GenerateNewRandomFile(&rnd);
  4916. ASSERT_EQ("3,4,4", FilesPerLevel(0));
  4917. GenerateNewRandomFile(&rnd);
  4918. ASSERT_EQ("0,4,8", FilesPerLevel(0));
  4919. GenerateNewRandomFile(&rnd);
  4920. ASSERT_EQ("1,4,8", FilesPerLevel(0));
  4921. // compact it three times
  4922. for (int i = 0; i < 3; ++i) {
  4923. ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
  4924. dbfull()->TEST_WaitForCompact();
  4925. }
  4926. // All files are compacted
  4927. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  4928. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  4929. GenerateNewRandomFile(&rnd);
  4930. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  4931. // nonoverlapping with the file on level 0
  4932. Slice start("a"), end("b");
  4933. ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
  4934. dbfull()->TEST_WaitForCompact();
  4935. // should not compact the level 0 file
  4936. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  4937. start = Slice("j");
  4938. end = Slice("m");
  4939. ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
  4940. dbfull()->TEST_WaitForCompact();
  4941. ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
  4942. options.compaction_filter_factory.get()));
  4943. // now it should compact the level 0 file
  4944. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  4945. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  4946. }
  4947. TEST_F(DBTest, PromoteL0) {
  4948. Options options = CurrentOptions();
  4949. options.disable_auto_compactions = true;
  4950. options.write_buffer_size = 10 * 1024 * 1024;
  4951. DestroyAndReopen(options);
  4952. // non overlapping ranges
  4953. std::vector<std::pair<int32_t, int32_t>> ranges = {
  4954. {81, 160}, {0, 80}, {161, 240}, {241, 320}};
  4955. int32_t value_size = 10 * 1024; // 10 KB
  4956. Random rnd(301);
  4957. std::map<int32_t, std::string> values;
  4958. for (const auto& range : ranges) {
  4959. for (int32_t j = range.first; j < range.second; j++) {
  4960. values[j] = RandomString(&rnd, value_size);
  4961. ASSERT_OK(Put(Key(j), values[j]));
  4962. }
  4963. ASSERT_OK(Flush());
  4964. }
  4965. int32_t level0_files = NumTableFilesAtLevel(0, 0);
  4966. ASSERT_EQ(level0_files, ranges.size());
  4967. ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1
  4968. // Promote L0 level to L2.
  4969. ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
  4970. // We expect that all the files were trivially moved from L0 to L2
  4971. ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  4972. ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
  4973. for (const auto& kv : values) {
  4974. ASSERT_EQ(Get(Key(kv.first)), kv.second);
  4975. }
  4976. }
  4977. TEST_F(DBTest, PromoteL0Failure) {
  4978. Options options = CurrentOptions();
  4979. options.disable_auto_compactions = true;
  4980. options.write_buffer_size = 10 * 1024 * 1024;
  4981. DestroyAndReopen(options);
  4982. // Produce two L0 files with overlapping ranges.
  4983. ASSERT_OK(Put(Key(0), ""));
  4984. ASSERT_OK(Put(Key(3), ""));
  4985. ASSERT_OK(Flush());
  4986. ASSERT_OK(Put(Key(1), ""));
  4987. ASSERT_OK(Flush());
  4988. Status status;
  4989. // Fails because L0 has overlapping files.
  4990. status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
  4991. ASSERT_TRUE(status.IsInvalidArgument());
  4992. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  4993. // Now there is a file in L1.
  4994. ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
  4995. ASSERT_OK(Put(Key(5), ""));
  4996. ASSERT_OK(Flush());
  4997. // Fails because L1 is non-empty.
  4998. status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
  4999. ASSERT_TRUE(status.IsInvalidArgument());
  5000. }
  5001. // Github issue #596
  5002. TEST_F(DBTest, CompactRangeWithEmptyBottomLevel) {
  5003. const int kNumLevels = 2;
  5004. const int kNumL0Files = 2;
  5005. Options options = CurrentOptions();
  5006. options.disable_auto_compactions = true;
  5007. options.num_levels = kNumLevels;
  5008. DestroyAndReopen(options);
  5009. Random rnd(301);
  5010. for (int i = 0; i < kNumL0Files; ++i) {
  5011. ASSERT_OK(Put(Key(0), RandomString(&rnd, 1024)));
  5012. Flush();
  5013. }
  5014. ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files);
  5015. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  5016. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  5017. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  5018. ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files);
  5019. }
  5020. #endif // ROCKSDB_LITE
  5021. TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
  5022. const int kNumL0Files = 50;
  5023. Options options = CurrentOptions();
  5024. options.level0_file_num_compaction_trigger = 4;
  5025. // never slowdown / stop
  5026. options.level0_slowdown_writes_trigger = 999999;
  5027. options.level0_stop_writes_trigger = 999999;
  5028. options.max_background_compactions = 10;
  5029. DestroyAndReopen(options);
  5030. // schedule automatic compactions after the manual one starts, but before it
  5031. // finishes to ensure conflict.
  5032. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  5033. {{"DBImpl::BackgroundCompaction:Start",
  5034. "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"},
  5035. {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts",
  5036. "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
  5037. std::atomic<int> callback_count(0);
  5038. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  5039. "DBImpl::MaybeScheduleFlushOrCompaction:Conflict",
  5040. [&](void* /*arg*/) { callback_count.fetch_add(1); });
  5041. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5042. Random rnd(301);
  5043. for (int i = 0; i < 2; ++i) {
  5044. // put two keys to ensure no trivial move
  5045. for (int j = 0; j < 2; ++j) {
  5046. ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024)));
  5047. }
  5048. ASSERT_OK(Flush());
  5049. }
  5050. port::Thread manual_compaction_thread([this]() {
  5051. CompactRangeOptions croptions;
  5052. croptions.exclusive_manual_compaction = true;
  5053. ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
  5054. });
  5055. TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts");
  5056. for (int i = 0; i < kNumL0Files; ++i) {
  5057. // put two keys to ensure no trivial move
  5058. for (int j = 0; j < 2; ++j) {
  5059. ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024)));
  5060. }
  5061. ASSERT_OK(Flush());
  5062. }
  5063. TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts");
  5064. ASSERT_GE(callback_count.load(), 1);
  5065. for (int i = 0; i < 2; ++i) {
  5066. ASSERT_NE("NOT_FOUND", Get(Key(i)));
  5067. }
  5068. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5069. manual_compaction_thread.join();
  5070. dbfull()->TEST_WaitForCompact();
  5071. }
  5072. #ifndef ROCKSDB_LITE
  5073. TEST_F(DBTest, CompactFilesShouldTriggerAutoCompaction) {
  5074. Options options = CurrentOptions();
  5075. options.max_background_compactions = 1;
  5076. options.level0_file_num_compaction_trigger = 4;
  5077. options.level0_slowdown_writes_trigger = 36;
  5078. options.level0_stop_writes_trigger = 36;
  5079. DestroyAndReopen(options);
  5080. // generate files for manual compaction
  5081. Random rnd(301);
  5082. for (int i = 0; i < 2; ++i) {
  5083. // put two keys to ensure no trivial move
  5084. for (int j = 0; j < 2; ++j) {
  5085. ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024)));
  5086. }
  5087. ASSERT_OK(Flush());
  5088. }
  5089. ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data;
  5090. db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
  5091. std::vector<std::string> input_files;
  5092. input_files.push_back(cf_meta_data.levels[0].files[0].name);
  5093. SyncPoint::GetInstance()->LoadDependency({
  5094. {"CompactFilesImpl:0",
  5095. "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"},
  5096. {"DBTest::CompactFilesShouldTriggerAutoCompaction:End",
  5097. "CompactFilesImpl:1"},
  5098. });
  5099. SyncPoint::GetInstance()->EnableProcessing();
  5100. port::Thread manual_compaction_thread([&]() {
  5101. auto s = db_->CompactFiles(CompactionOptions(),
  5102. db_->DefaultColumnFamily(), input_files, 0);
  5103. });
  5104. TEST_SYNC_POINT(
  5105. "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin");
  5106. // generate enough files to trigger compaction
  5107. for (int i = 0; i < 20; ++i) {
  5108. for (int j = 0; j < 2; ++j) {
  5109. ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024)));
  5110. }
  5111. ASSERT_OK(Flush());
  5112. }
  5113. db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
  5114. ASSERT_GT(cf_meta_data.levels[0].files.size(),
  5115. options.level0_file_num_compaction_trigger);
  5116. TEST_SYNC_POINT(
  5117. "DBTest::CompactFilesShouldTriggerAutoCompaction:End");
  5118. manual_compaction_thread.join();
  5119. dbfull()->TEST_WaitForCompact();
  5120. db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data);
  5121. ASSERT_LE(cf_meta_data.levels[0].files.size(),
  5122. options.level0_file_num_compaction_trigger);
  5123. }
  5124. #endif // ROCKSDB_LITE
  5125. // Github issue #595
  5126. // Large write batch with column families
  5127. TEST_F(DBTest, LargeBatchWithColumnFamilies) {
  5128. Options options = CurrentOptions();
  5129. options.env = env_;
  5130. options.write_buffer_size = 100000; // Small write buffer
  5131. CreateAndReopenWithCF({"pikachu"}, options);
  5132. int64_t j = 0;
  5133. for (int i = 0; i < 5; i++) {
  5134. for (int pass = 1; pass <= 3; pass++) {
  5135. WriteBatch batch;
  5136. size_t write_size = 1024 * 1024 * (5 + i);
  5137. fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n",
  5138. (write_size / 1024 / 1024), pass);
  5139. for (;;) {
  5140. std::string data(3000, j++ % 127 + 20);
  5141. data += ToString(j);
  5142. batch.Put(handles_[0], Slice(data), Slice(data));
  5143. if (batch.GetDataSize() > write_size) {
  5144. break;
  5145. }
  5146. }
  5147. fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n",
  5148. (batch.GetDataSize() / 1024 / 1024));
  5149. ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
  5150. fprintf(stderr, "done\n");
  5151. }
  5152. }
  5153. // make sure we can re-open it.
  5154. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  5155. }
  5156. // Make sure that Flushes can proceed in parallel with CompactRange()
  5157. TEST_F(DBTest, FlushesInParallelWithCompactRange) {
  5158. // iter == 0 -- leveled
  5159. // iter == 1 -- leveled, but throw in a flush between two levels compacting
  5160. // iter == 2 -- universal
  5161. for (int iter = 0; iter < 3; ++iter) {
  5162. Options options = CurrentOptions();
  5163. if (iter < 2) {
  5164. options.compaction_style = kCompactionStyleLevel;
  5165. } else {
  5166. options.compaction_style = kCompactionStyleUniversal;
  5167. }
  5168. options.write_buffer_size = 110 << 10;
  5169. options.level0_file_num_compaction_trigger = 4;
  5170. options.num_levels = 4;
  5171. options.compression = kNoCompression;
  5172. options.max_bytes_for_level_base = 450 << 10;
  5173. options.target_file_size_base = 98 << 10;
  5174. options.max_write_buffer_number = 2;
  5175. DestroyAndReopen(options);
  5176. Random rnd(301);
  5177. for (int num = 0; num < 14; num++) {
  5178. GenerateNewRandomFile(&rnd);
  5179. }
  5180. if (iter == 1) {
  5181. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  5182. {{"DBImpl::RunManualCompaction()::1",
  5183. "DBTest::FlushesInParallelWithCompactRange:1"},
  5184. {"DBTest::FlushesInParallelWithCompactRange:2",
  5185. "DBImpl::RunManualCompaction()::2"}});
  5186. } else {
  5187. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  5188. {{"CompactionJob::Run():Start",
  5189. "DBTest::FlushesInParallelWithCompactRange:1"},
  5190. {"DBTest::FlushesInParallelWithCompactRange:2",
  5191. "CompactionJob::Run():End"}});
  5192. }
  5193. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5194. std::vector<port::Thread> threads;
  5195. threads.emplace_back([&]() { Compact("a", "z"); });
  5196. TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
  5197. // this has to start a flush. if flushes are blocked, this will try to
  5198. // create
  5199. // 3 memtables, and that will fail because max_write_buffer_number is 2
  5200. for (int num = 0; num < 3; num++) {
  5201. GenerateNewRandomFile(&rnd, /* nowait */ true);
  5202. }
  5203. TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
  5204. for (auto& t : threads) {
  5205. t.join();
  5206. }
  5207. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5208. }
  5209. }
  5210. TEST_F(DBTest, DelayedWriteRate) {
  5211. const int kEntriesPerMemTable = 100;
  5212. const int kTotalFlushes = 12;
  5213. Options options = CurrentOptions();
  5214. env_->SetBackgroundThreads(1, Env::LOW);
  5215. options.env = env_;
  5216. env_->no_slowdown_ = true;
  5217. options.write_buffer_size = 100000000;
  5218. options.max_write_buffer_number = 256;
  5219. options.max_background_compactions = 1;
  5220. options.level0_file_num_compaction_trigger = 3;
  5221. options.level0_slowdown_writes_trigger = 3;
  5222. options.level0_stop_writes_trigger = 999999;
  5223. options.delayed_write_rate = 20000000; // Start with 200MB/s
  5224. options.memtable_factory.reset(
  5225. new SpecialSkipListFactory(kEntriesPerMemTable));
  5226. CreateAndReopenWithCF({"pikachu"}, options);
  5227. // Block compactions
  5228. test::SleepingBackgroundTask sleeping_task_low;
  5229. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  5230. Env::Priority::LOW);
  5231. for (int i = 0; i < 3; i++) {
  5232. Put(Key(i), std::string(10000, 'x'));
  5233. Flush();
  5234. }
  5235. // These writes will be slowed down to 1KB/s
  5236. uint64_t estimated_sleep_time = 0;
  5237. Random rnd(301);
  5238. Put("", "");
  5239. uint64_t cur_rate = options.delayed_write_rate;
  5240. for (int i = 0; i < kTotalFlushes; i++) {
  5241. uint64_t size_memtable = 0;
  5242. for (int j = 0; j < kEntriesPerMemTable; j++) {
  5243. auto rand_num = rnd.Uniform(20);
  5244. // Spread the size range to more.
  5245. size_t entry_size = rand_num * rand_num * rand_num;
  5246. WriteOptions wo;
  5247. Put(Key(i), std::string(entry_size, 'x'), wo);
  5248. size_memtable += entry_size + 18;
  5249. // Occasionally sleep a while
  5250. if (rnd.Uniform(20) == 6) {
  5251. env_->SleepForMicroseconds(2666);
  5252. }
  5253. }
  5254. dbfull()->TEST_WaitForFlushMemTable();
  5255. estimated_sleep_time += size_memtable * 1000000u / cur_rate;
  5256. // Slow down twice. One for memtable switch and one for flush finishes.
  5257. cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) *
  5258. kIncSlowdownRatio * kIncSlowdownRatio);
  5259. }
  5260. // Estimate the total sleep time fall into the rough range.
  5261. ASSERT_GT(env_->addon_time_.load(),
  5262. static_cast<int64_t>(estimated_sleep_time / 2));
  5263. ASSERT_LT(env_->addon_time_.load(),
  5264. static_cast<int64_t>(estimated_sleep_time * 2));
  5265. env_->no_slowdown_ = false;
  5266. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5267. sleeping_task_low.WakeUp();
  5268. sleeping_task_low.WaitUntilDone();
  5269. }
  5270. TEST_F(DBTest, HardLimit) {
  5271. Options options = CurrentOptions();
  5272. options.env = env_;
  5273. env_->SetBackgroundThreads(1, Env::LOW);
  5274. options.max_write_buffer_number = 256;
  5275. options.write_buffer_size = 110 << 10; // 110KB
  5276. options.arena_block_size = 4 * 1024;
  5277. options.level0_file_num_compaction_trigger = 4;
  5278. options.level0_slowdown_writes_trigger = 999999;
  5279. options.level0_stop_writes_trigger = 999999;
  5280. options.hard_pending_compaction_bytes_limit = 800 << 10;
  5281. options.max_bytes_for_level_base = 10000000000u;
  5282. options.max_background_compactions = 1;
  5283. options.memtable_factory.reset(
  5284. new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  5285. env_->SetBackgroundThreads(1, Env::LOW);
  5286. test::SleepingBackgroundTask sleeping_task_low;
  5287. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  5288. Env::Priority::LOW);
  5289. CreateAndReopenWithCF({"pikachu"}, options);
  5290. std::atomic<int> callback_count(0);
  5291. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  5292. "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) {
  5293. callback_count.fetch_add(1);
  5294. sleeping_task_low.WakeUp();
  5295. });
  5296. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5297. Random rnd(301);
  5298. int key_idx = 0;
  5299. for (int num = 0; num < 5; num++) {
  5300. GenerateNewFile(&rnd, &key_idx, true);
  5301. dbfull()->TEST_WaitForFlushMemTable();
  5302. }
  5303. ASSERT_EQ(0, callback_count.load());
  5304. for (int num = 0; num < 5; num++) {
  5305. GenerateNewFile(&rnd, &key_idx, true);
  5306. dbfull()->TEST_WaitForFlushMemTable();
  5307. }
  5308. ASSERT_GE(callback_count.load(), 1);
  5309. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5310. sleeping_task_low.WaitUntilDone();
  5311. }
  5312. #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
  5313. class WriteStallListener : public EventListener {
  5314. public:
  5315. WriteStallListener() : condition_(WriteStallCondition::kNormal) {}
  5316. void OnStallConditionsChanged(const WriteStallInfo& info) override {
  5317. MutexLock l(&mutex_);
  5318. condition_ = info.condition.cur;
  5319. }
  5320. bool CheckCondition(WriteStallCondition expected) {
  5321. MutexLock l(&mutex_);
  5322. return expected == condition_;
  5323. }
  5324. private:
  5325. port::Mutex mutex_;
  5326. WriteStallCondition condition_;
  5327. };
  5328. TEST_F(DBTest, SoftLimit) {
  5329. Options options = CurrentOptions();
  5330. options.env = env_;
  5331. options.write_buffer_size = 100000; // Small write buffer
  5332. options.max_write_buffer_number = 256;
  5333. options.level0_file_num_compaction_trigger = 1;
  5334. options.level0_slowdown_writes_trigger = 3;
  5335. options.level0_stop_writes_trigger = 999999;
  5336. options.delayed_write_rate = 20000; // About 200KB/s limited rate
  5337. options.soft_pending_compaction_bytes_limit = 160000;
  5338. options.target_file_size_base = 99999999; // All into one file
  5339. options.max_bytes_for_level_base = 50000;
  5340. options.max_bytes_for_level_multiplier = 10;
  5341. options.max_background_compactions = 1;
  5342. options.compression = kNoCompression;
  5343. WriteStallListener* listener = new WriteStallListener();
  5344. options.listeners.emplace_back(listener);
  5345. // FlushMemtable with opt.wait=true does not wait for
  5346. // `OnStallConditionsChanged` being called. The event listener is triggered
  5347. // on `JobContext::Clean`, which happens after flush result is installed.
  5348. // We use sync point to create a custom WaitForFlush that waits for
  5349. // context cleanup.
  5350. port::Mutex flush_mutex;
  5351. port::CondVar flush_cv(&flush_mutex);
  5352. bool flush_finished = false;
  5353. auto InstallFlushCallback = [&]() {
  5354. {
  5355. MutexLock l(&flush_mutex);
  5356. flush_finished = false;
  5357. }
  5358. SyncPoint::GetInstance()->SetCallBack(
  5359. "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) {
  5360. {
  5361. MutexLock l(&flush_mutex);
  5362. flush_finished = true;
  5363. }
  5364. flush_cv.SignalAll();
  5365. });
  5366. };
  5367. auto WaitForFlush = [&]() {
  5368. {
  5369. MutexLock l(&flush_mutex);
  5370. while (!flush_finished) {
  5371. flush_cv.Wait();
  5372. }
  5373. }
  5374. SyncPoint::GetInstance()->ClearCallBack(
  5375. "DBImpl::BackgroundCallFlush:ContextCleanedUp");
  5376. };
  5377. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5378. Reopen(options);
  5379. // Generating 360KB in Level 3
  5380. for (int i = 0; i < 72; i++) {
  5381. Put(Key(i), std::string(5000, 'x'));
  5382. if (i % 10 == 0) {
  5383. dbfull()->TEST_FlushMemTable(true, true);
  5384. }
  5385. }
  5386. dbfull()->TEST_WaitForCompact();
  5387. MoveFilesToLevel(3);
  5388. // Generating 360KB in Level 2
  5389. for (int i = 0; i < 72; i++) {
  5390. Put(Key(i), std::string(5000, 'x'));
  5391. if (i % 10 == 0) {
  5392. dbfull()->TEST_FlushMemTable(true, true);
  5393. }
  5394. }
  5395. dbfull()->TEST_WaitForCompact();
  5396. MoveFilesToLevel(2);
  5397. Put(Key(0), "");
  5398. test::SleepingBackgroundTask sleeping_task_low;
  5399. // Block compactions
  5400. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  5401. Env::Priority::LOW);
  5402. sleeping_task_low.WaitUntilSleeping();
  5403. // Create 3 L0 files, making score of L0 to be 3.
  5404. for (int i = 0; i < 3; i++) {
  5405. Put(Key(i), std::string(5000, 'x'));
  5406. Put(Key(100 - i), std::string(5000, 'x'));
  5407. // Flush the file. File size is around 30KB.
  5408. InstallFlushCallback();
  5409. dbfull()->TEST_FlushMemTable(true, true);
  5410. WaitForFlush();
  5411. }
  5412. ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
  5413. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
  5414. sleeping_task_low.WakeUp();
  5415. sleeping_task_low.WaitUntilDone();
  5416. sleeping_task_low.Reset();
  5417. dbfull()->TEST_WaitForCompact();
  5418. // Now there is one L1 file but doesn't trigger soft_rate_limit
  5419. // The L1 file size is around 30KB.
  5420. ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  5421. ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
  5422. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
  5423. // Only allow one compactin going through.
  5424. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  5425. "BackgroundCallCompaction:0", [&](void* /*arg*/) {
  5426. // Schedule a sleeping task.
  5427. sleeping_task_low.Reset();
  5428. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
  5429. &sleeping_task_low, Env::Priority::LOW);
  5430. });
  5431. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
  5432. Env::Priority::LOW);
  5433. sleeping_task_low.WaitUntilSleeping();
  5434. // Create 3 L0 files, making score of L0 to be 3
  5435. for (int i = 0; i < 3; i++) {
  5436. Put(Key(10 + i), std::string(5000, 'x'));
  5437. Put(Key(90 - i), std::string(5000, 'x'));
  5438. // Flush the file. File size is around 30KB.
  5439. InstallFlushCallback();
  5440. dbfull()->TEST_FlushMemTable(true, true);
  5441. WaitForFlush();
  5442. }
  5443. // Wake up sleep task to enable compaction to run and waits
  5444. // for it to go to sleep state again to make sure one compaction
  5445. // goes through.
  5446. sleeping_task_low.WakeUp();
  5447. sleeping_task_low.WaitUntilSleeping();
  5448. // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
  5449. // Given level multiplier 10, estimated pending compaction is around 100KB
  5450. // doesn't trigger soft_pending_compaction_bytes_limit
  5451. ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  5452. ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
  5453. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
  5454. // Create 3 L0 files, making score of L0 to be 3, higher than L0.
  5455. for (int i = 0; i < 3; i++) {
  5456. Put(Key(20 + i), std::string(5000, 'x'));
  5457. Put(Key(80 - i), std::string(5000, 'x'));
  5458. // Flush the file. File size is around 30KB.
  5459. InstallFlushCallback();
  5460. dbfull()->TEST_FlushMemTable(true, true);
  5461. WaitForFlush();
  5462. }
  5463. // Wake up sleep task to enable compaction to run and waits
  5464. // for it to go to sleep state again to make sure one compaction
  5465. // goes through.
  5466. sleeping_task_low.WakeUp();
  5467. sleeping_task_low.WaitUntilSleeping();
  5468. // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB
  5469. // L2 size is 360KB, so the estimated level fanout 4, estimated pending
  5470. // compaction is around 200KB
  5471. // triggerring soft_pending_compaction_bytes_limit
  5472. ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  5473. ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
  5474. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
  5475. sleeping_task_low.WakeUp();
  5476. sleeping_task_low.WaitUntilSleeping();
  5477. ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
  5478. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal));
  5479. // shrink level base so L2 will hit soft limit easier.
  5480. ASSERT_OK(dbfull()->SetOptions({
  5481. {"max_bytes_for_level_base", "5000"},
  5482. }));
  5483. Put("", "");
  5484. Flush();
  5485. ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
  5486. ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed));
  5487. sleeping_task_low.WaitUntilSleeping();
  5488. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5489. sleeping_task_low.WakeUp();
  5490. sleeping_task_low.WaitUntilDone();
  5491. }
  5492. TEST_F(DBTest, LastWriteBufferDelay) {
  5493. Options options = CurrentOptions();
  5494. options.env = env_;
  5495. options.write_buffer_size = 100000;
  5496. options.max_write_buffer_number = 4;
  5497. options.delayed_write_rate = 20000;
  5498. options.compression = kNoCompression;
  5499. options.disable_auto_compactions = true;
  5500. int kNumKeysPerMemtable = 3;
  5501. options.memtable_factory.reset(
  5502. new SpecialSkipListFactory(kNumKeysPerMemtable));
  5503. Reopen(options);
  5504. test::SleepingBackgroundTask sleeping_task;
  5505. // Block flushes
  5506. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
  5507. Env::Priority::HIGH);
  5508. sleeping_task.WaitUntilSleeping();
  5509. // Create 3 L0 files, making score of L0 to be 3.
  5510. for (int i = 0; i < 3; i++) {
  5511. // Fill one mem table
  5512. for (int j = 0; j < kNumKeysPerMemtable; j++) {
  5513. Put(Key(j), "");
  5514. }
  5515. ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
  5516. }
  5517. // Inserting a new entry would create a new mem table, triggering slow down.
  5518. Put(Key(0), "");
  5519. ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
  5520. sleeping_task.WakeUp();
  5521. sleeping_task.WaitUntilDone();
  5522. }
  5523. #endif // !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION)
  5524. TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
  5525. CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
  5526. kLZ4Compression, kLZ4HCCompression,
  5527. kXpressCompression};
  5528. for (auto comp : compressions) {
  5529. if (!CompressionTypeSupported(comp)) {
  5530. // not supported, we should fail the Open()
  5531. Options options = CurrentOptions();
  5532. options.compression = comp;
  5533. ASSERT_TRUE(!TryReopen(options).ok());
  5534. // Try if CreateColumnFamily also fails
  5535. options.compression = kNoCompression;
  5536. ASSERT_OK(TryReopen(options));
  5537. ColumnFamilyOptions cf_options(options);
  5538. cf_options.compression = comp;
  5539. ColumnFamilyHandle* handle;
  5540. ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
  5541. }
  5542. }
  5543. }
  5544. TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) {
  5545. Options options = CurrentOptions();
  5546. options.max_open_files = 100;
  5547. Reopen(options);
  5548. ColumnFamilyOptions cf_options(options);
  5549. // ttl is now supported when max_open_files is -1.
  5550. cf_options.ttl = 3600;
  5551. ColumnFamilyHandle* handle;
  5552. ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle));
  5553. delete handle;
  5554. }
  5555. #ifndef ROCKSDB_LITE
  5556. TEST_F(DBTest, RowCache) {
  5557. Options options = CurrentOptions();
  5558. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  5559. options.row_cache = NewLRUCache(8192);
  5560. DestroyAndReopen(options);
  5561. ASSERT_OK(Put("foo", "bar"));
  5562. ASSERT_OK(Flush());
  5563. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
  5564. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
  5565. ASSERT_EQ(Get("foo"), "bar");
  5566. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
  5567. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
  5568. ASSERT_EQ(Get("foo"), "bar");
  5569. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
  5570. ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
  5571. }
  5572. TEST_F(DBTest, PinnableSliceAndRowCache) {
  5573. Options options = CurrentOptions();
  5574. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  5575. options.row_cache = NewLRUCache(8192);
  5576. DestroyAndReopen(options);
  5577. ASSERT_OK(Put("foo", "bar"));
  5578. ASSERT_OK(Flush());
  5579. ASSERT_EQ(Get("foo"), "bar");
  5580. ASSERT_EQ(
  5581. reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
  5582. 1);
  5583. {
  5584. PinnableSlice pin_slice;
  5585. ASSERT_EQ(Get("foo", &pin_slice), Status::OK());
  5586. ASSERT_EQ(pin_slice.ToString(), "bar");
  5587. // Entry is already in cache, lookup will remove the element from lru
  5588. ASSERT_EQ(
  5589. reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
  5590. 0);
  5591. }
  5592. // After PinnableSlice destruction element is added back in LRU
  5593. ASSERT_EQ(
  5594. reinterpret_cast<LRUCache*>(options.row_cache.get())->TEST_GetLRUSize(),
  5595. 1);
  5596. }
  5597. #endif // ROCKSDB_LITE
  5598. TEST_F(DBTest, DeletingOldWalAfterDrop) {
  5599. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
  5600. {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"},
  5601. {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}});
  5602. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace();
  5603. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5604. Options options = CurrentOptions();
  5605. options.max_total_wal_size = 8192;
  5606. options.compression = kNoCompression;
  5607. options.write_buffer_size = 1 << 20;
  5608. options.level0_file_num_compaction_trigger = (1 << 30);
  5609. options.level0_slowdown_writes_trigger = (1 << 30);
  5610. options.level0_stop_writes_trigger = (1 << 30);
  5611. options.disable_auto_compactions = true;
  5612. DestroyAndReopen(options);
  5613. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5614. CreateColumnFamilies({"cf1", "cf2"}, options);
  5615. ASSERT_OK(Put(0, "key1", DummyString(8192)));
  5616. ASSERT_OK(Put(0, "key2", DummyString(8192)));
  5617. // the oldest wal should now be getting_flushed
  5618. ASSERT_OK(db_->DropColumnFamily(handles_[0]));
  5619. // all flushes should now do nothing because their CF is dropped
  5620. TEST_SYNC_POINT("Test:AllowFlushes");
  5621. TEST_SYNC_POINT("Test:WaitForFlush");
  5622. uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
  5623. ASSERT_OK(Put(1, "key3", DummyString(8192)));
  5624. ASSERT_OK(Put(1, "key4", DummyString(8192)));
  5625. // new wal should have been created
  5626. uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
  5627. EXPECT_GT(lognum2, lognum1);
  5628. }
  5629. TEST_F(DBTest, UnsupportedManualSync) {
  5630. DestroyAndReopen(CurrentOptions());
  5631. env_->is_wal_sync_thread_safe_.store(false);
  5632. Status s = db_->SyncWAL();
  5633. ASSERT_TRUE(s.IsNotSupported());
  5634. }
  5635. INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
  5636. ::testing::Combine(::testing::Values(1, 4),
  5637. ::testing::Bool()));
  5638. TEST_F(DBTest, PauseBackgroundWorkTest) {
  5639. Options options = CurrentOptions();
  5640. options.write_buffer_size = 100000; // Small write buffer
  5641. Reopen(options);
  5642. std::vector<port::Thread> threads;
  5643. std::atomic<bool> done(false);
  5644. db_->PauseBackgroundWork();
  5645. threads.emplace_back([&]() {
  5646. Random rnd(301);
  5647. for (int i = 0; i < 10000; ++i) {
  5648. Put(RandomString(&rnd, 10), RandomString(&rnd, 10));
  5649. }
  5650. done.store(true);
  5651. });
  5652. env_->SleepForMicroseconds(200000);
  5653. // make sure the thread is not done
  5654. ASSERT_FALSE(done.load());
  5655. db_->ContinueBackgroundWork();
  5656. for (auto& t : threads) {
  5657. t.join();
  5658. }
  5659. // now it's done
  5660. ASSERT_TRUE(done.load());
  5661. }
  5662. // Keep spawning short-living threads that create an iterator and quit.
  5663. // Meanwhile in another thread keep flushing memtables.
  5664. // This used to cause a deadlock.
  5665. TEST_F(DBTest, ThreadLocalPtrDeadlock) {
  5666. std::atomic<int> flushes_done{0};
  5667. std::atomic<int> threads_destroyed{0};
  5668. auto done = [&] {
  5669. return flushes_done.load() > 10;
  5670. };
  5671. port::Thread flushing_thread([&] {
  5672. for (int i = 0; !done(); ++i) {
  5673. ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"),
  5674. Slice(std::to_string(i).c_str())));
  5675. ASSERT_OK(db_->Flush(FlushOptions()));
  5676. int cnt = ++flushes_done;
  5677. fprintf(stderr, "Flushed %d times\n", cnt);
  5678. }
  5679. });
  5680. std::vector<port::Thread> thread_spawning_threads(10);
  5681. for (auto& t: thread_spawning_threads) {
  5682. t = port::Thread([&] {
  5683. while (!done()) {
  5684. {
  5685. port::Thread tmp_thread([&] {
  5686. auto it = db_->NewIterator(ReadOptions());
  5687. delete it;
  5688. });
  5689. tmp_thread.join();
  5690. }
  5691. ++threads_destroyed;
  5692. }
  5693. });
  5694. }
  5695. for (auto& t: thread_spawning_threads) {
  5696. t.join();
  5697. }
  5698. flushing_thread.join();
  5699. fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n",
  5700. flushes_done.load(), threads_destroyed.load());
  5701. }
  5702. TEST_F(DBTest, LargeBlockSizeTest) {
  5703. Options options = CurrentOptions();
  5704. CreateAndReopenWithCF({"pikachu"}, options);
  5705. ASSERT_OK(Put(0, "foo", "bar"));
  5706. BlockBasedTableOptions table_options;
  5707. table_options.block_size = 8LL * 1024 * 1024 * 1024LL;
  5708. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  5709. ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
  5710. }
  5711. #ifndef ROCKSDB_LITE
  5712. TEST_F(DBTest, CreationTimeOfOldestFile) {
  5713. const int kNumKeysPerFile = 32;
  5714. const int kNumLevelFiles = 2;
  5715. const int kValueSize = 100;
  5716. Options options = CurrentOptions();
  5717. options.max_open_files = -1;
  5718. env_->time_elapse_only_sleep_ = false;
  5719. options.env = env_;
  5720. env_->addon_time_.store(0);
  5721. DestroyAndReopen(options);
  5722. bool set_file_creation_time_to_zero = true;
  5723. int idx = 0;
  5724. int64_t time_1 = 0;
  5725. env_->GetCurrentTime(&time_1);
  5726. const uint64_t uint_time_1 = static_cast<uint64_t>(time_1);
  5727. // Add 50 hours
  5728. env_->addon_time_.fetch_add(50 * 60 * 60);
  5729. int64_t time_2 = 0;
  5730. env_->GetCurrentTime(&time_2);
  5731. const uint64_t uint_time_2 = static_cast<uint64_t>(time_2);
  5732. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  5733. "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
  5734. TableProperties* props = reinterpret_cast<TableProperties*>(arg);
  5735. if (set_file_creation_time_to_zero) {
  5736. if (idx == 0) {
  5737. props->file_creation_time = 0;
  5738. idx++;
  5739. } else if (idx == 1) {
  5740. props->file_creation_time = uint_time_1;
  5741. idx = 0;
  5742. }
  5743. } else {
  5744. if (idx == 0) {
  5745. props->file_creation_time = uint_time_1;
  5746. idx++;
  5747. } else if (idx == 1) {
  5748. props->file_creation_time = uint_time_2;
  5749. }
  5750. }
  5751. });
  5752. // Set file creation time in manifest all to 0.
  5753. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  5754. "FileMetaData::FileMetaData", [&](void* arg) {
  5755. FileMetaData* meta = static_cast<FileMetaData*>(arg);
  5756. meta->file_creation_time = 0;
  5757. });
  5758. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  5759. Random rnd(301);
  5760. for (int i = 0; i < kNumLevelFiles; ++i) {
  5761. for (int j = 0; j < kNumKeysPerFile; ++j) {
  5762. ASSERT_OK(
  5763. Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize)));
  5764. }
  5765. Flush();
  5766. }
  5767. // At this point there should be 2 files, one with file_creation_time = 0 and
  5768. // the other non-zero. GetCreationTimeOfOldestFile API should return 0.
  5769. uint64_t creation_time;
  5770. Status s1 = dbfull()->GetCreationTimeOfOldestFile(&creation_time);
  5771. ASSERT_EQ(0, creation_time);
  5772. ASSERT_EQ(s1, Status::OK());
  5773. // Testing with non-zero file creation time.
  5774. set_file_creation_time_to_zero = false;
  5775. options = CurrentOptions();
  5776. options.max_open_files = -1;
  5777. env_->time_elapse_only_sleep_ = false;
  5778. options.env = env_;
  5779. env_->addon_time_.store(0);
  5780. DestroyAndReopen(options);
  5781. for (int i = 0; i < kNumLevelFiles; ++i) {
  5782. for (int j = 0; j < kNumKeysPerFile; ++j) {
  5783. ASSERT_OK(
  5784. Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize)));
  5785. }
  5786. Flush();
  5787. }
  5788. // At this point there should be 2 files with non-zero file creation time.
  5789. // GetCreationTimeOfOldestFile API should return non-zero value.
  5790. uint64_t ctime;
  5791. Status s2 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
  5792. ASSERT_EQ(uint_time_1, ctime);
  5793. ASSERT_EQ(s2, Status::OK());
  5794. // Testing with max_open_files != -1
  5795. options = CurrentOptions();
  5796. options.max_open_files = 10;
  5797. DestroyAndReopen(options);
  5798. Status s3 = dbfull()->GetCreationTimeOfOldestFile(&ctime);
  5799. ASSERT_EQ(s3, Status::NotSupported());
  5800. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  5801. }
  5802. #endif
  5803. } // namespace ROCKSDB_NAMESPACE
  5804. #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
  5805. extern "C" {
  5806. void RegisterCustomObjects(int argc, char** argv);
  5807. }
  5808. #else
  5809. void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
  5810. #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
  5811. int main(int argc, char** argv) {
  5812. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  5813. ::testing::InitGoogleTest(&argc, argv);
  5814. RegisterCustomObjects(argc, argv);
  5815. return RUN_ALL_TESTS();
  5816. }