hypothesis_test.py 103 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795
  1. import numpy as np
  2. import copy
  3. import time
  4. from functools import partial, reduce
  5. from future.utils import viewitems, viewkeys
  6. from hypothesis import assume, given, settings, HealthCheck
  7. import hypothesis.strategies as st
  8. import unittest
  9. import threading
  10. from caffe2.python import core, workspace, tt_core, dyndep
  11. import caffe2.python.hypothesis_test_util as hu
  12. from caffe2.proto import caffe2_pb2
  13. dyndep.InitOpsLibrary('@/caffe2/caffe2/fb/optimizers:sgd_simd_ops')
  14. if workspace.has_gpu_support:
  15. # NOTE: During GPU stress tests, the number of workers exceeds the number
  16. # of GPUs which results in flakiness from GPU contention. As a
  17. # result, deadlines are not enforced on CUDA runs.
  18. _hypothesis_settings = settings
  19. def settings(**kwargs):
  20. if 'deadline' in kwargs:
  21. kwargs['deadline'] = None
  22. kwargs.setdefault('max_examples', 50)
  23. def wrapped(f):
  24. return _hypothesis_settings(**kwargs)(f)
  25. return wrapped
  26. def sigmoid(x):
  27. return 1.0 / (1.0 + np.exp(-x))
  28. @st.composite
  29. def _tensor_and_prefix(draw, dtype, elements, min_dim=1, max_dim=4, **kwargs):
  30. dims_ = draw(
  31. st.lists(hu.dims(**kwargs), min_size=min_dim, max_size=max_dim))
  32. extra_ = draw(
  33. st.lists(hu.dims(**kwargs), min_size=min_dim, max_size=max_dim))
  34. assume(len(dims_) + len(extra_) < max_dim)
  35. return (draw(hu.arrays(dims_ + extra_, dtype, elements)),
  36. draw(hu.arrays(extra_, dtype, elements)))
  37. def _tensor_and_indices(min_dim=1, max_dim=4, dtype=np.float32,
  38. elements=None, **kwargs):
  39. """ generates a tensor and a list of indices of larger tensor of same dim"""
  40. data_dims_ = st.lists(hu.dims(**kwargs), min_size=min_dim, max_size=max_dim)
  41. original_dim = st.integers(min_value=2, max_value=10)
  42. return st.tuples(data_dims_, original_dim).flatmap(lambda pair: st.tuples(
  43. st.just(pair[1]), # original dimension
  44. hu.arrays(pair[0], dtype, elements), # data tensor
  45. hu.arrays(pair[0][0], dtype=np.int64, elements=st.integers(
  46. min_value=0, max_value=pair[1] - 1)),
  47. ))
  48. _NUMPY_TYPE_TO_ENUM = {
  49. np.float32: core.DataType.FLOAT,
  50. np.int32: core.DataType.INT32,
  51. np.bool: core.DataType.BOOL,
  52. np.uint8: core.DataType.UINT8,
  53. np.int8: core.DataType.INT8,
  54. np.uint16: core.DataType.UINT16,
  55. np.int16: core.DataType.INT16,
  56. np.int64: core.DataType.INT64,
  57. np.float64: core.DataType.DOUBLE,
  58. }
  59. def _dtypes(dtypes=None):
  60. dtypes = dtypes if dtypes else [np.int32, np.int64, np.float32]
  61. return st.sampled_from(dtypes)
  62. def _test_binary(name, ref, filter_=None, gcs=hu.gcs,
  63. test_gradient=False, allow_inplace=False, dtypes=_dtypes):
  64. @given(
  65. inputs=dtypes().flatmap(
  66. lambda dtype: hu.tensors(
  67. n=2, dtype=dtype,
  68. elements=hu.elements_of_type(dtype, filter_=filter_))),
  69. out=st.sampled_from(('Y', 'X1', 'X2') if allow_inplace else ('Y',)),
  70. **gcs)
  71. @settings(max_examples=20, deadline=None)
  72. def test_binary(self, inputs, out, gc, dc):
  73. op = core.CreateOperator(name, ["X1", "X2"], [out])
  74. X1, X2 = inputs
  75. self.assertDeviceChecks(dc, op, [X1, X2], [0])
  76. # We only do gradient check with float32 types.
  77. if test_gradient and X1.dtype == np.float32:
  78. self.assertGradientChecks(gc, op, [X1, X2], 0, [0])
  79. self.assertReferenceChecks(gc, op, [X1, X2], ref)
  80. return test_binary
  81. def _test_binary_broadcast(name, ref, filter_=None,
  82. gcs=hu.gcs, allow_inplace=False, dtypes=_dtypes):
  83. @given(
  84. inputs=dtypes().flatmap(lambda dtype: _tensor_and_prefix(
  85. dtype=dtype,
  86. elements=hu.elements_of_type(dtype, filter_=filter_))),
  87. in_place=(st.booleans() if allow_inplace else st.just(False)),
  88. **gcs)
  89. @settings(max_examples=3, deadline=100)
  90. def test_binary_broadcast(self, inputs, in_place, gc, dc):
  91. op = core.CreateOperator(
  92. name, ["X1", "X2"], ["X1" if in_place else "Y"], broadcast=1)
  93. X1, X2 = inputs
  94. self.assertDeviceChecks(dc, op, [X1, X2], [0])
  95. def cast_ref(x, y):
  96. return (np.array(ref(x, y)[0], dtype=x.dtype), )
  97. # gradient not implemented yet
  98. # self.assertGradientChecks(gc, op, [X1, X2], 0, [0])
  99. self.assertReferenceChecks(gc, op, [X1, X2], cast_ref)
  100. return test_binary_broadcast
  101. class TestOperators(hu.HypothesisTestCase):
  102. def test_comparison_ops(self):
  103. ops = {"LT": lambda x1, x2: [x1 < x2],
  104. "LE": lambda x1, x2: [x1 <= x2],
  105. "GT": lambda x1, x2: [x1 > x2],
  106. "GE": lambda x1, x2: [x1 >= x2]}
  107. for name, ref in viewitems(ops):
  108. _test_binary(name, ref, gcs=hu.gcs_cpu_only)(self)
  109. _test_binary_broadcast(name, ref, gcs=hu.gcs_cpu_only)(self)
  110. @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs)
  111. @settings(deadline=10000)
  112. def test_sum(self, inputs, in_place, gc, dc):
  113. op = core.CreateOperator("Sum", ["X1", "X2"],
  114. ["Y" if not in_place else "X1"])
  115. X1, X2 = inputs
  116. self.assertDeviceChecks(dc, op, [X1, X2], [0])
  117. self.assertGradientChecks(gc, op, [X1, X2], 0, [0])
  118. @given(inputs=hu.tensors(n=2, min_dim=2, max_dim=2), **hu.gcs_cpu_only)
  119. @settings(deadline=10000)
  120. def test_row_mul(self, inputs, gc, dc):
  121. op = core.CreateOperator("RowMul", ["X1", "X2"], ["Y"])
  122. X1, Xtmp = inputs
  123. X2 = Xtmp[:, 0]
  124. def ref(x, y):
  125. ret = np.zeros(shape=x.shape, dtype=x.dtype)
  126. for i in range(y.size):
  127. ret[i, ] = x[i, ] * y[i]
  128. return [ret]
  129. self.assertDeviceChecks(dc, op, [X1, X2], [0])
  130. for i in range(2):
  131. self.assertGradientChecks(gc, op, [X1, X2], i, [0])
  132. self.assertReferenceChecks(gc, op, [X1, X2], ref)
  133. @given(inputs=hu.tensors(n=2), **hu.gcs_cpu_only)
  134. @settings(deadline=10000)
  135. def test_max(self, inputs, gc, dc):
  136. op = core.CreateOperator("Max", ["X1", "X2"], ["Y"])
  137. X1, X2 = inputs
  138. # Make X1 and X2 far from each other, since X1=X2 is not differentiable
  139. # and the step size of gradient checker is 0.05
  140. X1[np.logical_and(X1 >= X2 - 0.05, X1 <= X2)] -= 0.05
  141. X1[np.logical_and(X1 <= X2 + 0.05, X1 >= X2)] += 0.05
  142. self.assertDeviceChecks(dc, op, [X1, X2], [0])
  143. for i in range(2):
  144. self.assertGradientChecks(gc, op, [X1, X2], i, [0])
  145. def elementwise_max(X, Y):
  146. return [np.maximum(X, Y)]
  147. self.assertReferenceChecks(gc, op, [X1, X2], elementwise_max)
  148. def test_add(self):
  149. def not_overflow(x):
  150. if not isinstance(x, float):
  151. return abs(x) < (1 << 30) - 1
  152. return True
  153. def ref(x, y):
  154. return (x + y, )
  155. _test_binary("Add", ref, filter_=not_overflow, test_gradient=True)(self)
  156. _test_binary_broadcast("Add", ref, filter_=not_overflow)(self)
  157. def test_sub(self):
  158. def ref(x, y):
  159. return (x - y, )
  160. # TODO(jiayq): enable gradient test when implemented.
  161. _test_binary("Sub", ref, test_gradient=True)(self)
  162. _test_binary_broadcast("Sub", ref)(self)
  163. def test_mul(self):
  164. def not_overflow(x):
  165. if not isinstance(x, float):
  166. return abs(x) < (1 << 15) - 1
  167. return True
  168. def ref(x, y):
  169. return (x * y, )
  170. _test_binary("Mul", ref, filter_=not_overflow, test_gradient=True)(self)
  171. _test_binary_broadcast("Mul", ref, filter_=not_overflow)(self)
  172. def test_div(self):
  173. def ref(x, y):
  174. return (x / y, )
  175. def non_zero(x):
  176. return abs(x) > 1e-2
  177. def div_dtypes():
  178. return st.sampled_from([np.float32, np.float64])
  179. _test_binary(
  180. "Div", ref, filter_=non_zero, test_gradient=True,
  181. dtypes=div_dtypes, gcs=hu.gcs_cpu_only
  182. )(self)
  183. _test_binary(
  184. "Div", ref, filter_=non_zero, test_gradient=False,
  185. dtypes=div_dtypes
  186. )(self)
  187. _test_binary_broadcast(
  188. "Div", ref, filter_=non_zero, dtypes=div_dtypes)(self)
  189. @given(X=hu.tensor(), in_place=st.booleans(), **hu.gcs)
  190. @settings(deadline=1000)
  191. def test_negative(self, X, in_place, gc, dc):
  192. op = core.CreateOperator("Negative", ["X"],
  193. ["Y" if not in_place else "X"])
  194. self.assertDeviceChecks(dc, op, [X], [0])
  195. self.assertGradientChecks(gc, op, [X], 0, [0])
  196. @given(X=hu.tensor(), **hu.gcs)
  197. @settings(deadline=1000)
  198. def test_tanh(self, X, gc, dc):
  199. op = core.CreateOperator("Tanh", "X", "Y")
  200. self.assertDeviceChecks(dc, op, [X], [0])
  201. self.assertGradientChecks(gc, op, [X], 0, [0])
  202. @given(X=hu.tensor(), **hu.gcs)
  203. @settings(deadline=10000)
  204. def test_averaged_loss(self, X, gc, dc):
  205. op = core.CreateOperator("AveragedLoss", ["X"], ["loss"])
  206. self.assertDeviceChecks(dc, op, [X], [0])
  207. self.assertGradientChecks(gc, op, [X], 0, [0])
  208. @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs)
  209. @settings(deadline=10000)
  210. def test_softsign(self, X, inplace, gc, dc):
  211. op = core.CreateOperator("Softsign", ["X"], ["X" if inplace else "Y"])
  212. def softsign(X):
  213. return (X / (1 + np.abs(X)),)
  214. self.assertDeviceChecks(dc, op, [X], [0])
  215. self.assertReferenceChecks(gc, op, [X], softsign)
  216. if inplace:
  217. with self.assertRaises(Exception):
  218. self.assertGradientChecks(gc, op, [X], 0, [0])
  219. else:
  220. self.assertGradientChecks(gc, op, [X], 0, [0])
  221. @given(
  222. device_options=st.lists(
  223. min_size=2,
  224. max_size=4,
  225. elements=st.sampled_from(hu.expanded_device_options)),
  226. set_seed=st.booleans())
  227. @settings(deadline=10000)
  228. def test_random_seed_behaviour(self, device_options, set_seed):
  229. # Assume we are always operating on CUDA or CPU, since RNG is
  230. # inconsistent between CPU and GPU.
  231. device_options = copy.deepcopy(device_options)
  232. assume(len({do.device_type for do in device_options}) == 1)
  233. if set_seed:
  234. for do in device_options:
  235. do.random_seed = 1000
  236. def run(do):
  237. # Reset each time because 'Y' may already exist in the workspace
  238. # on a different device
  239. workspace.ResetWorkspace()
  240. ws = workspace.C.Workspace()
  241. op = core.CreateOperator(
  242. "XavierFill", [], ["Y"],
  243. device_option=do,
  244. shape=[2])
  245. ws.run(op)
  246. return ws.blobs["Y"].fetch()
  247. ys = [run(do) for do in device_options]
  248. for y in ys[1:]:
  249. if set_seed:
  250. np.testing.assert_array_equal(ys[0], y)
  251. else:
  252. with self.assertRaises(AssertionError):
  253. np.testing.assert_array_equal(ys[0], y)
  254. @given(axis=st.integers(min_value=1, max_value=4),
  255. num_output=st.integers(min_value=4, max_value=8),
  256. engine=st.sampled_from(["", "PACKED"]),
  257. **hu.gcs)
  258. @settings(deadline=10000)
  259. def test_fully_connected_axis(self, axis, num_output, engine, gc, dc):
  260. np.random.seed(1)
  261. X = np.random.randn(1, 2, 3, 2, 1).astype(np.float32)
  262. def prod(xs):
  263. p = 1
  264. for x in xs:
  265. p *= x
  266. return p
  267. K = prod(list(X.shape)[axis:])
  268. N = num_output
  269. W = np.random.randn(N, K).astype(np.float32)
  270. b = np.random.randn(N).astype(np.float32)
  271. op = core.CreateOperator(
  272. "FC",
  273. ["X", "W", "b"],
  274. ["Y"],
  275. engine=engine,
  276. axis=axis)
  277. for name, param in [("X", X), ("W", W), ("b", b)]:
  278. self.ws.create_blob(name).feed(param)
  279. self.ws.run(op)
  280. Y = self.ws.blobs["Y"].fetch()
  281. self.assertEqual(list(Y.shape), list(X.shape)[:axis] + [N])
  282. inputs = [X, W, b]
  283. self.assertDeviceChecks(dc, op, inputs, [0])
  284. for param, _ in enumerate(inputs):
  285. self.assertGradientChecks(gc, op, inputs, param, [0])
  286. @unittest.skipIf(not workspace.has_gpu_support,
  287. "Skipping test due to no gpu present.")
  288. @settings(deadline=None)
  289. @given(hidden_size=st.integers(min_value=1, max_value=3),
  290. num_layers=st.integers(min_value=1, max_value=3),
  291. bidirectional=st.booleans(),
  292. rnn_mode=st.sampled_from(["lstm"]), # TODO: "gru"
  293. input_mode=st.sampled_from(["linear"]),
  294. dropout=hu.floats(min_value=1.0, max_value=1.0),
  295. T=st.integers(min_value=2, max_value=6),
  296. N=st.integers(min_value=1, max_value=4),
  297. D=st.integers(min_value=1, max_value=4))
  298. def test_recurrent(self, hidden_size, num_layers, bidirectional, rnn_mode,
  299. input_mode, dropout, T, N, D):
  300. #there's a bug in miopen for N=1 which would be resolved in the next release.
  301. if workspace.has_hip_support:
  302. assume(N>1)
  303. # Random seed, this one happens to pass
  304. seed = 1234
  305. np.random.seed(seed)
  306. # set device option
  307. if workspace.has_hip_support:
  308. device_option = hu.hip_do
  309. engine = 'MIOPEN'
  310. else:
  311. device_option = hu.gpu_do
  312. engine = 'CUDNN'
  313. input_weight_size = hidden_size * D
  314. upper_layer_input_weight_size = hidden_size * hidden_size
  315. if bidirectional:
  316. upper_layer_input_weight_size *= 2
  317. recurrent_weight_size = hidden_size * hidden_size
  318. input_bias_size = hidden_size
  319. recurrent_bias_size = hidden_size
  320. num_directions = 2 if bidirectional else 1
  321. first_layer_sz = input_weight_size + recurrent_weight_size + \
  322. input_bias_size + recurrent_bias_size
  323. upper_layer_sz = upper_layer_input_weight_size + \
  324. recurrent_weight_size + input_bias_size + \
  325. recurrent_bias_size
  326. total_sz = 4 * (first_layer_sz + (num_layers - 1) * upper_layer_sz)
  327. total_sz *= num_directions
  328. W = np.random.rand(total_sz).astype(np.float32)
  329. self.ws.create_blob("WEIGHT").feed(W, device_option=device_option)
  330. op = core.CreateOperator(
  331. "Recurrent",
  332. ["INPUT", "HIDDEN_INPUT", "CELL_INPUT", "WEIGHT"],
  333. ["OUTPUT", "HIDDEN_OUTPUT", "CELL_OUTPUT",
  334. "RNN_SCRATCH", "DROPOUT_STATES"],
  335. hidden_size=hidden_size,
  336. bidirectional=bidirectional,
  337. rnn_mode=rnn_mode,
  338. dropout=dropout,
  339. input_mode=input_mode,
  340. num_layers=num_layers,
  341. seed=seed,
  342. engine=engine)
  343. X = np.random.randn(T, N, D).astype(np.float32)
  344. self.ws.create_blob("INPUT").feed(X, device_option=device_option)
  345. W = self.ws.blobs["WEIGHT"].fetch()
  346. H = np.random.randn(
  347. num_layers, N, hidden_size * num_directions).astype(
  348. np.float32)
  349. C = np.random.randn(
  350. num_layers, N, hidden_size * num_directions).astype(
  351. np.float32) if rnn_mode == "lstm" else \
  352. np.empty((1,)).astype(np.float32) # unused in GRU
  353. inputs = [X, H, C, W]
  354. input_idxs = [i for (i, _) in enumerate(inputs)] \
  355. if rnn_mode == "lstm" else [0, 1, 3] # ignore C
  356. for input_idx in input_idxs:
  357. self.assertGradientChecks(
  358. device_option, op, inputs, input_idx, [0],
  359. stepsize=0.01, threshold=0.01)
  360. @given(ndim=st.integers(1, 4),
  361. axis=st.integers(0, 3),
  362. add_axis=st.integers(0, 1),
  363. num_inputs=st.integers(2, 4), **hu.gcs)
  364. @settings(deadline=None, max_examples=50)
  365. def test_depth_concat(self, ndim, axis, add_axis, num_inputs, gc, dc):
  366. assume(axis < ndim)
  367. input_names = ['X0', 'X1', 'X2', 'X3'][:num_inputs]
  368. shape = [2, 3, 5, 7][:ndim]
  369. individual_dims = [1, 2, 3, 4, 5][:num_inputs]
  370. inputs = []
  371. for i in range(num_inputs):
  372. if add_axis == 0:
  373. # Sets a unique dim and create the input.
  374. shape[axis] = individual_dims[i]
  375. inputs.append(np.random.randn(*shape).astype(np.float32))
  376. op = core.CreateOperator("Concat", input_names, ["Y", "Y_dims"],
  377. axis=axis, add_axis=add_axis)
  378. self.assertDeviceChecks(dc, op, inputs, [0])
  379. for i in range(num_inputs):
  380. self.assertGradientChecks(gc, op, inputs, i, [0])
  381. # Reference
  382. def depth_concat(*inputs):
  383. inputs = list(inputs)
  384. if add_axis:
  385. for i in range(len(inputs)):
  386. inputs[i] = np.expand_dims(inputs[i], axis)
  387. input_dims = np.array([np.shape(x)[axis] for x in inputs])
  388. return [np.concatenate(inputs, axis=axis), input_dims]
  389. self.assertReferenceChecks(gc, op, inputs, depth_concat)
  390. @given(num_inputs=st.integers(2, 4),
  391. order=st.sampled_from([("NCHW", 1), ("NHWC", 3)]),
  392. **hu.gcs)
  393. @settings(deadline=10000)
  394. def test_depth_concat_with_order(self, num_inputs, order, gc, dc):
  395. input_names = ['X0', 'X1', 'X2', 'X3'][:num_inputs]
  396. shape = [2, 3, 5, 7]
  397. individual_dims = [1, 2, 3, 4][:num_inputs]
  398. inputs = []
  399. for i in range(num_inputs):
  400. # Sets a unique dim and create the input.
  401. shape[order[1]] = individual_dims[i]
  402. inputs.append(np.random.rand(*shape).astype(np.float32))
  403. op = core.CreateOperator("Concat", input_names, ["Y", "Y_dims"],
  404. order=order[0])
  405. self.assertDeviceChecks(dc, op, inputs, [0])
  406. for i in range(num_inputs):
  407. self.assertGradientChecks(gc, op, inputs, i, [0])
  408. # Reference
  409. def depth_concat_with_order(*inputs):
  410. inputs = list(inputs)
  411. axis = order[1]
  412. input_dims = np.array([np.shape(x)[axis] for x in inputs])
  413. return [np.concatenate(inputs, axis=axis), input_dims]
  414. self.assertReferenceChecks(gc, op, inputs, depth_concat_with_order)
  415. @given(X=hu.arrays(dims=[5, 2],
  416. elements=hu.floats(
  417. min_value=1.0,
  418. max_value=10.0)
  419. ),
  420. **hu.gcs_cpu_only)
  421. @settings(deadline=1000)
  422. def test_last_n_windows(self, X, gc, dc):
  423. workspace.FeedBlob('input', X)
  424. workspace.FeedBlob('next', np.array(0, dtype=np.int32))
  425. workspace.CreateBlob('output')
  426. collect_net = core.Net('collect_net')
  427. collect_net.LastNWindowCollector(
  428. ['output', 'next', 'input'],
  429. ['output', 'next'],
  430. num_to_collect=7,
  431. )
  432. plan = core.Plan('collect_data')
  433. plan.AddStep(core.execution_step('collect_data',
  434. [collect_net], num_iter=2))
  435. workspace.RunPlan(plan)
  436. output = workspace.FetchBlob('output')
  437. inputs = workspace.FetchBlob('input')
  438. new_output = np.zeros([7, inputs.shape[1]])
  439. for i in range(inputs.shape[0] * 2):
  440. new_output[i % 7] = inputs[i % inputs.shape[0]]
  441. import numpy.testing as npt
  442. npt.assert_almost_equal(output, new_output, decimal=5)
  443. @given(dtype=st.sampled_from([np.float32, np.float64, np.int32, np.bool]))
  444. @settings(deadline=1000)
  445. def test_print(self, dtype):
  446. data = np.random.permutation(6).astype(dtype)
  447. self.ws.create_blob("data").feed(data)
  448. op = core.CreateOperator("Print", "data", [])
  449. self.ws.run(op)
  450. @given(inputs=hu.tensors(n=2),
  451. in_place=st.booleans(),
  452. momentum=hu.floats(min_value=0.1, max_value=0.9),
  453. nesterov=st.booleans(),
  454. lr=hu.floats(min_value=0.1, max_value=0.9),
  455. **hu.gcs)
  456. @settings(deadline=10000)
  457. def test_momentum_sgd(
  458. self, inputs, in_place, momentum, nesterov, lr, gc, dc):
  459. grad, m = inputs
  460. lr = np.asarray([lr], dtype=np.float32)
  461. op = core.CreateOperator(
  462. "MomentumSGD",
  463. ["grad", "m", "lr"],
  464. ["grad" if in_place else "grad_o",
  465. "m" if in_place else "m_o"],
  466. momentum=momentum,
  467. nesterov=int(nesterov),
  468. device_option=gc)
  469. self.assertDeviceChecks(
  470. dc, op, [grad, m, lr], [0])
  471. # Reference
  472. def momentum_sgd(grad, m, lr):
  473. lr = lr[0]
  474. if not nesterov:
  475. adjusted_gradient = lr * grad + momentum * m
  476. return (adjusted_gradient, adjusted_gradient)
  477. else:
  478. m_new = momentum * m + lr * grad
  479. return ((1 + momentum) * m_new - momentum * m, m_new)
  480. self.assertReferenceChecks(gc, op, [grad, m, lr], momentum_sgd)
  481. @given(inputs=hu.tensors(n=3),
  482. in_place=st.booleans(),
  483. decay=hu.floats(min_value=0.1, max_value=0.9),
  484. momentum=hu.floats(min_value=0.1, max_value=0.9),
  485. lr=hu.floats(min_value=0.1, max_value=0.9),
  486. epsilon=hu.floats(min_value=1e-5, max_value=1e-2),
  487. **hu.gcs)
  488. @settings(deadline=10000)
  489. def test_rmsprop_sgd(self, inputs, in_place, decay, momentum, lr, epsilon,
  490. gc, dc):
  491. grad, ms, mom = inputs
  492. ms = np.abs(ms) + 0.01
  493. lr = np.asarray([lr], dtype=np.float32)
  494. op = core.CreateOperator(
  495. "RmsProp",
  496. ["grad", "ms", "mom", "lr"],
  497. ["grad" if in_place else "grad_o",
  498. "ms" if in_place else "ms_o",
  499. "mom" if in_place else "mom_o"],
  500. momentum=momentum, decay=decay, epsilon=epsilon, device_option=gc)
  501. self.assertDeviceChecks(dc, op, [grad, ms, mom, lr], [0])
  502. def rmsprop(grad, ms, mom, lr):
  503. lr = lr[0]
  504. ms_o = ms + (1. - decay) * (np.square(grad) - ms)
  505. mom_o = momentum * mom + lr * grad / np.sqrt(epsilon + ms_o)
  506. grad_o = mom_o
  507. return (grad_o, ms_o, mom_o)
  508. self.assertReferenceChecks(gc, op, [grad, ms, mom, lr], rmsprop)
  509. # Reference
  510. @staticmethod
  511. def _dense_ftrl(alpha, beta, lambda1, lambda2, w, nz, g):
  512. if isinstance(alpha, np.ndarray):
  513. alpha = np.asscalar(alpha)
  514. n = np.take(nz, 0, axis=-1)
  515. z = np.take(nz, 1, axis=-1)
  516. # python port of Sigrid's implementation
  517. g2 = g * g
  518. sigma = (np.sqrt(n + g2) - np.sqrt(n)) / alpha
  519. z += g - sigma * w
  520. n += g2
  521. w = (np.sign(z) * lambda1 - z) / (
  522. (beta + np.sqrt(n)) / alpha + lambda2)
  523. w[np.abs(z) <= lambda1] = 0
  524. return (w, np.stack([n, z], axis=-1))
  525. @given(inputs=hu.tensors(n=4),
  526. in_place=st.booleans(),
  527. alpha=hu.floats(min_value=0.01, max_value=0.1),
  528. beta=hu.floats(min_value=0.1, max_value=0.9),
  529. lambda1=hu.floats(min_value=0.001, max_value=0.1),
  530. lambda2=hu.floats(min_value=0.001, max_value=0.1),
  531. engine=st.sampled_from([None, "SIMD"]),
  532. **hu.gcs_cpu_only)
  533. @settings(deadline=1000)
  534. def test_ftrl_sgd(self, inputs, in_place, alpha, beta, lambda1, lambda2,
  535. engine, gc, dc):
  536. var, n, z, grad = inputs
  537. n = np.abs(n)
  538. nz = np.stack([n, z], axis=-1)
  539. op = core.CreateOperator(
  540. "Ftrl",
  541. ["var", "nz", "grad"],
  542. ["var" if in_place else "var_o",
  543. "nz" if in_place else "nz_o"],
  544. alpha=alpha, beta=beta, lambda1=lambda1, lambda2=lambda2,
  545. engine=engine,
  546. device_option=gc)
  547. self.assertDeviceChecks(
  548. dc, op, [var, nz, grad], [0])
  549. self.assertReferenceChecks(
  550. gc, op, [var, nz, grad],
  551. partial(self._dense_ftrl, alpha, beta, lambda1, lambda2))
  552. # Reference
  553. @staticmethod
  554. def _dense_gftrl(alpha, beta, lambda1, lambda2, w, nz, g):
  555. if isinstance(alpha, np.ndarray):
  556. alpha = np.asscalar(alpha)
  557. old_shape = g.shape
  558. n = np.take(nz, 0, axis=-1)
  559. z = np.take(nz, 1, axis=-1)
  560. output_dim = g.shape[0]
  561. w = w.reshape(output_dim, -1)
  562. g = g.reshape(output_dim, -1)
  563. n = n.reshape(output_dim, -1)
  564. z = z.reshape(output_dim, -1)
  565. input_dim = g.shape[1]
  566. g2 = g * g
  567. sigma = (np.sqrt(n + g2) - np.sqrt(n)) / alpha
  568. z += g - sigma * w
  569. n += g2
  570. z_norms = np.linalg.norm(z, 2, axis=0)
  571. z_norms = z_norms + 1e-6
  572. w = z * ((lambda1 * np.sqrt(output_dim)) / z_norms - 1) / \
  573. ((beta + np.sqrt(n)) / alpha + lambda2)
  574. for i in range(input_dim):
  575. if z_norms[i] <= lambda1 * np.sqrt(output_dim):
  576. w[:, i] = 0
  577. w = w.reshape(old_shape)
  578. n = n.reshape(old_shape)
  579. z = z.reshape(old_shape)
  580. return (w, np.stack([n, z], axis=-1))
  581. @given(inputs=hu.tensors(n=4),
  582. in_place=st.booleans(),
  583. alpha=hu.floats(min_value=0.01, max_value=0.1),
  584. beta=hu.floats(min_value=0.1, max_value=0.9),
  585. lambda1=hu.floats(min_value=0.001, max_value=0.1),
  586. lambda2=hu.floats(min_value=0.001, max_value=0.1),
  587. engine=st.sampled_from([None, "SIMD"]),
  588. **hu.gcs_cpu_only)
  589. @settings(deadline=10000)
  590. def test_gftrl_sgd(self, inputs, in_place, alpha, beta, lambda1, lambda2,
  591. engine, gc, dc):
  592. var, n, z, grad = inputs
  593. n = np.abs(n)
  594. nz = np.stack([n, z], axis=-1)
  595. op = core.CreateOperator(
  596. "GFtrl",
  597. ["var", "nz", "grad"],
  598. ["var" if in_place else "var_o",
  599. "nz" if in_place else "nz_o"],
  600. alpha=alpha, beta=beta, lambda1=lambda1, lambda2=lambda2,
  601. engine=engine,
  602. device_option=gc)
  603. self.assertDeviceChecks(
  604. dc, op, [var, nz, grad], [0])
  605. self.assertReferenceChecks(
  606. gc, op, [var, nz, grad],
  607. partial(self._dense_gftrl, alpha, beta, lambda1, lambda2))
  608. @given(inputs=hu.tensors(n=4),
  609. alpha=hu.floats(min_value=0.01, max_value=0.1),
  610. beta=hu.floats(min_value=0.1, max_value=0.9),
  611. lambda1=hu.floats(min_value=0.001, max_value=0.1),
  612. lambda2=hu.floats(min_value=0.001, max_value=0.1),
  613. engine=st.sampled_from([None, "SIMD"]),
  614. **hu.gcs_cpu_only)
  615. @settings(deadline=10000)
  616. def test_sparse_ftrl_sgd(self, inputs, alpha, beta, lambda1, lambda2,
  617. engine, gc, dc):
  618. var, n, z, grad = inputs
  619. # generate fake subset manually because hypothesis is too complicated :)
  620. indices = np.arange(var.shape[0])
  621. indices = indices[indices % 2 == 0]
  622. grad = grad[indices]
  623. n = np.abs(n)
  624. nz = np.stack([n, z], axis=-1)
  625. op = core.CreateOperator(
  626. "SparseFtrl",
  627. ["var", "nz", "indices", "grad"],
  628. ["var", "nz"],
  629. alpha=alpha, beta=beta, lambda1=lambda1, lambda2=lambda2,
  630. engine=engine,
  631. device_option=gc)
  632. self.assertDeviceChecks(
  633. dc, op, [var, nz, indices, grad], [0])
  634. # Reference
  635. def ftrl(w, nz, i, g):
  636. sw, snz = self._dense_ftrl(alpha, beta, lambda1, lambda2,
  637. w[i], nz[i], g)
  638. w[i] = sw
  639. nz[i] = snz
  640. return (w, nz)
  641. self.assertReferenceChecks(gc, op, [var, nz, indices, grad], ftrl)
  642. # Reference
  643. @staticmethod
  644. def _dense_ftrl_send_alpha_by_input(beta, lambda1, lambda2, w, nz, g, alpha):
  645. return TestOperators._dense_ftrl(alpha, beta, lambda1, lambda2, w, nz,
  646. g)
  647. @given(inputs=hu.tensors(n=4),
  648. in_place=st.booleans(),
  649. alpha=hu.floats(min_value=0.01, max_value=0.1),
  650. beta=hu.floats(min_value=0.1, max_value=0.9),
  651. lambda1=hu.floats(min_value=0.001, max_value=0.1),
  652. lambda2=hu.floats(min_value=0.001, max_value=0.1),
  653. engine=st.sampled_from([None, "SIMD"]),
  654. **hu.gcs_cpu_only)
  655. @settings(deadline=10000)
  656. def test_ftrl_sgd_send_alpha_by_input(self, inputs, in_place, alpha, beta,
  657. lambda1, lambda2, engine, gc, dc):
  658. var, n, z, grad = inputs
  659. n = np.abs(n)
  660. nz = np.stack([n, z], axis=-1)
  661. alpha = np.array(alpha).astype(np.float32)
  662. op = core.CreateOperator(
  663. "Ftrl",
  664. ["var", "nz", "grad", "alpha"],
  665. ["var" if in_place else "var_o",
  666. "nz" if in_place else "nz_o"],
  667. beta=beta, lambda1=lambda1, lambda2=lambda2,
  668. engine=engine,
  669. device_option=gc)
  670. self.assertDeviceChecks(
  671. dc, op, [var, nz, grad, alpha], [0])
  672. self.assertReferenceChecks(
  673. gc, op, [var, nz, grad, alpha],
  674. partial(self._dense_ftrl_send_alpha_by_input, beta, lambda1, lambda2))
  675. @given(inputs=hu.tensors(n=4),
  676. alpha=hu.floats(min_value=0.01, max_value=0.1),
  677. beta=hu.floats(min_value=0.1, max_value=0.9),
  678. lambda1=hu.floats(min_value=0.001, max_value=0.1),
  679. lambda2=hu.floats(min_value=0.001, max_value=0.1),
  680. engine=st.sampled_from([None, "SIMD"]),
  681. **hu.gcs_cpu_only)
  682. @settings(deadline=10000)
  683. def test_sparse_ftrl_sgd_send_alpha_by_input(self, inputs, alpha, beta,
  684. lambda1, lambda2, engine, gc,
  685. dc):
  686. var, n, z, grad = inputs
  687. # generate fake subset manually because hypothesis is too complicated :)
  688. indices = np.arange(var.shape[0])
  689. indices = indices[indices % 2 == 0]
  690. grad = grad[indices]
  691. n = np.abs(n)
  692. nz = np.stack([n, z], axis=-1)
  693. alpha = np.array(alpha).astype(np.float32)
  694. op = core.CreateOperator(
  695. "SparseFtrl",
  696. ["var", "nz", "indices", "grad", "alpha"],
  697. ["var", "nz"],
  698. beta=beta, lambda1=lambda1, lambda2=lambda2,
  699. engine=engine,
  700. device_option=gc)
  701. self.assertDeviceChecks(
  702. dc, op, [var, nz, indices, grad, alpha], [0])
  703. # Reference
  704. def ftrl(w, nz, i, g, alpha):
  705. sw, snz = self._dense_ftrl_send_alpha_by_input(beta, lambda1,
  706. lambda2, w[i], nz[i],
  707. g, alpha)
  708. w[i] = sw
  709. nz[i] = snz
  710. return (w, nz)
  711. self.assertReferenceChecks(gc, op, [var, nz, indices, grad, alpha],
  712. ftrl)
  713. @given(input=hu.tensor(max_value=20,
  714. max_dim=1,
  715. dtype=np.int32,
  716. elements=st.integers(min_value=0, max_value=10)),
  717. with_remapping=st.booleans(),
  718. **hu.gcs_no_hip)
  719. @settings(deadline=10000)
  720. def test_unique(self, input, with_remapping, gc, dc):
  721. op = core.CreateOperator(
  722. "Unique",
  723. ["input"],
  724. ["unique"] + (["remapping"] if with_remapping else []),
  725. device_option=gc)
  726. self.assertDeviceChecks(dc, op, [input], [0])
  727. # Validator
  728. def unique_valid(input, unique, remapping=None):
  729. self.assertEqual(unique.size, len(set(input)))
  730. self.assertEqual(sorted(unique), sorted(set(input)))
  731. if with_remapping:
  732. self.assertEqual(remapping.shape, input.shape)
  733. remapped = [unique[remapping[i]] for i in range(len(input))]
  734. np.testing.assert_array_equal(remapped, input)
  735. self.assertValidationChecks(gc, op, [input], unique_valid)
  736. @given(prediction=hu.arrays(dims=[10, 3],
  737. elements=hu.floats(allow_nan=False,
  738. allow_infinity=False,
  739. min_value=0,
  740. max_value=1)),
  741. labels=hu.arrays(dims=[10],
  742. dtype=np.int32,
  743. elements=st.integers(min_value=0,
  744. max_value=3 - 1)),
  745. top_k=st.integers(min_value=1, max_value=3),
  746. **hu.gcs)
  747. @settings(deadline=1000)
  748. def test_accuracy(self, prediction, labels, top_k, gc, dc):
  749. if(top_k > 1):
  750. gc = hu.cpu_do
  751. op = core.CreateOperator(
  752. "Accuracy",
  753. ["prediction", "labels"],
  754. ["accuracy"],
  755. top_k=top_k,
  756. device_option=gc
  757. )
  758. def op_ref(prediction, labels, top_k):
  759. N = prediction.shape[0]
  760. correct = 0
  761. for i in range(0, len(prediction)):
  762. pred_sorted = sorted(
  763. ([item, j] for j, item in enumerate(prediction[i])),
  764. key=lambda x: x[0],
  765. reverse=True
  766. )
  767. max_ids = [x[1] for x in pred_sorted[0:top_k]]
  768. for m in max_ids:
  769. if m == labels[i]:
  770. correct += 1
  771. accuracy = correct / N
  772. return (accuracy,)
  773. self.assertReferenceChecks(
  774. device_option=gc,
  775. op=op,
  776. inputs=[prediction, labels, top_k],
  777. reference=op_ref)
  778. @given(target_probabilities=hu.arrays(
  779. dims=[10], elements=hu.floats(allow_nan=False,
  780. allow_infinity=False,
  781. min_value=0.01,
  782. max_value=1)),
  783. **hu.gcs)
  784. @settings(deadline=1000)
  785. def test_perplexity(self, target_probabilities, gc, dc):
  786. op = core.CreateOperator(
  787. "Perplexity",
  788. ["target_probabilities"],
  789. ["perplexity"]
  790. )
  791. def op_ref(target_probabilities):
  792. N = target_probabilities.shape[0]
  793. perplexities = np.power(target_probabilities, -1.0 / N)
  794. perplexity = reduce(lambda x, y: x * y, perplexities)
  795. return (perplexity,)
  796. self.assertReferenceChecks(
  797. device_option=gc,
  798. op=op,
  799. inputs=[target_probabilities],
  800. reference=op_ref)
  801. @given(lengths=st.lists(st.integers(min_value=0, max_value=10),
  802. min_size=0,
  803. max_size=10),
  804. **hu.gcs_cpu_only)
  805. @settings(deadline=10000)
  806. def test_lengths_to_segment_ids(self, lengths, gc, dc):
  807. op = core.CreateOperator(
  808. "LengthsToSegmentIds",
  809. ["lengths"],
  810. ["segment_ids"])
  811. def op_ref(lengths):
  812. sids = []
  813. for i, l in enumerate(lengths):
  814. sids.extend(l * [i])
  815. return (np.array(sids, dtype=np.int32), )
  816. self.assertReferenceChecks(
  817. device_option=gc,
  818. op=op,
  819. inputs=[np.array(lengths, dtype=np.int32)],
  820. reference=op_ref)
  821. @given(lengths=st.lists(st.integers(min_value=0, max_value=10),
  822. min_size=0,
  823. max_size=10),
  824. **hu.gcs_cpu_only)
  825. @settings(deadline=10000)
  826. def test_lengths_range_fill(self, lengths, gc, dc):
  827. op = core.CreateOperator(
  828. "LengthsRangeFill",
  829. ["lengths"],
  830. ["increasing_seq"])
  831. def op_ref(lengths):
  832. sids = []
  833. for _, l in enumerate(lengths):
  834. sids.extend(list(range(l)))
  835. return (np.array(sids, dtype=np.int32), )
  836. self.assertReferenceChecks(
  837. device_option=gc,
  838. op=op,
  839. inputs=[np.array(lengths, dtype=np.int32)],
  840. reference=op_ref)
  841. @given(**hu.gcs_cpu_only)
  842. @settings(deadline=10000)
  843. def test_segment_ids_to_ranges(self, gc, dc):
  844. lengths = [4, 6, 3, 2, 0, 4]
  845. op = core.CreateOperator(
  846. "SegmentIdsToRanges",
  847. ["segment_ids"],
  848. ["ranges"])
  849. def op_ref(segment_ids):
  850. ranges = [np.array([0, 0], dtype=np.int32)]
  851. prev = 0
  852. for i, sid in enumerate(segment_ids):
  853. while sid != prev:
  854. prev += 1
  855. ranges.append(np.array([i, 0], dtype=np.int32))
  856. ranges[-1][1] += 1
  857. return (np.array(ranges, dtype=np.int32), )
  858. def lengths_to_segment_ids(lengths):
  859. sids = []
  860. for i, l in enumerate(lengths):
  861. sids.extend(l * [i])
  862. return (np.array(sids, dtype=np.int32), )
  863. self.assertReferenceChecks(
  864. device_option=gc,
  865. op=op,
  866. inputs=np.array(lengths_to_segment_ids(lengths), dtype=np.int32),
  867. reference=op_ref)
  868. @given(lengths=st.lists(st.integers(min_value=0, max_value=10),
  869. min_size=0,
  870. max_size=10),
  871. **hu.gcs_cpu_only)
  872. @settings(deadline=10000)
  873. def test_lengths_to_ranges(self, lengths, gc, dc):
  874. op = core.CreateOperator(
  875. "LengthsToRanges",
  876. ["lengths"],
  877. ["ranges"])
  878. def op_ref(x):
  879. if not x.size:
  880. return (x.reshape((0, 2)), )
  881. return (np.column_stack((np.concatenate(([0], np.cumsum(x)[:-1])),
  882. x)), )
  883. self.assertReferenceChecks(
  884. device_option=gc,
  885. op=op,
  886. inputs=[np.array(lengths, dtype=np.int32)],
  887. reference=op_ref)
  888. @given(
  889. lengths=st.lists(
  890. st.integers(min_value=0, max_value=10), min_size=0, max_size=10
  891. ),
  892. include_last_offset=st.booleans(),
  893. **hu.gcs_cpu_only
  894. )
  895. @settings(deadline=None)
  896. def test_lengths_to_offsets(self, lengths, include_last_offset, gc, dc):
  897. op = core.CreateOperator(
  898. "LengthsToOffsets",
  899. ["lengths"],
  900. ["ranges"],
  901. include_last_offset=include_last_offset,
  902. )
  903. def op_ref(x):
  904. if not x.size:
  905. arr = [x.reshape(0)]
  906. else:
  907. arr = [np.concatenate(([0], np.cumsum(x)[:-1]))]
  908. if include_last_offset:
  909. arr[0] = np.concatenate((arr[0], np.array([np.sum(x)])))
  910. return tuple(arr)
  911. self.assertReferenceChecks(
  912. device_option=gc,
  913. op=op,
  914. inputs=[np.array(lengths, dtype=np.int32)],
  915. reference=op_ref,
  916. )
  917. @given(prediction=hu.arrays(dims=[10, 3],
  918. elements=hu.floats(allow_nan=False,
  919. allow_infinity=False,
  920. min_value=0,
  921. max_value=1)),
  922. labels=hu.arrays(dims=[10],
  923. dtype=np.int32,
  924. elements=st.integers(min_value=0,
  925. max_value=3 - 1)),
  926. **hu.gcs)
  927. @settings(deadline=10000)
  928. def test_multi_class_accuracy(self, prediction, labels, gc, dc):
  929. op = core.CreateOperator(
  930. "MultiClassAccuracy",
  931. ["prediction", "labels"],
  932. ["accuracies", "amounts"]
  933. )
  934. def op_ref(prediction, labels):
  935. N = prediction.shape[0]
  936. D = prediction.shape[1]
  937. accuracies = np.empty(D, dtype=float)
  938. accuracies.fill(0)
  939. amounts = np.empty(D, dtype=int)
  940. amounts.fill(0)
  941. max_ids = np.argmax(prediction, axis=1)
  942. for i in range(0, N):
  943. max_id = max_ids[i]
  944. label_id = labels[i]
  945. if max_id == label_id:
  946. accuracies[label_id] += 1
  947. amounts[label_id] += 1
  948. for i in range(0, D):
  949. amount = amounts[i]
  950. if amount:
  951. accuracies[i] /= amount
  952. return (accuracies, amounts,)
  953. self.assertReferenceChecks(
  954. device_option=gc,
  955. op=op,
  956. inputs=[prediction, labels],
  957. reference=op_ref)
  958. @given(lengths=st.lists(st.integers(min_value=0, max_value=10),
  959. min_size=0,
  960. max_size=10),
  961. **hu.gcs_cpu_only)
  962. @settings(deadline=10000)
  963. def test_segment_ids_to_lengths(self, lengths, gc, dc):
  964. op = core.CreateOperator(
  965. "SegmentIdsToLengths",
  966. ["segment_ids"],
  967. ["lengths"])
  968. def lengths_to_ids(lengths):
  969. sids = []
  970. for i, l in enumerate(lengths):
  971. sids.extend(l * [i])
  972. return sids
  973. segment_ids = lengths_to_ids(lengths)
  974. def ids_to_lengths(ids):
  975. ids_length = len(ids)
  976. if ids_length == 0:
  977. return (np.array([], dtype=np.int32),)
  978. lengths = []
  979. # segment id starts with 0
  980. prev_id = -1
  981. tmp_length = 0
  982. for idx in range(ids_length):
  983. cur_id = ids[idx]
  984. if cur_id != prev_id:
  985. if idx != 0:
  986. lengths.append(tmp_length)
  987. while prev_id + 1 != cur_id:
  988. lengths.append(0)
  989. prev_id += 1
  990. prev_id = cur_id
  991. tmp_length = 0
  992. tmp_length += 1
  993. lengths.append(tmp_length)
  994. return (np.array(lengths, dtype=np.int32),)
  995. self.assertReferenceChecks(
  996. device_option=gc,
  997. op=op,
  998. inputs=[np.array(segment_ids, dtype=np.int32)],
  999. reference=ids_to_lengths)
  1000. @given(lengths=st.lists(st.integers(min_value=1, max_value=10),
  1001. min_size=0,
  1002. max_size=10),
  1003. power=st.sampled_from([0.5, 1.0, 1.5, 2.0]),
  1004. **hu.gcs_cpu_only)
  1005. @settings(deadline=10000)
  1006. def test_lengths_to_weights(self, lengths, power, gc, dc):
  1007. op = core.CreateOperator(
  1008. "LengthsToWeights",
  1009. ["lengths"],
  1010. ["weights"],
  1011. power=power)
  1012. def lengths_to_weights(lengths):
  1013. weighted_length = []
  1014. for l in lengths:
  1015. weighted_length.extend(l * [1 / pow(l, power)])
  1016. return (np.array(weighted_length, dtype=float),)
  1017. self.assertReferenceChecks(
  1018. device_option=gc,
  1019. op=op,
  1020. inputs=[np.array(lengths, dtype=np.int32)],
  1021. reference=lengths_to_weights)
  1022. @given(input_tensor=hu.arrays(
  1023. dims=[10], elements=hu.floats(allow_nan=False,
  1024. allow_infinity=False)),
  1025. **hu.gcs)
  1026. @settings(deadline=10000)
  1027. def test_abs(self, input_tensor, gc, dc):
  1028. op = core.CreateOperator(
  1029. "Abs",
  1030. ["input"],
  1031. ["output"]
  1032. )
  1033. def abs_ref(input_tensor):
  1034. return (np.abs(input_tensor),)
  1035. self.assertReferenceChecks(
  1036. device_option=gc,
  1037. op=op,
  1038. inputs=[input_tensor],
  1039. reference=abs_ref)
  1040. @given(input_tensor=hu.arrays(
  1041. dims=[10], elements=hu.floats(min_value=-10,
  1042. max_value=10)),
  1043. **hu.gcs)
  1044. @settings(deadline=10000)
  1045. def test_cos(self, input_tensor, gc, dc):
  1046. op = core.CreateOperator(
  1047. "Cos",
  1048. ["input"],
  1049. ["output"]
  1050. )
  1051. def cos_ref(input_tensor):
  1052. return (np.cos(input_tensor),)
  1053. self.assertReferenceChecks(
  1054. device_option=gc,
  1055. op=op,
  1056. inputs=[input_tensor],
  1057. reference=cos_ref)
  1058. @given(input_tensor=hu.arrays(
  1059. dims=[10], elements=hu.floats(min_value=-10,
  1060. max_value=10)),
  1061. **hu.gcs)
  1062. @settings(deadline=1000)
  1063. def test_sin(self, input_tensor, gc, dc):
  1064. op = core.CreateOperator(
  1065. "Sin",
  1066. ["input"],
  1067. ["output"]
  1068. )
  1069. def sin_ref(input_tensor):
  1070. return (np.sin(input_tensor),)
  1071. self.assertReferenceChecks(
  1072. device_option=gc,
  1073. op=op,
  1074. inputs=[input_tensor],
  1075. reference=sin_ref)
  1076. @given(input_tensor=hu.arrays(
  1077. dims=[10], elements=hu.floats(allow_nan=False,
  1078. allow_infinity=False)),
  1079. **hu.gcs)
  1080. @settings(deadline=10000)
  1081. def test_exp(self, input_tensor, gc, dc):
  1082. op = core.CreateOperator(
  1083. "Exp",
  1084. ["input"],
  1085. ["output"]
  1086. )
  1087. def exp_ref(input_tensor):
  1088. return (np.exp(input_tensor),)
  1089. self.assertReferenceChecks(
  1090. device_option=gc,
  1091. op=op,
  1092. inputs=[input_tensor],
  1093. reference=exp_ref)
  1094. @given(input_tensor=hu.arrays(
  1095. dims=[10], elements=hu.floats(min_value=1,
  1096. max_value=10000)),
  1097. **hu.gcs_cpu_only)
  1098. @settings(deadline=10000)
  1099. def test_log(self, input_tensor, gc, dc):
  1100. op = core.CreateOperator(
  1101. "Log",
  1102. ["input"],
  1103. ["output"]
  1104. )
  1105. def log_ref(input_tensor):
  1106. return (np.log(input_tensor),)
  1107. self.assertReferenceChecks(
  1108. device_option=gc,
  1109. op=op,
  1110. inputs=[input_tensor],
  1111. reference=log_ref)
  1112. self.assertGradientChecks(gc, op, [input_tensor], 0, [0])
  1113. def test_blobs_dequeue_timeout(self):
  1114. op = core.CreateOperator(
  1115. "CreateBlobsQueue",
  1116. [],
  1117. ["queue"],
  1118. capacity=5,
  1119. num_blobs=1)
  1120. self.ws.run(op)
  1121. t = time.time()
  1122. op = core.CreateOperator(
  1123. "DequeueBlobs",
  1124. ["queue"],
  1125. ["out"],
  1126. timeout_secs=0.2)
  1127. self.assertRaises(RuntimeError, lambda: self.ws.run(op))
  1128. t = time.time() - t
  1129. self.assertGreater(t, 0.19)
  1130. @given(num_threads=st.integers(1, 10), # noqa
  1131. num_elements=st.integers(1, 100),
  1132. capacity=st.integers(1, 5),
  1133. num_blobs=st.integers(1, 3),
  1134. do=st.sampled_from(hu.device_options))
  1135. @settings(deadline=10000)
  1136. def test_blobs_queue_threading(self, num_threads, num_elements,
  1137. capacity, num_blobs, do):
  1138. """
  1139. - Construct matrices of size N x D
  1140. - Start K threads
  1141. - Push all N rows into the queue of capacity C
  1142. - Pull all N rows out of the queue.
  1143. - Verify that the output matrices are permutation of the rows of the
  1144. original matrices.
  1145. """
  1146. import threading
  1147. import queue
  1148. op = core.CreateOperator(
  1149. "CreateBlobsQueue",
  1150. [],
  1151. ["queue"],
  1152. capacity=capacity,
  1153. num_blobs=num_blobs,
  1154. device_option=do)
  1155. self.ws.run(op)
  1156. xs = [np.random.randn(num_elements, 5).astype(np.float32)
  1157. for _ in range(num_blobs)]
  1158. q = queue.Queue()
  1159. for i in range(num_elements):
  1160. q.put([x[i] for x in xs])
  1161. def enqueue(t):
  1162. while True:
  1163. feed_blobs = ["x_{}_{}".format(i, t) for i in range(num_blobs)]
  1164. op = core.CreateOperator(
  1165. "EnqueueBlobs",
  1166. ["queue"] + feed_blobs,
  1167. feed_blobs,
  1168. device_option=do)
  1169. try:
  1170. elems = q.get_nowait()
  1171. for elem, feed_blob in zip(elems, feed_blobs):
  1172. self.ws.create_blob(feed_blob).feed(
  1173. elem, device_option=do)
  1174. self.ws.run(op)
  1175. except queue.Empty:
  1176. return
  1177. # Create all blobs before racing on multiple threads
  1178. # (blob creation is not threadsafe)
  1179. for t in range(num_threads):
  1180. for i in range(num_blobs):
  1181. self.ws.create_blob("x_{}_{}".format(i, t))
  1182. threads = [threading.Thread(target=enqueue, args=(t,))
  1183. for t in range(num_threads)]
  1184. for thread in threads:
  1185. thread.start()
  1186. for n in range(num_elements):
  1187. dequeue_blobs = ["y_{}_{}".format(i, n) for i in range(num_blobs)]
  1188. op = core.CreateOperator(
  1189. "DequeueBlobs",
  1190. ["queue"],
  1191. dequeue_blobs,
  1192. device_option=do)
  1193. self.ws.run(op)
  1194. for thread in threads:
  1195. thread.join()
  1196. op = core.CreateOperator("CloseBlobsQueue", ["queue"], [])
  1197. self.ws.run(op)
  1198. ys = [np.vstack([self.ws.blobs["y_{}_{}".format(i, n)].fetch()
  1199. for n in range(num_elements)])
  1200. for i in range(num_blobs)]
  1201. for i in range(num_blobs):
  1202. self.assertEqual(ys[i].shape, xs[i].shape)
  1203. for j in range(num_elements):
  1204. # Verify that the rows of the returned blob are a
  1205. # permutation. The order may be different due to
  1206. # different threads racing.
  1207. self.assertTrue(
  1208. any(np.array_equal(xs[i][j], ys[i][k])
  1209. for k in range(num_elements)))
  1210. @given(num_producers=st.integers(1, 10),
  1211. num_consumers=st.integers(1, 10),
  1212. capacity=st.integers(1, 5),
  1213. num_blobs=st.integers(1, 3),
  1214. do=st.sampled_from(hu.device_options))
  1215. @settings(deadline=None, max_examples=50)
  1216. def test_safe_blobs_queue(self, num_producers, num_consumers,
  1217. capacity, num_blobs, do):
  1218. init_net = core.Net('init_net')
  1219. queue = init_net.CreateBlobsQueue(
  1220. [], 1, capacity=capacity, num_blobs=num_blobs)
  1221. producer_steps = []
  1222. truth = 0
  1223. for i in range(num_producers):
  1224. name = 'producer_%d' % i
  1225. net = core.Net(name)
  1226. blobs = [net.ConstantFill([], 1, value=1.0, run_once=False)
  1227. for times in range(num_blobs)]
  1228. status = net.NextName()
  1229. net.SafeEnqueueBlobs([queue] + blobs, blobs + [status])
  1230. count = (i + 1) * 10
  1231. step = core.execution_step(name, net, num_iter=count)
  1232. truth += count
  1233. producer_steps.append(step)
  1234. producer_exit_net = core.Net('producer_exit_net')
  1235. producer_exit_net.CloseBlobsQueue([queue], 0)
  1236. producer_step = core.execution_step('producer', [
  1237. core.execution_step(
  1238. 'producers', producer_steps, concurrent_substeps=True),
  1239. core.execution_step('producer_exit', producer_exit_net)]
  1240. )
  1241. consumer_steps = []
  1242. counters = []
  1243. const_1 = init_net.ConstantFill([], 1, value=1.0)
  1244. for i in range(num_consumers):
  1245. name = 'consumer_%d' % i
  1246. net1 = core.Net(name)
  1247. blobs = net1.SafeDequeueBlobs([queue], num_blobs + 1)
  1248. status = blobs[-1]
  1249. net2 = core.Net(name + '_counter')
  1250. counter = init_net.ConstantFill([], 1, value=0.0)
  1251. counters.append(counter)
  1252. net2.Add([counter, const_1], counter)
  1253. consumer_steps.append(core.execution_step(
  1254. name, [net1, net2], should_stop_blob=status))
  1255. consumer_step = core.execution_step(
  1256. 'consumer', consumer_steps, concurrent_substeps=True)
  1257. init_step = core.execution_step('init', init_net)
  1258. worker_step = core.execution_step(
  1259. 'worker', [consumer_step, producer_step], concurrent_substeps=True)
  1260. plan = core.Plan('test')
  1261. plan.AddStep(init_step)
  1262. plan.AddStep(worker_step)
  1263. self.ws.run(plan)
  1264. v = 0
  1265. for counter in counters:
  1266. v += self.ws.blobs[str(counter)].fetch().tolist()
  1267. self.assertEqual(v, truth)
  1268. @given(num_queues=st.integers(1, 5),
  1269. num_iter=st.integers(5, 10),
  1270. capacity=st.integers(1, 5),
  1271. num_blobs=st.integers(1, 3))
  1272. @settings(deadline=None, max_examples=50)
  1273. def test_weighted_sample_blobs_queue(
  1274. self, num_queues, num_iter, capacity, num_blobs
  1275. ):
  1276. # Create BlobsQueue for each input queue
  1277. print("num_queues", num_queues)
  1278. init_net = core.Net('init_net')
  1279. queues = [
  1280. init_net.CreateBlobsQueue(
  1281. [], 1, capacity=capacity, num_blobs=num_blobs
  1282. ) for _ in range(num_queues)
  1283. ]
  1284. # Create multiple producer nets and one producer exist net
  1285. producer_steps = []
  1286. producer_exit_nets = []
  1287. for i in range(num_queues):
  1288. name = 'producer_%d' % i
  1289. net = core.Net(name)
  1290. blobs = [net.ConstantFill([], 1, value=1.0, run_once=False)
  1291. for _ in range(num_blobs)]
  1292. status = net.NextName()
  1293. net.SafeEnqueueBlobs([queues[i]] + blobs, blobs + [status])
  1294. exit_net = core.Net('producer_exit_%d' % i)
  1295. exit_net.CloseBlobsQueue(queues[i], 0)
  1296. producer_exit_nets.append(exit_net)
  1297. step = core.execution_step(
  1298. name, [
  1299. core.execution_step(
  1300. 'producer_%d' % i, [net], num_iter=num_iter
  1301. ),
  1302. core.execution_step('producer_exit_%d' % i, [exit_net]),
  1303. ]
  1304. )
  1305. producer_steps.append(step)
  1306. producer_step = core.execution_step(
  1307. 'producer', [
  1308. core.execution_step(
  1309. 'producers',
  1310. producer_steps,
  1311. concurrent_substeps=True,
  1312. ),
  1313. ]
  1314. )
  1315. status_lst = []
  1316. def append(ins, outs):
  1317. status_lst.append(ins)
  1318. # Create one consumer dequeue net and one consumer exist net
  1319. consumer_net = core.Net('weight_sample_dequeue_net')
  1320. table_idx_blob = np.random.randint(low=-1, high=num_blobs, size=1)
  1321. blobs = consumer_net.WeightedSampleDequeueBlobs(
  1322. queues,
  1323. num_blobs + 1,
  1324. weights=np.random.uniform(low=0.0, high=1.0, size=(num_queues,)),
  1325. table_idx_blob=table_idx_blob[0],
  1326. )
  1327. status = blobs[-1]
  1328. consumer_net.Python(append)(status)
  1329. consumer_step = core.execution_step(
  1330. 'consumer',
  1331. [
  1332. core.execution_step(
  1333. 'consumer', [consumer_net], should_stop_blob=status
  1334. ),
  1335. core.execution_step('producer_exit', producer_exit_nets)
  1336. ]
  1337. )
  1338. init_step = core.execution_step('init', init_net)
  1339. worker_step = core.execution_step(
  1340. 'worker', [producer_step, consumer_step], concurrent_substeps=True)
  1341. plan = core.Plan('test')
  1342. plan.AddStep(init_step)
  1343. plan.AddStep(worker_step)
  1344. self.ws.run(plan)
  1345. assert len(status_lst) >= num_iter + 1
  1346. assert len(status_lst) <= num_iter * num_queues + 1
  1347. @given(
  1348. data=hu.tensor(),
  1349. **hu.gcs_cpu_only)
  1350. @settings(deadline=10000)
  1351. def test_squeeze_expand_dims(self, data, gc, dc):
  1352. dims = [0, 0]
  1353. if len(data.shape) > 2:
  1354. dims.append(2)
  1355. op = core.CreateOperator(
  1356. "ExpandDims",
  1357. ["data"],
  1358. ["expanded"],
  1359. dims=dims)
  1360. def expand_dims_ref(data, *args, **kw):
  1361. inc_dims = list(set(dims))
  1362. inc_dims.sort()
  1363. r = data
  1364. for dim in inc_dims:
  1365. r = np.expand_dims(r, axis=dim)
  1366. return (r, )
  1367. def squeeze_ref(data, *args, **kw):
  1368. dec_dims = list(set(dims))
  1369. dec_dims.sort(reverse=True)
  1370. r = data
  1371. for dim in dec_dims:
  1372. r = np.squeeze(r, axis=dim)
  1373. return (r, )
  1374. self.assertReferenceChecks(
  1375. device_option=gc,
  1376. op=op,
  1377. inputs=[data],
  1378. reference=expand_dims_ref,
  1379. output_to_grad='expanded',
  1380. grad_reference=squeeze_ref)
  1381. @given(**hu.gcs_cpu_only)
  1382. @settings(deadline=10000)
  1383. def test_tt_layer(self, gc, dc):
  1384. seed = 1234
  1385. np.random.seed(seed)
  1386. inp_sizes = [2, 2, 2, 2]
  1387. out_sizes = [2, 2, 2, 2]
  1388. tt_ranks = [1, 3, 3, 3, 1]
  1389. op = core.CreateOperator(
  1390. "TT",
  1391. ["X", "b", "cores"],
  1392. ["Y"],
  1393. inp_sizes=inp_sizes,
  1394. out_sizes=out_sizes,
  1395. tt_ranks=tt_ranks,
  1396. )
  1397. X = np.expand_dims(
  1398. np.random.rand(16).astype(np.float32), axis=0)
  1399. b = np.array([0] * 16).astype(np.float32)
  1400. cores = tt_core.init_tt_cores(inp_sizes, out_sizes, tt_ranks)
  1401. self.ws.create_blob("X").feed(X)
  1402. self.ws.create_blob("b").feed(b)
  1403. self.ws.create_blob("cores").feed(cores)
  1404. self.ws.run(op)
  1405. Y = self.ws.blobs[("Y")].fetch()
  1406. Y = Y.reshape([16])
  1407. golden = np.array([-9.51763490e-07, -1.28442286e-06,
  1408. -2.86281141e-07, 2.28865644e-07,
  1409. -1.96180017e-06, -1.78920531e-06,
  1410. 9.31094666e-07, -2.04273989e-07,
  1411. 1.70017107e-06, 1.64845711e-06,
  1412. -1.06099132e-06, -4.69111137e-07,
  1413. 6.57552358e-08, -1.28942040e-08,
  1414. -2.29114004e-07, -1.04262714e-06])
  1415. # This golden array is dependent on the specified inp_sizes, out_sizes,
  1416. # tt_ranks, and seed. Changing these will cause the test to fail.
  1417. self.assertAlmostEqual(np.linalg.norm(golden - Y), 0, delta=1e-10)
  1418. @given(**hu.gcs_cpu_only)
  1419. def test_tt_sls_layer(self, gc, dc):
  1420. seed = 1234
  1421. np.random.seed(seed)
  1422. factor_voc = [10, 10, 10]
  1423. factor_width = [2, 2, 2]
  1424. op = core.CreateOperator(
  1425. "TTSparseLengthsSum",
  1426. ["core0", "core1", "core2", "index", "lengths"],
  1427. ["Y", "core0_output", "core1_output", "indices"],
  1428. factor_i=factor_voc,
  1429. factor_j=factor_width,
  1430. ranks=[1, 16, 16, 1],
  1431. emb_size=8
  1432. )
  1433. c0 = np.ones([10, 1, 2, 16]).astype(np.float32)
  1434. c1 = np.ones([10, 16, 2, 16]).astype(np.float32)
  1435. c2 = np.ones([10, 16, 2, 1]).astype(np.float32)
  1436. # index = np.array([0, 1, 2, 1, 4], dtype=np.int)
  1437. # lengths = np.array([3, 2], dtype=np.int)
  1438. index = np.array([0, 1, 2, 1, 4], np.int64)
  1439. lengths = np.array([3, 2], np.int32)
  1440. self.ws.create_blob("core0").feed(c0)
  1441. self.ws.create_blob("core1").feed(c1)
  1442. self.ws.create_blob("core2").feed(c2)
  1443. self.ws.create_blob("index").feed(index)
  1444. self.ws.create_blob("lengths").feed(lengths)
  1445. self.ws.run(op)
  1446. Y = self.ws.blobs[("Y")].fetch()
  1447. self.assertEqual(list(Y.shape), [2, 8])
  1448. golden = np.array([[768, 768, 768, 768, 768, 768, 768, 768],
  1449. [512, 512, 512, 512, 512, 512, 512, 512]])
  1450. self.assertAlmostEqual(np.linalg.norm(golden - Y), 0, delta=0)
  1451. @given(**hu.gcs_cpu_only)
  1452. def test_tt_sls_gradientop(self, gc, dc):
  1453. op = core.CreateOperator(
  1454. "TTSparseLengthsSumGradient",
  1455. ["core0", "core1", "core2", "lengths",
  1456. "core0_out", "core1_out", "indices", "dY"],
  1457. ["dCore0", "dCore1", "dCore2"]
  1458. )
  1459. c0 = np.ones([10, 1, 4, 16]).astype(np.float32)
  1460. c1 = np.ones([10, 16, 4, 16]).astype(np.float32)
  1461. c2 = np.ones([10, 16, 4, 1]).astype(np.float32)
  1462. lengths = np.array([3, 2], np.int32)
  1463. c0_out = np.ones([5, 4, 16]).astype(np.float32)
  1464. c1_out = np.ones([5, 16, 16]).astype(np.float32)
  1465. indices = np.array([[0, 0, 0],
  1466. [1, 0, 0],
  1467. [2, 0, 0],
  1468. [1, 0, 0],
  1469. [4, 0, 0]], np.int64)
  1470. dY = np.ones([2, 64]).astype(np.float32)
  1471. self.ws.create_blob("core0").feed(c0)
  1472. self.ws.create_blob("core1").feed(c1)
  1473. self.ws.create_blob("core2").feed(c2)
  1474. self.ws.create_blob("lengths").feed(lengths)
  1475. self.ws.create_blob("core0_out").feed(c0_out)
  1476. self.ws.create_blob("core1_out").feed(c1_out)
  1477. self.ws.create_blob("indices").feed(indices)
  1478. self.ws.create_blob("dY").feed(dY)
  1479. self.ws.run(op)
  1480. dCore0 = self.ws.blobs[("dCore0")].fetch()
  1481. dCore1 = self.ws.blobs[("dCore1")].fetch()
  1482. dCore2 = self.ws.blobs[("dCore2")].fetch()
  1483. self.assertEqual(list(dCore0.shape), list(c0.shape))
  1484. self.assertEqual(list(dCore1.shape), list(c1.shape))
  1485. self.assertEqual(list(dCore2.shape), list(c2.shape))
  1486. @given(**hu.gcs_cpu_only)
  1487. def test_tt_sls_gradientop1(self, gc, dc):
  1488. op = core.CreateOperator(
  1489. "TTSparseLengthsSumGradient",
  1490. ["core0", "core1", "core2", "lengths",
  1491. "core0_out", "core1_out", "indices", "dY"],
  1492. ["dCore0", "dCore1", "dCore2"]
  1493. )
  1494. c0 = np.ones([101, 1, 2, 16]).astype(np.float32)
  1495. c1 = np.ones([102, 16, 2, 16]).astype(np.float32)
  1496. c2 = np.ones([153, 16, 4, 1]).astype(np.float32)
  1497. lengths = np.array([3, 2], np.int32)
  1498. c0_out = np.ones([5, 2, 16]).astype(np.float32)
  1499. c1_out = np.ones([5, 4, 16]).astype(np.float32)
  1500. indices = np.array([[0, 0, 0],
  1501. [1, 0, 0],
  1502. [2, 0, 0],
  1503. [1, 0, 0],
  1504. [4, 0, 0]], np.int64)
  1505. dY = np.ones([2, 16]).astype(np.float32)
  1506. self.ws.create_blob("core0").feed(c0)
  1507. self.ws.create_blob("core1").feed(c1)
  1508. self.ws.create_blob("core2").feed(c2)
  1509. self.ws.create_blob("lengths").feed(lengths)
  1510. self.ws.create_blob("core0_out").feed(c0_out)
  1511. self.ws.create_blob("core1_out").feed(c1_out)
  1512. self.ws.create_blob("indices").feed(indices)
  1513. self.ws.create_blob("dY").feed(dY)
  1514. self.ws.run(op)
  1515. dCore0 = self.ws.blobs[("dCore0")].fetch()
  1516. dCore1 = self.ws.blobs[("dCore1")].fetch()
  1517. dCore2 = self.ws.blobs[("dCore2")].fetch()
  1518. self.assertEqual(list(dCore0.shape), list(c0.shape))
  1519. self.assertEqual(list(dCore1.shape), list(c1.shape))
  1520. self.assertEqual(list(dCore2.shape), list(c2.shape))
  1521. @given(**hu.gcs_cpu_only)
  1522. @settings(deadline=10000)
  1523. def test_tt_sls(self, gc, dc):
  1524. factor_voc = [10, 10, 10]
  1525. factor_width = [2, 2, 2]
  1526. op = core.CreateOperator(
  1527. "TTSparseLengthsSum",
  1528. ["core0", "core1", "core2", "index", "lengths"],
  1529. ["Y", "core0_output", "core1_output", "indices"],
  1530. factor_i=factor_voc,
  1531. factor_j=factor_width,
  1532. ranks=[1, 16, 16, 1],
  1533. emb_size=8
  1534. )
  1535. c0 = np.ones([10, 1, 2, 16]).astype(np.float32)
  1536. c1 = np.ones([10, 16, 2, 16]).astype(np.float32)
  1537. c2 = np.ones([10, 16, 2, 1]).astype(np.float32)
  1538. index = np.array([0, 1, 2, 1, 4], np.int64)
  1539. lengths = np.array([0, 3, 0, 0, 2, 0, 0], np.int32)
  1540. self.assertGradientChecks(gc, op, [c0, c1, c2, index, lengths], 0, [0])
  1541. @given(**hu.gcs_cpu_only)
  1542. def test_tt_sls_repro(self, gc, dc):
  1543. factor_voc = [125, 160, 200]
  1544. factor_width = [4, 4, 4]
  1545. op = core.CreateOperator(
  1546. "TTSparseLengthsSum",
  1547. ["core0", "core1", "core2", "index", "lengths"],
  1548. ["Y", "core0_output", "core1_output", "indices"],
  1549. factor_i=factor_voc,
  1550. factor_j=factor_width,
  1551. ranks=[1, 16, 16, 1],
  1552. emb_size=64
  1553. )
  1554. c0 = np.ones([125, 1, 4, 16]).astype(np.float32)
  1555. c1 = np.ones([160, 16, 4, 16]).astype(np.float32)
  1556. c2 = np.ones([200, 16, 4, 1]).astype(np.float32)
  1557. index = np.array([0, 4000000 - 1, 20000, 1000000, 4000000 - 1], np.int64)
  1558. lengths = np.array([0, 3, 0, 0, 2, 0, 0], np.int32)
  1559. self.ws.create_blob("core0").feed(c0)
  1560. self.ws.create_blob("core1").feed(c1)
  1561. self.ws.create_blob("core2").feed(c2)
  1562. self.ws.create_blob("index").feed(index)
  1563. self.ws.create_blob("lengths").feed(lengths)
  1564. self.ws.run(op)
  1565. Y = self.ws.blobs[("Y")].fetch()
  1566. self.assertEqual(list(Y.shape), [7, 64])
  1567. golden = np.array([[0] * 64, [768] * 64, [0] * 64, [0] * 64, [512] * 64, [0] * 64, [0] * 64])
  1568. self.assertAlmostEqual(np.linalg.norm(golden - Y), 0, delta=0)
  1569. @given(**hu.gcs_cpu_only)
  1570. def test_tt_sls_gradientop2(self, gc, dc):
  1571. op = core.CreateOperator(
  1572. "TTSparseLengthsSumGradient",
  1573. ["core0", "core1", "core2", "lengths",
  1574. "core0_out", "core1_out", "indices", "dY"],
  1575. ["dCore0", "dCore1", "dCore2"]
  1576. )
  1577. c0 = np.ones([101, 1, 2, 16]).astype(np.float32)
  1578. c1 = np.ones([102, 16, 2, 16]).astype(np.float32)
  1579. c2 = np.ones([153, 16, 4, 1]).astype(np.float32)
  1580. lengths = np.array([0, 3, 0, 0, 2, 0, 0], np.int32)
  1581. c0_out = np.ones([5, 2, 16]).astype(np.float32)
  1582. c1_out = np.ones([5, 4, 16]).astype(np.float32)
  1583. indices = np.array([[0, 0, 0],
  1584. [1, 0, 0],
  1585. [2, 0, 0],
  1586. [1, 0, 0],
  1587. [4, 0, 0]], np.int64)
  1588. dY = np.ones([7, 16]).astype(np.float32)
  1589. self.ws.create_blob("core0").feed(c0)
  1590. self.ws.create_blob("core1").feed(c1)
  1591. self.ws.create_blob("core2").feed(c2)
  1592. self.ws.create_blob("lengths").feed(lengths)
  1593. self.ws.create_blob("core0_out").feed(c0_out)
  1594. self.ws.create_blob("core1_out").feed(c1_out)
  1595. self.ws.create_blob("indices").feed(indices)
  1596. self.ws.create_blob("dY").feed(dY)
  1597. self.ws.run(op)
  1598. dCore0 = self.ws.blobs[("dCore0")].fetch()
  1599. dCore1 = self.ws.blobs[("dCore1")].fetch()
  1600. dCore2 = self.ws.blobs[("dCore2")].fetch()
  1601. self.assertEqual(list(dCore0.shape), list(c0.shape))
  1602. self.assertEqual(list(dCore1.shape), list(c1.shape))
  1603. self.assertEqual(list(dCore2.shape), list(c2.shape))
  1604. @given(num_workers=st.integers(1, 10),
  1605. net_type=st.sampled_from(
  1606. ["simple", "dag"] +
  1607. (["async_dag"] if workspace.has_gpu_support else [])),
  1608. **hu.gcs)
  1609. @settings(deadline=10000)
  1610. def test_dag_net_forking(self, net_type, num_workers, gc, dc):
  1611. from caffe2.python.model_helper import ModelHelper
  1612. from caffe2.python import brew
  1613. m = ModelHelper(name="test_model")
  1614. n = 10
  1615. d = 2
  1616. depth = 2
  1617. iters = 5
  1618. np.random.seed(1701)
  1619. # Build a binary tree of FC layers, summing at each node.
  1620. for i in reversed(range(depth)):
  1621. for j in range(2 ** i):
  1622. bottom_1 = "{}_{}".format(i + 1, 2 * j)
  1623. bottom_2 = "{}_{}".format(i + 1, 2 * j + 1)
  1624. mid_1 = "{}_{}_m".format(i + 1, 2 * j)
  1625. mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1)
  1626. top = "{}_{}".format(i, j)
  1627. brew.fc(
  1628. m,
  1629. bottom_1, mid_1,
  1630. dim_in=d, dim_out=d,
  1631. weight_init=('ConstantFill', dict(value=np.random.randn())),
  1632. bias_init=('ConstantFill', dict(value=np.random.randn())))
  1633. brew.fc(
  1634. m,
  1635. bottom_2, mid_2,
  1636. dim_in=d, dim_out=d,
  1637. weight_init=('ConstantFill', dict(value=np.random.randn())),
  1638. bias_init=('ConstantFill', dict(value=np.random.randn())))
  1639. m.net.Sum([mid_1, mid_2], top)
  1640. m.net.SquaredL2Distance(["0_0", "label"], "xent")
  1641. m.net.AveragedLoss("xent", "loss")
  1642. input_to_grad = m.AddGradientOperators(["loss"])
  1643. m.Proto().device_option.CopyFrom(gc)
  1644. m.param_init_net.Proto().device_option.CopyFrom(gc)
  1645. m.Proto().type = net_type
  1646. m.Proto().num_workers = num_workers
  1647. self.ws.run(m.param_init_net)
  1648. print(str(m.Proto()))
  1649. def run():
  1650. import numpy as np
  1651. np.random.seed(1701)
  1652. input_blobs = ["{}_{}".format(depth, j) for j in range(2 ** depth)]
  1653. for input_blob in input_blobs:
  1654. self.ws.create_blob(input_blob).feed(
  1655. np.random.randn(n, d).astype(np.float32),
  1656. device_option=gc)
  1657. self.ws.create_blob("label").feed(
  1658. np.random.randn(n, d).astype(np.float32),
  1659. device_option=gc)
  1660. self.ws.run(m.net)
  1661. gradients = [
  1662. self.ws.blobs[str(input_to_grad[input_blob])].fetch()
  1663. for input_blob in input_blobs]
  1664. return gradients
  1665. outputs = [run() for _ in range(iters)]
  1666. for output in outputs[1:]:
  1667. np.testing.assert_array_equal(outputs[0], output)
  1668. self.assertAlmostEqual(np.sum(np.square(output)), 91.81752,
  1669. delta=1e-2)
  1670. @given(input=hu.tensor(min_dim=2, max_dim=6),
  1671. slice_dim=st.integers(),
  1672. a=st.integers(),
  1673. b=st.integers(),
  1674. is_empty=st.booleans(),
  1675. **hu.gcs_cpu_only)
  1676. @settings(deadline=None, max_examples=50)
  1677. def test_slice(self, input, slice_dim, a, b, is_empty, gc, dc):
  1678. slice_dim = slice_dim % len(input.shape)
  1679. if (is_empty):
  1680. input = np.random.rand(*([0] + list(input.shape))).astype(np.int32)
  1681. slice_dim += 1
  1682. a = a % input.shape[slice_dim]
  1683. b = b % input.shape[slice_dim] + 1
  1684. start_vec = np.zeros(len(input.shape), dtype=np.int32)
  1685. end_vec = np.ones(len(input.shape), dtype=np.int32) * -1
  1686. start_vec[slice_dim] = min(a, b)
  1687. end_vec[slice_dim] = max(a, b)
  1688. op = core.CreateOperator(
  1689. "Slice",
  1690. ["input", "start", "end"],
  1691. ["output"])
  1692. def slice_ref(x, s, e):
  1693. if len(s.shape) == 0:
  1694. return x
  1695. slc = [slice(si, None if ei == -1 else ei) for si, ei in zip(s, e)]
  1696. return (x[slc], )
  1697. self.assertReferenceChecks(gc, op, [input, start_vec, end_vec],
  1698. slice_ref)
  1699. self.assertGradientChecks(gc, op, [input, start_vec, end_vec], 0, [0])
  1700. @given(data=hu.tensor(), **hu.gcs_cpu_only)
  1701. @settings(deadline=10000)
  1702. def test_shape(self, data, gc, dc):
  1703. op = core.CreateOperator("Shape", ["data"], ["shape"])
  1704. self.assertReferenceChecks(gc, op, [data], lambda x: (x.shape, ))
  1705. @given(data=hu.tensor(), **hu.gcs_cpu_only)
  1706. @settings(deadline=10000)
  1707. def test_shape_with_axes(self, data, gc, dc):
  1708. def shape_ref(x, y):
  1709. return ([x.shape[i] for i in y],)
  1710. axes = np.random.randint(len(data.shape), size=10).tolist()
  1711. op = core.CreateOperator("Shape", ["data"], ["shape"], axes=axes)
  1712. self.assertReferenceChecks(gc, op, [data, axes], shape_ref)
  1713. @given(x=hu.tensor(), y=hu.tensor(), **hu.gcs_cpu_only)
  1714. @settings(deadline=1000)
  1715. def test_has_elements(self, x, y, gc, dc):
  1716. op = core.CreateOperator("HasElements", ["x", "y"], ["has_elements"])
  1717. self.assertReferenceChecks(gc, op, [x, y], lambda x, y: (len(x) > 0 or len(y) > 0, ))
  1718. op = core.CreateOperator("IsEmpty", ["x"], ["is_empty"])
  1719. self.assertReferenceChecks(gc, op, [x], lambda x: (len(x) == 0, ))
  1720. @given(initial_iters=st.integers(0, 100),
  1721. max_iters=st.integers(0, 100))
  1722. @settings(deadline=10000)
  1723. def test_should_stop_as_criteria_net_execution_step(
  1724. self, initial_iters, max_iters):
  1725. net = core.Net("net")
  1726. net.Iter(["iter"], ["iter"])
  1727. self.ws.create_blob("iter").feed(
  1728. np.asarray([initial_iters]).astype(np.int64))
  1729. self.ws.create_blob("num_iters").feed(
  1730. np.asarray([max_iters]).astype(np.int64))
  1731. criteria_net = core.Net("criteria")
  1732. criteria_net.GE(["iter", "num_iters"], ["stop"])
  1733. criteria_net.Proto().external_output.extend(["stop"])
  1734. plan = core.Plan('plan')
  1735. plan.AddStep(core.execution_step(
  1736. 'step', [criteria_net, net],
  1737. should_stop_blob=core.BlobReference("stop")))
  1738. self.ws.run(plan)
  1739. iters = self.ws.blobs[("iter")].fetch()
  1740. self.assertEqual(iters.dtype, np.int64)
  1741. self.assertEqual(iters[0], max(initial_iters, max_iters))
  1742. def test_disabled_execution_step(self):
  1743. def createNets(i, disabled):
  1744. should_stop = 'should_stop_{}'.format(i)
  1745. output = 'output_{}'.format(i)
  1746. # init content and stop signal
  1747. init = core.Net("init_{}".format(i))
  1748. init.ConstantFill(
  1749. [],
  1750. [output],
  1751. shape=[1],
  1752. value=0.0
  1753. )
  1754. init.Cast([output], [should_stop], to='bool')
  1755. # decide if disabled or not
  1756. criterion = core.Net("criterion_{}".format(i))
  1757. tmp = criterion.ConstantFill(
  1758. [],
  1759. shape=[1],
  1760. value=1.0 if disabled else 0.0
  1761. )
  1762. criterion.Cast([tmp], [should_stop], to='bool')
  1763. criterion.Proto().external_output.extend([should_stop])
  1764. # the body net is just to turn a 0 blob to 1
  1765. net = core.Net("net_{}".format(i))
  1766. net.ConstantFill(
  1767. [],
  1768. [output],
  1769. shape=[1],
  1770. value=1.0
  1771. )
  1772. # always end the loop
  1773. ender = core.Net("ender_{}".format(i))
  1774. tmp = ender.ConstantFill(
  1775. [],
  1776. shape=[1],
  1777. value=1.0
  1778. )
  1779. ender.Cast([tmp], [should_stop], to='bool')
  1780. ender.Proto().external_output.extend([should_stop])
  1781. return [init, criterion, net, ender]
  1782. nets = [createNets(1, False),
  1783. createNets(2, True),
  1784. createNets(3, False)]
  1785. steps = [
  1786. core.execution_step(
  1787. 'step_1', nets[0],
  1788. should_stop_blob=core.BlobReference('should_stop_1')),
  1789. core.execution_step(
  1790. 'step_2', nets[1],
  1791. should_stop_blob=core.BlobReference('should_stop_2')),
  1792. core.execution_step('step_3', nets[2])
  1793. ]
  1794. expected = [1.0, 0.0, 1.0]
  1795. plan = core.Plan('plan')
  1796. plan.AddStep(core.execution_step('all_steps', steps, num_iter=3))
  1797. self.ws.run(plan)
  1798. for i, _ in enumerate(nets):
  1799. self.assertEqual(
  1800. self.ws.blobs['output_{}'.format(i + 1)].fetch()[0],
  1801. expected[i])
  1802. @given(initial_iters=st.integers(0, 100),
  1803. num_iters=st.integers(0, 100))
  1804. @settings(deadline=10000)
  1805. def test_iter_count_with_execution_step(self, initial_iters, num_iters):
  1806. net = core.Net("net")
  1807. net.Iter(["iter"], ["iter"])
  1808. self.ws.create_blob("iter").feed(
  1809. np.asarray([initial_iters]).astype(np.int64))
  1810. step = core.ExecutionStep("step", [net])
  1811. step.SetIter(num_iters)
  1812. plan = core.Plan("plan")
  1813. plan.AddStep(step)
  1814. self.ws.run(plan)
  1815. iters = self.ws.blobs[("iter")].fetch()
  1816. self.assertEqual(iters.dtype, np.int64)
  1817. self.assertEqual(iters[0], initial_iters + num_iters)
  1818. @given(initial_iters=st.integers(0, 100),
  1819. num_iters=st.integers(0, 100),
  1820. num_nets=st.integers(0, 5))
  1821. @settings(deadline=None, max_examples=50)
  1822. def test_atomic_iter_with_concurrent_steps(self, initial_iters, num_iters,
  1823. num_nets):
  1824. init_net = core.Net("init_net")
  1825. iter_mutex = init_net.CreateMutex([], ["iter_mutex"])
  1826. self.ws.create_blob("iter").feed(
  1827. np.asarray([initial_iters]).astype(np.int64))
  1828. concurrent_steps = core.ExecutionStep("concurrent_steps",
  1829. num_iter=num_iters)
  1830. for i in range(num_nets):
  1831. net = core.Net("net_{}".format(i))
  1832. net.AtomicIter([iter_mutex, "iter"], ["iter"])
  1833. step = core.ExecutionStep("step", [net])
  1834. concurrent_steps.AddSubstep(step)
  1835. concurrent_steps.SetConcurrentSubsteps(True)
  1836. plan = core.Plan("plan")
  1837. plan.AddStep(concurrent_steps)
  1838. stats_net = core.Net("stats_net")
  1839. stats_net.StatRegistryExport([], ["stats_key", "stats_val", "stats_ts"])
  1840. self.ws.run(init_net)
  1841. self.ws.run(plan)
  1842. self.ws.run(stats_net)
  1843. iters = self.ws.blobs[("iter")].fetch()
  1844. self.assertEqual(iters.dtype, np.int64)
  1845. self.assertEqual(iters[0], initial_iters + num_iters * num_nets)
  1846. if num_iters * num_nets > 0:
  1847. stats_key = self.ws.blobs[("stats_key")].fetch()
  1848. atomic_iter_key = b'atomic_iter/stats/iter/num_iter'
  1849. self.assertTrue(atomic_iter_key in stats_key)
  1850. stat_val = self.ws.blobs[("stats_val")].fetch()
  1851. self.assertEqual(num_iters * num_nets, stat_val[list(stats_key).index(atomic_iter_key)])
  1852. @given(a=hu.tensor(),
  1853. src=st.sampled_from(list(viewkeys(_NUMPY_TYPE_TO_ENUM))),
  1854. dst=st.sampled_from(list(viewkeys(_NUMPY_TYPE_TO_ENUM))),
  1855. use_name=st.booleans(),
  1856. **hu.gcs)
  1857. @settings(deadline=1000)
  1858. def test_cast(self, a, src, dst, use_name, gc, dc):
  1859. a = a.astype(src)
  1860. # Casting from a float type outside the range of the integral
  1861. # type is UB.
  1862. ftypes = [np.float32, np.float64]
  1863. if src in ftypes and dst not in ftypes and dst is not np.bool:
  1864. info = np.iinfo(dst)
  1865. a = np.clip(a, info.min, info.max)
  1866. def ref(data):
  1867. return [data.astype(dst)]
  1868. to = _NUMPY_TYPE_TO_ENUM[dst]
  1869. if use_name:
  1870. to = caffe2_pb2.TensorProto.DataType.Name(to).lower()
  1871. op = core.CreateOperator('Cast', ["X"], ["Y"], to=to)
  1872. self.assertDeviceChecks(dc, op, [a], [0])
  1873. out, = self.assertReferenceChecks(gc, op, [a], ref)
  1874. self.assertEqual(dst, out.dtype)
  1875. @given(a=hu.tensor(),
  1876. eps=hu.floats(min_value=1e-4, max_value=1e-2),
  1877. a_grad=hu.tensor(elements=hu.floats(min_value=0.01, max_value=0.99)),
  1878. eps_grad=hu.floats(min_value=1e-4, max_value=1e-3),
  1879. **hu.gcs)
  1880. @settings(deadline=10000)
  1881. def test_logit(self, a, eps, a_grad, eps_grad, gc, dc):
  1882. def ref(data):
  1883. data = np.clip(data, eps, 1.0 - eps)
  1884. return (np.log(data / (1 - data)), )
  1885. # forward testing carried out in the full range of input
  1886. # to ensure original test coverage.
  1887. # gradient test carried out with reduced input range
  1888. # because the sharp increase of the logit curve at 0 and 1
  1889. # error increases dramtically when input is close to 0 or 1
  1890. # and it will fail the test.
  1891. # So we only run gradient test in the range of (0.01, 0.99)
  1892. # very occasionally, test may fail due to random accumulated error
  1893. # reduce test range to (0.02, 0.98) will improve test stability
  1894. op = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps)
  1895. self.assertDeviceChecks(dc, op, [a], [0])
  1896. self.assertReferenceChecks(gc, op, [a], ref)
  1897. op_grad = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps_grad)
  1898. self.assertGradientChecks(gc, op_grad, [a_grad], 0, [0],
  1899. threshold=0.04, stepsize=2e-3)
  1900. @given(a=hu.tensor(elements=hu.floats(allow_nan=True)),
  1901. value=hu.floats(min_value=-10, max_value=10),
  1902. **hu.gcs)
  1903. @settings(deadline=1000)
  1904. def test_replace_nan(self, a, value, gc, dc):
  1905. def ref(data):
  1906. out = np.copy(data)
  1907. out[np.isnan(data)] = value
  1908. return (out, )
  1909. op = core.CreateOperator('ReplaceNaN', ["X"], ["Y"], value=value)
  1910. self.assertDeviceChecks(dc, op, [a], [0])
  1911. self.assertReferenceChecks(gc, op, [a], ref)
  1912. @given(data=_dtypes(dtypes=[np.int32, np.int64, np.float32, np.bool]).
  1913. flatmap(lambda dtype: hu.tensor(
  1914. min_dim=1, dtype=dtype, elements=hu.elements_of_type(dtype))),
  1915. has_input=st.booleans(),
  1916. has_extra_shape=st.booleans(),
  1917. extra_shape=st.lists(
  1918. min_size=1, max_size=5, elements=st.integers(1, 5)),
  1919. **hu.gcs)
  1920. @settings(deadline=10000)
  1921. def test_constant_fill(self, data, has_input, has_extra_shape, extra_shape,
  1922. gc, dc):
  1923. dtype = data.dtype.type
  1924. # in opt mode, np.bool is converted into np.bool_
  1925. if data.dtype == np.dtype(np.bool):
  1926. dtype = np.bool
  1927. value = data.item(0)
  1928. gt_shape = data.shape
  1929. inputs = [data]
  1930. enum_type = _NUMPY_TYPE_TO_ENUM[dtype]
  1931. if has_input:
  1932. if has_extra_shape:
  1933. op = core.CreateOperator('ConstantFill', ["X"], ["Y"],
  1934. dtype=enum_type,
  1935. extra_shape=extra_shape,
  1936. value=value)
  1937. gt_shape += tuple(extra_shape)
  1938. else:
  1939. op = core.CreateOperator('ConstantFill', ["X"], ["Y"],
  1940. dtype=enum_type,
  1941. value=value)
  1942. else:
  1943. op = core.CreateOperator('ConstantFill', [], ["Y"],
  1944. dtype=enum_type,
  1945. value=value,
  1946. shape=list(gt_shape))
  1947. inputs = []
  1948. def ref(inputs=None):
  1949. outputs = np.full(shape=gt_shape, fill_value=value, dtype=dtype)
  1950. return [outputs]
  1951. self.assertDeviceChecks(dc, op, inputs, [0])
  1952. out, = self.assertReferenceChecks(gc, op, inputs, ref)
  1953. self.assertEqual(dtype, out.dtype)
  1954. @given(data=_dtypes(dtypes=[np.int32, np.int64, np.float32, np.bool]).
  1955. flatmap(lambda dtype: hu.tensor(
  1956. min_dim=1, dtype=dtype, elements=hu.elements_of_type(dtype))),
  1957. **hu.gcs)
  1958. @settings(deadline=1000)
  1959. def test_constant_fill_from_tensor(self, data, gc, dc):
  1960. dtype = data.dtype.type
  1961. if data.dtype == np.dtype(np.bool):
  1962. dtype = np.bool
  1963. value = np.array([data.item(0)], dtype=dtype)
  1964. inputs = [data, value]
  1965. enum_type = _NUMPY_TYPE_TO_ENUM[dtype]
  1966. op = core.CreateOperator(
  1967. 'ConstantFill',
  1968. ["X", "V"],
  1969. ["Y"],
  1970. dtype=enum_type,
  1971. )
  1972. def ref(x, v):
  1973. outputs = np.full(shape=data.shape, fill_value=value[0], dtype=dtype)
  1974. return [outputs]
  1975. self.assertDeviceChecks(dc, op, inputs, [0])
  1976. out, = self.assertReferenceChecks(gc, op, inputs, ref)
  1977. self.assertEqual(dtype, out.dtype)
  1978. @given(t=st.integers(1, 5),
  1979. n=st.integers(1, 5),
  1980. d=st.integers(1, 5))
  1981. @settings(deadline=10000)
  1982. def test_elman_recurrent_network(self, t, n, d):
  1983. from caffe2.python import model_helper, brew
  1984. np.random.seed(1701)
  1985. step_net = model_helper.ModelHelper(name="Elman")
  1986. # TODO: name scope external inputs and outputs
  1987. step_net.Proto().external_input.extend(
  1988. ["input_t", "seq_lengths", "timestep",
  1989. "hidden_t_prev", "gates_t_w", "gates_t_b"])
  1990. step_net.Proto().type = "simple"
  1991. step_net.Proto().external_output.extend(["hidden_t", "gates_t"])
  1992. brew.fc(step_net,
  1993. "hidden_t_prev", "gates_t", dim_in=d, dim_out=d, axis=2)
  1994. step_net.net.Sum(["gates_t", "input_t"], ["gates_t"])
  1995. step_net.net.Sigmoid(["gates_t"], ["hidden_t"])
  1996. # Initialize params for step net in the parent net
  1997. for op in step_net.param_init_net.Proto().op:
  1998. workspace.RunOperatorOnce(op)
  1999. backward_ops, backward_mapping = core.GradientRegistry.GetBackwardPass(
  2000. step_net.Proto().op, {"hidden_t": "hidden_t_grad"})
  2001. backward_mapping = {
  2002. str(k): str(v) for k, v in viewitems(backward_mapping)
  2003. }
  2004. backward_step_net = core.Net("ElmanBackward")
  2005. del backward_step_net.Proto().op[:]
  2006. backward_step_net.Proto().op.extend(backward_ops)
  2007. assert backward_mapping["input_t"] == "gates_t_grad"
  2008. links = [
  2009. ("hidden_t_prev", "hidden", 0),
  2010. ("hidden_t", "hidden", 1),
  2011. ("input_t", "input", 0),
  2012. ]
  2013. link_internal, link_external, link_offset = zip(*links)
  2014. backward_links = [
  2015. ("hidden_t_prev_grad", "hidden_grad", 0),
  2016. ("hidden_t_grad", "hidden_grad", 1),
  2017. ("gates_t_grad", "input_grad", 0),
  2018. ]
  2019. backward_link_internal, backward_link_external, backward_link_offset = \
  2020. zip(*backward_links)
  2021. backward_step_net.Proto().external_input.extend(["hidden_t_grad"])
  2022. backward_step_net.Proto().external_input.extend(
  2023. step_net.Proto().external_input)
  2024. backward_step_net.Proto().external_input.extend(
  2025. step_net.Proto().external_output)
  2026. inputs = ["input", "seq_lengths", "gates_t_w", "gates_t_b", "hidden_input"]
  2027. recurrent_inputs = ["hidden_input"]
  2028. op = core.CreateOperator(
  2029. "RecurrentNetwork",
  2030. inputs,
  2031. ["output", "hidden", "hidden_output", "step_workspaces"],
  2032. alias_src=["hidden", "hidden"],
  2033. alias_dst=["output", "hidden_output"],
  2034. alias_offset=[1, -1],
  2035. recurrent_states=["hidden"],
  2036. initial_recurrent_state_ids=[
  2037. inputs.index(i) for i in recurrent_inputs
  2038. ],
  2039. link_internal=link_internal,
  2040. link_external=link_external,
  2041. link_offset=link_offset,
  2042. backward_link_internal=backward_link_internal,
  2043. backward_link_external=backward_link_external,
  2044. backward_link_offset=backward_link_offset,
  2045. param=[inputs.index(p) for p in step_net.params],
  2046. step_net=step_net.Proto(),
  2047. backward_step_net=backward_step_net.Proto(),
  2048. outputs_with_grads=[0],
  2049. )
  2050. workspace.FeedBlob(
  2051. "input", np.random.randn(t, n, d).astype(np.float32))
  2052. workspace.FeedBlob(
  2053. "hidden_input", np.random.randn(1, n, d).astype(np.float32))
  2054. workspace.FeedBlob(
  2055. "seq_lengths", np.random.randint(0, t, size=(n,)).astype(np.int32))
  2056. def reference(input, seq_lengths, gates_w, gates_b, hidden_input):
  2057. T = input.shape[0]
  2058. N = input.shape[1]
  2059. D = input.shape[2]
  2060. hidden = np.zeros(shape=(T + 1, N, D))
  2061. assert hidden.shape[0] == T + 1
  2062. assert hidden.shape[1] == N
  2063. assert hidden.shape[2] == D
  2064. hidden[0, :, :] = hidden_input
  2065. for t in range(T):
  2066. input_t = input[t].reshape(1, N, D)
  2067. hidden_t_prev = hidden[t].reshape(1, N, D)
  2068. gates = np.dot(hidden_t_prev, gates_w.T)
  2069. gates = gates.reshape(1, N, D) + input_t.reshape(1, N, D)
  2070. hidden[t + 1] = sigmoid(gates)
  2071. return hidden[1:], hidden, hidden[-1].reshape(1, N, D)
  2072. self.assertReferenceChecks(
  2073. hu.cpu_do,
  2074. op,
  2075. [workspace.FetchBlob(name)
  2076. for name in ["input", "seq_lengths", "gates_t_w", "gates_t_b",
  2077. "hidden_input"]],
  2078. reference,
  2079. outputs_to_check=[0, 1, 2])
  2080. for param in [0, 2, 3]:
  2081. self.assertGradientChecks(
  2082. hu.cpu_do,
  2083. op,
  2084. [workspace.FetchBlob(name)
  2085. for name in ["input", "seq_lengths", "gates_t_w", "gates_t_b",
  2086. "hidden_input"]],
  2087. param,
  2088. [0])
  2089. @settings(suppress_health_check=[HealthCheck.filter_too_much], deadline=10000)
  2090. @given(n=st.integers(1, 5),
  2091. c=st.integers(1, 5),
  2092. h=st.integers(1, 5),
  2093. w=st.integers(1, 5),
  2094. pad=st.integers(0, 2),
  2095. block_size=st.integers(2, 3),
  2096. **hu.gcs)
  2097. def test_space_to_batch(self, n, c, h, w, pad, block_size, gc, dc):
  2098. assume((h + 2 * pad) % block_size == 0)
  2099. assume((w + 2 * pad) % block_size == 0)
  2100. X = np.random.randn(n, c, h, w).astype(np.float32)
  2101. op = core.CreateOperator("SpaceToBatch", ["X"], ["Y"],
  2102. pad=pad, block_size=block_size)
  2103. self.assertDeviceChecks(dc, op, [X], [0])
  2104. self.assertGradientChecks(gc, op, [X], 0, [0])
  2105. @settings(suppress_health_check=[HealthCheck.filter_too_much], deadline=10000)
  2106. @given(n=st.integers(1, 5),
  2107. c=st.integers(1, 5),
  2108. h=st.integers(1, 5),
  2109. w=st.integers(1, 5),
  2110. pad=st.integers(0, 2),
  2111. block_size=st.integers(2, 3),
  2112. **hu.gcs)
  2113. def test_batch_to_space(self, n, c, h, w, pad, block_size, gc, dc):
  2114. assume((h + 2 * pad) % block_size == 0)
  2115. assume((w + 2 * pad) % block_size == 0)
  2116. X = np.random.randn(
  2117. n * block_size * block_size,
  2118. c,
  2119. (h + 2 * pad) // block_size,
  2120. (w + 2 * pad) // block_size).astype(np.float32)
  2121. op = core.CreateOperator("BatchToSpace", ["X"], ["Y"],
  2122. pad=pad, block_size=block_size)
  2123. self.assertDeviceChecks(dc, op, [X], [0])
  2124. self.assertGradientChecks(gc, op, [X], 0, [0])
  2125. @given(X=hu.tensor(),
  2126. in_place=st.booleans(),
  2127. scale=hu.floats(min_value=-2.0, max_value=2.0),
  2128. **hu.gcs)
  2129. @settings(deadline=10000)
  2130. def test_scale(self, X, in_place, scale, gc, dc):
  2131. op = core.CreateOperator(
  2132. "Scale", ["X"], ["Y" if not in_place else "X"],
  2133. scale=scale)
  2134. self.assertDeviceChecks(dc, op, [X], [0])
  2135. self.assertGradientChecks(gc, op, [X], 0, [0])
  2136. @given(s=st.text())
  2137. def test_string_serde(self, s):
  2138. s = s.encode('ascii', 'ignore')
  2139. self.ws.create_blob("a").feed(s)
  2140. serialized = self.ws.blobs["a"].serialize("a")
  2141. self.ws.create_blob("b").deserialize(serialized)
  2142. self.assertEqual(s, self.ws.blobs[("a")].fetch())
  2143. self.assertEqual(s, self.ws.blobs[("b")].fetch())
  2144. @given(pad=st.integers(0, 3),
  2145. size=st.integers(1, 10),
  2146. input_channels=st.integers(1, 5),
  2147. batch_size=st.integers(1, 5),
  2148. order=st.sampled_from(["NCHW", "NHWC"]),
  2149. mode=st.sampled_from(["constant", "reflect", "edge"]),
  2150. **hu.gcs)
  2151. @settings(deadline=None, max_examples=50)
  2152. def test_same_pad_image(self, pad, size, input_channels, batch_size, order,
  2153. mode, gc, dc):
  2154. assume(size > pad)
  2155. op = core.CreateOperator(
  2156. "PadImage",
  2157. ["X"],
  2158. ["Y"],
  2159. pad=pad,
  2160. mode=mode,
  2161. order=order,
  2162. )
  2163. if order == "NHWC":
  2164. X = np.random.rand(
  2165. batch_size, size, size, input_channels).astype(np.float32) - 0.5
  2166. def numpy_pad_ref(x):
  2167. return (np.pad(
  2168. x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), mode),)
  2169. else:
  2170. X = np.random.rand(
  2171. batch_size, input_channels, size, size).astype(np.float32) - 0.5
  2172. def numpy_pad_ref(x):
  2173. return (np.pad(
  2174. x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode),)
  2175. self.assertReferenceChecks(gc, op, [X], numpy_pad_ref)
  2176. self.assertDeviceChecks(dc, op, [X], [0])
  2177. self.assertGradientChecks(gc, op, [X], 0, [0])
  2178. @given(pad_t=st.integers(0, 3),
  2179. pad_l=st.integers(0, 3),
  2180. pad_b=st.integers(0, 3),
  2181. pad_r=st.integers(0, 3),
  2182. size=st.integers(1, 10),
  2183. input_channels=st.integers(1, 5),
  2184. batch_size=st.integers(1, 5),
  2185. order=st.sampled_from(["NCHW", "NHWC"]),
  2186. mode=st.sampled_from(["constant", "reflect", "edge"]),
  2187. **hu.gcs)
  2188. @settings(deadline=None, max_examples=50)
  2189. def test_pad_image(self, pad_t, pad_l, pad_b, pad_r, size, input_channels,
  2190. batch_size, order, mode, gc, dc):
  2191. assume(size > max(pad_b, pad_r, pad_t, pad_l))
  2192. op = core.CreateOperator(
  2193. "PadImage",
  2194. ["X"],
  2195. ["Y"],
  2196. pad_t=pad_t,
  2197. pad_l=pad_l,
  2198. pad_b=pad_b,
  2199. pad_r=pad_r,
  2200. mode=mode,
  2201. order=order,
  2202. )
  2203. if order == "NHWC":
  2204. X = np.random.rand(
  2205. batch_size, size, size, input_channels).astype(np.float32) - 0.5
  2206. def numpy_pad_ref(x):
  2207. return (np.pad(
  2208. x, ((0, 0), (pad_t, pad_b), (pad_l, pad_r), (0, 0)),
  2209. mode),)
  2210. else:
  2211. X = np.random.rand(
  2212. batch_size, input_channels, size, size).astype(np.float32) - 0.5
  2213. def numpy_pad_ref(x):
  2214. return (np.pad(
  2215. x, ((0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)),
  2216. mode),)
  2217. self.assertReferenceChecks(gc, op, [X], numpy_pad_ref)
  2218. self.assertDeviceChecks(dc, op, [X], [0])
  2219. self.assertGradientChecks(gc, op, [X], 0, [0])
  2220. @given(size=st.integers(7, 10),
  2221. input_channels=st.integers(1, 10),
  2222. batch_size=st.integers(1, 3),
  2223. order=st.sampled_from(["NCHW", "NHWC"]),
  2224. epsilon=hu.floats(min_value=1e-4, max_value=1e-2),
  2225. **hu.gcs_cpu_only)
  2226. @settings(deadline=10000)
  2227. def test_instance_norm(self, size, input_channels, batch_size, order,
  2228. epsilon, gc, dc):
  2229. op = core.CreateOperator(
  2230. "InstanceNorm",
  2231. ["X", "scale", "bias"],
  2232. ["Y"],
  2233. order=order,
  2234. epsilon=epsilon,
  2235. )
  2236. np.random.seed(1701)
  2237. scale = np.random.rand(input_channels).astype(np.float32) + 0.5
  2238. bias = np.random.rand(input_channels).astype(np.float32) - 0.5
  2239. X = np.random.rand(
  2240. batch_size, input_channels, size, size).astype(np.float32) - 0.5
  2241. if order == "NHWC":
  2242. X = X.swapaxes(1, 2).swapaxes(2, 3)
  2243. def ref_nchw(x, scale, bias):
  2244. x = x.reshape(batch_size * input_channels, size * size)
  2245. y = (x - x.mean(1)[:, np.newaxis])
  2246. y /= np.sqrt(x.var(1) + epsilon)[:, np.newaxis]
  2247. y = y.reshape(batch_size, input_channels, size, size)
  2248. y = y * scale.reshape(1, input_channels, 1, 1)
  2249. y = y + bias.reshape(1, input_channels, 1, 1)
  2250. return (y, )
  2251. def ref_nhwc(x, scale, bias):
  2252. x = x.swapaxes(2, 3).swapaxes(1, 2)
  2253. y = ref_nchw(x, scale, bias)[0]
  2254. return (y.swapaxes(1, 2).swapaxes(2, 3), )
  2255. self.assertReferenceChecks(
  2256. gc, op, [X, scale, bias],
  2257. ref_nchw if order == "NCHW" else ref_nhwc)
  2258. # TODO(jiayq): when there are backward and GPU implementations, enable
  2259. # these two.
  2260. # self.assertDeviceChecks(dc, op, [X, scale, bias], [0])
  2261. # self.assertGradientChecks(gc, op, [X, scale, bias], 0, [0])
  2262. ws = workspace.C.Workspace()
  2263. feeds = [("X", X), ("scale", scale), ("bias", bias)]
  2264. for blob, arr in feeds:
  2265. ws.create_blob(blob).feed(arr)
  2266. for _ in range(100):
  2267. ws.run(op)
  2268. for blob, arr in feeds:
  2269. np.testing.assert_array_equal(ws.blobs[blob].fetch(), arr)
  2270. @given(inp=_dtypes().flatmap(lambda dt: _tensor_and_indices(
  2271. elements=hu.elements_of_type(dt), dtype=dt)),
  2272. **hu.gcs)
  2273. @settings(deadline=10000)
  2274. def test_sparse_to_dense(self, inp, gc, dc):
  2275. first_dim, X, I = inp
  2276. if X.dtype != np.dtype('float32') and gc.device_type in {caffe2_pb2.CUDA, caffe2_pb2.HIP} :
  2277. # Cuda only support 32 bit float
  2278. print("Bailout {}".format(X.dtype))
  2279. return
  2280. if gc.device_type in {caffe2_pb2.CUDA, caffe2_pb2.HIP}:
  2281. # Cuda version only support int32
  2282. I = I.astype(np.int32)
  2283. if X.dtype in (np.dtype('int64'), np.dtype('int32')):
  2284. assume((np.abs(X.ravel()).max() < np.iinfo('int32').max).all())
  2285. assume(np.abs(X.ravel()).astype(np.int64).sum() < np.iinfo('int32').max)
  2286. # values don't matter
  2287. D = np.zeros((first_dim,) + X.shape[1:]).astype(X.dtype)
  2288. op = core.CreateOperator("SparseToDense", ["I", "X", "D"], ["Y"])
  2289. op_noshapeinfer = core.CreateOperator("SparseToDense", ["I", "X"], ["Y"])
  2290. def sparse_to_dense(I, X, D):
  2291. O = np.zeros(D.shape, dtype=X.dtype)
  2292. for i, p in enumerate(I):
  2293. O[p] += X[i]
  2294. return [O]
  2295. def sparse_to_dense_noshapeinfer(I, X):
  2296. O = np.zeros((np.max(I) + 1,) + X.shape[1:], dtype=X.dtype)
  2297. for i, p in enumerate(I):
  2298. O[p] += X[i]
  2299. return [O]
  2300. self.assertReferenceChecks(gc, op, [I, X, D], sparse_to_dense)
  2301. self.assertReferenceChecks(gc, op_noshapeinfer, [I, X], sparse_to_dense_noshapeinfer)
  2302. if X.dtype == np.float32:
  2303. self.assertGradientChecks(gc, op, [I, X, D], 1, [0])
  2304. @given(inputs=hu.tensors(n=2, min_dim=2, max_dim=2), **hu.gcs_cpu_only)
  2305. @settings(deadline=10000)
  2306. def test_dot_product(self, inputs, gc, dc):
  2307. X, Y = inputs
  2308. op = core.CreateOperator("DotProduct", ["X", "Y"], 'out')
  2309. def dotproduct(X, Y):
  2310. return (np.sum(X * Y, axis=1), )
  2311. self.assertReferenceChecks(gc, op, [X, Y], dotproduct)
  2312. self.assertDeviceChecks(dc, op, [X, Y], [0])
  2313. self.assertGradientChecks(gc, op, [X, Y], 0, [0])
  2314. self.assertGradientChecks(gc, op, [X, Y], 1, [0])
  2315. @given(N=st.integers(min_value=2, max_value=10),
  2316. M=st.integers(min_value=2, max_value=10),
  2317. K=st.integers(min_value=2, max_value=10),
  2318. pad_value=hu.floats(min_value=0.1, max_value=1.0),
  2319. **hu.gcs_cpu_only)
  2320. @settings(deadline=10000)
  2321. def test_dot_product_with_padding(self, N, M, K, pad_value, gc, dc):
  2322. X = np.random.rand(N, M).astype(np.float32) - 0.5
  2323. Y = np.random.rand(N, K).astype(np.float32) - 0.5
  2324. op = core.CreateOperator("DotProductWithPadding", ["X", "Y"], 'out',
  2325. pad_value=pad_value)
  2326. def dotproduct(X, Y):
  2327. Z = np.ones((N, max(M, K))).astype(np.float32) * pad_value
  2328. if M < K:
  2329. Z[:, :M] = X
  2330. return (np.sum(Z * Y, axis=1), )
  2331. else:
  2332. Z[:, :K] = Y
  2333. return (np.sum(Z * X, axis=1), )
  2334. self.assertReferenceChecks(gc, op, [X, Y], dotproduct)
  2335. self.assertDeviceChecks(dc, op, [X, Y], [0])
  2336. self.assertGradientChecks(gc, op, [X, Y], 0, [0])
  2337. self.assertGradientChecks(gc, op, [X, Y], 1, [0])
  2338. @given(N=st.integers(min_value=2, max_value=10),
  2339. M=st.integers(min_value=2, max_value=10),
  2340. pad_value=hu.floats(min_value=0.1, max_value=1.0),
  2341. **hu.gcs_cpu_only)
  2342. @settings(deadline=10000)
  2343. def test_dot_product_with_rep_padding(self, N, M, pad_value, gc, dc):
  2344. K = 2 * M
  2345. X = np.random.rand(N, M).astype(np.float32) - 0.5
  2346. Y = np.random.rand(N, K).astype(np.float32) - 0.5
  2347. op = core.CreateOperator("DotProductWithPadding", ["X", "Y"], 'out',
  2348. replicate=True,
  2349. pad_value=pad_value)
  2350. def dotproduct(X, Y):
  2351. import numpy.matlib as npm
  2352. if M < K:
  2353. Z = npm.repmat(X, 1, K // M)
  2354. return (np.sum(Z * Y, axis=1), )
  2355. else:
  2356. Z = npm.repmat(Y, 1, M // K)
  2357. return (np.sum(Z * X, axis=1), )
  2358. self.assertReferenceChecks(gc, op, [X, Y], dotproduct)
  2359. self.assertDeviceChecks(dc, op, [X, Y], [0])
  2360. self.assertGradientChecks(gc, op, [X, Y], 0, [0])
  2361. self.assertGradientChecks(gc, op, [X, Y], 1, [0])
  2362. @given(N=st.integers(min_value=2, max_value=10),
  2363. M=st.integers(min_value=2, max_value=10), **hu.gcs_cpu_only)
  2364. @settings(deadline=10000)
  2365. def test_ensure_dense(self, N, M, gc, dc):
  2366. # in place
  2367. X = np.random.rand(N, M).astype(np.float32) - 0.5
  2368. op = core.CreateOperator("EnsureDense", ["X"], "X")
  2369. self.assertReferenceChecks(gc, op, [X], lambda x: [x])
  2370. self.assertDeviceChecks(dc, op, [X], [0])
  2371. # or not
  2372. X = np.random.rand(N, M).astype(np.float32) - 0.5
  2373. op = core.CreateOperator("EnsureDense", ["X"], "out")
  2374. self.assertReferenceChecks(gc, op, [X], lambda x: [x])
  2375. self.assertDeviceChecks(dc, op, [X], [0])
  2376. @given(N=st.integers(min_value=10, max_value=100),
  2377. M=st.integers(min_value=2, max_value=10),
  2378. num_buckets=st.integers(min_value=1, max_value=5),
  2379. **hu.gcs_cpu_only)
  2380. @settings(deadline=10000)
  2381. def test_accumulate_histogram_op(self, N, M, num_buckets, gc, dc):
  2382. X = np.random.rand(N, M).astype(np.float32)
  2383. lower_bound, upper_bound = 0.1, 0.9
  2384. op = core.CreateOperator("AccumulateHistogram", ["X"],
  2385. ['cur_hist', 'acc_hist'],
  2386. lower_bound=lower_bound,
  2387. upper_bound=upper_bound,
  2388. num_buckets=num_buckets)
  2389. def histogram(X):
  2390. hist = np.zeros((num_buckets + 2, ), dtype=np.int32)
  2391. segment = (upper_bound - lower_bound) / num_buckets
  2392. Y = np.zeros((N, M), dtype=np.int32)
  2393. Y[X < lower_bound] = 0
  2394. Y[X >= upper_bound] = num_buckets + 1
  2395. Y[(X >= lower_bound) & (X < upper_bound)] = \
  2396. ((X[(X >= lower_bound) & (X < upper_bound)] - lower_bound) /
  2397. segment + 1).astype(np.int32)
  2398. for i in range(Y.shape[0]):
  2399. for j in range(Y.shape[1]):
  2400. hist[Y[i][j]] += 1
  2401. cur_hist, acc_hist = hist, hist
  2402. return [cur_hist, acc_hist]
  2403. self.assertDeviceChecks(dc, op, [X], [0, 1])
  2404. self.assertReferenceChecks(gc, op, [X], histogram)
  2405. @settings(max_examples=1, deadline=None)
  2406. @given(
  2407. queue_capacity=st.integers(2, 2),
  2408. time_sleep=st.integers(5, 10),
  2409. num_blobs_to_equeue=st.integers(1, 1),
  2410. num_blobs_to_dequeue=st.integers(2, 2),
  2411. )
  2412. def test_safe_dequeue_blob__raises_exception_when_hang(
  2413. self,
  2414. queue_capacity,
  2415. time_sleep,
  2416. num_blobs_to_equeue,
  2417. num_blobs_to_dequeue,
  2418. ):
  2419. r"""
  2420. Tests SafeDequeueBlobsOp being cancellable.
  2421. Create a queue with the number of BlobsQueue less than the number
  2422. SafeDequeueBlobs to cause the hanging behavior when running the Net.
  2423. Then call cancel from the previous sleeping thread to ensure exception
  2424. is raised.
  2425. """
  2426. def _net_instance_cancel(net_instance):
  2427. time.sleep(time_sleep)
  2428. net_instance.cancel()
  2429. init_net = core.Net("init_net")
  2430. init_net.Proto().type = "async_scheduling"
  2431. queue = init_net.CreateBlobsQueue(
  2432. [],
  2433. "queue_name",
  2434. capacity=queue_capacity,
  2435. num_blobs=num_blobs_to_equeue,
  2436. )
  2437. ws = workspace.Workspace()
  2438. ws.create_net(init_net).run()
  2439. net = core.Net("net")
  2440. net.Proto().type = "async_scheduling"
  2441. blobs = net.SafeDequeueBlobs([queue], num_blobs_to_dequeue)
  2442. net_instance = ws.create_net(net)
  2443. t = threading.Thread(target=_net_instance_cancel, args=[net_instance])
  2444. t.start()
  2445. with self.assertRaises(Exception):
  2446. net_instance.run()
  2447. t.join()
  2448. if __name__ == "__main__":
  2449. unittest.main()