layers_test.py 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516
  1. import hypothesis.strategies as st
  2. import numpy as np
  3. import numpy.testing as npt
  4. from hypothesis import given, settings
  5. import caffe2.python.hypothesis_test_util as hu
  6. from caffe2.python import (
  7. layer_model_instantiator,
  8. core,
  9. schema,
  10. workspace,
  11. )
  12. from caffe2.python.layers.layers import (
  13. AccessedFeatures,
  14. almost_equal_schemas,
  15. get_key,
  16. IdList,
  17. IdScoreList,
  18. InstantiationContext,
  19. is_request_only_scalar,
  20. set_request_only,
  21. )
  22. from caffe2.python.layers.tags import Tags
  23. from caffe2.python.layer_test_util import (
  24. LayersTestCase,
  25. OpSpec,
  26. )
  27. import logging
  28. logger = logging.getLogger(__name__)
  29. class TestLayers(LayersTestCase):
  30. def testSparseDropoutWithReplacement(self):
  31. input_record = schema.NewRecord(self.model.net, IdList)
  32. self.model.output_schema = schema.Struct()
  33. lengths_blob = input_record.field_blobs()[0]
  34. values_blob = input_record.field_blobs()[1]
  35. lengths = np.array([1] * 10).astype(np.int32)
  36. values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.int64)
  37. workspace.FeedBlob(lengths_blob, lengths)
  38. workspace.FeedBlob(values_blob, values)
  39. out = self.model.SparseDropoutWithReplacement(
  40. input_record, 0.0, 0.5, 1.0, -1, output_names_or_num=1)
  41. self.assertEqual(schema.List(schema.Scalar(np.int64,)), out)
  42. train_init_net, train_net = self.get_training_nets()
  43. eval_net = self.get_eval_net()
  44. predict_net = self.get_predict_net()
  45. workspace.RunNetOnce(train_init_net)
  46. workspace.RunNetOnce(train_net)
  47. out_values = workspace.FetchBlob(out.items())
  48. out_lengths = workspace.FetchBlob(out.lengths())
  49. self.assertBlobsEqual(out_values, values)
  50. self.assertBlobsEqual(out_lengths, lengths)
  51. workspace.RunNetOnce(eval_net)
  52. workspace.RunNetOnce(predict_net)
  53. predict_values = workspace.FetchBlob("values_auto_0")
  54. predict_lengths = workspace.FetchBlob("lengths_auto_0")
  55. self.assertBlobsEqual(predict_values, np.array([-1] * 10).astype(np.int64))
  56. self.assertBlobsEqual(predict_lengths, lengths)
  57. def testAddLoss(self):
  58. input_record_LR = self.new_record(
  59. schema.Struct(
  60. ('label', schema.Scalar((np.float64, (1, )))),
  61. ('logit', schema.Scalar((np.float32, (2, )))),
  62. ('weight', schema.Scalar((np.float64, (1, ))))
  63. )
  64. )
  65. loss_LR = self.model.BatchLRLoss(input_record_LR)
  66. self.model.add_loss(loss_LR)
  67. assert 'unnamed' in self.model.loss
  68. self.assertEqual(
  69. schema.Scalar((np.float32, tuple())), self.model.loss.unnamed
  70. )
  71. self.assertEqual(loss_LR, self.model.loss.unnamed)
  72. self.model.add_loss(loss_LR, 'addLoss')
  73. assert 'addLoss' in self.model.loss
  74. self.assertEqual(
  75. schema.Scalar((np.float32, tuple())), self.model.loss.addLoss
  76. )
  77. self.assertEqual(loss_LR, self.model.loss.addLoss)
  78. self.model.add_loss(
  79. schema.Scalar(
  80. dtype=np.float32, blob=core.BlobReference('loss_blob_1')
  81. ), 'addLoss'
  82. )
  83. assert 'addLoss_auto_0' in self.model.loss
  84. self.assertEqual(
  85. schema.Scalar((np.float32, tuple())), self.model.loss.addLoss_auto_0
  86. )
  87. assert core.BlobReference('loss_blob_1') in self.model.loss.field_blobs()
  88. self.model.add_loss(
  89. schema.Struct(
  90. (
  91. 'structName', schema.Scalar(
  92. dtype=np.float32,
  93. blob=core.BlobReference('loss_blob_2')
  94. )
  95. )
  96. ), 'addLoss'
  97. )
  98. assert 'addLoss_auto_1' in self.model.loss
  99. self.assertEqual(
  100. schema.Struct(('structName', schema.Scalar((np.float32, tuple())))),
  101. self.model.loss.addLoss_auto_1
  102. )
  103. assert core.BlobReference('loss_blob_2') in self.model.loss.field_blobs()
  104. loss_in_tuple_0 = schema.Scalar(
  105. dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_0')
  106. )
  107. loss_in_tuple_1 = schema.Scalar(
  108. dtype=np.float32, blob=core.BlobReference('loss_blob_in_tuple_1')
  109. )
  110. loss_tuple = schema.NamedTuple(
  111. 'loss_in_tuple', * [loss_in_tuple_0, loss_in_tuple_1]
  112. )
  113. self.model.add_loss(loss_tuple, 'addLoss')
  114. assert 'addLoss_auto_2' in self.model.loss
  115. self.assertEqual(
  116. schema.Struct(
  117. ('loss_in_tuple_0', schema.Scalar((np.float32, tuple()))),
  118. ('loss_in_tuple_1', schema.Scalar((np.float32, tuple())))
  119. ), self.model.loss.addLoss_auto_2
  120. )
  121. assert core.BlobReference('loss_blob_in_tuple_0')\
  122. in self.model.loss.field_blobs()
  123. assert core.BlobReference('loss_blob_in_tuple_1')\
  124. in self.model.loss.field_blobs()
  125. def testFilterMetricSchema(self):
  126. self.model.add_metric_field("a:b", schema.Scalar())
  127. self.model.add_metric_field("a:c", schema.Scalar())
  128. self.model.add_metric_field("d", schema.Scalar())
  129. self.assertEqual(
  130. self.model.metrics_schema,
  131. schema.Struct(
  132. ("a", schema.Struct(
  133. ("b", schema.Scalar()),
  134. ("c", schema.Scalar()),
  135. )),
  136. ("d", schema.Scalar()),
  137. ))
  138. self.model.filter_metrics_schema({"a:b", "d"})
  139. self.assertEqual(
  140. self.model.metrics_schema,
  141. schema.Struct(
  142. ("a", schema.Struct(
  143. ("b", schema.Scalar()),
  144. )),
  145. ("d", schema.Scalar()),
  146. ))
  147. def testAddOutputSchema(self):
  148. # add the first field
  149. self.model.add_output_schema('struct', schema.Struct())
  150. expected_output_schema = schema.Struct(('struct', schema.Struct()))
  151. self.assertEqual(
  152. self.model.output_schema,
  153. expected_output_schema,
  154. )
  155. # add the second field
  156. self.model.add_output_schema('scalar', schema.Scalar(np.float64))
  157. expected_output_schema = schema.Struct(
  158. ('struct', schema.Struct()),
  159. ('scalar', schema.Scalar(np.float64)),
  160. )
  161. self.assertEqual(
  162. self.model.output_schema,
  163. expected_output_schema,
  164. )
  165. # overwrite a field should raise
  166. with self.assertRaises(AssertionError):
  167. self.model.add_output_schema('scalar', schema.Struct())
  168. def _test_net(self, net, ops_list):
  169. '''
  170. Helper function to assert the net contains some set of operations and
  171. then to run the net.
  172. Inputs:
  173. net -- the network to test and run
  174. ops_list -- the list of operation specifications to check for
  175. in the net
  176. '''
  177. ops_output = self.assertNetContainOps(net, ops_list)
  178. workspace.RunNetOnce(net)
  179. return ops_output
  180. def testFCWithoutBias(self):
  181. output_dims = 2
  182. fc_without_bias = self.model.FCWithoutBias(
  183. self.model.input_feature_schema.float_features, output_dims)
  184. self.model.output_schema = fc_without_bias
  185. self.assertEqual(
  186. schema.Scalar((np.float32, (output_dims, ))),
  187. fc_without_bias
  188. )
  189. train_init_net, train_net = self.get_training_nets()
  190. init_ops = self.assertNetContainOps(
  191. train_init_net,
  192. [
  193. OpSpec("UniformFill", None, None),
  194. ]
  195. )
  196. mat_mul_spec = OpSpec(
  197. "MatMul",
  198. [
  199. self.model.input_feature_schema.float_features(),
  200. init_ops[0].output[0],
  201. ],
  202. fc_without_bias.field_blobs()
  203. )
  204. self.assertNetContainOps(train_net, [mat_mul_spec])
  205. predict_net = self.get_predict_net()
  206. self.assertNetContainOps(predict_net, [mat_mul_spec])
  207. def testFCWithBootstrap(self):
  208. output_dims = 1
  209. fc_with_bootstrap = self.model.FCWithBootstrap(
  210. self.model.input_feature_schema.float_features,
  211. output_dims=output_dims,
  212. num_bootstrap=2,
  213. max_fc_size=-1
  214. )
  215. self.model.output_schema = fc_with_bootstrap
  216. self.assertEqual(len(fc_with_bootstrap), 4)
  217. # must be in this order
  218. assert (
  219. core.BlobReference("fc_with_bootstrap/bootstrap_iteration_0/indices") == fc_with_bootstrap[0].field_blobs()[0]
  220. )
  221. assert (
  222. core.BlobReference("fc_with_bootstrap/bootstrap_iteration_0/preds") == fc_with_bootstrap[1].field_blobs()[0]
  223. )
  224. assert (
  225. core.BlobReference("fc_with_bootstrap/bootstrap_iteration_1/indices") == fc_with_bootstrap[2].field_blobs()[0]
  226. )
  227. assert (
  228. core.BlobReference("fc_with_bootstrap/bootstrap_iteration_1/preds") == fc_with_bootstrap[3].field_blobs()[0]
  229. )
  230. train_init_net, train_net = self.get_training_nets()
  231. predict_net = layer_model_instantiator.generate_predict_net(self.model)
  232. train_proto = train_net.Proto()
  233. eval_proto = predict_net.Proto()
  234. train_ops = train_proto.op
  235. eval_ops = eval_proto.op
  236. master_train_ops = [
  237. "Shape",
  238. "GivenTensorInt64Fill",
  239. "Gather",
  240. "GivenTensorIntFill",
  241. "GivenTensorIntFill",
  242. "Cast",
  243. "Sub",
  244. "UniformIntFill",
  245. "Gather",
  246. "FC",
  247. "UniformIntFill",
  248. "Gather",
  249. "FC",
  250. ]
  251. master_eval_ops = [
  252. "Shape",
  253. "GivenTensorInt64Fill",
  254. "Gather",
  255. "GivenTensorIntFill",
  256. "GivenTensorIntFill",
  257. "Cast",
  258. "Sub",
  259. "UniformIntFill",
  260. "FC",
  261. "UniformIntFill",
  262. "FC",
  263. ]
  264. assert len(train_ops) == len(master_train_ops)
  265. assert len(eval_ops) == len(master_eval_ops)
  266. assert train_proto.external_input == eval_proto.external_input
  267. assert train_proto.external_output == list()
  268. # make sure all the ops are present and unchanged for train_net and eval_net
  269. for idx, op in enumerate(master_train_ops):
  270. assert train_ops[idx].type == op
  271. for idx, op in enumerate(master_eval_ops):
  272. assert eval_ops[idx].type == op
  273. def testFCwithAxis2(self):
  274. input_dim = 10
  275. output_dim = 30
  276. max_length = 20
  277. input_record = self.new_record(
  278. schema.Struct(
  279. ('history_sequence', schema.Scalar((np.float32, (max_length,
  280. input_dim)))),
  281. )
  282. )
  283. fc_out = self.model.FC(
  284. input_record.history_sequence, output_dim,
  285. axis=2)
  286. self.model.output_schema = fc_out
  287. self.assertEqual(
  288. schema.Scalar((np.float32, (max_length, output_dim))),
  289. fc_out
  290. )
  291. train_init_net, train_net = self.get_training_nets()
  292. def testFCTransposed(self):
  293. input_dim = 10
  294. output_dim = 30
  295. max_length = 20
  296. input_record = self.new_record(
  297. schema.Struct(
  298. ('history_sequence', schema.Scalar((np.float32, (max_length,
  299. input_dim)))),
  300. )
  301. )
  302. fc_transposed_out = self.model.FC(
  303. input_record.history_sequence, output_dim,
  304. axis=2, transposed=True)
  305. self.model.output_schema = fc_transposed_out
  306. self.assertEqual(
  307. schema.Scalar((np.float32, (max_length, output_dim))),
  308. fc_transposed_out
  309. )
  310. train_init_net, train_net = self.get_training_nets()
  311. def testFCTransposedWithMaxFCSize(self):
  312. input_dim = 10
  313. output_dim = 30
  314. max_length = 20
  315. input_record = self.new_record(
  316. schema.Struct(
  317. ('history_sequence', schema.Scalar((np.float32, (max_length,
  318. input_dim)))),
  319. )
  320. )
  321. fc_transposed_out = self.model.FC(
  322. input_record.history_sequence, output_dim,
  323. max_fc_size=input_dim * output_dim // 2,
  324. axis=2, transposed=True)
  325. self.model.output_schema = fc_transposed_out
  326. self.assertEqual(
  327. schema.Scalar((np.float32, (max_length, output_dim))),
  328. fc_transposed_out
  329. )
  330. train_init_net, train_net = self.get_training_nets()
  331. def testSparseLookupSumPoolingWithEviction(self):
  332. # Create test embedding table of 1 row
  333. record = schema.NewRecord(self.model.net, schema.Struct(
  334. ('sparse', schema.Struct(
  335. ('sparse_feature_0', schema.ListWithEvicted(
  336. schema.Scalar(np.int64,
  337. metadata=schema.Metadata(categorical_limit=1)),)),)),
  338. ))
  339. embedding_dim = 8
  340. lengths_blob = record.sparse.sparse_feature_0.lengths.get()
  341. values_blob = record.sparse.sparse_feature_0.items.get()
  342. evicted_values_blob = record.sparse.sparse_feature_0._evicted_values.get()
  343. lengths = np.array([1]).astype(np.int32)
  344. values = np.array([0]).astype(np.int64)
  345. # Need to reset row 0
  346. evicted_values = np.array([0]).astype(np.int64)
  347. workspace.FeedBlob(lengths_blob, lengths)
  348. workspace.FeedBlob(values_blob, values)
  349. workspace.FeedBlob(evicted_values_blob, evicted_values)
  350. embedding_after_pooling = self.model.SparseLookup(
  351. record.sparse.sparse_feature_0, [embedding_dim], 'Sum', weight_init=("ConstantFill", {"value": 1.0}))
  352. self.model.output_schema = schema.Struct()
  353. self.assertEqual(
  354. schema.Scalar((np.float32, (embedding_dim, ))),
  355. embedding_after_pooling
  356. )
  357. train_init_net, train_net = self.get_training_nets()
  358. workspace.RunNetOnce(train_init_net)
  359. embedding_after_init = workspace.FetchBlob("sparse_lookup/w")
  360. # Change row 0's value before reset
  361. new_values = np.array([[2, 2, 2, 2, 2, 2, 2, 2]]).astype(np.float32)
  362. workspace.FeedBlob("sparse_lookup/w", new_values)
  363. workspace.RunNetOnce(train_net.Proto())
  364. embedding_after_training = workspace.FetchBlob("sparse_lookup/w")
  365. # Verify row 0's value does not change after reset
  366. self.assertEquals(embedding_after_training.all(), embedding_after_init.all())
  367. def testSparseLookupSumPooling(self):
  368. record = schema.NewRecord(self.model.net, schema.Struct(
  369. ('sparse', schema.Struct(
  370. ('sparse_feature_0', schema.List(
  371. schema.Scalar(np.int64,
  372. metadata=schema.Metadata(categorical_limit=1000)))),
  373. )),
  374. ))
  375. embedding_dim = 64
  376. embedding_after_pooling = self.model.SparseLookup(
  377. record.sparse.sparse_feature_0, [embedding_dim], 'Sum')
  378. self.model.output_schema = schema.Struct()
  379. self.assertEqual(
  380. schema.Scalar((np.float32, (embedding_dim, ))),
  381. embedding_after_pooling
  382. )
  383. train_init_net, train_net = self.get_training_nets()
  384. init_ops = self.assertNetContainOps(
  385. train_init_net,
  386. [
  387. OpSpec("UniformFill", None, None),
  388. OpSpec("ConstantFill", None, None),
  389. ]
  390. )
  391. sparse_lookup_op_spec = OpSpec(
  392. 'SparseLengthsSum',
  393. [
  394. init_ops[0].output[0],
  395. record.sparse.sparse_feature_0.items(),
  396. record.sparse.sparse_feature_0.lengths(),
  397. ],
  398. [embedding_after_pooling()]
  399. )
  400. self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
  401. predict_net = self.get_predict_net()
  402. self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
  403. @given(
  404. use_hashing=st.booleans(),
  405. modulo=st.integers(min_value=100, max_value=200),
  406. use_divide_mod=st.booleans(),
  407. divisor=st.integers(min_value=10, max_value=20),
  408. )
  409. def testSparseFeatureHashIdList(self, use_hashing, modulo, use_divide_mod, divisor):
  410. record = schema.NewRecord(
  411. self.model.net,
  412. schema.List(schema.Scalar(
  413. np.int64,
  414. metadata=schema.Metadata(categorical_limit=60000)
  415. ))
  416. )
  417. use_divide_mod = use_divide_mod if use_hashing is False else False
  418. output_schema = self.model.SparseFeatureHash(
  419. record,
  420. modulo=modulo,
  421. use_hashing=use_hashing,
  422. use_divide_mod=use_divide_mod,
  423. divisor=divisor,
  424. )
  425. self.model.output_schema = output_schema
  426. self.assertEqual(len(self.model.layers), 1)
  427. self.assertEqual(output_schema._items.metadata.categorical_limit,
  428. modulo)
  429. train_init_net, train_net = self.get_training_nets()
  430. if use_divide_mod:
  431. self.assertEqual(len(train_net.Proto().op), 3)
  432. else:
  433. self.assertEqual(len(train_net.Proto().op), 2)
  434. @given(
  435. use_hashing=st.booleans(),
  436. modulo=st.integers(min_value=100, max_value=200),
  437. )
  438. def testSparseFeatureHashIdScoreList(self, use_hashing, modulo):
  439. record = schema.NewRecord(self.model.net,
  440. schema.Map(schema.Scalar(np.int64,
  441. metadata=schema.Metadata(
  442. categorical_limit=60000)),
  443. np.float32))
  444. output_schema = self.model.SparseFeatureHash(
  445. record,
  446. modulo=modulo,
  447. use_hashing=use_hashing)
  448. self.model.output_schema = output_schema
  449. self.assertEqual(len(self.model.layers), 1)
  450. self.assertEqual(output_schema._items.keys.metadata.categorical_limit,
  451. modulo)
  452. train_init_net, train_net = self.get_training_nets()
  453. def testSparseLookupIncorrectPositionWeightedOnIdList(self):
  454. '''
  455. Currently the implementation of SparseLookup assumed input is id_score_list
  456. when use PositionWeighted.
  457. '''
  458. record = schema.NewRecord(self.model.net, schema.Struct(
  459. ('sparse', schema.Struct(
  460. ('sparse_feature_0', schema.List(
  461. schema.Scalar(np.int64,
  462. metadata=schema.Metadata(categorical_limit=1000)))),
  463. )),
  464. ))
  465. embedding_dim = 64
  466. with self.assertRaises(AssertionError):
  467. self.model.SparseLookup(
  468. record.sparse.sparse_feature_0, [embedding_dim], 'PositionWeighted')
  469. def testSparseLookupPositionWeightedOnIdList(self):
  470. record = schema.NewRecord(self.model.net, schema.Struct(
  471. ('sparse', schema.Struct(
  472. ('sparse_feature_0', schema.List(
  473. schema.Scalar(np.int64,
  474. metadata=schema.Metadata(categorical_limit=1000)))),
  475. )),
  476. ))
  477. # convert id_list to id_score_list with PositionWeighted layer
  478. sparse_segment = record.sparse.sparse_feature_0
  479. pos_w_layer = self.model.PositionWeighted(sparse_segment)
  480. sparse_segment = schema.Map(
  481. keys=get_key(sparse_segment),
  482. values=pos_w_layer.position_weights,
  483. lengths_blob=sparse_segment.lengths
  484. )
  485. embedding_dim = 64
  486. embedding_after_pooling = self.model.SparseLookup(
  487. sparse_segment, [embedding_dim], 'PositionWeighted')
  488. self.model.output_schema = schema.Struct()
  489. self.assertEqual(
  490. schema.Scalar((np.float32, (embedding_dim, ))),
  491. embedding_after_pooling
  492. )
  493. train_init_net, train_net = self.get_training_nets()
  494. self.assertNetContainOps(
  495. train_init_net,
  496. [
  497. OpSpec("ConstantFill", None, None), # position_weights/pos_w
  498. OpSpec("UniformFill", None, None),
  499. OpSpec("ConstantFill", None, None),
  500. ]
  501. )
  502. self.assertNetContainOps(train_net, [
  503. OpSpec("LengthsRangeFill", None, None),
  504. OpSpec("Gather", None, None),
  505. OpSpec("SparseLengthsWeightedSum", None, None),
  506. ])
  507. predict_net = self.get_predict_net()
  508. self.assertNetContainOps(predict_net, [
  509. OpSpec("LengthsRangeFill", None, None),
  510. OpSpec("Gather", None, None),
  511. OpSpec("SparseLengthsWeightedSum", None, None),
  512. ])
  513. def testSparseLookupPositionWeightedOnIdScoreList(self):
  514. record = schema.NewRecord(self.model.net, schema.Struct(
  515. ('sparse', schema.Struct(
  516. ('id_score_list_0', schema.Map(
  517. schema.Scalar(
  518. np.int64,
  519. metadata=schema.Metadata(
  520. categorical_limit=1000
  521. ),
  522. ),
  523. np.float32
  524. )),
  525. )),
  526. ))
  527. embedding_dim = 64
  528. embedding_after_pooling = self.model.SparseLookup(
  529. record.sparse.id_score_list_0, [embedding_dim], 'PositionWeighted')
  530. self.model.output_schema = schema.Struct()
  531. self.assertEqual(
  532. schema.Scalar((np.float32, (embedding_dim, ))),
  533. embedding_after_pooling
  534. )
  535. train_init_net, train_net = self.get_training_nets()
  536. init_ops = self.assertNetContainOps(
  537. train_init_net,
  538. [
  539. OpSpec("UniformFill", None, None),
  540. OpSpec("ConstantFill", None, None),
  541. ]
  542. )
  543. sparse_lookup_op_spec = OpSpec(
  544. 'SparseLengthsWeightedSum',
  545. [
  546. init_ops[0].output[0],
  547. record.sparse.id_score_list_0.values(),
  548. record.sparse.id_score_list_0.keys(),
  549. record.sparse.id_score_list_0.lengths(),
  550. ],
  551. [embedding_after_pooling()]
  552. )
  553. self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
  554. predict_net = self.get_predict_net()
  555. self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
  556. def testSparseLookupIncorrectRecencyWeightedOnIdList(self):
  557. '''
  558. Currently the implementation of SparseLookup assumed input is id_score_list
  559. when use RecencyWeighted.
  560. '''
  561. record = schema.NewRecord(self.model.net, schema.Struct(
  562. ('sparse', schema.Struct(
  563. ('sparse_feature_0', schema.List(
  564. schema.Scalar(np.int64,
  565. metadata=schema.Metadata(categorical_limit=1000)))),
  566. )),
  567. ))
  568. embedding_dim = 64
  569. with self.assertRaises(AssertionError):
  570. self.model.SparseLookup(
  571. record.sparse.sparse_feature_0, [embedding_dim], 'RecencyWeighted')
  572. def testSparseLookupRecencyWeightedOnIdScoreList(self):
  573. record = schema.NewRecord(self.model.net, schema.Struct(
  574. ('sparse', schema.Struct(
  575. ('id_score_list_0', schema.Map(
  576. schema.Scalar(
  577. np.int64,
  578. metadata=schema.Metadata(
  579. categorical_limit=1000
  580. ),
  581. ),
  582. np.float32
  583. )),
  584. )),
  585. ))
  586. embedding_dim = 64
  587. embedding_after_pooling = self.model.SparseLookup(
  588. record.sparse.id_score_list_0, [embedding_dim], 'RecencyWeighted')
  589. self.model.output_schema = schema.Struct()
  590. self.assertEqual(
  591. schema.Scalar((np.float32, (embedding_dim, ))),
  592. embedding_after_pooling
  593. )
  594. train_init_net, train_net = self.get_training_nets()
  595. init_ops = self.assertNetContainOps(
  596. train_init_net,
  597. [
  598. OpSpec("UniformFill", None, None),
  599. OpSpec("ConstantFill", None, None),
  600. ]
  601. )
  602. sparse_lookup_op_spec = OpSpec(
  603. 'SparseLengthsWeightedSum',
  604. [
  605. init_ops[0].output[0],
  606. record.sparse.id_score_list_0.values(),
  607. record.sparse.id_score_list_0.keys(),
  608. record.sparse.id_score_list_0.lengths(),
  609. ],
  610. [embedding_after_pooling()]
  611. )
  612. self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
  613. predict_net = self.get_predict_net()
  614. self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
  615. def testPairwiseSimilarityWithAllEmbeddings(self):
  616. embedding_dim = 64
  617. N = 5
  618. record = schema.NewRecord(self.model.net, schema.Struct(
  619. ('all_embeddings', schema.Scalar(
  620. ((np.float32, (N, embedding_dim)))
  621. )),
  622. ))
  623. current = self.model.PairwiseSimilarity(
  624. record, N * N)
  625. self.assertEqual(
  626. schema.Scalar((np.float32, (N * N, ))),
  627. current
  628. )
  629. train_init_net, train_net = self.get_training_nets()
  630. self.assertNetContainOps(train_init_net, [])
  631. self.assertNetContainOps(train_net, [
  632. OpSpec("BatchMatMul", None, None),
  633. OpSpec("Flatten", None, None),
  634. ])
  635. def testPairwiseSimilarityWithXandYEmbeddings(self):
  636. embedding_dim = 64
  637. record = schema.NewRecord(self.model.net, schema.Struct(
  638. ('x_embeddings', schema.Scalar(
  639. ((np.float32, (5, embedding_dim)))
  640. )),
  641. ('y_embeddings', schema.Scalar(
  642. ((np.float32, (6, embedding_dim)))
  643. )),
  644. ))
  645. current = self.model.PairwiseSimilarity(
  646. record, 5 * 6)
  647. self.assertEqual(
  648. schema.Scalar((np.float32, (5 * 6, ))),
  649. current
  650. )
  651. train_init_net, train_net = self.get_training_nets()
  652. self.assertNetContainOps(train_init_net, [])
  653. self.assertNetContainOps(train_net, [
  654. OpSpec("BatchMatMul", None, None),
  655. OpSpec("Flatten", None, None),
  656. ])
  657. def testPairwiseSimilarityWithXandYEmbeddingsAndGather(self):
  658. embedding_dim = 64
  659. output_idx = [1, 3, 5]
  660. output_idx_blob = self.model.add_global_constant(
  661. str(self.model.net.NextScopedBlob('pairwise_dot_product_gather')),
  662. output_idx,
  663. dtype=np.int32,
  664. )
  665. indices_to_gather = schema.Scalar(
  666. (np.int32, len(output_idx)),
  667. output_idx_blob,
  668. )
  669. record = schema.NewRecord(self.model.net, schema.Struct(
  670. ('x_embeddings', schema.Scalar(
  671. ((np.float32, (5, embedding_dim)))
  672. )),
  673. ('y_embeddings', schema.Scalar(
  674. ((np.float32, (6, embedding_dim)))
  675. )),
  676. ('indices_to_gather', indices_to_gather),
  677. ))
  678. current = self.model.PairwiseSimilarity(
  679. record, len(output_idx))
  680. # This assert is not necessary,
  681. # output size is passed into PairwiseSimilarity
  682. self.assertEqual(
  683. schema.Scalar((np.float32, (len(output_idx), ))),
  684. current
  685. )
  686. train_init_net, train_net = self.get_training_nets()
  687. self.assertNetContainOps(train_init_net, [])
  688. self.assertNetContainOps(train_net, [
  689. OpSpec("BatchMatMul", None, None),
  690. OpSpec("Flatten", None, None),
  691. OpSpec("BatchGather", None, None),
  692. ])
  693. def testPairwiseSimilarityIncorrectInput(self):
  694. embedding_dim = 64
  695. record = schema.NewRecord(self.model.net, schema.Struct(
  696. ('x_embeddings', schema.Scalar(
  697. ((np.float32, (5, embedding_dim)))
  698. )),
  699. ))
  700. with self.assertRaises(AssertionError):
  701. self.model.PairwiseSimilarity(
  702. record, 25)
  703. record = schema.NewRecord(self.model.net, schema.Struct(
  704. ('all_embeddings', schema.List(np.float32))
  705. ))
  706. with self.assertRaises(AssertionError):
  707. self.model.PairwiseSimilarity(
  708. record, 25)
  709. def testConcat(self):
  710. embedding_dim = 64
  711. input_record = self.new_record(schema.Struct(
  712. ('input1', schema.Scalar((np.float32, (embedding_dim, )))),
  713. ('input2', schema.Scalar((np.float32, (embedding_dim, )))),
  714. ('input3', schema.Scalar((np.float32, (embedding_dim, )))),
  715. ))
  716. output = self.model.Concat(input_record)
  717. self.assertEqual(
  718. schema.Scalar((np.float32, ((len(input_record.fields) * embedding_dim, )))),
  719. output
  720. )
  721. # Note that in Concat layer we assume first dimension is batch.
  722. # so input is B * embedding_dim
  723. # add_axis=1 make it B * 1 * embedding_dim
  724. # concat on axis=1 make it B * N * embedding_dim
  725. output = self.model.Concat(input_record, axis=1, add_axis=1)
  726. self.assertEqual(
  727. schema.Scalar((np.float32, ((len(input_record.fields), embedding_dim)))),
  728. output
  729. )
  730. def testSamplingTrain(self):
  731. output_dims = 1000
  732. indices = self.new_record(schema.Scalar((np.int32, (10,))))
  733. sampling_prob = self.new_record(schema.Scalar((np.float32, (10, ))))
  734. sampled_fc = self.model.SamplingTrain(
  735. schema.Struct(
  736. ('input', self.model.input_feature_schema.float_features),
  737. ('indices', indices),
  738. ('sampling_prob', sampling_prob),
  739. ),
  740. "FC",
  741. output_dims,
  742. )
  743. self.model.output_schema = sampled_fc
  744. # Check that we don't add prediction layer into the model
  745. self.assertEqual(1, len(self.model.layers))
  746. self.assertEqual(
  747. schema.Scalar((np.float32, (output_dims, ))),
  748. sampled_fc
  749. )
  750. train_init_net, train_net = self.get_training_nets()
  751. init_ops = self.assertNetContainOps(
  752. train_init_net,
  753. [
  754. OpSpec("UniformFill", None, None),
  755. OpSpec("UniformFill", None, None),
  756. ]
  757. )
  758. sampled_fc_layer = self.model.layers[0]
  759. gather_w_spec = OpSpec(
  760. "Gather",
  761. [
  762. init_ops[0].output[0],
  763. indices(),
  764. ],
  765. [
  766. sampled_fc_layer._prediction_layer.train_param_blobs[0]
  767. ]
  768. )
  769. gather_b_spec = OpSpec(
  770. "Gather",
  771. [
  772. init_ops[1].output[0],
  773. indices(),
  774. ],
  775. [
  776. sampled_fc_layer._prediction_layer.train_param_blobs[1]
  777. ]
  778. )
  779. train_fc_spec = OpSpec(
  780. "FC",
  781. [
  782. self.model.input_feature_schema.float_features(),
  783. ] + sampled_fc_layer._prediction_layer.train_param_blobs,
  784. sampled_fc.field_blobs()
  785. )
  786. log_spec = OpSpec("Log", [sampling_prob()], [None])
  787. sub_spec = OpSpec(
  788. "Sub",
  789. [sampled_fc.field_blobs()[0], None],
  790. sampled_fc.field_blobs()
  791. )
  792. train_ops = self.assertNetContainOps(
  793. train_net,
  794. [gather_w_spec, gather_b_spec, train_fc_spec, log_spec, sub_spec])
  795. self.assertEqual(train_ops[3].output[0], train_ops[4].input[1])
  796. predict_net = self.get_predict_net()
  797. self.assertNetContainOps(
  798. predict_net,
  799. [
  800. OpSpec(
  801. "FC",
  802. [
  803. self.model.input_feature_schema.float_features(),
  804. init_ops[0].output[0],
  805. init_ops[1].output[0],
  806. ],
  807. sampled_fc.field_blobs()
  808. )
  809. ]
  810. )
  811. def testBatchLRLoss(self):
  812. input_record = self.new_record(schema.Struct(
  813. ('label', schema.Scalar((np.float64, (1,)))),
  814. ('logit', schema.Scalar((np.float32, (2,)))),
  815. ('weight', schema.Scalar((np.float64, (1,))))
  816. ))
  817. loss = self.model.BatchLRLoss(input_record)
  818. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  819. def testBatchLRLossWithUncertainty(self):
  820. input_record = self.new_record(schema.Struct(
  821. ('label', schema.Scalar((np.float64, (1,)))),
  822. ('logit', schema.Scalar((np.float32, (2,)))),
  823. ('weight', schema.Scalar((np.float64, (1,)))),
  824. ('log_variance', schema.Scalar((np.float64, (1,)))),
  825. ))
  826. loss = self.model.BatchLRLoss(input_record)
  827. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  828. def testMarginRankLoss(self):
  829. input_record = self.new_record(schema.Struct(
  830. ('pos_prediction', schema.Scalar((np.float32, (1,)))),
  831. ('neg_prediction', schema.List(np.float32)),
  832. ))
  833. pos_items = np.array([0.1, 0.2, 0.3], dtype=np.float32)
  834. neg_lengths = np.array([1, 2, 3], dtype=np.int32)
  835. neg_items = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype=np.float32)
  836. schema.FeedRecord(
  837. input_record,
  838. [pos_items, neg_lengths, neg_items]
  839. )
  840. loss = self.model.MarginRankLoss(input_record)
  841. self.run_train_net_forward_only()
  842. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  843. def testBPRLoss(self):
  844. input_record = self.new_record(schema.Struct(
  845. ('pos_prediction', schema.Scalar((np.float32, (1,)))),
  846. ('neg_prediction', schema.List(np.float32)),
  847. ))
  848. pos_items = np.array([0.8, 0.9], dtype=np.float32)
  849. neg_lengths = np.array([1, 2], dtype=np.int32)
  850. neg_items = np.array([0.1, 0.2, 0.3], dtype=np.float32)
  851. schema.FeedRecord(
  852. input_record,
  853. [pos_items, neg_lengths, neg_items]
  854. )
  855. loss = self.model.BPRLoss(input_record)
  856. self.run_train_net_forward_only()
  857. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  858. result = workspace.FetchBlob('bpr_loss/output')
  859. np.testing.assert_array_almost_equal(np.array(1.24386, dtype=np.float32), result)
  860. def testBatchMSELoss(self):
  861. input_record = self.new_record(schema.Struct(
  862. ('label', schema.Scalar((np.float64, (1,)))),
  863. ('prediction', schema.Scalar((np.float32, (2,)))),
  864. ))
  865. loss = self.model.BatchMSELoss(input_record)
  866. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  867. def testBatchHuberLoss(self):
  868. input_record = self.new_record(schema.Struct(
  869. ('label', schema.Scalar((np.float32, (1,)))),
  870. ('prediction', schema.Scalar((np.float32, (2,)))),
  871. ))
  872. loss = self.model.BatchHuberLoss(input_record)
  873. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  874. def testBatchSigmoidCrossEntropyLoss(self):
  875. input_record = self.new_record(schema.Struct(
  876. ('label', schema.Scalar((np.float32, (32,)))),
  877. ('prediction', schema.Scalar((np.float32, (32,))))
  878. ))
  879. loss = self.model.BatchSigmoidCrossEntropyLoss(input_record)
  880. self.assertEqual(schema.Scalar((np.float32, tuple())), loss)
  881. def testBatchSoftmaxLoss(self):
  882. input_record = self.new_record(schema.Struct(
  883. ('label', schema.Scalar((np.float32, tuple()))),
  884. ('prediction', schema.Scalar((np.float32, (32,))))
  885. ))
  886. loss = self.model.BatchSoftmaxLoss(input_record)
  887. self.assertEqual(schema.Struct(
  888. ('softmax', schema.Scalar((np.float32, (32,)))),
  889. ('loss', schema.Scalar(np.float32)),
  890. ), loss)
  891. def testBatchSoftmaxLossWeight(self):
  892. input_record = self.new_record(schema.Struct(
  893. ('label', schema.Scalar((np.float32, tuple()))),
  894. ('prediction', schema.Scalar((np.float32, (32,)))),
  895. ('weight', schema.Scalar((np.float64, (1,))))
  896. ))
  897. loss = self.model.BatchSoftmaxLoss(input_record)
  898. self.assertEqual(schema.Struct(
  899. ('softmax', schema.Scalar((np.float32, (32,)))),
  900. ('loss', schema.Scalar(np.float32)),
  901. ), loss)
  902. @given(
  903. X=hu.arrays(dims=[2, 5]),
  904. )
  905. def testBatchNormalization(self, X):
  906. input_record = self.new_record(schema.Scalar((np.float32, (5,))))
  907. schema.FeedRecord(input_record, [X])
  908. bn_output = self.model.BatchNormalization(input_record)
  909. self.assertEqual(schema.Scalar((np.float32, (5,))), bn_output)
  910. self.model.output_schema = schema.Struct()
  911. train_init_net, train_net = self.get_training_nets()
  912. init_ops = self.assertNetContainOps(
  913. train_init_net,
  914. [
  915. OpSpec("ConstantFill", None, None),
  916. OpSpec("ConstantFill", None, None),
  917. OpSpec("ConstantFill", None, None),
  918. OpSpec("ConstantFill", None, None),
  919. ]
  920. )
  921. input_blob = input_record.field_blobs()[0]
  922. output_blob = bn_output.field_blobs()[0]
  923. expand_dims_spec = OpSpec(
  924. "ExpandDims",
  925. [input_blob],
  926. None,
  927. )
  928. train_bn_spec = OpSpec(
  929. "SpatialBN",
  930. [None, init_ops[0].output[0], init_ops[1].output[0],
  931. init_ops[2].output[0], init_ops[3].output[0]],
  932. [output_blob, init_ops[2].output[0], init_ops[3].output[0], None, None],
  933. {'is_test': 0, 'order': 'NCHW', 'momentum': 0.9},
  934. )
  935. test_bn_spec = OpSpec(
  936. "SpatialBN",
  937. [None, init_ops[0].output[0], init_ops[1].output[0],
  938. init_ops[2].output[0], init_ops[3].output[0]],
  939. [output_blob],
  940. {'is_test': 1, 'order': 'NCHW', 'momentum': 0.9},
  941. )
  942. squeeze_spec = OpSpec(
  943. "Squeeze",
  944. [output_blob],
  945. [output_blob],
  946. )
  947. self.assertNetContainOps(
  948. train_net,
  949. [expand_dims_spec, train_bn_spec, squeeze_spec]
  950. )
  951. eval_net = self.get_eval_net()
  952. self.assertNetContainOps(
  953. eval_net,
  954. [expand_dims_spec, test_bn_spec, squeeze_spec]
  955. )
  956. predict_net = self.get_predict_net()
  957. self.assertNetContainOps(
  958. predict_net,
  959. [expand_dims_spec, test_bn_spec, squeeze_spec]
  960. )
  961. workspace.RunNetOnce(train_init_net)
  962. workspace.RunNetOnce(train_net)
  963. schema.FeedRecord(input_record, [X])
  964. workspace.RunNetOnce(eval_net)
  965. schema.FeedRecord(input_record, [X])
  966. workspace.RunNetOnce(predict_net)
  967. @given(
  968. X=hu.arrays(dims=[2, 5, 6]),
  969. use_layer_norm_op=st.booleans(),
  970. )
  971. def testLayerNormalization(self, X, use_layer_norm_op):
  972. expect = (5, 6,)
  973. if not use_layer_norm_op:
  974. X = X.reshape(10, 6)
  975. expect = (6,)
  976. input_record = self.new_record(schema.Scalar((np.float32, expect)))
  977. schema.FeedRecord(input_record, [X])
  978. ln_output = self.model.LayerNormalization(
  979. input_record, use_layer_norm_op=use_layer_norm_op
  980. )
  981. self.assertEqual(schema.Scalar((np.float32, expect)), ln_output)
  982. self.model.output_schema = schema.Struct()
  983. train_init_net, train_net = self.get_training_nets(add_constants=True)
  984. workspace.RunNetOnce(train_init_net)
  985. workspace.RunNetOnce(train_net)
  986. @given(
  987. X=hu.arrays(dims=[5, 2]),
  988. num_to_collect=st.integers(min_value=1, max_value=10),
  989. )
  990. def testLastNWindowCollector(self, X, num_to_collect):
  991. input_record = self.new_record(schema.Scalar(np.float32))
  992. schema.FeedRecord(input_record, [X])
  993. last_n = self.model.LastNWindowCollector(input_record, num_to_collect)
  994. self.run_train_net_forward_only()
  995. output_record = schema.FetchRecord(last_n.last_n)
  996. start = max(0, 5 - num_to_collect)
  997. npt.assert_array_equal(X[start:], output_record())
  998. num_visited = schema.FetchRecord(last_n.num_visited)
  999. npt.assert_array_equal([5], num_visited())
  1000. @given(
  1001. X=hu.arrays(dims=[5, 2]),
  1002. num_to_collect=st.integers(min_value=3, max_value=3),
  1003. )
  1004. @settings(deadline=1000)
  1005. def testReservoirSamplingWithID(self, X, num_to_collect):
  1006. ID = np.array([1, 2, 3, 1, 2], dtype=np.int64)
  1007. input_record = self.new_record(
  1008. schema.Struct(
  1009. ('record', schema.Struct(
  1010. ('dense', schema.Scalar()),
  1011. )),
  1012. ('object_id', schema.Scalar(np.int64)),
  1013. )
  1014. )
  1015. schema.FeedRecord(input_record, [X, ID])
  1016. packed_record = self.model.PackRecords(
  1017. input_record.record, 1, fields=input_record.record.field_names())
  1018. reservoir_input = schema.Struct(
  1019. ('data', packed_record),
  1020. ('object_id', input_record.object_id),
  1021. )
  1022. reservoir = self.model.ReservoirSampling(
  1023. reservoir_input, num_to_collect)
  1024. self.model.output_schema = schema.Struct()
  1025. train_init_net, train_net = \
  1026. layer_model_instantiator.generate_training_nets_forward_only(
  1027. self.model)
  1028. workspace.RunNetOnce(train_init_net)
  1029. workspace.CreateNet(train_net)
  1030. workspace.RunNet(train_net.Proto().name, num_iter=2)
  1031. num_visited = schema.FetchRecord(reservoir.num_visited)
  1032. npt.assert_array_equal([3], num_visited())
  1033. for param in self.model.params:
  1034. serialized = workspace.SerializeBlob(str(param))
  1035. workspace.DeserializeBlob(str(param), serialized)
  1036. ID = np.array([3, 5, 3, 3, 5], dtype=np.int64)
  1037. schema.FeedRecord(input_record.object_id, [ID])
  1038. workspace.RunNet(train_net.Proto().name, num_iter=2)
  1039. num_visited = schema.FetchRecord(reservoir.num_visited)
  1040. npt.assert_array_equal([2], num_visited())
  1041. def testUniformSampling(self):
  1042. input_record = self.new_record(schema.Scalar(np.int32))
  1043. input_array = np.array([3, 10, 11, 15, 20, 99], dtype=np.int32)
  1044. schema.FeedRecord(input_record, [input_array])
  1045. num_samples = 20
  1046. num_elements = 100
  1047. uniform_sampling_output = self.model.UniformSampling(
  1048. input_record, num_samples, num_elements)
  1049. self.model.loss = uniform_sampling_output
  1050. self.run_train_net()
  1051. samples = workspace.FetchBlob(uniform_sampling_output.samples())
  1052. sampling_prob = workspace.FetchBlob(
  1053. uniform_sampling_output.sampling_prob())
  1054. self.assertEqual(num_samples, len(samples))
  1055. np.testing.assert_array_equal(input_array, samples[:len(input_array)])
  1056. np.testing.assert_almost_equal(
  1057. np.array([float(num_samples) / num_elements] * num_samples,
  1058. dtype=np.float32),
  1059. sampling_prob
  1060. )
  1061. def testUniformSamplingWithIncorrectSampleSize(self):
  1062. input_record = self.new_record(schema.Scalar(np.int32))
  1063. num_samples = 200
  1064. num_elements = 100
  1065. with self.assertRaises(AssertionError):
  1066. self.model.UniformSampling(input_record, num_samples, num_elements)
  1067. def testGatherRecord(self):
  1068. indices = np.array([1, 3, 4], dtype=np.int32)
  1069. dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2)
  1070. lengths = np.array(list(range(10)), dtype=np.int32)
  1071. items = np.array(list(range(lengths.sum())), dtype=np.int64)
  1072. items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32)
  1073. items_items = np.array(list(range(items_lengths.sum())), dtype=np.int64)
  1074. record = self.new_record(schema.Struct(
  1075. ('dense', schema.Scalar(np.float32)),
  1076. ('sparse', schema.Struct(
  1077. ('list', schema.List(np.int64)),
  1078. ('list_of_list', schema.List(schema.List(np.int64))),
  1079. )),
  1080. ('empty_struct', schema.Struct())
  1081. ))
  1082. indices_record = self.new_record(schema.Scalar(np.int32))
  1083. input_record = schema.Struct(
  1084. ('indices', indices_record),
  1085. ('record', record),
  1086. )
  1087. schema.FeedRecord(
  1088. input_record,
  1089. [indices, dense, lengths, items, lengths, items_lengths,
  1090. items_items])
  1091. gathered_record = self.model.GatherRecord(input_record)
  1092. self.assertTrue(schema.equal_schemas(gathered_record, record))
  1093. self.run_train_net_forward_only()
  1094. gathered_dense = workspace.FetchBlob(gathered_record.dense())
  1095. np.testing.assert_array_equal(
  1096. np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense)
  1097. gathered_lengths = workspace.FetchBlob(
  1098. gathered_record.sparse.list.lengths())
  1099. np.testing.assert_array_equal(
  1100. np.concatenate([lengths[i:i + 1] for i in indices]),
  1101. gathered_lengths)
  1102. gathered_items = workspace.FetchBlob(
  1103. gathered_record.sparse.list.items())
  1104. offsets = lengths.cumsum() - lengths
  1105. np.testing.assert_array_equal(
  1106. np.concatenate([
  1107. items[offsets[i]: offsets[i] + lengths[i]]
  1108. for i in indices
  1109. ]), gathered_items)
  1110. gathered_items_lengths = workspace.FetchBlob(
  1111. gathered_record.sparse.list_of_list.items.lengths())
  1112. np.testing.assert_array_equal(
  1113. np.concatenate([
  1114. items_lengths[offsets[i]: offsets[i] + lengths[i]]
  1115. for i in indices
  1116. ]),
  1117. gathered_items_lengths
  1118. )
  1119. nested_offsets = []
  1120. nested_lengths = []
  1121. nested_offset = 0
  1122. j = 0
  1123. for l in lengths:
  1124. nested_offsets.append(nested_offset)
  1125. nested_length = 0
  1126. for _i in range(l):
  1127. nested_offset += items_lengths[j]
  1128. nested_length += items_lengths[j]
  1129. j += 1
  1130. nested_lengths.append(nested_length)
  1131. gathered_items_items = workspace.FetchBlob(
  1132. gathered_record.sparse.list_of_list.items.items())
  1133. np.testing.assert_array_equal(
  1134. np.concatenate([
  1135. items_items[nested_offsets[i]:
  1136. nested_offsets[i] + nested_lengths[i]]
  1137. for i in indices
  1138. ]),
  1139. gathered_items_items
  1140. )
  1141. def testMapToRange(self):
  1142. input_record = self.new_record(schema.Scalar(np.int32))
  1143. indices_blob = self.model.MapToRange(input_record,
  1144. max_index=100).indices
  1145. self.model.output_schema = schema.Struct()
  1146. train_init_net, train_net = self.get_training_nets()
  1147. schema.FeedRecord(
  1148. input_record,
  1149. [np.array([10, 3, 20, 99, 15, 11, 3, 11], dtype=np.int32)]
  1150. )
  1151. workspace.RunNetOnce(train_init_net)
  1152. workspace.RunNetOnce(train_net)
  1153. indices = workspace.FetchBlob(indices_blob())
  1154. np.testing.assert_array_equal(
  1155. np.array([1, 2, 3, 4, 5, 6, 2, 6], dtype=np.int32),
  1156. indices
  1157. )
  1158. schema.FeedRecord(
  1159. input_record,
  1160. [np.array([10, 3, 23, 35, 60, 15, 10, 15], dtype=np.int32)]
  1161. )
  1162. workspace.RunNetOnce(train_net)
  1163. indices = workspace.FetchBlob(indices_blob())
  1164. np.testing.assert_array_equal(
  1165. np.array([1, 2, 7, 8, 9, 5, 1, 5], dtype=np.int32),
  1166. indices
  1167. )
  1168. eval_net = self.get_eval_net()
  1169. schema.FeedRecord(
  1170. input_record,
  1171. [np.array([10, 3, 23, 35, 60, 15, 200], dtype=np.int32)]
  1172. )
  1173. workspace.RunNetOnce(eval_net)
  1174. indices = workspace.FetchBlob(indices_blob())
  1175. np.testing.assert_array_equal(
  1176. np.array([1, 2, 7, 8, 9, 5, 0], dtype=np.int32),
  1177. indices
  1178. )
  1179. schema.FeedRecord(
  1180. input_record,
  1181. [np.array([10, 3, 23, 15, 101, 115], dtype=np.int32)]
  1182. )
  1183. workspace.RunNetOnce(eval_net)
  1184. indices = workspace.FetchBlob(indices_blob())
  1185. np.testing.assert_array_equal(
  1186. np.array([1, 2, 7, 5, 0, 0], dtype=np.int32),
  1187. indices
  1188. )
  1189. predict_net = self.get_predict_net()
  1190. schema.FeedRecord(
  1191. input_record,
  1192. [np.array([3, 3, 20, 23, 151, 35, 60, 15, 200], dtype=np.int32)]
  1193. )
  1194. workspace.RunNetOnce(predict_net)
  1195. indices = workspace.FetchBlob(indices_blob())
  1196. np.testing.assert_array_equal(
  1197. np.array([2, 2, 3, 7, 0, 8, 9, 5, 0], dtype=np.int32),
  1198. indices
  1199. )
  1200. def testSelectRecordByContext(self):
  1201. float_features = self.model.input_feature_schema.float_features
  1202. float_array = np.array([1.0, 2.0], dtype=np.float32)
  1203. schema.FeedRecord(float_features, [float_array])
  1204. with Tags(Tags.EXCLUDE_FROM_PREDICTION):
  1205. log_float_features = self.model.Log(float_features, 1)
  1206. joined = self.model.SelectRecordByContext(
  1207. schema.Struct(
  1208. (InstantiationContext.PREDICTION, float_features),
  1209. (InstantiationContext.TRAINING, log_float_features),
  1210. # TODO: TRAIN_ONLY layers are also generated in eval
  1211. (InstantiationContext.EVAL, log_float_features),
  1212. )
  1213. )
  1214. # model.output_schema has to a struct
  1215. self.model.output_schema = schema.Struct((
  1216. 'joined', joined
  1217. ))
  1218. predict_net = layer_model_instantiator.generate_predict_net(self.model)
  1219. workspace.RunNetOnce(predict_net)
  1220. predict_output = schema.FetchRecord(predict_net.output_record())
  1221. npt.assert_array_equal(float_array,
  1222. predict_output['joined']())
  1223. eval_net = layer_model_instantiator.generate_eval_net(self.model)
  1224. workspace.RunNetOnce(eval_net)
  1225. eval_output = schema.FetchRecord(eval_net.output_record())
  1226. npt.assert_array_equal(np.log(float_array),
  1227. eval_output['joined']())
  1228. _, train_net = (
  1229. layer_model_instantiator.generate_training_nets_forward_only(
  1230. self.model
  1231. )
  1232. )
  1233. workspace.RunNetOnce(train_net)
  1234. train_output = schema.FetchRecord(train_net.output_record())
  1235. npt.assert_array_equal(np.log(float_array),
  1236. train_output['joined']())
  1237. def testFunctionalLayer(self):
  1238. def normalize(net, in_record, out_record):
  1239. mean = net.ReduceFrontMean(in_record(), 1)
  1240. net.Sub(
  1241. [in_record(), mean],
  1242. out_record(),
  1243. broadcast=1)
  1244. normalized = self.model.Functional(
  1245. self.model.input_feature_schema.float_features, 1,
  1246. normalize, name="normalizer")
  1247. # Attach metadata to one of the outputs and use it in FC
  1248. normalized.set_type((np.float32, 32))
  1249. self.model.output_schema = self.model.FC(normalized, 2)
  1250. predict_net = layer_model_instantiator.generate_predict_net(
  1251. self.model)
  1252. ops = predict_net.Proto().op
  1253. assert len(ops) == 3
  1254. assert ops[0].type == "ReduceFrontMean"
  1255. assert ops[1].type == "Sub"
  1256. assert ops[2].type == "FC"
  1257. assert len(ops[0].input) == 1
  1258. assert ops[0].input[0] ==\
  1259. self.model.input_feature_schema.float_features()
  1260. assert len(ops[1].output) == 1
  1261. assert ops[1].output[0] in ops[2].input
  1262. def testFunctionalLayerHelper(self):
  1263. mean = self.model.ReduceFrontMean(
  1264. self.model.input_feature_schema.float_features, 1)
  1265. normalized = self.model.Sub(
  1266. schema.Tuple(
  1267. self.model.input_feature_schema.float_features, mean),
  1268. 1, broadcast=1)
  1269. # Attach metadata to one of the outputs and use it in FC
  1270. normalized.set_type((np.float32, (32,)))
  1271. self.model.output_schema = self.model.FC(normalized, 2)
  1272. predict_net = layer_model_instantiator.generate_predict_net(
  1273. self.model)
  1274. ops = predict_net.Proto().op
  1275. assert len(ops) == 3
  1276. assert ops[0].type == "ReduceFrontMean"
  1277. assert ops[1].type == "Sub"
  1278. assert ops[2].type == "FC"
  1279. assert len(ops[0].input) == 1
  1280. assert ops[0].input[0] ==\
  1281. self.model.input_feature_schema.float_features()
  1282. assert len(ops[1].output) == 1
  1283. assert ops[1].output[0] in ops[2].input
  1284. def testFunctionalLayerHelperAutoInference(self):
  1285. softsign = self.model.Softsign(
  1286. schema.Tuple(self.model.input_feature_schema.float_features),
  1287. 1)
  1288. assert softsign.field_type().base == np.float32
  1289. assert softsign.field_type().shape == (32,)
  1290. self.model.output_schema = self.model.FC(softsign, 2)
  1291. predict_net = layer_model_instantiator.generate_predict_net(
  1292. self.model)
  1293. ops = predict_net.Proto().op
  1294. assert len(ops) == 2
  1295. assert ops[0].type == "Softsign"
  1296. assert ops[1].type == "FC"
  1297. assert len(ops[0].input) == 1
  1298. assert ops[0].input[0] ==\
  1299. self.model.input_feature_schema.float_features()
  1300. assert len(ops[0].output) == 1
  1301. assert ops[0].output[0] in ops[1].input
  1302. def testHalfToFloatTypeInference(self):
  1303. input = self.new_record(schema.Scalar((np.float32, (32,))))
  1304. output = self.model.FloatToHalf(input, 1)
  1305. assert output.field_type().base == np.float16
  1306. assert output.field_type().shape == (32, )
  1307. output = self.model.HalfToFloat(output, 1)
  1308. assert output.field_type().base == np.float32
  1309. assert output.field_type().shape == (32, )
  1310. def testFunctionalLayerHelperAutoInferenceScalar(self):
  1311. loss = self.model.AveragedLoss(self.model.input_feature_schema, 1)
  1312. self.assertEqual(1, len(loss.field_types()))
  1313. self.assertEqual(np.float32, loss.field_types()[0].base)
  1314. self.assertEqual(tuple(), loss.field_types()[0].shape)
  1315. def testFunctionalLayerInputCoercion(self):
  1316. one = self.model.global_constants['ONE']
  1317. two = self.model.Add([one, one], 1)
  1318. self.model.loss = two
  1319. self.run_train_net()
  1320. data = workspace.FetchBlob(two.field_blobs()[0])
  1321. np.testing.assert_array_equal([2.0], data)
  1322. def testFunctionalLayerWithOutputNames(self):
  1323. k = 3
  1324. topk = self.model.TopK(
  1325. self.model.input_feature_schema,
  1326. output_names_or_num=['values', 'indices'],
  1327. k=k,
  1328. )
  1329. self.assertEqual(2, len(topk.field_types()))
  1330. self.assertEqual(np.float32, topk.field_types()[0].base)
  1331. self.assertEqual((k,), topk.field_types()[0].shape)
  1332. self.assertEqual(np.int32, topk.field_types()[1].base)
  1333. self.assertEqual((k,), topk.field_types()[1].shape)
  1334. self.assertEqual(['TopK/values', 'TopK/indices'], topk.field_blobs())
  1335. def testFunctionalLayerSameOperatorOutputNames(self):
  1336. Con1 = self.model.ConstantFill([], 1, value=1)
  1337. Con2 = self.model.ConstantFill([], 1, value=2)
  1338. self.assertNotEqual(str(Con1), str(Con2))
  1339. def testFunctionalLayerWithOutputDtypes(self):
  1340. loss = self.model.AveragedLoss(
  1341. self.model.input_feature_schema,
  1342. 1,
  1343. output_dtypes=(np.float32, (1,)),
  1344. )
  1345. self.assertEqual(1, len(loss.field_types()))
  1346. self.assertEqual(np.float32, loss.field_types()[0].base)
  1347. self.assertEqual((1,), loss.field_types()[0].shape)
  1348. def testPropagateRequestOnly(self):
  1349. # test case when output is request only
  1350. input_record = self.new_record(schema.Struct(
  1351. ('input1', schema.Scalar((np.float32, (32, )))),
  1352. ('input2', schema.Scalar((np.float32, (64, )))),
  1353. ('input3', schema.Scalar((np.float32, (16, )))),
  1354. ))
  1355. set_request_only(input_record)
  1356. concat_output = self.model.Concat(input_record)
  1357. self.assertEqual(is_request_only_scalar(concat_output), True)
  1358. # test case when output is not request only
  1359. input_record2 = self.new_record(schema.Struct(
  1360. ('input4', schema.Scalar((np.float32, (100, ))))
  1361. )) + input_record
  1362. concat_output2 = self.model.Concat(input_record2)
  1363. self.assertEqual(is_request_only_scalar(concat_output2), False)
  1364. def testSetRequestOnly(self):
  1365. input_record = schema.Scalar(np.int64)
  1366. schema.attach_metadata_to_scalars(
  1367. input_record,
  1368. schema.Metadata(
  1369. categorical_limit=100000000,
  1370. expected_value=99,
  1371. feature_specs=schema.FeatureSpec(
  1372. feature_ids=[1, 100, 1001]
  1373. )
  1374. )
  1375. )
  1376. set_request_only(input_record)
  1377. self.assertEqual(input_record.metadata.categorical_limit, 100000000)
  1378. self.assertEqual(input_record.metadata.expected_value, 99)
  1379. self.assertEqual(
  1380. input_record.metadata.feature_specs.feature_ids,
  1381. [1, 100, 1001]
  1382. )
  1383. @given(
  1384. X=hu.arrays(dims=[5, 5]), # Shape of X is irrelevant
  1385. dropout_for_eval=st.booleans(),
  1386. )
  1387. def testDropout(self, X, dropout_for_eval):
  1388. input_record = self.new_record(schema.Scalar((np.float32, (1,))))
  1389. schema.FeedRecord(input_record, [X])
  1390. d_output = self.model.Dropout(
  1391. input_record,
  1392. dropout_for_eval=dropout_for_eval
  1393. )
  1394. self.assertEqual(schema.Scalar((np.float32, (1,))), d_output)
  1395. self.model.output_schema = schema.Struct()
  1396. train_init_net, train_net = self.get_training_nets()
  1397. input_blob = input_record.field_blobs()[0]
  1398. output_blob = d_output.field_blobs()[0]
  1399. with_d_spec = OpSpec(
  1400. "Dropout",
  1401. [input_blob],
  1402. [output_blob, None],
  1403. {'is_test': 0, 'ratio': 0.5}
  1404. )
  1405. without_d_spec = OpSpec(
  1406. "Dropout",
  1407. [input_blob],
  1408. [output_blob, None],
  1409. {'is_test': 1, 'ratio': 0.5}
  1410. )
  1411. self.assertNetContainOps(
  1412. train_net,
  1413. [with_d_spec]
  1414. )
  1415. eval_net = self.get_eval_net()
  1416. predict_net = self.get_predict_net()
  1417. if dropout_for_eval:
  1418. self.assertNetContainOps(
  1419. eval_net,
  1420. [with_d_spec]
  1421. )
  1422. self.assertNetContainOps(
  1423. predict_net,
  1424. [with_d_spec]
  1425. )
  1426. else:
  1427. self.assertNetContainOps(
  1428. eval_net,
  1429. [without_d_spec]
  1430. )
  1431. self.assertNetContainOps(
  1432. predict_net,
  1433. [without_d_spec]
  1434. )
  1435. workspace.RunNetOnce(train_init_net)
  1436. workspace.RunNetOnce(train_net)
  1437. schema.FeedRecord(input_record, [X])
  1438. workspace.RunNetOnce(eval_net)
  1439. schema.FeedRecord(input_record, [X])
  1440. workspace.RunNetOnce(predict_net)
  1441. @given(
  1442. num_inputs=st.integers(1, 3),
  1443. batch_size=st.integers(5, 10)
  1444. )
  1445. def testMergeIdListsLayer(self, num_inputs, batch_size):
  1446. inputs = []
  1447. for _ in range(num_inputs):
  1448. lengths = np.random.randint(5, size=batch_size).astype(np.int32)
  1449. size = lengths.sum()
  1450. values = np.random.randint(1, 10, size=size).astype(np.int64)
  1451. inputs.append(lengths)
  1452. inputs.append(values)
  1453. input_schema = schema.Tuple(
  1454. *[schema.List(
  1455. schema.Scalar(dtype=np.int64, metadata=schema.Metadata(
  1456. categorical_limit=20
  1457. ))) for _ in range(num_inputs)]
  1458. )
  1459. input_record = schema.NewRecord(self.model.net, input_schema)
  1460. schema.FeedRecord(input_record, inputs)
  1461. output_schema = self.model.MergeIdLists(input_record)
  1462. assert schema.equal_schemas(
  1463. output_schema, IdList,
  1464. check_field_names=False)
  1465. @given(
  1466. batch_size=st.integers(min_value=2, max_value=10),
  1467. input_dims=st.integers(min_value=5, max_value=10),
  1468. output_dims=st.integers(min_value=5, max_value=10),
  1469. bandwidth=st.floats(min_value=0.1, max_value=5),
  1470. )
  1471. def testRandomFourierFeatures(self, batch_size, input_dims, output_dims, bandwidth):
  1472. def _rff_hypothesis_test(rff_output, X, W, b, scale):
  1473. '''
  1474. Runs hypothesis test for Semi Random Features layer.
  1475. Inputs:
  1476. rff_output -- output of net after running random fourier features layer
  1477. X -- input data
  1478. W -- weight parameter from train_init_net
  1479. b -- bias parameter from train_init_net
  1480. scale -- value by which to scale the output vector
  1481. '''
  1482. output = workspace.FetchBlob(rff_output)
  1483. output_ref = scale * np.cos(np.dot(X, np.transpose(W)) + b)
  1484. npt.assert_allclose(output, output_ref, rtol=1e-3, atol=1e-3)
  1485. X = np.random.random((batch_size, input_dims)).astype(np.float32)
  1486. scale = np.sqrt(2.0 / output_dims)
  1487. input_record = self.new_record(schema.Scalar((np.float32, (input_dims,))))
  1488. schema.FeedRecord(input_record, [X])
  1489. input_blob = input_record.field_blobs()[0]
  1490. rff_output = self.model.RandomFourierFeatures(input_record,
  1491. output_dims,
  1492. bandwidth)
  1493. self.model.output_schema = schema.Struct()
  1494. self.assertEqual(
  1495. schema.Scalar((np.float32, (output_dims, ))),
  1496. rff_output
  1497. )
  1498. train_init_net, train_net = self.get_training_nets()
  1499. # Init net assertions
  1500. init_ops_list = [
  1501. OpSpec("GaussianFill", None, None),
  1502. OpSpec("UniformFill", None, None),
  1503. ]
  1504. init_ops = self._test_net(train_init_net, init_ops_list)
  1505. W = workspace.FetchBlob(self.model.layers[0].w)
  1506. b = workspace.FetchBlob(self.model.layers[0].b)
  1507. # Operation specifications
  1508. fc_spec = OpSpec("FC", [input_blob, init_ops[0].output[0],
  1509. init_ops[1].output[0]], None)
  1510. cosine_spec = OpSpec("Cos", None, None)
  1511. scale_spec = OpSpec("Scale", None, rff_output.field_blobs(),
  1512. {'scale': scale})
  1513. ops_list = [
  1514. fc_spec,
  1515. cosine_spec,
  1516. scale_spec
  1517. ]
  1518. # Train net assertions
  1519. self._test_net(train_net, ops_list)
  1520. _rff_hypothesis_test(rff_output(), X, W, b, scale)
  1521. # Eval net assertions
  1522. eval_net = self.get_eval_net()
  1523. self._test_net(eval_net, ops_list)
  1524. _rff_hypothesis_test(rff_output(), X, W, b, scale)
  1525. # Predict net assertions
  1526. predict_net = self.get_predict_net()
  1527. self._test_net(predict_net, ops_list)
  1528. _rff_hypothesis_test(rff_output(), X, W, b, scale)
  1529. @given(
  1530. batch_size=st.integers(min_value=2, max_value=10),
  1531. input_dims=st.integers(min_value=5, max_value=10),
  1532. output_dims=st.integers(min_value=5, max_value=10),
  1533. s=st.integers(min_value=0, max_value=3),
  1534. scale=st.floats(min_value=0.1, max_value=5),
  1535. set_weight_as_global_constant=st.booleans()
  1536. )
  1537. def testArcCosineFeatureMap(self, batch_size, input_dims, output_dims, s, scale,
  1538. set_weight_as_global_constant):
  1539. def _arc_cosine_hypothesis_test(ac_output, X, W, b, s):
  1540. '''
  1541. Runs hypothesis test for Arc Cosine layer.
  1542. Inputs:
  1543. ac_output -- output of net after running arc cosine layer
  1544. X -- input data
  1545. W -- weight parameter from train_init_net
  1546. b -- bias parameter from train_init_net
  1547. s -- degree parameter
  1548. '''
  1549. # Get output from net
  1550. net_output = workspace.FetchBlob(ac_output)
  1551. # Computing output directly
  1552. x_rand = np.matmul(X, np.transpose(W)) + b
  1553. x_pow = np.power(x_rand, s)
  1554. if s > 0:
  1555. h_rand_features = np.piecewise(x_rand,
  1556. [x_rand <= 0, x_rand > 0],
  1557. [0, 1])
  1558. else:
  1559. h_rand_features = np.piecewise(x_rand,
  1560. [x_rand <= 0, x_rand > 0],
  1561. [0, lambda x: x / (1 + x)])
  1562. output_ref = np.multiply(x_pow, h_rand_features)
  1563. # Comparing net output and computed output
  1564. npt.assert_allclose(net_output, output_ref, rtol=1e-3, atol=1e-3)
  1565. X = np.random.normal(size=(batch_size, input_dims)).astype(np.float32)
  1566. input_record = self.new_record(schema.Scalar((np.float32, (input_dims,))))
  1567. schema.FeedRecord(input_record, [X])
  1568. input_blob = input_record.field_blobs()[0]
  1569. ac_output = self.model.ArcCosineFeatureMap(
  1570. input_record,
  1571. output_dims,
  1572. s=s,
  1573. scale=scale,
  1574. set_weight_as_global_constant=set_weight_as_global_constant
  1575. )
  1576. self.model.output_schema = schema.Struct()
  1577. self.assertEqual(
  1578. schema.Scalar((np.float32, (output_dims, ))),
  1579. ac_output
  1580. )
  1581. train_init_net, train_net = self.get_training_nets()
  1582. # Run create_init_net to initialize the global constants, and W and b
  1583. workspace.RunNetOnce(train_init_net)
  1584. workspace.RunNetOnce(self.model.create_init_net(name='init_net'))
  1585. if set_weight_as_global_constant:
  1586. W = workspace.FetchBlob(
  1587. self.model.global_constants['arc_cosine_feature_map_fixed_rand_W']
  1588. )
  1589. b = workspace.FetchBlob(
  1590. self.model.global_constants['arc_cosine_feature_map_fixed_rand_b']
  1591. )
  1592. else:
  1593. W = workspace.FetchBlob(self.model.layers[0].random_w)
  1594. b = workspace.FetchBlob(self.model.layers[0].random_b)
  1595. # Operation specifications
  1596. fc_spec = OpSpec("FC", [input_blob, None, None], None)
  1597. softsign_spec = OpSpec("Softsign", None, None)
  1598. relu_spec = OpSpec("Relu", None, None)
  1599. relu_spec_output = OpSpec("Relu", None, ac_output.field_blobs())
  1600. pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)})
  1601. mul_spec = OpSpec("Mul", None, ac_output.field_blobs())
  1602. if s == 0:
  1603. ops_list = [
  1604. fc_spec,
  1605. softsign_spec,
  1606. relu_spec_output,
  1607. ]
  1608. elif s == 1:
  1609. ops_list = [
  1610. fc_spec,
  1611. relu_spec_output,
  1612. ]
  1613. else:
  1614. ops_list = [
  1615. fc_spec,
  1616. relu_spec,
  1617. pow_spec,
  1618. mul_spec,
  1619. ]
  1620. # Train net assertions
  1621. self._test_net(train_net, ops_list)
  1622. _arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
  1623. # Eval net assertions
  1624. eval_net = self.get_eval_net()
  1625. self._test_net(eval_net, ops_list)
  1626. _arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
  1627. # Predict net assertions
  1628. predict_net = self.get_predict_net()
  1629. self._test_net(predict_net, ops_list)
  1630. _arc_cosine_hypothesis_test(ac_output(), X, W, b, s)
  1631. @given(
  1632. batch_size=st.integers(min_value=2, max_value=10),
  1633. input_dims=st.integers(min_value=5, max_value=10),
  1634. output_dims=st.integers(min_value=5, max_value=10),
  1635. s=st.integers(min_value=0, max_value=3),
  1636. scale=st.floats(min_value=0.1, max_value=5),
  1637. set_weight_as_global_constant=st.booleans(),
  1638. use_struct_input=st.booleans(),
  1639. )
  1640. def testSemiRandomFeatures(self, batch_size, input_dims, output_dims, s, scale,
  1641. set_weight_as_global_constant, use_struct_input):
  1642. def _semi_random_hypothesis_test(srf_output, X_full, X_random, rand_w,
  1643. rand_b, s):
  1644. '''
  1645. Runs hypothesis test for Semi Random Features layer.
  1646. Inputs:
  1647. srf_output -- output of net after running semi random features layer
  1648. X_full -- full input data
  1649. X_random -- random-output input data
  1650. rand_w -- random-initialized weight parameter from train_init_net
  1651. rand_b -- random-initialized bias parameter from train_init_net
  1652. s -- degree parameter
  1653. '''
  1654. # Get output from net
  1655. net_output = workspace.FetchBlob(srf_output)
  1656. # Fetch learned parameter blobs
  1657. learned_w = workspace.FetchBlob(self.model.layers[0].learned_w)
  1658. learned_b = workspace.FetchBlob(self.model.layers[0].learned_b)
  1659. # Computing output directly
  1660. x_rand = np.matmul(X_random, np.transpose(rand_w)) + rand_b
  1661. x_learn = np.matmul(X_full, np.transpose(learned_w)) + learned_b
  1662. x_pow = np.power(x_rand, s)
  1663. if s > 0:
  1664. h_rand_features = np.piecewise(x_rand,
  1665. [x_rand <= 0, x_rand > 0],
  1666. [0, 1])
  1667. else:
  1668. h_rand_features = np.piecewise(x_rand,
  1669. [x_rand <= 0, x_rand > 0],
  1670. [0, lambda x: x / (1 + x)])
  1671. output_ref = np.multiply(np.multiply(x_pow, h_rand_features), x_learn)
  1672. # Comparing net output and computed output
  1673. npt.assert_allclose(net_output, output_ref, rtol=1e-3, atol=1e-3)
  1674. X_full = np.random.normal(size=(batch_size, input_dims)).astype(np.float32)
  1675. if use_struct_input:
  1676. X_random = np.random.normal(size=(batch_size, input_dims)).\
  1677. astype(np.float32)
  1678. input_data = [X_full, X_random]
  1679. input_record = self.new_record(schema.Struct(
  1680. ('full', schema.Scalar(
  1681. (np.float32, (input_dims,))
  1682. )),
  1683. ('random', schema.Scalar(
  1684. (np.float32, (input_dims,))
  1685. ))
  1686. ))
  1687. else:
  1688. X_random = X_full
  1689. input_data = [X_full]
  1690. input_record = self.new_record(schema.Scalar(
  1691. (np.float32, (input_dims,))
  1692. ))
  1693. schema.FeedRecord(input_record, input_data)
  1694. srf_output = self.model.SemiRandomFeatures(
  1695. input_record,
  1696. output_dims,
  1697. s=s,
  1698. scale_random=scale,
  1699. scale_learned=scale,
  1700. set_weight_as_global_constant=set_weight_as_global_constant
  1701. )
  1702. self.model.output_schema = schema.Struct()
  1703. self.assertEqual(
  1704. schema.Struct(
  1705. ('full', schema.Scalar(
  1706. (np.float32, (output_dims,))
  1707. )),
  1708. ('random', schema.Scalar(
  1709. (np.float32, (output_dims,))
  1710. ))
  1711. ),
  1712. srf_output
  1713. )
  1714. init_ops_list = [
  1715. OpSpec("GaussianFill", None, None),
  1716. OpSpec("UniformFill", None, None),
  1717. OpSpec("GaussianFill", None, None),
  1718. OpSpec("UniformFill", None, None),
  1719. ]
  1720. train_init_net, train_net = self.get_training_nets()
  1721. # Need to run to initialize the global constants for layer
  1722. workspace.RunNetOnce(self.model.create_init_net(name='init_net'))
  1723. if set_weight_as_global_constant:
  1724. # If weight params are global constants, they won't be in train_init_net
  1725. init_ops = self._test_net(train_init_net, init_ops_list[:2])
  1726. rand_w = workspace.FetchBlob(
  1727. self.model.global_constants['semi_random_features_fixed_rand_W']
  1728. )
  1729. rand_b = workspace.FetchBlob(
  1730. self.model.global_constants['semi_random_features_fixed_rand_b']
  1731. )
  1732. # Operation specifications
  1733. fc_random_spec = OpSpec("FC", [None, None, None], None)
  1734. fc_learned_spec = OpSpec("FC", [None, init_ops[0].output[0],
  1735. init_ops[1].output[0]], None)
  1736. else:
  1737. init_ops = self._test_net(train_init_net, init_ops_list)
  1738. rand_w = workspace.FetchBlob(self.model.layers[0].random_w)
  1739. rand_b = workspace.FetchBlob(self.model.layers[0].random_b)
  1740. # Operation specifications
  1741. fc_random_spec = OpSpec("FC", [None, init_ops[0].output[0],
  1742. init_ops[1].output[0]], None)
  1743. fc_learned_spec = OpSpec("FC", [None, init_ops[2].output[0],
  1744. init_ops[3].output[0]], None)
  1745. softsign_spec = OpSpec("Softsign", None, None)
  1746. relu_spec = OpSpec("Relu", None, None)
  1747. relu_output_spec = OpSpec("Relu", None, srf_output.random.field_blobs())
  1748. pow_spec = OpSpec("Pow", None, None, {'exponent': float(s - 1)})
  1749. mul_interim_spec = OpSpec("Mul", None, srf_output.random.field_blobs())
  1750. mul_spec = OpSpec("Mul", None, srf_output.full.field_blobs())
  1751. if s == 0:
  1752. ops_list = [
  1753. fc_learned_spec,
  1754. fc_random_spec,
  1755. softsign_spec,
  1756. relu_output_spec,
  1757. mul_spec,
  1758. ]
  1759. elif s == 1:
  1760. ops_list = [
  1761. fc_learned_spec,
  1762. fc_random_spec,
  1763. relu_output_spec,
  1764. mul_spec,
  1765. ]
  1766. else:
  1767. ops_list = [
  1768. fc_learned_spec,
  1769. fc_random_spec,
  1770. relu_spec,
  1771. pow_spec,
  1772. mul_interim_spec,
  1773. mul_spec,
  1774. ]
  1775. # Train net assertions
  1776. self._test_net(train_net, ops_list)
  1777. _semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
  1778. rand_w, rand_b, s)
  1779. # Eval net assertions
  1780. eval_net = self.get_eval_net()
  1781. self._test_net(eval_net, ops_list)
  1782. _semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
  1783. rand_w, rand_b, s)
  1784. # Predict net assertions
  1785. predict_net = self.get_predict_net()
  1786. self._test_net(predict_net, ops_list)
  1787. _semi_random_hypothesis_test(srf_output.full(), X_full, X_random,
  1788. rand_w, rand_b, s)
  1789. def testConv(self):
  1790. batch_size = 50
  1791. H = 1
  1792. W = 10
  1793. C = 50
  1794. output_dims = 32
  1795. kernel_h = 1
  1796. kernel_w = 3
  1797. stride_h = 1
  1798. stride_w = 1
  1799. pad_t = 0
  1800. pad_b = 0
  1801. pad_r = None
  1802. pad_l = None
  1803. input_record = self.new_record(schema.Scalar((np.float32, (H, W, C))))
  1804. X = np.random.random((batch_size, H, W, C)).astype(np.float32)
  1805. schema.FeedRecord(input_record, [X])
  1806. conv = self.model.Conv(
  1807. input_record,
  1808. output_dims,
  1809. kernel_h=kernel_h,
  1810. kernel_w=kernel_w,
  1811. stride_h=stride_h,
  1812. stride_w=stride_w,
  1813. pad_t=pad_t,
  1814. pad_b=pad_b,
  1815. pad_r=pad_r,
  1816. pad_l=pad_l,
  1817. order='NHWC'
  1818. )
  1819. self.assertEqual(
  1820. schema.Scalar((np.float32, (output_dims,))),
  1821. conv
  1822. )
  1823. self.run_train_net_forward_only()
  1824. output_record = schema.FetchRecord(conv)
  1825. # check the number of output channels is the same as input in this example
  1826. assert output_record.field_types()[0].shape == (H, W, output_dims)
  1827. assert output_record().shape == (batch_size, H, W, output_dims)
  1828. train_init_net, train_net = self.get_training_nets()
  1829. # Init net assertions
  1830. init_ops = self.assertNetContainOps(
  1831. train_init_net,
  1832. [
  1833. OpSpec("XavierFill", None, None),
  1834. OpSpec("ConstantFill", None, None),
  1835. ]
  1836. )
  1837. conv_spec = OpSpec(
  1838. "Conv",
  1839. [
  1840. input_record.field_blobs()[0],
  1841. init_ops[0].output[0],
  1842. init_ops[1].output[0],
  1843. ],
  1844. conv.field_blobs()
  1845. )
  1846. # Train net assertions
  1847. self.assertNetContainOps(train_net, [conv_spec])
  1848. # Predict net assertions
  1849. predict_net = self.get_predict_net()
  1850. self.assertNetContainOps(predict_net, [conv_spec])
  1851. # Eval net assertions
  1852. eval_net = self.get_eval_net()
  1853. self.assertNetContainOps(eval_net, [conv_spec])
  1854. @given(
  1855. num=st.integers(min_value=10, max_value=100),
  1856. feed_weight=st.booleans(),
  1857. use_inv_var_parameterization=st.booleans(),
  1858. use_log_barrier=st.booleans(),
  1859. enable_diagnose=st.booleans(),
  1860. **hu.gcs
  1861. )
  1862. @settings(deadline=1000)
  1863. def testAdaptiveWeight(
  1864. self, num, feed_weight, use_inv_var_parameterization, use_log_barrier,
  1865. enable_diagnose, gc, dc
  1866. ):
  1867. input_record = self.new_record(schema.RawTuple(num))
  1868. data = np.random.random(num)
  1869. schema.FeedRecord(
  1870. input_record, [np.array(x).astype(np.float32) for x in data]
  1871. )
  1872. weights = np.random.random(num) if feed_weight else None
  1873. result = self.model.AdaptiveWeight(
  1874. input_record,
  1875. weights=weights,
  1876. estimation_method=(
  1877. 'inv_var' if use_inv_var_parameterization else 'log_std'
  1878. ),
  1879. pos_optim_method=(
  1880. 'log_barrier' if use_log_barrier else 'pos_grad_proj'
  1881. ),
  1882. enable_diagnose=enable_diagnose
  1883. )
  1884. train_init_net, train_net = self.get_training_nets(True)
  1885. workspace.RunNetOnce(train_init_net)
  1886. workspace.RunNetOnce(train_net)
  1887. result = workspace.FetchBlob(result())
  1888. if not feed_weight:
  1889. weights = np.array([1. / num for _ in range(num)])
  1890. expected = np.sum(weights * data + 0.5 * np.log(1. / 2. / weights))
  1891. npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
  1892. if enable_diagnose:
  1893. assert len(self.model.ad_hoc_plot_blobs) == num
  1894. reconst_weights_from_ad_hoc = np.array(
  1895. [workspace.FetchBlob(b) for b in self.model.ad_hoc_plot_blobs]
  1896. ).flatten()
  1897. npt.assert_allclose(
  1898. reconst_weights_from_ad_hoc, weights, atol=1e-4, rtol=1e-4
  1899. )
  1900. else:
  1901. assert len(self.model.ad_hoc_plot_blobs) == 0
  1902. @given(num=st.integers(min_value=10, max_value=100), **hu.gcs)
  1903. def testConstantWeight(self, num, gc, dc):
  1904. input_record = self.new_record(schema.RawTuple(num))
  1905. data = np.random.random(num)
  1906. schema.FeedRecord(
  1907. input_record, [np.array(x).astype(np.float32) for x in data]
  1908. )
  1909. weights = np.random.random(num)
  1910. result = self.model.ConstantWeight(input_record, weights=weights)
  1911. train_init_net, train_net = self.get_training_nets(True)
  1912. workspace.RunNetOnce(train_init_net)
  1913. workspace.RunNetOnce(train_net)
  1914. result = workspace.FetchBlob(result())
  1915. expected = np.sum(weights * data)
  1916. npt.assert_allclose(expected, result, atol=1e-4, rtol=1e-4)
  1917. @given(**hu.gcs)
  1918. @settings(deadline=10000)
  1919. def testHomotopyWeight(self, gc, dc):
  1920. input_record = self.new_record(schema.RawTuple(2))
  1921. data = np.random.random(2)
  1922. schema.FeedRecord(
  1923. input_record, [np.array(x).astype(np.float32) for x in data]
  1924. )
  1925. # ensure: quad_life > 2 * half_life
  1926. half_life = int(np.random.random() * 1e2 + 1)
  1927. quad_life = int(np.random.random() * 1e3 + 2 * half_life + 1)
  1928. min_weight = np.random.random()
  1929. max_weight = np.random.random() + min_weight + 1e-5
  1930. result = self.model.HomotopyWeight(
  1931. input_record,
  1932. min_weight=min_weight,
  1933. max_weight=max_weight,
  1934. half_life=half_life,
  1935. quad_life=quad_life,
  1936. )
  1937. train_init_net, train_net = self.get_training_nets(True)
  1938. workspace.RunNetOnce(train_init_net)
  1939. workspace.CreateNet(train_net)
  1940. workspace.RunNet(train_net.Name(), num_iter=half_life)
  1941. half_life_result = workspace.FetchBlob(result())
  1942. workspace.RunNet(train_net.Name(), num_iter=quad_life - half_life)
  1943. quad_life_result = workspace.FetchBlob(result())
  1944. alpha = (min_weight + max_weight) / 2.
  1945. beta = (min_weight + max_weight) / 2.
  1946. expected_half_life_result = alpha * data[0] + beta * data[1]
  1947. alpha = (3 * min_weight + max_weight) / 4.
  1948. beta = (min_weight + 3 * max_weight) / 4.
  1949. expected_quad_life_result = alpha * data[0] + beta * data[1]
  1950. npt.assert_allclose(
  1951. expected_half_life_result, half_life_result, atol=1e-2, rtol=1e-2
  1952. )
  1953. npt.assert_allclose(
  1954. expected_quad_life_result, quad_life_result, atol=1e-2, rtol=1e-2
  1955. )
  1956. def _testLabelSmooth(self, categories, binary_prob_label, bsz):
  1957. label = self.new_record(schema.Scalar((np.float32, (1, ))))
  1958. label_np = np.random.randint(categories, size=bsz).astype(np.float32)
  1959. schema.FeedRecord(label, [label_np])
  1960. smooth_matrix_shape = (
  1961. 2 if binary_prob_label else (categories, categories)
  1962. )
  1963. smooth_matrix = np.random.random(smooth_matrix_shape)
  1964. smoothed_label = self.model.LabelSmooth(label, smooth_matrix)
  1965. train_init_net, train_net = self.get_training_nets(True)
  1966. workspace.RunNetOnce(train_init_net)
  1967. workspace.RunNetOnce(train_net)
  1968. smoothed_label_np = workspace.FetchBlob(smoothed_label())
  1969. if binary_prob_label:
  1970. expected = np.array(
  1971. [
  1972. smooth_matrix[0] if x == 0.0 else smooth_matrix[1]
  1973. for x in label_np
  1974. ]
  1975. )
  1976. else:
  1977. expected = np.array([smooth_matrix[int(x)] for x in label_np])
  1978. npt.assert_allclose(expected, smoothed_label_np, atol=1e-4, rtol=1e-4)
  1979. @given(
  1980. categories=st.integers(min_value=2, max_value=10),
  1981. bsz=st.integers(min_value=10, max_value=100),
  1982. **hu.gcs
  1983. )
  1984. def testLabelSmoothForCategoricalLabel(self, categories, bsz, gc, dc):
  1985. self._testLabelSmooth(categories, False, bsz)
  1986. @given(
  1987. bsz=st.integers(min_value=10, max_value=100),
  1988. **hu.gcs
  1989. )
  1990. def testLabelSmoothForBinaryProbLabel(self, bsz, gc, dc):
  1991. self._testLabelSmooth(2, True, bsz)
  1992. @given(
  1993. num_inputs=st.integers(min_value=2, max_value=10),
  1994. batch_size=st.integers(min_value=2, max_value=10),
  1995. input_dim=st.integers(min_value=5, max_value=10),
  1996. seed=st.integers(1, 10),
  1997. )
  1998. def testBlobWeightedSum(self, num_inputs, batch_size, input_dim, seed):
  1999. def get_blob_weighted_sum():
  2000. weights = []
  2001. for i in range(num_inputs):
  2002. w_blob_name = 'blob_weighted_sum/w_{0}'.format(i)
  2003. assert workspace.HasBlob(w_blob_name), (
  2004. "cannot fine blob {}".format(w_blob_name)
  2005. )
  2006. w = workspace.FetchBlob(w_blob_name)
  2007. weights.append(w)
  2008. result = np.sum([
  2009. input_data[idx] * weights[idx] for idx in range(num_inputs)
  2010. ], axis=0)
  2011. return result
  2012. np.random.seed(seed)
  2013. expected_output_schema = schema.Scalar((np.float32, (input_dim,)))
  2014. input_schema = schema.Tuple(
  2015. *[expected_output_schema for _ in range(num_inputs)]
  2016. )
  2017. input_data = [
  2018. np.random.random((batch_size, input_dim)).astype(np.float32)
  2019. for _ in range(num_inputs)
  2020. ]
  2021. input_record = self.new_record(input_schema)
  2022. schema.FeedRecord(input_record, input_data)
  2023. # test output schema
  2024. ws_output = self.model.BlobWeightedSum(input_record)
  2025. self.assertEqual(len(self.model.layers), 1)
  2026. assert schema.equal_schemas(ws_output, expected_output_schema)
  2027. # test train net
  2028. train_init_net, train_net = self.get_training_nets()
  2029. workspace.RunNetOnce(train_init_net)
  2030. workspace.RunNetOnce(train_net)
  2031. output = workspace.FetchBlob(ws_output())
  2032. npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
  2033. self.run_train_net_forward_only()
  2034. output = workspace.FetchBlob(ws_output())
  2035. npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
  2036. # test eval net
  2037. eval_net = self.get_eval_net()
  2038. workspace.RunNetOnce(eval_net)
  2039. output = workspace.FetchBlob(ws_output())
  2040. npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
  2041. # test pred net
  2042. pred_net = self.get_predict_net()
  2043. workspace.RunNetOnce(pred_net)
  2044. output = workspace.FetchBlob(ws_output())
  2045. npt.assert_almost_equal(get_blob_weighted_sum(), output, decimal=5)
  2046. def testFeatureSparseToDenseGetAccessedFeatures(self):
  2047. float_features_column = "float_features"
  2048. float_features_type = "FLOAT"
  2049. float_features_ids = [1, 2, 3]
  2050. id_list_features_column = "id_list_features"
  2051. id_list_features_type = "ID_LIST"
  2052. id_list_features_ids = [4, 5, 6]
  2053. id_score_list_features_column = "id_score_list_features"
  2054. id_score_list_features_type = "ID_SCORE_LIST"
  2055. id_score_list_features_ids = [7, 8 , 9]
  2056. feature_names = ["a", "b", "c"]
  2057. input_record = self.new_record(schema.Struct(
  2058. (float_features_column, schema.Map(np.int32, np.float32)),
  2059. (id_list_features_column,
  2060. schema.Map(np.int32, schema.List(np.int64))),
  2061. (id_score_list_features_column,
  2062. schema.Map(np.int32, schema.Map(np.int64, np.float32))),
  2063. ))
  2064. input_specs = [
  2065. (
  2066. float_features_column,
  2067. schema.FeatureSpec(
  2068. feature_type=float_features_type,
  2069. feature_ids=float_features_ids,
  2070. feature_names=feature_names,
  2071. ),
  2072. ),
  2073. (
  2074. id_list_features_column,
  2075. schema.FeatureSpec(
  2076. feature_type=id_list_features_type,
  2077. feature_ids=id_list_features_ids,
  2078. feature_names=feature_names,
  2079. ),
  2080. ),
  2081. (
  2082. id_score_list_features_column,
  2083. schema.FeatureSpec(
  2084. feature_type=id_score_list_features_type,
  2085. feature_ids=id_score_list_features_ids,
  2086. feature_names=feature_names,
  2087. ),
  2088. ),
  2089. ]
  2090. self.model.FeatureSparseToDense(input_record, input_specs)
  2091. expected_accessed_features = {
  2092. float_features_column: [
  2093. AccessedFeatures(float_features_type, set(float_features_ids))],
  2094. id_list_features_column: [
  2095. AccessedFeatures(id_list_features_type, set(id_list_features_ids))],
  2096. id_score_list_features_column: [
  2097. AccessedFeatures(id_score_list_features_type, set(id_score_list_features_ids))],
  2098. }
  2099. self.assertEqual(len(self.model.layers), 1)
  2100. self.assertEqual(
  2101. self.model.layers[0].get_accessed_features(),
  2102. expected_accessed_features
  2103. )
  2104. def test_get_key(self):
  2105. def _is_id_list(input_record):
  2106. return almost_equal_schemas(input_record, IdList)
  2107. def _is_id_score_list(input_record):
  2108. return almost_equal_schemas(input_record,
  2109. IdScoreList,
  2110. check_field_types=False)
  2111. def old_get_sparse_key_logic(input_record):
  2112. if _is_id_list(input_record):
  2113. sparse_key = input_record.items()
  2114. elif _is_id_score_list(input_record):
  2115. sparse_key = input_record.keys()
  2116. else:
  2117. raise NotImplementedError()
  2118. return sparse_key
  2119. id_score_list_record = schema.NewRecord(
  2120. self.model.net,
  2121. schema.Map(
  2122. schema.Scalar(
  2123. np.int64,
  2124. metadata=schema.Metadata(
  2125. categorical_limit=1000
  2126. ),
  2127. ),
  2128. np.float32
  2129. )
  2130. )
  2131. self.assertEqual(
  2132. get_key(id_score_list_record)(),
  2133. old_get_sparse_key_logic(id_score_list_record)
  2134. )
  2135. id_list_record = schema.NewRecord(
  2136. self.model.net,
  2137. schema.List(
  2138. schema.Scalar(
  2139. np.int64,
  2140. metadata=schema.Metadata(categorical_limit=1000)
  2141. )
  2142. )
  2143. )
  2144. self.assertEqual(
  2145. get_key(id_list_record)(),
  2146. old_get_sparse_key_logic(id_list_record)
  2147. )
  2148. def testSparseLookupWithAttentionWeightOnIdScoreList(self):
  2149. record = schema.NewRecord(
  2150. self.model.net,
  2151. schema.Map(
  2152. schema.Scalar(
  2153. np.int64,
  2154. metadata=schema.Metadata(categorical_limit=1000),
  2155. ),
  2156. np.float32,
  2157. ),
  2158. )
  2159. embedding_dim = 64
  2160. embedding_after_pooling = self.model.SparseLookup(
  2161. record, [embedding_dim], "Sum", use_external_weights=True
  2162. )
  2163. self.model.output_schema = schema.Struct()
  2164. self.assertEqual(
  2165. schema.Scalar((np.float32, (embedding_dim,))), embedding_after_pooling
  2166. )
  2167. train_init_net, train_net = self.get_training_nets()
  2168. init_ops = self.assertNetContainOps(
  2169. train_init_net,
  2170. [OpSpec("UniformFill", None, None), OpSpec("ConstantFill", None, None)],
  2171. )
  2172. sparse_lookup_op_spec = OpSpec(
  2173. "SparseLengthsWeightedSum",
  2174. [
  2175. init_ops[0].output[0],
  2176. record.values(),
  2177. record.keys(),
  2178. record.lengths(),
  2179. ],
  2180. [embedding_after_pooling()],
  2181. )
  2182. self.assertNetContainOps(train_net, [sparse_lookup_op_spec])
  2183. predict_net = self.get_predict_net()
  2184. self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
  2185. def testSparseItemwiseDropoutWithReplacement(self):
  2186. input_record = schema.NewRecord(self.model.net, IdList)
  2187. self.model.output_schema = schema.Struct()
  2188. lengths_blob = input_record.field_blobs()[0]
  2189. values_blob = input_record.field_blobs()[1]
  2190. lengths = np.array([1] * 10).astype(np.int32)
  2191. values = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.int64)
  2192. workspace.FeedBlob(lengths_blob, lengths)
  2193. workspace.FeedBlob(values_blob, values)
  2194. out = self.model.SparseItemwiseDropoutWithReplacement(
  2195. input_record, 0.0, 0.5, 1.0, -1, output_names_or_num=1)
  2196. self.assertEqual(schema.List(schema.Scalar(np.int64,)), out)
  2197. train_init_net, train_net = self.get_training_nets()
  2198. eval_net = self.get_eval_net()
  2199. predict_net = self.get_predict_net()
  2200. workspace.RunNetOnce(train_init_net)
  2201. workspace.RunNetOnce(train_net)
  2202. out_values = workspace.FetchBlob(out.items())
  2203. out_lengths = workspace.FetchBlob(out.lengths())
  2204. self.assertBlobsEqual(out_values, values)
  2205. self.assertBlobsEqual(out_lengths, lengths)
  2206. workspace.RunNetOnce(eval_net)
  2207. workspace.RunNetOnce(predict_net)
  2208. predict_values = workspace.FetchBlob("values_auto_0")
  2209. predict_lengths = workspace.FetchBlob("lengths_auto_0")
  2210. self.assertBlobsEqual(predict_values, np.array([-1] * 10).astype(np.int64))
  2211. self.assertBlobsEqual(predict_lengths, lengths)