dacorvo HF Staff commited on
Commit
bccb7ca
·
verified ·
1 Parent(s): bd174f8

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +72 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/88ab959ffd7f969e9ecf.json +59 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/866e4ae3016710258519.json +88 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/4d19457aee330abf74c2.json +165 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/bde8d1f238d097f95d53.json +165 -0
  6. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/88ab959ffd7f969e9ecf.json +59 -0
  7. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/microsoft/Phi-3.5-mini-instruct/bde8d1f238d097f95d53.json +165 -0
  8. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/866e4ae3016710258519.json +88 -0
  9. neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/compile_flags.json +1 -0
  10. neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.done +0 -0
  11. neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.hlo_module.pb +3 -0
  12. neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.neff +3 -0
  13. neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/wrapped_neff.hlo +3 -0
  14. neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/compile_flags.json +1 -0
  15. neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.done +0 -0
  16. neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.hlo_module.pb +3 -0
  17. neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.neff +3 -0
  18. neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/compile_flags.json +1 -0
  19. neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.done +0 -0
  20. neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.hlo_module.pb +3 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.neff +3 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/wrapped_neff.hlo +3 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/compile_flags.json +1 -0
  24. neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.done +0 -0
  25. neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.hlo_module.pb +3 -0
  26. neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.neff +3 -0
  27. neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/wrapped_neff.hlo +3 -0
  28. neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff +0 -0
  29. neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff +0 -0
  30. neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff +0 -0
  31. neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff +0 -0
  32. neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff +0 -0
  33. neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff +0 -0
  34. neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff +1 -1
  35. neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff +0 -0
  36. neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff +0 -0
  37. neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff +0 -0
  38. neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff +0 -0
  39. neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff +0 -0
  40. neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff +0 -0
  41. neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff +0 -0
  42. neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff +0 -0
  43. neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff +0 -0
  44. neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff +0 -0
  45. neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff +0 -0
  46. neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff +0 -0
  47. neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff +1 -1
  48. neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff +0 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff +1 -1
  50. neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff +0 -0
.gitattributes CHANGED
@@ -17193,3 +17193,75 @@ neuronxcc-2.21.33363.0+82129205/MODULE_bec56f8afdbf66a50c25+a02c3a36/wrapped_nef
17193
  neuronxcc-2.21.33363.0+82129205/MODULE_e337d9bea67418c4cbfc+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17194
  neuronxcc-2.21.33363.0+82129205/MODULE_e337d9bea67418c4cbfc+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17195
  neuronxcc-2.21.33363.0+82129205/MODULE_e860112df381f8b3f4ae+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17193
  neuronxcc-2.21.33363.0+82129205/MODULE_e337d9bea67418c4cbfc+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17194
  neuronxcc-2.21.33363.0+82129205/MODULE_e337d9bea67418c4cbfc+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17195
  neuronxcc-2.21.33363.0+82129205/MODULE_e860112df381f8b3f4ae+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17196
+ neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17197
+ neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17198
+ neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.neff filter=lfs diff=lfs merge=lfs -text
17199
+ neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17200
+ neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17201
+ neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.neff filter=lfs diff=lfs merge=lfs -text
17202
+ neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17203
+ neuronxcc-2.21.33363.0+82129205/MODULE_1bfa3dd1929fbcd69193+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
17204
+ neuronxcc-2.21.33363.0+82129205/MODULE_25ecfcd32b38ffcca9c6+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17205
+ neuronxcc-2.21.33363.0+82129205/MODULE_264f116884a2b72bfd94+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17206
+ neuronxcc-2.21.33363.0+82129205/MODULE_264f116884a2b72bfd94+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17207
+ neuronxcc-2.21.33363.0+82129205/MODULE_272f23df0029fc228e27+6e4949b4/model.neff filter=lfs diff=lfs merge=lfs -text
17208
+ neuronxcc-2.21.33363.0+82129205/MODULE_30f9fb1c90d72ce5be20+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17209
+ neuronxcc-2.21.33363.0+82129205/MODULE_35c78e406a558e46489f+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17210
+ neuronxcc-2.21.33363.0+82129205/MODULE_35c78e406a558e46489f+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17211
+ neuronxcc-2.21.33363.0+82129205/MODULE_3be023fb36d494480eab+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17212
+ neuronxcc-2.21.33363.0+82129205/MODULE_3be023fb36d494480eab+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17213
+ neuronxcc-2.21.33363.0+82129205/MODULE_3f20d1a2a58617b5c568+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17214
+ neuronxcc-2.21.33363.0+82129205/MODULE_4371876575046bfaddbc+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17215
+ neuronxcc-2.21.33363.0+82129205/MODULE_4a866a7ab3440bf5039c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17216
+ neuronxcc-2.21.33363.0+82129205/MODULE_4a866a7ab3440bf5039c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17217
+ neuronxcc-2.21.33363.0+82129205/MODULE_4c340605e5cfcfb52aa0+4cce07ab/model.neff filter=lfs diff=lfs merge=lfs -text
17218
+ neuronxcc-2.21.33363.0+82129205/MODULE_4c340605e5cfcfb52aa0+4cce07ab/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17219
+ neuronxcc-2.21.33363.0+82129205/MODULE_4e8633381936708d6422+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
17220
+ neuronxcc-2.21.33363.0+82129205/MODULE_4e8633381936708d6422+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17221
+ neuronxcc-2.21.33363.0+82129205/MODULE_5882b7576829bc99fe1e+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
17222
+ neuronxcc-2.21.33363.0+82129205/MODULE_5ca02efa566c733695c6+3ab2fb27/model.neff filter=lfs diff=lfs merge=lfs -text
17223
+ neuronxcc-2.21.33363.0+82129205/MODULE_5ca02efa566c733695c6+3ab2fb27/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17224
+ neuronxcc-2.21.33363.0+82129205/MODULE_649c25ed694844adcb1e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17225
+ neuronxcc-2.21.33363.0+82129205/MODULE_649c25ed694844adcb1e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17226
+ neuronxcc-2.21.33363.0+82129205/MODULE_6b4fc5b451fdfa1b744b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17227
+ neuronxcc-2.21.33363.0+82129205/MODULE_6b4fc5b451fdfa1b744b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17228
+ neuronxcc-2.21.33363.0+82129205/MODULE_6e8fd65e06dcc6271828+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17229
+ neuronxcc-2.21.33363.0+82129205/MODULE_6e8fd65e06dcc6271828+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17230
+ neuronxcc-2.21.33363.0+82129205/MODULE_731d30e8407c98f2d0ed+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17231
+ neuronxcc-2.21.33363.0+82129205/MODULE_75a04de57ab666b70aff+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17232
+ neuronxcc-2.21.33363.0+82129205/MODULE_76e92077ca40a96b12db+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
17233
+ neuronxcc-2.21.33363.0+82129205/MODULE_7d8253c434168957472d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17234
+ neuronxcc-2.21.33363.0+82129205/MODULE_7d8253c434168957472d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17235
+ neuronxcc-2.21.33363.0+82129205/MODULE_7ec08bf0fb6b63a494ce+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17236
+ neuronxcc-2.21.33363.0+82129205/MODULE_7ec08bf0fb6b63a494ce+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17237
+ neuronxcc-2.21.33363.0+82129205/MODULE_8ec74b7503cb0a5e0b12+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17238
+ neuronxcc-2.21.33363.0+82129205/MODULE_8ec74b7503cb0a5e0b12+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17239
+ neuronxcc-2.21.33363.0+82129205/MODULE_9efcdde20b63764672c5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17240
+ neuronxcc-2.21.33363.0+82129205/MODULE_b0696c18110a46d8a884+6f0a400d/model.neff filter=lfs diff=lfs merge=lfs -text
17241
+ neuronxcc-2.21.33363.0+82129205/MODULE_b0696c18110a46d8a884+6f0a400d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17242
+ neuronxcc-2.21.33363.0+82129205/MODULE_b671fe852f9096fe7aa3+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17243
+ neuronxcc-2.21.33363.0+82129205/MODULE_b671fe852f9096fe7aa3+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17244
+ neuronxcc-2.21.33363.0+82129205/MODULE_b72fc7afaa1831a24a09+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17245
+ neuronxcc-2.21.33363.0+82129205/MODULE_b72fc7afaa1831a24a09+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17246
+ neuronxcc-2.21.33363.0+82129205/MODULE_bde9ee2bb7e53f22e095+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text
17247
+ neuronxcc-2.21.33363.0+82129205/MODULE_bde9ee2bb7e53f22e095+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17248
+ neuronxcc-2.21.33363.0+82129205/MODULE_c6635e2179b5a4161da1+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
17249
+ neuronxcc-2.21.33363.0+82129205/MODULE_c69b5542e96ab576f244+755a2aef/model.neff filter=lfs diff=lfs merge=lfs -text
17250
+ neuronxcc-2.21.33363.0+82129205/MODULE_c69b5542e96ab576f244+755a2aef/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17251
+ neuronxcc-2.21.33363.0+82129205/MODULE_ce61e92cac5a58aac125+4af285bc/model.neff filter=lfs diff=lfs merge=lfs -text
17252
+ neuronxcc-2.21.33363.0+82129205/MODULE_ce61e92cac5a58aac125+4af285bc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17253
+ neuronxcc-2.21.33363.0+82129205/MODULE_d13ee67d85c8f49510c4+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text
17254
+ neuronxcc-2.21.33363.0+82129205/MODULE_d13ee67d85c8f49510c4+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17255
+ neuronxcc-2.21.33363.0+82129205/MODULE_d56f37f16ff55f594311+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17256
+ neuronxcc-2.21.33363.0+82129205/MODULE_e8a43167f74176083689+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17257
+ neuronxcc-2.21.33363.0+82129205/MODULE_e8a43167f74176083689+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17258
+ neuronxcc-2.21.33363.0+82129205/MODULE_ef6a56befefaaf69dc23+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
17259
+ neuronxcc-2.21.33363.0+82129205/MODULE_f0597be9cdf3f20234ea+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17260
+ neuronxcc-2.21.33363.0+82129205/MODULE_f16e59ac0827406c000f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17261
+ neuronxcc-2.21.33363.0+82129205/MODULE_f76d71fc849f0090bedd+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17262
+ neuronxcc-2.21.33363.0+82129205/MODULE_f76d71fc849f0090bedd+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17263
+ neuronxcc-2.21.33363.0+82129205/MODULE_f7d293fceac8bc3b694c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17264
+ neuronxcc-2.21.33363.0+82129205/MODULE_f8644b89d99409c828a0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
17265
+ neuronxcc-2.21.33363.0+82129205/MODULE_fadf125c58ba12884396+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
17266
+ neuronxcc-2.21.33363.0+82129205/MODULE_fadf125c58ba12884396+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
17267
+ neuronxcc-2.21.33363.0+82129205/MODULE_fbd4adbd7e12955cf995+6170d8e1/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/0cc25526e2cfc37a8875a3752f33c4d7505d8a07b869d0f3f41915cf6e763b74/88ab959ffd7f969e9ecf.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 1024,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 1024,
36
+ "neuronxcc_version": "2.21.33363.0+82129205",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.6.dev3",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "prefill_chunk_size": 0,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn1",
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 32,
50
+ "num_hidden_layers": 40,
51
+ "num_key_value_heads": 8,
52
+ "residual_multiplier": 0.22,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 5000000.0,
56
+ "tie_word_embeddings": true,
57
+ "use_cache": true,
58
+ "vocab_size": 49155
59
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/866e4ae3016710258519.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev3",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/4d19457aee330abf74c2.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3.5-mini-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3.5-mini-instruct",
27
+ "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77",
28
+ "continuous_batching": false,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 1,
34
+ "max_context_length": 8192,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 8192,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.6.dev3",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "prefill_chunk_size": 1024,
43
+ "sequence_length": 8192,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": "trn1",
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "original_max_position_embeddings": 4096,
54
+ "partial_rotary_factor": 1.0,
55
+ "resid_pdrop": 0.0,
56
+ "rms_norm_eps": 1e-05,
57
+ "rope_scaling": {
58
+ "long_factor": [
59
+ 1.0800000429153442,
60
+ 1.1100000143051147,
61
+ 1.1399999856948853,
62
+ 1.340000033378601,
63
+ 1.5899999141693115,
64
+ 1.600000023841858,
65
+ 1.6200000047683716,
66
+ 2.620000123977661,
67
+ 3.2300000190734863,
68
+ 3.2300000190734863,
69
+ 4.789999961853027,
70
+ 7.400000095367432,
71
+ 7.700000286102295,
72
+ 9.09000015258789,
73
+ 12.199999809265137,
74
+ 17.670000076293945,
75
+ 24.46000099182129,
76
+ 28.57000160217285,
77
+ 30.420001983642578,
78
+ 30.840002059936523,
79
+ 32.590003967285156,
80
+ 32.93000411987305,
81
+ 42.320003509521484,
82
+ 44.96000289916992,
83
+ 50.340003967285156,
84
+ 50.45000457763672,
85
+ 57.55000305175781,
86
+ 57.93000411987305,
87
+ 58.21000289916992,
88
+ 60.1400032043457,
89
+ 62.61000442504883,
90
+ 62.62000274658203,
91
+ 62.71000289916992,
92
+ 63.1400032043457,
93
+ 63.1400032043457,
94
+ 63.77000427246094,
95
+ 63.93000411987305,
96
+ 63.96000289916992,
97
+ 63.970001220703125,
98
+ 64.02999877929688,
99
+ 64.06999969482422,
100
+ 64.08000183105469,
101
+ 64.12000274658203,
102
+ 64.41000366210938,
103
+ 64.4800033569336,
104
+ 64.51000213623047,
105
+ 64.52999877929688,
106
+ 64.83999633789062
107
+ ],
108
+ "short_factor": [
109
+ 1.0,
110
+ 1.0199999809265137,
111
+ 1.0299999713897705,
112
+ 1.0299999713897705,
113
+ 1.0499999523162842,
114
+ 1.0499999523162842,
115
+ 1.0499999523162842,
116
+ 1.0499999523162842,
117
+ 1.0499999523162842,
118
+ 1.0699999332427979,
119
+ 1.0999999046325684,
120
+ 1.1099998950958252,
121
+ 1.1599998474121094,
122
+ 1.1599998474121094,
123
+ 1.1699998378753662,
124
+ 1.2899998426437378,
125
+ 1.339999794960022,
126
+ 1.679999828338623,
127
+ 1.7899998426437378,
128
+ 1.8199998140335083,
129
+ 1.8499997854232788,
130
+ 1.8799997568130493,
131
+ 1.9099997282028198,
132
+ 1.9399996995925903,
133
+ 1.9899996519088745,
134
+ 2.0199997425079346,
135
+ 2.0199997425079346,
136
+ 2.0199997425079346,
137
+ 2.0199997425079346,
138
+ 2.0199997425079346,
139
+ 2.0199997425079346,
140
+ 2.0299997329711914,
141
+ 2.0299997329711914,
142
+ 2.0299997329711914,
143
+ 2.0299997329711914,
144
+ 2.0299997329711914,
145
+ 2.0299997329711914,
146
+ 2.0299997329711914,
147
+ 2.0299997329711914,
148
+ 2.0299997329711914,
149
+ 2.0799996852874756,
150
+ 2.0899996757507324,
151
+ 2.189999580383301,
152
+ 2.2199995517730713,
153
+ 2.5899994373321533,
154
+ 2.729999542236328,
155
+ 2.749999523162842,
156
+ 2.8399994373321533
157
+ ],
158
+ "type": "longrope"
159
+ },
160
+ "rope_theta": 10000.0,
161
+ "sliding_window": 262144,
162
+ "tie_word_embeddings": false,
163
+ "use_cache": true,
164
+ "vocab_size": 32064
165
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/8c90ac2593ed0b7f1ecb60e82cb184fb11f2ea640befa1cc7b10766a5c02525d/bde8d1f238d097f95d53.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3.5-mini-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3.5-mini-instruct",
27
+ "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77",
28
+ "continuous_batching": true,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 4,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.6.dev3",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "prefill_chunk_size": 0,
43
+ "sequence_length": 1024,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": "trn1",
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "original_max_position_embeddings": 4096,
54
+ "partial_rotary_factor": 1.0,
55
+ "resid_pdrop": 0.0,
56
+ "rms_norm_eps": 1e-05,
57
+ "rope_scaling": {
58
+ "long_factor": [
59
+ 1.0800000429153442,
60
+ 1.1100000143051147,
61
+ 1.1399999856948853,
62
+ 1.340000033378601,
63
+ 1.5899999141693115,
64
+ 1.600000023841858,
65
+ 1.6200000047683716,
66
+ 2.620000123977661,
67
+ 3.2300000190734863,
68
+ 3.2300000190734863,
69
+ 4.789999961853027,
70
+ 7.400000095367432,
71
+ 7.700000286102295,
72
+ 9.09000015258789,
73
+ 12.199999809265137,
74
+ 17.670000076293945,
75
+ 24.46000099182129,
76
+ 28.57000160217285,
77
+ 30.420001983642578,
78
+ 30.840002059936523,
79
+ 32.590003967285156,
80
+ 32.93000411987305,
81
+ 42.320003509521484,
82
+ 44.96000289916992,
83
+ 50.340003967285156,
84
+ 50.45000457763672,
85
+ 57.55000305175781,
86
+ 57.93000411987305,
87
+ 58.21000289916992,
88
+ 60.1400032043457,
89
+ 62.61000442504883,
90
+ 62.62000274658203,
91
+ 62.71000289916992,
92
+ 63.1400032043457,
93
+ 63.1400032043457,
94
+ 63.77000427246094,
95
+ 63.93000411987305,
96
+ 63.96000289916992,
97
+ 63.970001220703125,
98
+ 64.02999877929688,
99
+ 64.06999969482422,
100
+ 64.08000183105469,
101
+ 64.12000274658203,
102
+ 64.41000366210938,
103
+ 64.4800033569336,
104
+ 64.51000213623047,
105
+ 64.52999877929688,
106
+ 64.83999633789062
107
+ ],
108
+ "short_factor": [
109
+ 1.0,
110
+ 1.0199999809265137,
111
+ 1.0299999713897705,
112
+ 1.0299999713897705,
113
+ 1.0499999523162842,
114
+ 1.0499999523162842,
115
+ 1.0499999523162842,
116
+ 1.0499999523162842,
117
+ 1.0499999523162842,
118
+ 1.0699999332427979,
119
+ 1.0999999046325684,
120
+ 1.1099998950958252,
121
+ 1.1599998474121094,
122
+ 1.1599998474121094,
123
+ 1.1699998378753662,
124
+ 1.2899998426437378,
125
+ 1.339999794960022,
126
+ 1.679999828338623,
127
+ 1.7899998426437378,
128
+ 1.8199998140335083,
129
+ 1.8499997854232788,
130
+ 1.8799997568130493,
131
+ 1.9099997282028198,
132
+ 1.9399996995925903,
133
+ 1.9899996519088745,
134
+ 2.0199997425079346,
135
+ 2.0199997425079346,
136
+ 2.0199997425079346,
137
+ 2.0199997425079346,
138
+ 2.0199997425079346,
139
+ 2.0199997425079346,
140
+ 2.0299997329711914,
141
+ 2.0299997329711914,
142
+ 2.0299997329711914,
143
+ 2.0299997329711914,
144
+ 2.0299997329711914,
145
+ 2.0299997329711914,
146
+ 2.0299997329711914,
147
+ 2.0299997329711914,
148
+ 2.0299997329711914,
149
+ 2.0799996852874756,
150
+ 2.0899996757507324,
151
+ 2.189999580383301,
152
+ 2.2199995517730713,
153
+ 2.5899994373321533,
154
+ 2.729999542236328,
155
+ 2.749999523162842,
156
+ 2.8399994373321533
157
+ ],
158
+ "type": "longrope"
159
+ },
160
+ "rope_theta": 10000.0,
161
+ "sliding_window": 262144,
162
+ "tie_word_embeddings": false,
163
+ "use_cache": true,
164
+ "vocab_size": 32064
165
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/granite/ibm-granite/granite-3.1-2b-instruct/88ab959ffd7f969e9ecf.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "dtype": "bfloat16",
12
+ "embedding_multiplier": 12.0,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 8192,
17
+ "logits_scaling": 8.0,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "granite",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "batch_size": 4,
24
+ "capacity_factor": null,
25
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
26
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
27
+ "continuous_batching": true,
28
+ "ep_degree": 1,
29
+ "fused_qkv": true,
30
+ "glu_mlp": true,
31
+ "local_ranks_size": 2,
32
+ "max_batch_size": 4,
33
+ "max_context_length": 1024,
34
+ "max_topk": 256,
35
+ "n_active_tokens": 1024,
36
+ "neuronxcc_version": "2.21.33363.0+82129205",
37
+ "on_device_sampling": true,
38
+ "optimum_neuron_version": "0.4.6.dev3",
39
+ "output_logits": false,
40
+ "pp_degree": 1,
41
+ "prefill_chunk_size": 0,
42
+ "sequence_length": 1024,
43
+ "speculation_length": 0,
44
+ "start_rank_id": 0,
45
+ "target": "trn1",
46
+ "torch_dtype": "bfloat16",
47
+ "tp_degree": 2
48
+ },
49
+ "num_attention_heads": 32,
50
+ "num_hidden_layers": 40,
51
+ "num_key_value_heads": 8,
52
+ "residual_multiplier": 0.22,
53
+ "rms_norm_eps": 1e-05,
54
+ "rope_scaling": null,
55
+ "rope_theta": 5000000.0,
56
+ "tie_word_embeddings": true,
57
+ "use_cache": true,
58
+ "vocab_size": 49155
59
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/phi3/microsoft/Phi-3.5-mini-instruct/bde8d1f238d097f95d53.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3.5-mini-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "dtype": "bfloat16",
15
+ "embd_pdrop": 0.0,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3072,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
+ "model_type": "phi3",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "checkpoint_id": "microsoft/Phi-3.5-mini-instruct",
27
+ "checkpoint_revision": "2fe192450127e6a83f7441aef6e3ca586c338b77",
28
+ "continuous_batching": true,
29
+ "ep_degree": 1,
30
+ "fused_qkv": true,
31
+ "glu_mlp": true,
32
+ "local_ranks_size": 2,
33
+ "max_batch_size": 4,
34
+ "max_context_length": 1024,
35
+ "max_topk": 256,
36
+ "n_active_tokens": 1024,
37
+ "neuronxcc_version": "2.21.33363.0+82129205",
38
+ "on_device_sampling": true,
39
+ "optimum_neuron_version": "0.4.6.dev3",
40
+ "output_logits": false,
41
+ "pp_degree": 1,
42
+ "prefill_chunk_size": 0,
43
+ "sequence_length": 1024,
44
+ "speculation_length": 0,
45
+ "start_rank_id": 0,
46
+ "target": "trn1",
47
+ "torch_dtype": "bfloat16",
48
+ "tp_degree": 2
49
+ },
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "original_max_position_embeddings": 4096,
54
+ "partial_rotary_factor": 1.0,
55
+ "resid_pdrop": 0.0,
56
+ "rms_norm_eps": 1e-05,
57
+ "rope_scaling": {
58
+ "long_factor": [
59
+ 1.0800000429153442,
60
+ 1.1100000143051147,
61
+ 1.1399999856948853,
62
+ 1.340000033378601,
63
+ 1.5899999141693115,
64
+ 1.600000023841858,
65
+ 1.6200000047683716,
66
+ 2.620000123977661,
67
+ 3.2300000190734863,
68
+ 3.2300000190734863,
69
+ 4.789999961853027,
70
+ 7.400000095367432,
71
+ 7.700000286102295,
72
+ 9.09000015258789,
73
+ 12.199999809265137,
74
+ 17.670000076293945,
75
+ 24.46000099182129,
76
+ 28.57000160217285,
77
+ 30.420001983642578,
78
+ 30.840002059936523,
79
+ 32.590003967285156,
80
+ 32.93000411987305,
81
+ 42.320003509521484,
82
+ 44.96000289916992,
83
+ 50.340003967285156,
84
+ 50.45000457763672,
85
+ 57.55000305175781,
86
+ 57.93000411987305,
87
+ 58.21000289916992,
88
+ 60.1400032043457,
89
+ 62.61000442504883,
90
+ 62.62000274658203,
91
+ 62.71000289916992,
92
+ 63.1400032043457,
93
+ 63.1400032043457,
94
+ 63.77000427246094,
95
+ 63.93000411987305,
96
+ 63.96000289916992,
97
+ 63.970001220703125,
98
+ 64.02999877929688,
99
+ 64.06999969482422,
100
+ 64.08000183105469,
101
+ 64.12000274658203,
102
+ 64.41000366210938,
103
+ 64.4800033569336,
104
+ 64.51000213623047,
105
+ 64.52999877929688,
106
+ 64.83999633789062
107
+ ],
108
+ "short_factor": [
109
+ 1.0,
110
+ 1.0199999809265137,
111
+ 1.0299999713897705,
112
+ 1.0299999713897705,
113
+ 1.0499999523162842,
114
+ 1.0499999523162842,
115
+ 1.0499999523162842,
116
+ 1.0499999523162842,
117
+ 1.0499999523162842,
118
+ 1.0699999332427979,
119
+ 1.0999999046325684,
120
+ 1.1099998950958252,
121
+ 1.1599998474121094,
122
+ 1.1599998474121094,
123
+ 1.1699998378753662,
124
+ 1.2899998426437378,
125
+ 1.339999794960022,
126
+ 1.679999828338623,
127
+ 1.7899998426437378,
128
+ 1.8199998140335083,
129
+ 1.8499997854232788,
130
+ 1.8799997568130493,
131
+ 1.9099997282028198,
132
+ 1.9399996995925903,
133
+ 1.9899996519088745,
134
+ 2.0199997425079346,
135
+ 2.0199997425079346,
136
+ 2.0199997425079346,
137
+ 2.0199997425079346,
138
+ 2.0199997425079346,
139
+ 2.0199997425079346,
140
+ 2.0299997329711914,
141
+ 2.0299997329711914,
142
+ 2.0299997329711914,
143
+ 2.0299997329711914,
144
+ 2.0299997329711914,
145
+ 2.0299997329711914,
146
+ 2.0299997329711914,
147
+ 2.0299997329711914,
148
+ 2.0299997329711914,
149
+ 2.0799996852874756,
150
+ 2.0899996757507324,
151
+ 2.189999580383301,
152
+ 2.2199995517730713,
153
+ 2.5899994373321533,
154
+ 2.729999542236328,
155
+ 2.749999523162842,
156
+ 2.8399994373321533
157
+ ],
158
+ "type": "longrope"
159
+ },
160
+ "rope_theta": 10000.0,
161
+ "sliding_window": 262144,
162
+ "tie_word_embeddings": false,
163
+ "use_cache": true,
164
+ "vocab_size": 32064
165
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/866e4ae3016710258519.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev3",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn1",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 2
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6070ba301fdd09bd8986151e94c7b92ce7819a1d22c851ff55373b6d132bdd5
3
+ size 87331
neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3feae7061a6e87b40cf794818cfd435e42b033c7f8380972829f873fa9787e8c
3
+ size 236544
neuronxcc-2.21.33363.0+82129205/MODULE_034cf87f477c882b257f+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e52cf7dc0e5f6cb7a27b7f359a858711690d867d08124c333abb5daa645c4718
3
+ size 244847
neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8239944f5c78d314d34c0e8ea62975adcc12d846f485c3c5fea351b127db89
3
+ size 99591
neuronxcc-2.21.33363.0+82129205/MODULE_04cec4dcda7910e7fb2e+283df001/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd41c9c13e10e8bde7c89554bd777f6c1c86a48d2df0bf4e2076c3086c4b5dbf
3
+ size 410624
neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3995113b9941789a06c77ea7f12cea66ce8f84e71f1c545e6fb307d19d5b330
3
+ size 89962
neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e0ebd7df8002034ca5710ae915e4bbd03ea1897ed60c71417a46c80cfce934
3
+ size 287744
neuronxcc-2.21.33363.0+82129205/MODULE_0b9974a2d1726da77b0a+a02c3a36/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17b050bc6ff253b2ed29a8a618ec985673ec3d3ccc738af645ab113f498de73
3
+ size 296049
neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_c7c088ba-1995-4e62-851a-c774a6069131/compiler_workdir/NeuronRMSNorm/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c774c6626fc91684e924b843c1fe807dadbbac6d8a8e19443663b43c13eac35
3
+ size 1165
neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0af0e67de0c158c397a503ee4c1e80ca6bbfe89239565e96795dd3a821030e6
3
+ size 103424
neuronxcc-2.21.33363.0+82129205/MODULE_0c504f5fe9366071df6f+2b70e3a5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f424802b4fb9ab1e755d51541e72053fa1436b169b387d6fe9f2c368c69f0079
3
+ size 104320
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_11301370888592189606+e30acd3a/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f50b3217d80f831aa266a928dfd5f0eb1f60737668b46a653da6035c136158d
3
  size 246784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e3ebadfc05c2572babe3aff125e581b50060d2ddf96e09ee3a4a0b55c19375
3
  size 246784
neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11909310600244571805+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11933818254123612383+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_1210392327607194823+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13102010590082783346+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13475273426270778455+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13728813963059599796+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_13783872956654054643+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14097776130612710282+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14155105943057125178+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14173682194645188821+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14274833313744358348+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14469716438804935215+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_14657035910656097540+e30acd3a/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b839222648bdaa494af61a8fc9a9f05ab16323bfbe94b46469079fa51392480d
3
  size 123904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703a7e8e3848965928554cadbd5d5b8934c7f3d55d17152862a9579a043afa2e
3
  size 123904
neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_14739672283771660808+e30acd3a/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_15128894797204950836+e30acd3a/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2651b3c11fc0d7599fa66a322eca8f38b728c267782e25f354ebb09e5dc53860
3
  size 113664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2bfe04556f19754f56e7322a2686563fef9d0fce7d3ab6fee4641cecbaef16c
3
  size 113664
neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_15557971880365771457+e30acd3a/model.neff differ