diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbce74e5c64b97114098962fa58454a57d7fb532
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,5 @@
+{
+ "": 2,
+ "": 1,
+ "": 0
+}
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd0fd9b733259e8e472c0f16ce909d8600f2e3ce
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,5611 @@
+{
+ "metadata": {
+ "ParamSize": 453
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 262144000,
+ "records": [
+ {
+ "name": "param_0",
+ "shape": [
+ 32000,
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144000,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_7",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_10",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_1",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_2",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_3",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_4",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_5",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_6",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_8",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_9",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_11",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_12",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_13",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_14",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_21",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_24",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_15",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_16",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_17",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_18",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_19",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_20",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_22",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_23",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_25",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_26",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_27",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_28",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_35",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_38",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_29",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_30",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_31",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_32",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_33",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_34",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_36",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_37",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_39",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_40",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_41",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_42",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_49",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_52",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_43",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_44",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_45",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_46",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_47",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_48",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_50",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_51",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_53",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_54",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_55",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_56",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_63",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_66",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_57",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_58",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_59",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_60",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_61",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_62",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_64",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_65",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_67",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_68",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_69",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_70",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_77",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_80",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_71",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_72",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_73",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_74",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_75",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_76",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_78",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_79",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_81",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_82",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_83",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_84",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_91",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_94",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_85",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_86",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_87",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_88",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_89",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_90",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_92",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_93",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_95",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_96",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_97",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_98",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_105",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_108",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_99",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_100",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_101",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_102",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_103",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_104",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_106",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_107",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_109",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_110",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_111",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_112",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_119",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_122",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_113",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_114",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_115",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_116",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_117",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_118",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_120",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_121",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_123",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_124",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_125",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_126",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_133",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_136",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_127",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_128",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_129",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_130",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_131",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_132",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_134",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_135",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_137",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_138",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_139",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_140",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_147",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_150",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_141",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_142",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_143",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_144",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_145",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_146",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_148",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_149",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_151",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_152",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_153",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_154",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_161",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_164",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_155",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_156",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_157",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_158",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_159",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_160",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_162",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_163",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_165",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_166",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_167",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_168",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_175",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_178",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_169",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_170",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_171",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_172",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_173",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_174",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_176",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_177",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_179",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_180",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_181",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_182",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_189",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_192",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_183",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_184",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_185",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_186",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_187",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_188",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_190",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_191",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_193",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_194",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_195",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_196",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_203",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_206",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_197",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_198",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_199",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_200",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_201",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_202",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_204",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_205",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_207",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_208",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_209",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_210",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_217",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_220",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_211",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_212",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_213",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_214",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_215",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_216",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_218",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_219",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_221",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_222",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_223",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_224",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_231",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_234",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_225",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_226",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_227",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_228",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_229",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_230",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_232",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_233",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_235",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_236",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_237",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_238",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_245",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_248",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_239",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_240",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_241",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_242",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_243",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_244",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_246",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_247",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_249",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_250",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_251",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_252",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_259",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_262",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_253",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_254",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_255",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_256",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_257",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_258",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_260",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_261",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_263",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_264",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_265",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_266",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_273",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_276",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_267",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_268",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_269",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_270",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_271",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_272",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_274",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_275",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_277",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_278",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_279",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_280",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_287",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_290",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_281",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_282",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_283",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_284",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_285",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_286",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_288",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_289",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_291",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_292",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_293",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_294",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_301",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_304",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_295",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_296",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_297",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_298",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_299",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_300",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_302",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_303",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_305",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_306",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_307",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_308",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_315",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_318",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_309",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_310",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_311",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_312",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_313",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_314",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_316",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_317",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_319",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_320",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_321",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_322",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_329",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_332",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_323",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_324",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_325",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_326",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_327",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_328",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_330",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_331",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_333",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_334",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_335",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_336",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_343",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_346",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_337",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_338",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_339",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_340",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_341",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_342",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_344",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_345",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_347",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_348",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_349",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_350",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_357",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_360",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_351",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_352",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_353",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_354",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_355",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_356",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_358",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_359",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_361",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_362",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_363",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_364",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_371",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_374",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_365",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_366",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_367",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_368",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_369",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_370",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_372",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_373",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_375",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_376",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_377",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_378",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_385",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_83.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_388",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_84.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_379",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_380",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_381",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_382",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_383",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_384",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_386",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_387",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_389",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_390",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_391",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_392",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_85.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_399",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_86.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_402",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_87.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_393",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_394",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_395",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_396",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_397",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_398",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_400",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_401",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_403",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_404",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_405",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_406",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_88.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_413",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_89.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_416",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_90.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_407",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_408",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_409",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_410",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_411",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_412",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_414",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_415",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_417",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_418",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_419",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_420",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_91.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_427",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_92.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_430",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_93.bin",
+ "format": "raw-shard",
+ "nbytes": 27803648,
+ "records": [
+ {
+ "name": "param_421",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_422",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_423",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_424",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_425",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_426",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_428",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_429",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_431",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_432",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_433",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_434",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_94.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "param_441",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_95.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "param_444",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_96.bin",
+ "format": "raw-shard",
+ "nbytes": 262144000,
+ "records": [
+ {
+ "name": "param_450",
+ "shape": [
+ 32000,
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144000,
+ "byteOffset": 0
+ }
+ ]
+ },
+ {
+ "dataPath": "params_shard_97.bin",
+ "format": "raw-shard",
+ "nbytes": 28860416,
+ "records": [
+ {
+ "name": "param_435",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "param_436",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "param_437",
+ "shape": [
+ 6144,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 393216,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "param_438",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "raw",
+ "nbytes": 8388608,
+ "byteOffset": 13369344
+ },
+ {
+ "name": "param_439",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 21757952
+ },
+ {
+ "name": "param_440",
+ "shape": [
+ 4096,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 262144,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "param_442",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 22282240
+ },
+ {
+ "name": "param_443",
+ "shape": [
+ 28672,
+ 32
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 1835008,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "param_445",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 25952256
+ },
+ {
+ "name": "param_446",
+ "shape": [
+ 4096,
+ 112
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 917504,
+ "byteOffset": 26869760
+ },
+ {
+ "name": "param_447",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27787264
+ },
+ {
+ "name": "param_448",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27795456
+ },
+ {
+ "name": "param_449",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27803648
+ },
+ {
+ "name": "param_451",
+ "shape": [
+ 2048,
+ 128
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 524288,
+ "byteOffset": 27811840
+ },
+ {
+ "name": "param_452",
+ "shape": [
+ 2048,
+ 128
+ ],
+ "dtype": "float16",
+ "format": "raw",
+ "nbytes": 524288,
+ "byteOffset": 28336128
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..55a4f4ee04443f79760b03029a294750c7635905
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c1b94878d25676a958a89390204eaa681ebf8481442c84cd8eabc227dd021e4
+size 262144000
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f17d1e15b924c9fd05ed077dd0d5c7dc99abb29
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:308c2cbf4c417aaca1991b388fd57da156c5f45d2d65fd6ba39b06442496f167
+size 58720256
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..71732b2377ea88f90692ae5fa867cc88790200de
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8078ad4c040d9cd9725654e06fc2e4c95dd5c0810ba53adeda4ef83c1ce8fe1
+size 58720256
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..05afd55a53695cf745dcc69a038c9dccb6b79373
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4063ad734f72bda36770c72262d76fa141e5f276337981e51be8c42c37dd7e61
+size 29360128
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4dac454859069623d3fc612fc16fe0ade5f0c62e
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa65b8c1667623e56ea57b251aae433de37cadfa363f951346e429ae032cbf90
+size 27803648
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3d08a7c722f80bfcb5178545437443f459cf59b4
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c8df1a65ee35d1903c1b44b7411574b8d95b7a2f4fb0c1f04dd38b15f346bb9
+size 58720256
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ab66861cef674b4baacd8827d21bb7d47e1a280
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e579561d42046df794aff2b418b322cb6fd908323732dea185c743c83e3ae0e
+size 29360128
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5cbf790686d7787deecd9d1a29f341c3a34c8081
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:489e1bc46f36d2289421fc6c6a869e9e9dad04b8a042784e196bd7a1e2def644
+size 27803648
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..94d6b53bf9df8a77a850706e5ddea4f0fd226370
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22e5658467b6199b4931e396070d4d3d528a4174599d18a57ab9c5322eb38b4
+size 58720256
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8bdb9d898a888412270cdc5b6874b9e1b30c9112
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:554fa077ddf9d5fff315c4807e12e60cfa1f2c254356765f548ac1f5b3fb1c4e
+size 29360128
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cbb584f334529d6bd0b9654ac2c7c5e613795736
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c53d500a2efc263afb1aeb3ed9f4d763a8c826ac3663a0905c276eef1c537c9
+size 27803648
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9595ce5a9962f2f8eb050499a33e3aadf2434b2c
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a591a5b46f68cde21d6c40ab3727a7c54299876f0eeee7cacd7c350f07816b85
+size 58720256
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9a1ebac2dd893a75bb0239aea1538157100d85ad
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cac35c02eec2e54c41c0a3afe28ea7df201c4afd8336c2dec87f86a8426b8fd4
+size 29360128
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..474f9c7b651ab41bbb099caaa5d81ff674041107
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c6508b232ae412d96ac9d9d993e6e58c52e1f9fdbcad54f82a4266264163aee
+size 29360128
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..24fc645d2d4078fa78b389a4e24434454fb16ef7
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2a6a0c5fdd6ed340cf1bc3eb18a1fdd27781eeac72acc1249532689002b1d12
+size 27803648
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7ef4cfe4bf57df2675d27169be2a4dd32a36d46
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5edfa53f2575608b54a1d2107b767458fa02a0c8a252684108cfe5ab36e5f2
+size 58720256
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..da7fbd465070e37d42e73cfb18c3cf8dde00933c
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:018c20abe103c1e9e416312669dde3ff58a20b461a22fe2e1c690b522bd3f208
+size 29360128
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fe3c64a518f9835e163bb07ee01bcd808c57698c
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3400910ef601706268a685e6426739dd48b196525ddd9a20d81acd207139bbaa
+size 27803648
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7bdcb005cd4ff75ea5f3c59232d78d08262f460f
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9721fc1aa69571ae61469ea07401546ce56fe269aaed4facde21dac2856fc5e
+size 58720256
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e54af6261d8b0401cb6123fc7e8be669ea9df3ff
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700770b95d2c91f56dd092347cec3047339ca4e907fc11bcd7240cfc007b1977
+size 29360128
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6caaaac88d5f48845ab7943aa753f9b9f33da78a
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:398a9b4baceb675d7d9a370467f98f310b2a859a717e2499bebc17673c65a63c
+size 27803648
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9b8da258df91d3a39c6c88b79ee3eff13e4f247c
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d14286c1cf6b001cad33a2cc73e86c39808519b7d340f9fd37a2f181ad0d7cc
+size 58720256
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..903ab4f7a8bc0ea57f862a0ad796db409f4b8bf1
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:847f9436057038aad8a375065a25acdbcf7617c097269cd3bac41b23b33b0a00
+size 29360128
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..91d030abb631c34b9ed15d9093e1407aedebf79d
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f4a1a8f63ab27291ce44c382a7ceedee4b5cbe37a385f8d0ed0206a264a469f
+size 27803648
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65bc809154856971ada472fd4470f7a128d4fa39
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d43989f526ba98f98c90f055955d35e0b89585c0fa5f2b0800e4360e19a90b5
+size 27803648
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7fa7980a64386d39593776b576f61b8464f8a13b
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24adf8a268e477513c49f32fbc642c375684706f6bdb28a86b69fc061897d24e
+size 58720256
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..618285973c4fc5d325be79de2e955cdda2b0994f
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90b0b06f74724bc9179b752017dcef5fd101f7b6597a6d8bf7b6a2cc294e9d64
+size 29360128
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fd3cd968d9ecc6d8e98d5058105d34cb05cff437
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df624f272fa426d8aa113e581d3e9a8e7b5dc21a2c3b3278190d1d6b8bca435f
+size 27803648
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b4f46c3e74e8512c30445d8a632229cd0a19dd3e
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c76bf41139d2d9adb95c390644fc3b3980d451ed7db764c35b17782ff82bfb6
+size 58720256
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6cad29afb3361b41c9e0c95e7d7efab776b2aa1
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aad1faf22a689d3b65968cd4bfcb2649b39280913dd2cb3c154cd072766ef56e
+size 29360128
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5d62ce6d93a6f4aa7bfd9d3f616c4b9f4e58d4c3
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbfb6bfefe76deee84a37f6cea55f7a9742e62aad51288da3a34759179d32142
+size 27803648
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8284b3ca08089afe2dc882264345b4867b584d80
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e998d46d388ecb8d94f63bd8048db29fa6622c112abab4b5c1c5cac8da7507a4
+size 58720256
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..03062af06e6403052d21b8cfd7eb39e625746390
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d643b1c2e5fff3cc1295f625982d6a5adbc7248855290f9f2ad183b55ecf0f5
+size 29360128
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2b18e42a70c289e64356b232117301bb98a4784a
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2851563973e59b9bf050770ce83f6048eaa9e5dee4f2455559ddb2b6a17ad313
+size 27803648
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8d13155830672a040fa78f2db749b4a67a579e78
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dedc1ffa22adab22045513b632023e7ee182e3bde4262125ed51d090e49f3052
+size 58720256
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d5427a71d5460b6ad7d18ee001d3d72517b98c18
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6707ef5c4f3325b2c7965e24693def955e5bbc04f64e3334c582ee82dbd4122b
+size 58720256
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..71dd3cbd9086d2a0748cbb2eab5ea79337d2f23f
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00c83f1fda4b6cc523edb1a5831dbe274dba4f7f3141e35d07fb7a74ea3af9c7
+size 29360128
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6fa896663081ee86058192b64bf91109805d6d8
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:deefc32e0ecf6cd70acd504b9f47fb3655ffb1d742e0301ff65c0df1e3b60eb4
+size 27803648
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..932ca81dd78b7f429442706f7dd5810f89e04e15
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bc17abb621f08e6bdc2caa726cd5fb34bd4eada177e2baa266f8f38f6ae1ba3
+size 58720256
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2103b58f0d03d74bb5c92e5b414a665e279f14c4
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ed26ee7ed2765246d41fa150885e1371d371acf76599e8d1c8599545eb39166
+size 29360128
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c9c5b5ac6f67c75b89d0a8a432c2f48836431742
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de5eb4b68a0bcbea7b6d80c34d2c0b760257d46ac68c5e5f9c3a9bba93581ea4
+size 27803648
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e9ff4309c71d8a4ed7ecb4f4e6ab29afc7361a6f
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80963950e380260e41501c5e6470a5f2657412e5aee9b86194a92388de8fe3f3
+size 58720256
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..061e01e2fa08973135064deef72c7a19fccc2f33
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a2b980ed1e659114340fcda5d78e7977e4ff8336c466937ed4c06e0bc5baa9
+size 29360128
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..58e8d5f4637e299fd7bb3d7d4f0ec5b925bc82de
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f87290154bc3564e925ca6f5b1682d95dbd443abb4a9bb2a989163eec3a49133
+size 27803648
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5923f705496dc70f83755babdc8ccd790f5b93a6
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87e5126950ec447cd8a802a5e0f4be0d3a41e4647c6b624ad125d96f480557eb
+size 58720256
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..50a27f74d44093c9a8ccccc0f46e78c4597b87e1
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f89077a5a80ec1584cf5b88cceb6cadd564a0b45979105df47836154cdeafe66
+size 29360128
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eebaed5fe98efb87c1b396f17d68f1503d2f4599
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:422a7ef5130be679427603848086669b36736917ec2a909f7b76f9b2c5856c07
+size 29360128
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..724c29bdc8a8fe43c42872809fad06ce1d20523a
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:531bacad60cdc41f8aaa199a74a63e2b9d441af794df0886e6c56f49188e90b1
+size 27803648
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d519267687b514c6c19c396c9d2434bcb24d4ff7
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20c16dee171fe7dac22817f37401ea7f4813de3c3d78642b6e18a03c8ac21b3e
+size 58720256
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7b51a50c2cfba705522d05fcd0e8f0ac3563bb42
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbdd3df9675b0328653b7b276d3fbf914a78f40a03e1edd87a48f01f26c4e352
+size 29360128
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ef5b0894244cb317d46ee0d735ce69b282fda265
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e88ef0f35ba362a3a66f4d3a9d5c1bea717d649641f4b4753363f674021475d
+size 27803648
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..684a49366517cb5cb770296c8d40b5d448b6aa6e
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dc55d2f0233e45dc55bf243b7ff81d64f843d3e2378c81d2fc5e1c9fb752a78
+size 58720256
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..afe2e3310549c60b778c5ffdf3d45b1b68702db0
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbb33ed217cf4af2f5a175ee896f87765db8e5742c1a722929e1297af99cd90e
+size 29360128
diff --git a/params_shard_57.bin b/params_shard_57.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6394303dc8bf8a551240909487e944acc0a408a
--- /dev/null
+++ b/params_shard_57.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba19849007154aab57f2260557999e0a1fbd7ed7b76dc6f8a25e8910e37c99b
+size 27803648
diff --git a/params_shard_58.bin b/params_shard_58.bin
new file mode 100644
index 0000000000000000000000000000000000000000..11583583a35d615ad83207160c5ac33c301bc985
--- /dev/null
+++ b/params_shard_58.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cf4fb0aa7b89efd345d6cd2ffb8dc61fdfa78193d601c6d03e000cc66ca0c1e
+size 58720256
diff --git a/params_shard_59.bin b/params_shard_59.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d57a5c6f662e14d94c7625175e6350197cd4ca93
--- /dev/null
+++ b/params_shard_59.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54749075bc4797f89721aa9fe6554b0c9859144214819884a8ca1fe9021ded69
+size 29360128
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a107a6aa08b909f8f836cc609eef64990cc2f570
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71ea44f225dfa6389cba33759df7b9d94c3c3f1d90940198be738d49f7c9d85b
+size 27803648
diff --git a/params_shard_60.bin b/params_shard_60.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2016a63fc1e52a1f96370814c7cf4c154abdd192
--- /dev/null
+++ b/params_shard_60.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5daeb043603a6fb27ea99c883e480bb78067bc74da52da625f3f38c349d43d8
+size 27803648
diff --git a/params_shard_61.bin b/params_shard_61.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5eaffa7688c8c0f780788b8c1008ab9540782076
--- /dev/null
+++ b/params_shard_61.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84c653ec5da3bbd186256332d5c67e3319bf2b78c28d32595bb766a02d60be62
+size 58720256
diff --git a/params_shard_62.bin b/params_shard_62.bin
new file mode 100644
index 0000000000000000000000000000000000000000..700a13103685dea872924522657a6898e4b8db7f
--- /dev/null
+++ b/params_shard_62.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9493e003a36d6396c428b327ad923ab4d41132a8a4fbec91e15056d63125fdeb
+size 29360128
diff --git a/params_shard_63.bin b/params_shard_63.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7b3d7e04449af9509dd8aaa5703ccde6e63ee297
--- /dev/null
+++ b/params_shard_63.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de694952f5ed78f378abac6fe1b210b5feda83be078c64e34ca25987230cdaee
+size 27803648
diff --git a/params_shard_64.bin b/params_shard_64.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ab5e7ad79307d51ab14f1b27c54752f913c087a2
--- /dev/null
+++ b/params_shard_64.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b730355ecaa58ce44d37c08916988acb097be456b40a463e265dd6db14f73ea2
+size 58720256
diff --git a/params_shard_65.bin b/params_shard_65.bin
new file mode 100644
index 0000000000000000000000000000000000000000..38153467e3e98f75e6988fb0fe57f0101db7953e
--- /dev/null
+++ b/params_shard_65.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d72bbf896c5b7f54493806ff9b2e99bbd9bd0b567ae17b565d8a18e1fccbafba
+size 29360128
diff --git a/params_shard_66.bin b/params_shard_66.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5d99606f908e98d81d7e2e54f9fbfd90d2ed8a9a
--- /dev/null
+++ b/params_shard_66.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdf37318c73fdc6e9fd180cc2b86f38606876354ac50077a3ded00c9eb85511e
+size 27803648
diff --git a/params_shard_67.bin b/params_shard_67.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3780cc169585f92d0888e54b673060c67e5e0bec
--- /dev/null
+++ b/params_shard_67.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64935b31262f19f8b9b9e4d37d1f60ea4047208f5311f3fc9dbc669d67ad6a4c
+size 58720256
diff --git a/params_shard_68.bin b/params_shard_68.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2f32d5a70f808f2282706bcac615e7210c018276
--- /dev/null
+++ b/params_shard_68.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ed21ef52c64a0438484901e08890386abe8603a062cbf05e4918e8c1359ecf1
+size 29360128
diff --git a/params_shard_69.bin b/params_shard_69.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17e1f9b1ae852da75c324921adc84de9f904f15c
--- /dev/null
+++ b/params_shard_69.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48d4d5eb8d26c760f3ca655341369d649356ac3f4680600136d9924b5d80f39e
+size 27803648
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cc6ef9657752154d4e1ff4572b98bdc1e7ab8338
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23b5ab65f824d441b2302bb8045052916b256458dc52cfea996c81f9fe1829c6
+size 58720256
diff --git a/params_shard_70.bin b/params_shard_70.bin
new file mode 100644
index 0000000000000000000000000000000000000000..42b8547efaa42e0d6b61df5b575e0128f06ca5a3
--- /dev/null
+++ b/params_shard_70.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2516c6ed75fe013fb3049cec6a2031c935c1f7782e6274ce241fe0b5aac9b777
+size 58720256
diff --git a/params_shard_71.bin b/params_shard_71.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5c5682a123521506425ea9a313030b41a1c25a2c
--- /dev/null
+++ b/params_shard_71.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60a08972c0bcb98a90b2ca3fc0b4995e2a3f4468d91ce5ba65d550ac1ccd3ad2
+size 29360128
diff --git a/params_shard_72.bin b/params_shard_72.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a98c518670a6ea958670f24ade1f76958ce897c3
--- /dev/null
+++ b/params_shard_72.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96114260efc632de857f1fa8105ac0459b9ec3857774f27b42e1a2d9ee54d6b5
+size 27803648
diff --git a/params_shard_73.bin b/params_shard_73.bin
new file mode 100644
index 0000000000000000000000000000000000000000..816b160003dae58e9319c4e637d1e3295a350f45
--- /dev/null
+++ b/params_shard_73.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91d23f9c3f9d67afc394c6e9ab95923ab1d8d73cb09f40d5f8329a0dba97fec1
+size 58720256
diff --git a/params_shard_74.bin b/params_shard_74.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e232a0935267866c7152f87cfc342a555863a1cd
--- /dev/null
+++ b/params_shard_74.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8401d31cc87647d413ae6be4755063cdfb4f406589ab9383e832c81aaef11d1d
+size 29360128
diff --git a/params_shard_75.bin b/params_shard_75.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e346c2ee5973e38078d008612e5c923e65c4306b
--- /dev/null
+++ b/params_shard_75.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64b294028d076cf98df27e4b047c75c0992cabc5ed8ef4e7154d8b6031d8f124
+size 27803648
diff --git a/params_shard_76.bin b/params_shard_76.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9de68e7f1aeb78acd7dcf87a01659f78dafd0dc8
--- /dev/null
+++ b/params_shard_76.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbfa8a009a5f4e8b6a0936bc0adc03c869d5eadf6b119e6eaa9594f0fb990118
+size 58720256
diff --git a/params_shard_77.bin b/params_shard_77.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e91cf2c1332ff26ff0e3d34241cfe0df921221c5
--- /dev/null
+++ b/params_shard_77.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf617b3cd5e446bdb06811bf4e9fc7c6e788ef378ce1795415254e462960ebe9
+size 29360128
diff --git a/params_shard_78.bin b/params_shard_78.bin
new file mode 100644
index 0000000000000000000000000000000000000000..23675e01d65be6535b2f9a42c669d0c54280c447
--- /dev/null
+++ b/params_shard_78.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:606ee0d6514ca002272a210280737528af05ef619fb6bb5b60fb04db17abd8e8
+size 27803648
diff --git a/params_shard_79.bin b/params_shard_79.bin
new file mode 100644
index 0000000000000000000000000000000000000000..69d60652dc99223894781d8b537ad28ab37eedc4
--- /dev/null
+++ b/params_shard_79.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13e497ab510bcf6a400d10d911132e03342022042ef54ad2e9d6c99f1c6c234a
+size 58720256
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9a69f6f1efc68f02640d8f5bab2ca8997e9d2677
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f26398c9da761893dd5e4ef9d6f46541bbb716a14825f386a7af12739c0d7703
+size 29360128
diff --git a/params_shard_80.bin b/params_shard_80.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4cf41bb8365fdfb7cab1218e6280cd64e6b22d28
--- /dev/null
+++ b/params_shard_80.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83e054eb7cec71aed363c246124c389e818367d349e89e7d870f0c87a107a86a
+size 29360128
diff --git a/params_shard_81.bin b/params_shard_81.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fad01ae1575927f08a594551eacc903ae033fead
--- /dev/null
+++ b/params_shard_81.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9352e8be5814b44888029a5f43a6f328afa4ed9a04799323f6b0a1a59e744da6
+size 27803648
diff --git a/params_shard_82.bin b/params_shard_82.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b2c248e32f849131469647bc9f923fb6aa30f8c0
--- /dev/null
+++ b/params_shard_82.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6fd9047217488c5cb9190b4f5ab4753116059980fcc9cdab9879cb30c4a5731
+size 58720256
diff --git a/params_shard_83.bin b/params_shard_83.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1d46eb912efd815cb5cd8a2f08d76dc0a9330be8
--- /dev/null
+++ b/params_shard_83.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84c674182128d03282819d34e6cd789607982d7bebb662480be4690d677b3afb
+size 29360128
diff --git a/params_shard_84.bin b/params_shard_84.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ac30cb9de0c0106578a674ef61376e20c7ec84af
--- /dev/null
+++ b/params_shard_84.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46fd263b2ec2809d20629af5a8b50485dc17604868431dd038bf233629aa26b0
+size 27803648
diff --git a/params_shard_85.bin b/params_shard_85.bin
new file mode 100644
index 0000000000000000000000000000000000000000..081b4953e9eafd9be328e1fba0015d7adfaf95c4
--- /dev/null
+++ b/params_shard_85.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18cc3457b7832adf68a17907b652fba10e8fa53db85d8877a4b4aad22c550be6
+size 58720256
diff --git a/params_shard_86.bin b/params_shard_86.bin
new file mode 100644
index 0000000000000000000000000000000000000000..98a772f8187922aab557d66ef64871529b387b8c
--- /dev/null
+++ b/params_shard_86.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98d48f3cf5f5fc6609b4dc764ea37577cf5bf0c6d0dda6a5971360635e523cdb
+size 29360128
diff --git a/params_shard_87.bin b/params_shard_87.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ecd322b0b6d59a0a77a79bb4c1f535611011eda0
--- /dev/null
+++ b/params_shard_87.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21229e1538f4489e16e7f76c0bd538ba79be7d4580074b4440cb5651d01dca27
+size 27803648
diff --git a/params_shard_88.bin b/params_shard_88.bin
new file mode 100644
index 0000000000000000000000000000000000000000..156385c7ccf2888ca7fe38151a629e813dabff4b
--- /dev/null
+++ b/params_shard_88.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95c25cd6be0a34b1741a68c696a04e52567fcd7d17a2c14cb7e5861f056135ba
+size 58720256
diff --git a/params_shard_89.bin b/params_shard_89.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3b74530333d41c0f3043611baeb4b0879e9e222f
--- /dev/null
+++ b/params_shard_89.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecaae12f3fd5939c8fa56d6e43e0d2eca2ef4568e06ae81842500a2eb3822983
+size 29360128
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e1f91294bf0b7a0b93d46c40fc0bf617c9440e1
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec055f076094175dff053468d0c7a66b890ac21518503ac862eb92c987351920
+size 27803648
diff --git a/params_shard_90.bin b/params_shard_90.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5bf88bdfe87ec0b1f296a2c314abf89adf0b3f95
--- /dev/null
+++ b/params_shard_90.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2695c8046a5b5a249222c485dc596b3f041865b78bb3a0438936c82e707a32f
+size 27803648
diff --git a/params_shard_91.bin b/params_shard_91.bin
new file mode 100644
index 0000000000000000000000000000000000000000..71f512d40e9c648262cbfa401122263e4fb0957b
--- /dev/null
+++ b/params_shard_91.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e98d948f079e2c34ac3582ea00c8ba06ca9cea77debbb819902f290a1c1e95a5
+size 58720256
diff --git a/params_shard_92.bin b/params_shard_92.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4a2d44fdd44f4094e5abb963e751b38bd5c750b4
--- /dev/null
+++ b/params_shard_92.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2ddb806c085814f7ef930c10a908178a53b222450aba2ad2f92d366c392da4
+size 29360128
diff --git a/params_shard_93.bin b/params_shard_93.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4e7a946b674c5ed110c6f736231abb8dade37a41
--- /dev/null
+++ b/params_shard_93.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:230bc4b253846ea9bb69c6164a491b22ae6b775b78d553f6ab4fefe1e1405f3e
+size 27803648
diff --git a/params_shard_94.bin b/params_shard_94.bin
new file mode 100644
index 0000000000000000000000000000000000000000..889f2edc074571372a47c84906bb586c3c1f1115
--- /dev/null
+++ b/params_shard_94.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92bc2413df0aa4f464112260ff08aaa96ef77edbc5810fe9737d5adca707881
+size 58720256
diff --git a/params_shard_95.bin b/params_shard_95.bin
new file mode 100644
index 0000000000000000000000000000000000000000..437ea5fb1bc91d3234249de9036ac4ff667a2a20
--- /dev/null
+++ b/params_shard_95.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd49117ba0709ed4143aa72d90db952e190da6046de4b0c22d2eb1a449e49303
+size 29360128
diff --git a/params_shard_96.bin b/params_shard_96.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a723fa1d5134b155481dd62d923d65b7206d4ce
--- /dev/null
+++ b/params_shard_96.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a37ee7decd0585b6872cff57ce836bf8122061a7d359081dddc5ab95a0319804
+size 262144000
diff --git a/params_shard_97.bin b/params_shard_97.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0cf0d8eb757bfeaed9a022d46d60ee10c6d514a3
--- /dev/null
+++ b/params_shard_97.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b15d17ce07fa0b29c6a26cbfcd8d6661c5e04e0b8e7371229a5e52c4df5765da
+size 28860416
diff --git a/private-llm-config.json b/private-llm-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..816ef1b08a9c65cef6c47cdfb7f8f1a83fd92be8
--- /dev/null
+++ b/private-llm-config.json
@@ -0,0 +1,21 @@
+{
+ "model_lib": "zephyr-7b-alpha-w4a16g128asym",
+ "local_id": "zephyr-7b-alpha-w4a16g128asym",
+ "conv_template": "zephyr",
+ "temperature": 0.7,
+ "repetition_penalty": 1.1,
+ "top_p": 0.95,
+ "mean_gen_len": 512,
+ "max_gen_len": 2048,
+ "num_shards": 1,
+ "shift_fill_factor": 0.3,
+ "tokenizer_files": [
+ "added_tokens.json",
+ "tokenizer.model"
+ ],
+ "model_category": "mistral",
+ "model_name": "zephyr-7b-alpha",
+ "vocab_size": 32000,
+ "sliding_window": 4096,
+ "chunk_size": 4096
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c57f1bf6ea28d2e3ca4540709beb3a80815c2aab
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,46 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "",
+ "",
+ ""
+ ],
+ "bos_token": "",
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "truncation_side": "left",
+ "unk_token": "",
+ "use_default_system_prompt": true
+}