diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..f6093c86014c94348f18bf42c2e404e6fbde995d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 2, + "": 1, + "": 0, + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..e7fb4d685ff586fba25578e236b8eda85d7ff0eb --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,5611 @@ +{ + "metadata": { + "ParamSize": 453 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_0", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_7", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_10", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_1", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_2", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_3", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_4", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_5", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_6", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_8", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_9", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_11", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_12", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_13", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_14", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_21", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_24", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_15", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_16", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_17", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_18", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_19", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_20", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_22", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_23", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_25", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_26", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_27", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_28", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_35", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_38", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_29", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_30", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_31", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_32", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_33", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_34", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_36", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_37", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_39", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_40", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_41", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_42", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_49", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_52", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_43", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_44", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_45", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_46", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_47", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_48", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_50", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_51", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_53", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_54", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_55", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_56", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_63", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_66", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_57", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_58", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_59", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_60", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_61", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_62", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_64", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_65", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_67", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_68", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_69", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_70", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_77", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_80", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_71", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_72", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_73", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_74", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_75", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_76", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_78", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_79", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_81", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_82", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_83", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_84", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_91", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_94", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_85", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_86", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_87", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_88", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_89", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_90", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_92", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_93", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_95", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_96", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_97", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_98", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_105", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_108", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_99", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_100", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_101", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_102", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_103", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_104", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_106", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_107", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_109", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_110", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_111", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_112", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_119", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_122", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_113", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_114", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_115", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_116", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_117", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_118", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_120", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_121", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_123", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_124", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_125", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_126", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_133", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_136", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_127", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_128", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_129", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_130", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_131", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_132", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_134", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_135", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_137", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_138", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_139", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_140", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_147", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_150", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_141", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_142", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_143", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_144", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_145", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_146", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_148", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_149", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_151", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_152", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_153", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_154", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_161", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_164", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_155", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_156", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_157", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_158", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_159", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_160", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_162", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_163", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_165", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_166", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_167", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_168", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_175", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_178", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_169", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_170", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_171", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_172", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_173", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_174", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_176", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_177", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_179", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_180", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_181", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_182", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_189", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_192", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_183", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_184", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_185", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_186", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_187", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_188", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_190", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_191", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_193", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_194", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_195", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_196", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_203", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_206", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_197", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_198", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_199", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_200", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_201", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_202", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_204", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_205", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_207", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_208", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_209", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_210", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_217", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_220", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_211", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_212", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_213", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_214", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_215", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_216", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_218", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_219", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_221", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_222", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_223", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_224", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_231", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_234", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_225", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_226", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_227", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_228", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_229", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_230", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_232", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_233", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_235", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_236", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_237", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_238", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_245", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_248", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_239", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_240", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_241", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_242", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_243", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_244", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_246", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_247", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_249", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_250", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_251", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_252", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_259", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_262", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_253", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_254", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_255", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_256", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_257", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_258", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_260", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_261", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_263", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_264", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_265", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_266", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_273", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_276", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_267", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_268", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_269", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_270", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_271", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_272", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_274", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_275", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_277", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_278", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_279", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_280", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_287", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_290", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_281", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_282", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_283", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_284", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_285", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_286", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_288", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_289", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_291", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_292", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_293", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_294", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_301", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_304", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_295", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_296", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_297", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_298", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_299", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_300", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_302", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_303", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_305", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_306", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_307", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_308", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_315", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_318", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_309", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_310", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_311", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_312", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_313", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_314", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_316", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_317", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_319", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_320", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_321", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_322", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_329", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_332", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_323", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_324", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_325", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_326", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_327", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_328", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_330", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_331", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_333", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_334", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_335", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_336", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_343", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_346", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_337", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_338", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_339", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_340", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_341", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_342", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_344", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_345", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_347", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_348", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_349", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_350", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_357", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_360", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_351", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_352", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_353", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_354", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_355", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_356", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_358", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_359", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_361", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_362", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_363", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_364", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_371", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_374", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_365", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_366", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_367", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_368", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_369", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_370", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_372", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_373", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_375", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_376", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_377", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_378", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_385", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_388", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_379", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_380", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_381", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_382", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_383", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_384", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_386", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_387", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_389", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_390", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_391", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_392", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_399", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_402", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_393", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_394", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_395", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_396", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_397", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_398", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_400", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_401", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_403", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_404", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_405", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_406", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_413", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_416", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_407", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_408", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_409", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_410", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_411", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_412", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_414", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_415", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_417", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_418", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_419", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_420", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_427", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_430", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_421", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_422", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_423", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_424", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_425", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_426", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_428", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_429", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_431", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_432", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_433", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_434", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_441", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_444", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_450", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "param_435", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_436", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_437", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_438", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_439", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_440", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_442", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_443", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_445", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_446", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_447", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_448", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + }, + { + "name": "param_449", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27803648 + }, + { + "name": "param_451", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 27811840 + }, + { + "name": "param_452", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 28336128 + } + ] + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..44fc79861b10f29ecbc991f621f3878d905f1aaa --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48feb642e87601828a768937f8e03c3684f875f097dc5a0cd98627904163f780 +size 262160384 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1ad7f1caeb01a7757aa54eea86d406e37144e43 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b132e606c87b6a0681675917dc3e0e3ec3f16908bf7ebf2e4c65c263a6611829 +size 58720256 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c9b8599f48061cdb7e70ef212f3facb45555423 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33460493b565ab8cb88f00d560eb25014456fad8ac483153ee8603633e8110bb +size 58720256 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa66cfd495e06b32ce86a282aa7758ddf2920165 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec3e78c64cf1bfcef1aca28e00a32ea863c96228c7c252255f8f1f698222fd5 +size 29360128 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0b0571d3ab4358f8dfb7080b6dd59d5f2516a55 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa937b04348bf5938e3f13108e7954b4145e8da4f27e21f44370b10a4822897e +size 27803648 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..6152e71a1ff3ed8e6d0a645d8889fa633fbefca7 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3d04d84b2c9c4b11fe4276756e3ff14b01ac663aea5feb5d9840095562c41a +size 58720256 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1a871924fdd0bca798cd6da8cec9cb5c74c1558 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24be82278b8b5ba1d6dc2c6a9273075629322436460a1a1d68617bdef059d6b4 +size 29360128 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5bae714969becb1639dcaac2b0530f61512f5a8 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68fb324151ffa8cd1922863f1e64da44738524557f30fc8361beff16d54a941b +size 27803648 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e1c0b30686b2d79fe0e16d7701a9d61e3deb64d --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fde7e41c711d04754eaeafba3e6690771f54d257ab3291ffa39f0e9ff6b239a +size 58720256 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2aebec706bdf5e274b382396a05d30f5c6e465f --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbd0f5e02132655507ec1f5d5dbbafd6c494fc6d03f5cbbf23a026fd2c92aad +size 29360128 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..4eb3b8b504ca3c2f6f5a45a08a312cd00d269f54 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa05d8de3f6325d4ea46808b7bdac535ac38a1d55d79bc73d639d2f0ad642745 +size 27803648 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..19d9f2824bfc94d814115506ff493b3e41e9ea76 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595fdb1027d224f076db994de2fa7e703cc9c56fe6a23c62ce01b4695098aaad +size 58720256 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..09f36114b27b3e1825d80b0e165312c0ad16bd0a --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7b4553f90a57d22ac955a8b44678fa9b0f25f0b9c6584c214421fc655ed76e +size 29360128 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6447246056ce9dfcb3bf14903e74f360a7d84e0 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e1136bd39a9c64b94c06885199756e6bbf46d1d6061e47b042b67acd7a6318 +size 29360128 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..179d837e8cc226262786e18f3b6cdb0134b2f3c8 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2a3c8ddb4e60e39a499982d79d5f0b41e493417ab048fb96de5d3e9cf5aff7 +size 27803648 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..757502cbf960d2c37a7620e39b4a2d8ee5f3dfb9 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9aac5fda792945c61059594c932001995d5aaf3397c910c0a37852159b4c929 +size 58720256 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..76de036695657c024ce9c6e304e1502006b569ee --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1a320b3fabc5b1fa070496551fbf512c771550f655c9b68fa6900b35c8b9d0 +size 29360128 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02a015ca6d3e5819656982a77709e0e41a1f608 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe479be3513e257089c37bce7e80e12e653e3d4c36fc4b0b05ec0eb4e55bc7c +size 27803648 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..28bbb00cd863c43aa9d5b0ac49d1e0b4dfec2a3b --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74862a380dfa328dd24db53c6e319af7cacd3186c5361e388e3671ad6f44203a +size 58720256 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c9b5cb60d809dbfda4e8188773acdf30b6d39f4 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62af5633dde001a1969a0a6b3b93867ea137091e1b0a8e90d5641c76f200490e +size 29360128 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6cd357f333625fb6594a7b2fe9ffe5215a4286a --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9781222f1f543ef2ef8f6d3356f89fd2c7ecdb7a5a50a139dfb5887ead4449d +size 27803648 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..070a457ed4597dc98cfcc194a274abb03571a31d --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:933d3895b70d9944ca33d2eb78d986780fce128602a50d49d692dbf1d3e8ee72 +size 58720256 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6ed6a83bc392b6f3cf999df4f46ab6bbcca8bed --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12073f8729bb1b0e424b17c71818f942848510f7ba01ba2b7fed04527693a43b +size 29360128 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdc7f98ce036796753ba3f746a1d6510e72edd5b --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923ab071859c0a93a3e71074de6c61587191b42d53c2b5452537368816bac58e +size 27803648 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2d7e14942a92613146ab7e6b06ac08097bac619 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36d87b4f187a9e8610a84e1846ff136384c5fc9ceab63a4301155a1315da342 +size 27803648 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..a64e31fea5446ad0ecf0ed722ad75a1e74d7ff40 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c7c0c55a42f4fe9d0ca7f748b6a53ff598a745a8d7c40694020e1b2ebb24ec +size 58720256 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..102eee277547db7b24fb0039207de66fad80160e --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52f0a0fe857fffff5d7a48052cfa40d1a077adc5bf152042e782aaf86c52fd1 +size 29360128 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..6998c9f68e18191bd3bc4009f65d4e24ca276631 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263a8152cbbeec2ce3603bca5802b50be6302ec9bbd48b5cb813ad5f15a92400 +size 27803648 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..7dd92314a1ff85f1a7ac26ebface702967c99de8 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53e579a49a88b723d6b9b019a96eaf7b0df3c57ff6ca40ee2dfcc224c468e04 +size 58720256 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c25ad8e1c17ad649ca9b647f7b82a36ec07b7a9 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca598848c6d2c86727db9373e4d3b872476af741fecb8e347c50fa1e16c08c0 +size 29360128 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c2df6dc6f6be76d23b7b69c5e96ff0822eff2c1 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e77df8f16676cba54481936d122e9eed77ed791f1da603883714179a0629f1 +size 27803648 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..a82a1a716b049f37c7edf9b7548edd26772bc2ef --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5421ca5822b81f302252ab57911757e06b2bdfb1f0cf3b9b39aae777a156e0a6 +size 58720256 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..06be4661bee1890acc189d61cf563a6b7a7e26a7 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90fab690d4ee136fcadb0f2f1e98d4e0ec0778bec728348c9798db8bb61ffef +size 29360128 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef0a28d7843f78fc5c3d203b31319973e0bd4753 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bfccb368b5114b3a9182bd2024d65f8aab7add8a97a0350da6b48ea7805961a +size 27803648 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..72480443b688b4f1cbbf2dc8bd7f1228b1d6d167 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfeaef3fc551d641aab849e14445023e73ea0067cf0d6ab461b2d5a4a3b43fac +size 58720256 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff04a4db5864382b1867d53ecb02ec06126755a9 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42aceeeeec7c1c6a0389532c2c747bf5ab76ec91b999ef6f7f7236fc97feed2 +size 58720256 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d3a59c4da0e3c3f4b62cf5d7962a6ee7fe83bc3 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7bd1adb5c83fcb06454728c8b9a2e67342182f3a29bf1087e24fd3575d1cc5 +size 29360128 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..9bd461a1d54d974be502372a29ca29a9aac3058d --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06df3e8bdb7af9965f36e761fdc88ab756445d02b4c51b9e1c68c4500cd0aa71 +size 27803648 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..f64c4710ffa6afe2ddae67a5e90041e8dfa91ef7 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715fba6814c124e5d878f62bcb8c36bdd16c9cf2556c3c5cccda5d2fda9fea31 +size 58720256 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc7308197e7666a4d6bfbd2e88824dc3bdc282a2 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42ab692a9f2e44e01dfe470239750193fa922212fce70a4f80b5706dc375753 +size 29360128 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb11f3336c8cc772a2756bce14d6d6f30c16af04 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb77b19a096ead58e97b2dbc08659b76f81b47a1d4696312f0227b8f21ca0010 +size 27803648 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..467b2a8a2baed3b68ace716fe5be1d6f628ba380 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a731dc8bb168d5baea8c961faa1dff2fedb32b9a6c6c0ac4ac43a5caae6341a +size 58720256 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..467ebb606bd5c1d1c62fac963e7621384fd092ab --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57381a70807afc6f2316edcdf583ff61a030f634f2902dc86365ba9103f44016 +size 29360128 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..946d311de2d1a6b072947c0ef9ad55c8ddb4f6e8 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbf3a61550001ae8bd38cd3e5f94bcaae1faa90b41eae1592bcc1d0b87853ba +size 27803648 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..630411babeb8f6bcbe69a0ac09a81cea95c69420 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c227ae8aba50ac40f016197eaaf04c833186dda076a1108cffb1075a05fc4f28 +size 58720256 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..0472b4ea26e8360af60dae897f1c5cbb38f102d3 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f624e94cdae59fc2d5bdbd528e910b9dcbd4d505ee6f0cbbb356a69973b6d705 +size 29360128 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2b00c513ed7ba7cad5b6476e5734c4195ee42e2 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc33837e05a1f7ecda1d415fed845aee633b1f1dc500a8d9babed596df795a1b +size 29360128 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..becd0c5111830954a467fcbd4a40529dfda3c84a --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5abd6f6ade725190e00f40cb2e5377a0a88c06704ef1dc0f8616539f7dbe80 +size 27803648 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1f1dfb3645aad30dbd5972a49ab2285e9905744 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad0680511acaa5b8f6c2c79916156e4a883f86c0e26e2b2cc6f8a25698316c1 +size 58720256 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..457cabf678d81979c2dd0717d5c1d104995092c5 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e8b1ae59cd974caf6ecd9e52b35d179cdc9240207fc11fddacc4f3212dac43 +size 29360128 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..43fe947f8d49fe4e348035a9282b643618682d41 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb1ced68d8895b100fc19241e2115c246f71e8cdf6400c50fd2ebffca2967c1 +size 27803648 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..f21d5392f020dc485224e8e2b68f8a085c60325b --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf804ed5f01ee7bfea64deb3037d41563909ad8ed0ef98e1053375fee2bc8db +size 58720256 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..d62ef14b08a127fee14885b0fd3f066eaa4c8d16 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b125243c85edd115a1216b9b5d7d2364eb31000f76b6f1a376814eb413c480a +size 29360128 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3416f0389d4008538283cd0a33d6d5ef5dfb6eb --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846a11eae51ed4f7483a5a12587a5db511be14181cb15fc12e73bf12f4463fa2 +size 27803648 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..42ec2b8691355d6eb305c8130a30fe4ace4032dd --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42958ec11ee90558a3adaa5c230bf667e30646a99bfc7062b00aced95961a233 +size 58720256 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f59443adce95b3f3ed4ed764018f4eca5427f1e --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c25b6c5d43d0588b07b4a55ff3107e11f44ee10836e81f37088602571a09b2 +size 29360128 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..59eda15226456f79c80f5e9226199ded0c660f44 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964da5d5be0b1968e65fcf0d5dbc26f3783360d4825706d010925afb5d304147 +size 27803648 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..971e46154d053815e82108841dd86f1a04ad83fe --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0337aee359bb7458ebe5f0dc9346dcbf7be109832d9de3dea59002e019ac6e41 +size 27803648 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2150e6d2fae10a04eb732a20246643c47bbe3d1 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d2c2326f66e7a17c540fddade5955f2d7ccf525d0e347f4f8409bc5a462332 +size 58720256 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..84d35f8cf22e918df771b5cf8d55f59894ef51bc --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee77f19f05e091f4b07ca7e686067a1a4e01cfb0e852107ff64cff91e1a4f01 +size 29360128 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..e87bcf145e2c6029907c29f47e8b509f582584af --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0bcfaf4de197aa62f7ac890bca4e9e1b879987d5d67d32163793f1ffda5068b +size 27803648 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..14e1c5338a1e5be53d628c019936c20b24a22ba7 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a65c71dc0d8ac8598d0d22ba4010c998797417d2bcf2d70bcb58b6645fec718 +size 58720256 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..1df64b191732cb25d658e93c1a8fa2d34bed3ff1 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd13538f8ff1d6f380ad3baf9cb8ae6221c5b641ee7f7c2d4417334f756049b +size 29360128 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..8222f8da10eb4c0e370dac013c3ead98fbe33549 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ec0a5b838b29c23c99e61de5330865e1e214aa297db15ade08b82bfbb8cb3a +size 27803648 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..1601edbaaf26e60498a1abb21e3b6dced3b6f0bf --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f04f7357c155d567c25bc1e33864dd81340c9d9dd04e935f11803aefd028da +size 58720256 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8892dbb40214aa31eda14ff722325da6385d200 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b7af534016b33bacdc1e374285e98a98bcfa7057dad1e0804307990be35d76 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3b163bfc151db597d307c01b05abdbb14d93873 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0803e9affb0894bed50202fc4bbdd66f9892203759a416fa035f798f1fb102 +size 27803648 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..eebf67f6b2c39b2002fcd496824591d5b9fa59ad --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aa9c17592b97128cdc555cf77675778af3f9415dd16b1d82ea1c49c1777821c +size 58720256 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..b669e60bf9690bcdc1437ea794326ea073b9a0e5 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b178dc6f7f07a27d14391a3c00a5cf0827d66385eea527cba58c715e51fca972 +size 58720256 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..29c4a75149816bd3fc79bcd959ef70e71d783090 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec957db345b0384575e17777e79e18e20651fe60d68e02935c0fdf85e97033fe +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..f7f9537a9aa4f687254fdd270c322be2a24a9985 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee452ddb5c5730b83353268767e53e711bc5bf944c72b3c92173c6d9f9690a8 +size 27803648 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..06d6e66cb50a273edbb3351c64657da33492809e --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baac3cf4a008d8707c799394cf8cccea35bec046c7978c6a004a5d780d9fef31 +size 58720256 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c0f63e882afc7c03792c5e1fdcd0e99f8376e66 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c2ed2bbae32ac0ff17e4cb9b925ec189b0757dc819f5c4b63367ec53744ab0 +size 29360128 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0915a538d2ca2b3c8b0cd62b2034d339875d2a7 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7922f45e04a1e93159adb9c21426c6c1637e4358d2ce384c279a014eaa56508a +size 27803648 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..98f24232e622d33a1b42b960d87b07d225b6ecfb --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42ac1b21882e113c6c7c7683f9a6769c378fcd42ab8b7bdd3dd69b453d5c4e9 +size 58720256 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..b319277e92c65906d3621469a24f1a75ee5e3ab1 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e2202658f45c775af719d0051072abe7bd0b05e197789866de5c2ba7dfdf1d +size 29360128 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..27cee2dbb034ff2e349254dbf6b292bdd177fe05 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cf24ae74e4c466245bba139e098a127482ced82c9ea0b9e4d4753a3cf7c927 +size 27803648 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fa0e4917ed74be650e7165ab8030913a4e0327c --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07efe3943d004c138c56c71bc97031fe80f3c8a3bac924734e36d565c76039b0 +size 58720256 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..d69ccd7e8133b6f4e28317c9bdee6278d96c7950 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31d5a38662b8f9cffb5fc0748b7a1add36f0b57d304ab81b30dc2e3d5fe1b12 +size 29360128 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..35c9dff9d395bfaa002fa88b3df56fcf8afd6647 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c9613510de49e6cbe8bcf77fec11c54221f80aa9dc027db23cce4e7ffce511 +size 29360128 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcf7afb622287e59b109b5ae7117537d0b53c6ee --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a296044821b7a8994bbbaafdc4f5a6ce12aa2c205affe1c22b0eeadc4e4412c +size 27803648 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..9300bc9ec2d4ee21687f2acc3df151703842a7d3 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c3ac7084c38d67308637b5aa6e3a6543628311ec39ae90fc58f2a2a6a806bb +size 58720256 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..06fe024594e17e8bf0ce45d0833959150c0591af --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3437768dd28792aa2fc69a8d53c5ff525ca9044c660eaf59dfaff658bfa0e3 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..b54e0378ebf9b3c3f8d3a6dbbc59ae8cd8db5c44 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ac742ec526be0284ca82271050f6b4393703ee176c06e7c4063ac355fa4a00 +size 27803648 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bf9a0ae600d32fd8e463ffbd934de390fcb6576 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e9ceada875db004b23e316bf9f9e7bc9283a107b25ef63c7beb4b927331e0c +size 58720256 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..614d36d226a8c94121f244021552fd3c9c9ee39f --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf607ee02c12070dec64f188657ea5b83cce56bb76dec9559e8df2f89df440e +size 29360128 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d2fecd54913be0c40b4278eed73bb600c8e3c3f --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4550d3a9cd6cfde720f22c936f54efd271cdbfa3aad8f34226788e44b754d16b +size 27803648 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..db61817fe1f71c86cce3cd0c1ebf0171e2cc1566 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a300a63391aa4a7874db6175067642a4f7298f149b28a04ed68b47e8d9a5b535 +size 58720256 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..06cee5a1f373cde924ec21d2c78a5e80a27ec8ac --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80c6eb30191294b9bc5b37c43c6152238537dc2f04a67af5adb766bf0aaa56c +size 29360128 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..52233c8238a720a1f09731d9eec64687167e231a --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9990bf7a5e8a8a11262f1dd68edbd5b9a708598ddc1bb7bdb6c419fa94df310 +size 27803648 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b3fc6cc8c3ce79ffed2e7f494ecf8bb2d9c6843 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1fe0d337862b4ff8f432d5ed926a90c2875567a0ae7517db10726ecf7b98c6 +size 27803648 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..b725b286da10e003807097a5d770da8716a00d99 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64a2597f3a3f7523277f2555558e5fcce00ae7035ff8983353dece40bc52ee8 +size 58720256 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f8675432faf52ea86714be3f23c67d51e24ca1f --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fadbd10e6a4ad552e7f48ee7b0499ec8e21ce6398089a8dc2ea5d26fe939b58 +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..95a180a53f0b991a6f95202b0c4eab4a39482d24 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c7e917da8828c4f1dbf90073a25096348e3b2137568a243208fb8549e41b1b +size 27803648 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..965910400d94de3959d033d7b7c0a5e7ae4df49d --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece1d90b66c99dead378f5aa74e2e426d9d642d41c7cad972cce8dbf47c55da8 +size 58720256 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..943a19d77ba295bf6ab8f6c8a4e5470804107958 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5dd41226d050be7f2eb80e899780562b62336ae86fec985bc34b1ba1c249df +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..87f27a094cf30e16027a5e02436272e92652edab --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e08610aa20fb95fd475b29e99781eb757a286071ce102f7c5b47b419172efe +size 262160384 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..26900ce2ddc0c617257936ca2d2b7df1ceb1cf86 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000fd6bedc3823ce4f86a565ea96d4080c17893a210864e33e403cfb899baa9d +size 28860416 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..4786316b97bd858ff990989dd5e4762e1a18f2ab --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,24 @@ +{ + "model_lib": "jackalope-7b-w4a16g128asym", + "local_id": "jackalope-7b-w4a16g128asym", + "conv_template": "chatml", + "temperature": 0.7, + "repetition_penalty": 1.1, + "top_p": 0.95, + "mean_gen_len": 512, + "max_gen_len": 2048, + "num_shards": 1, + "shift_fill_factor": 0.3, + "tokenizer_files": [ + "added_tokens.json", + "tokenizer.model" + ], + "model_category": "mistral", + "model_name": "jackalope-7b", + "vocab_size": 32002, + "sliding_window": 4096, + "sliding_window_chunk_size": 4096, + "conv_config": { + "system": "<|im_start|>system\nYou are JackalopeAI. Write out your reasoning step-by-step to be sure you get the right answers!\n<|im_end|>" + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..21556fc2e711009b5f211b53f55700ee09504bf4 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,68 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "<|im_end|>", + "<|im_start|>" + ], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "tokenizer_file": null, + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}