diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..f6093c86014c94348f18bf42c2e404e6fbde995d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 2, + "": 1, + "": 0, + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..e7fb4d685ff586fba25578e236b8eda85d7ff0eb --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,5611 @@ +{ + "metadata": { + "ParamSize": 453 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_0", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_7", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_10", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_1", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_2", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_3", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_4", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_5", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_6", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_8", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_9", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_11", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_12", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_13", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_14", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_21", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_24", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_15", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_16", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_17", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_18", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_19", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_20", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_22", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_23", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_25", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_26", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_27", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_28", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_35", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_38", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_29", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_30", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_31", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_32", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_33", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_34", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_36", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_37", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_39", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_40", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_41", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_42", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_49", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_52", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_43", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_44", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_45", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_46", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_47", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_48", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_50", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_51", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_53", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_54", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_55", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_56", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_63", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_66", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_57", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_58", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_59", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_60", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_61", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_62", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_64", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_65", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_67", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_68", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_69", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_70", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_77", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_80", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_71", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_72", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_73", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_74", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_75", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_76", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_78", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_79", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_81", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_82", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_83", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_84", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_91", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_94", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_85", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_86", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_87", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_88", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_89", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_90", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_92", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_93", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_95", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_96", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_97", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_98", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_105", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_108", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_99", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_100", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_101", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_102", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_103", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_104", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_106", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_107", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_109", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_110", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_111", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_112", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_119", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_122", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_113", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_114", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_115", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_116", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_117", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_118", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_120", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_121", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_123", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_124", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_125", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_126", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_133", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_136", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_127", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_128", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_129", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_130", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_131", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_132", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_134", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_135", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_137", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_138", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_139", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_140", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_147", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_150", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_141", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_142", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_143", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_144", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_145", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_146", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_148", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_149", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_151", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_152", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_153", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_154", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_161", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_164", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_155", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_156", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_157", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_158", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_159", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_160", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_162", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_163", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_165", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_166", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_167", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_168", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_175", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_178", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_169", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_170", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_171", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_172", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_173", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_174", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_176", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_177", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_179", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_180", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_181", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_182", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_189", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_192", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_183", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_184", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_185", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_186", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_187", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_188", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_190", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_191", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_193", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_194", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_195", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_196", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_203", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_206", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_197", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_198", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_199", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_200", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_201", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_202", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_204", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_205", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_207", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_208", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_209", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_210", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_217", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_220", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_211", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_212", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_213", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_214", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_215", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_216", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_218", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_219", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_221", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_222", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_223", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_224", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_231", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_234", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_225", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_226", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_227", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_228", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_229", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_230", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_232", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_233", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_235", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_236", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_237", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_238", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_245", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_248", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_239", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_240", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_241", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_242", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_243", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_244", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_246", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_247", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_249", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_250", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_251", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_252", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_259", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_262", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_253", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_254", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_255", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_256", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_257", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_258", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_260", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_261", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_263", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_264", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_265", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_266", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_273", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_276", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_267", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_268", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_269", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_270", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_271", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_272", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_274", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_275", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_277", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_278", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_279", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_280", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_287", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_290", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_281", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_282", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_283", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_284", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_285", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_286", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_288", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_289", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_291", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_292", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_293", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_294", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_301", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_304", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_295", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_296", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_297", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_298", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_299", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_300", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_302", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_303", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_305", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_306", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_307", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_308", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_315", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_318", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_309", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_310", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_311", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_312", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_313", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_314", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_316", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_317", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_319", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_320", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_321", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_322", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_329", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_332", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_323", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_324", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_325", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_326", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_327", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_328", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_330", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_331", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_333", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_334", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_335", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_336", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_343", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_346", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_337", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_338", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_339", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_340", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_341", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_342", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_344", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_345", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_347", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_348", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_349", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_350", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_357", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_360", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_351", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_352", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_353", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_354", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_355", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_356", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_358", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_359", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_361", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_362", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_363", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_364", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_371", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_374", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_365", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_366", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_367", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_368", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_369", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_370", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_372", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_373", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_375", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_376", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_377", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_378", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_385", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_388", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_379", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_380", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_381", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_382", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_383", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_384", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_386", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_387", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_389", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_390", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_391", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_392", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_399", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_402", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_393", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_394", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_395", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_396", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_397", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_398", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_400", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_401", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_403", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_404", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_405", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_406", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_413", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_416", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_407", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_408", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_409", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_410", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_411", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_412", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_414", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_415", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_417", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_418", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_419", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_420", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_427", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_430", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_421", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_422", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_423", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_424", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_425", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_426", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_428", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_429", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_431", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_432", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_433", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_434", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_441", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_444", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_450", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "param_435", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_436", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_437", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_438", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_439", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_440", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_442", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_443", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_445", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_446", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_447", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_448", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + }, + { + "name": "param_449", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27803648 + }, + { + "name": "param_451", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 27811840 + }, + { + "name": "param_452", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 28336128 + } + ] + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..abffefa83bdc037edd4ac4b22bc4c73dc3f248df --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f556661e46cffa6737185be513b4dd1918e8e9ea7cf15f558efe69edb96f02b +size 262160384 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8bf82a088a018dafc0fff4c8550b7775da40d4d --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5455d497e040f1f34e69729c1c6df39464e662e657631951bbd03062ef002be2 +size 58720256 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c8bda0685ade983a8027754f11509c97c7484f5 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b6935ab49eef9d3c18717241de3845f57902336737c890f3745be8fefd4002 +size 58720256 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..237e93acff32d86d60e7505b2debe06d54840beb --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859c6e8626a164632882a4645d0cad23e87584c46ab0b1b8cea94f46f698f266 +size 29360128 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..588d9e570d3bd21b606cc76f6c154633e948c9ee --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6e8541fc82a99541e7ed2deae91a608d5823e868f84abc7a2b10eda3eb4110 +size 27803648 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d1ace199882b218f335aa5ab6120fc69696708b --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e933d071ec7fd3d60e56b665528317099bca86b1a2961e373cdc48d025f428 +size 58720256 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..398acc7f8b6e9b2584a8c19280790e79941b11dc --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129b2ad9f702393e44d65bb00ea080da6924b43c1f9af9c928a66e130eb3e7f0 +size 29360128 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..232be1eb85ff250e2c2607e65bf48417399569d8 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c850f66aa4d282cdc5fdd3204fb8bac76f5c42efed7e428723bd895ade75fe5 +size 27803648 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..026862171bf452fa28b8a9edb7f11d97abebe37a --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb58fe592cfed0026903863770e639773cdd3b1dd0afc8c080458ae041be5fe7 +size 58720256 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e580f0e630ef70e923374df900aaa9f1819d6a5 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c9e101b464d7ca646e96383f7f7dc757017f37d0dc55b6df8e2496cdb8c5ab +size 29360128 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..795a7145f221553b7ad4f119a8d96bffef91f03a --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea008e071fb92353ca197747e17319b7e18783eeaa170b257ba4b55d979b94d +size 27803648 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..35c8817654f5a2550c83e862b7dd1e7d71bca26a --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3da546e8a0699ba03f71bf9fb4c0a29d2b657e37276ac3311b86b7206837233 +size 58720256 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a031188f93b4df14a5fe8beb830d8c3b08ca3a6 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bee7435f70400baf20b5d4d8c2193d5bbafcd92e5cb8cdae6520aca723c386d +size 29360128 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..9079ade7066fc43ea8d8cbbde8cc0866d6bf36ce --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f725f04385f49096720973bec9030077466780a93285710028604b1627fdcd19 +size 29360128 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..97155e630b2b72824e8740c7e9f56c4821e3d2af --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:688de7a6bd9918acd6045729b52629f1fd2dbc7cd6ddf53b68598dbc36afc485 +size 27803648 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6de686479b7291b36a61428891cb7f7cf23a398 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2cb53768639855ab379eb3e2470ee199d869eb9e3aec0da51b27b94c07c070 +size 58720256 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab9bc0fa3f46b51448c8a84c09909777c660e3d2 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090f1ea32e168bb32170ee0db3f2c184c92a38d17fd9413438abf930068bf2e7 +size 29360128 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..e581b1062af0ee0902175bd35aaf98ba745b8a1f --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4352377ac31cb9eb91f3906413501a2238cfce492ad1ba080dd131e2782841f8 +size 27803648 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd990e27f6b3f62e7f92e044d1d9f7a1e919a945 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0546727b80a989358fee8ede28e82f7ab4a580bdd5a8ef878bd39b8269df90e +size 58720256 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..54636177b1f8bc4dd498f6d81393ed96dcc51eb3 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13394c84b231a25628f84ac27c9e230a5f0010459e51ebc8417f5883eefc99f +size 29360128 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..81166002d3bc34e5156aab3a11109f618532f860 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3197beee00005daa9c82a39c79db41afd3be6e2b46e844150313030e0456ad6 +size 27803648 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..66c725d7c0625aa20d9eee8e46aea3e8428e3f54 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a320096746c7cbfec303a3fad8be1f3eefe2a9c6bda833cfc76b7797ea7aeec5 +size 58720256 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..285373a3f2e752d4fed0d8721b57bb9ca56694b8 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a718e40a800df7438316618ed40ac840cc41f7b6fe149d464aebcd0460cbe9 +size 29360128 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef358670cfd2ae9d7f3597ae70a602baaaeede3f --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e289a48cdaeae389fd46c283f2d83cc5217efb1c474fd913b9f0dcc8a5c5313 +size 27803648 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d0919a129793ab5b53cea4704eca3a7a63129b9 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da41d4a0d6c1c45b3dd76be228e9fa68fc0e17dec07722445b8bce7a3f0ccb3f +size 27803648 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..178383a4e0a25cfb57ffd434ca301ea36410fe5b --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc774657f83c0cbd51b74e1b2dd936fc065f35101be22d766279e838e4aeee4b +size 58720256 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..083fcf62f5192b519fac886fabc1cbbffaf93bdf --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c07199460a6759392ac219026c4055d0b8380af8f70a2a856bdcf93e6d3660 +size 29360128 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..9048ef1704584e6c51a74bd2fdee4736718d6ec4 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a4324b7bc0adee124bf6fec7ea641c32788224501ebecfc3d9507720e9f38d +size 27803648 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..e94074145bb3ec5a004ea6129fccc22d42ffcbed --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f667089b96dde54c06122838650acc2c183181560101b5661578224427806212 +size 58720256 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..9560f685947fb0ee6f452de8bf28ad81e4368525 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9cb778a4ac666ee283fde7a4f096c6e8804a9c8db5de479fd773019a8ea2cc +size 29360128 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..36740bfc894eb78a6c89e6f8b078c8682cd704e4 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab57497875fd564e564d12426429493d5adb98a6cef0fcbb6640866b5c5fdb3 +size 27803648 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..43083bd0354ae51a927238d4a87c80a6b667b6db --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c124f1bcbe5e92d453f151b2171de0e82cbb3ebb5d5286ebb2dc4268a3a22959 +size 58720256 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..8392db1e9e42df071e8a632da36c3743a4577505 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb63da4e0a84e11ca974770ec4cdee8d21ef0aff854290202246a59a663749ca +size 29360128 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..88605ebc092f7c76a935b2a10eddc75bbc57449c --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26eec7435155157e787d4f6454acc9a2aa9968696cdab396d8586071cd0f493 +size 27803648 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..c92b58df3d102f5a2562016ccd02ac46b5bcef5f --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8019cc6f5c761ff8ce697bd5fcb1c8e823c243092f4ec29a69f3e5f67c996529 +size 58720256 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c14dc4d231aef9d622724dc412748aeec605dd3 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a1627c97cfaafb3b12299aa99c4dda37e306f8106f7fe03741b6a0adbbc3a8 +size 58720256 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..18c2290c0e9a9a81ff05d155649177cd5cdeb510 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f5a2428cfc30f8265c58687b24c28865ae9d6d2d94039d2d64ddceb41e25ea +size 29360128 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..44e02e52abe5ac429e3dbc243b539e117642f529 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4beaceb6ad7472fee7e3488a8b25da3fb01dcc894aeec801099eae743def04da +size 27803648 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..64754ec14d792cc7553508993fe49badbbf5f2ad --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65f65a6b03e88113d7a3b2689329e97555ab2a476648a4c348189f121e507395 +size 58720256 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..c467ba85528185ed2b0eb90b2c89de9d9b9017ad --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4de672fa69e0ccf3b16520f797a0bc9b0dd76990257b58e339875be79889dc +size 29360128 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..971aeea3095ea984fc26e9e1e110fd7b8e3a48d3 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb985b5ea49072571114e80452ae68bb6800817ac9ab5bd3886a092e6fe5e19 +size 27803648 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e0e216adb9d06684f3c44b0a4b26537bc2b1096 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6c40be223ac7955c9b5799959d58a112819c5be955da350e1def9130a75117 +size 58720256 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4ab1b4c5c8cde55ad0b511a07721e140664f413 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49b5d142ce13fb3f282d92a8a2e1fb2f8f02ba3fd276ff448c3cb1a902e67db +size 29360128 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..63bd1837cd083e3ec9e547de2c7f331c2b7d0b08 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52c0f895ca84617bdf1bc1cebf440b8c17d4c9552e399384bc2c571fa2fd91c +size 27803648 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c5829ead759fa8a18992e051bc6b1bc733f875f --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572ca44c39a272edcad88e91bd95157e9cade84b77dda13b2f7c326d886ab75c +size 58720256 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..61dfe524aa95d5d2fcb7b6977cf799eace1d0dc4 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3fa94e6f39e6c97d1b73bdbb85623d59b6cf1bfbb3768d979ace5049fa66fb1 +size 29360128 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe40f1f152d4e252fb71ea9830ea5192077a742f --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb51ee920d9512058152303a5b93dcab9c67ecf33db2c9b354dae43e4c40e4b +size 29360128 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..219393f650160963a258008e51078920ad0a3dfc --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f36483e59d735cf6ddabc6d0484f758d3e8ec5d22c8e20b81c76ea80566f52 +size 27803648 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c73232ae0231434bd2fe1998d8cdc35438285f8 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d9ea570ade11135f83e4036ae147c6b91e2c5ac7a90ec0fcd837bd13465461 +size 58720256 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e57ed7f858356cc874d0cb72f01eab68cd036be --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aaf8726fc3850b6d130bda0c18bc33fbc858b781e8d3e1df7474c408c1bca67 +size 29360128 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe967e5ebe1cab1c50f83c863e4e58b1fb62ce75 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04534be34c1842db053fbaebaafedab93b376d8a5ac16253f8807fb207be889 +size 27803648 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9763fb7e38af7e8a5d26fbfdbb29d66cd917197 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf9f9e68c9596e3de89bd704aede1feb8157f63af33f21bc6cfcd7fc1cf97f3 +size 58720256 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb9ce576a7ba02940bfc494ba576bc6484f9ae67 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33d103b0c0fd4a31dafab6dd134a58fe33d7f7a6d6e071a0500a1dc31e9d49a +size 29360128 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..56777b47ccef8aa951c7eb572bb7c5d7b750ea6d --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f6bc72cc5764e42b444b6053c37226c47bf1fbdafbabd6bd603a32d6136f789 +size 27803648 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..0cc6181ea1f21eeb798215b450fa2d30857ac187 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19904acee028234eba84ed1ed494e0564b40fef53cd0cf9acafbe7eac7f6f3c9 +size 58720256 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2eca3f1f875ec273193e09384ee7a45dfee33cd --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d105c42f83c2e8bcc97322638e6adfddbc62392cec4296948b538a15c689af3 +size 29360128 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5c31d5a127211d6571492d3611a8a25810e5921 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbc7767480b9c705f6561f447a3a83df993c838b9ffd6364975bd32deb8d4d2 +size 27803648 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc769f9df13e49b85fede2029191b65f42b19a70 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac08147f1bca59407145130b90fea3ff8290e70119652dff596c92ce5fda050 +size 27803648 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..c86577efb98a7d8d3864e2a3338f3deb0b21f96a --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52516583b1ee87de26c20c10890e8215f69337143787b29d9b0df217d357122c +size 58720256 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..dadf86e0e7515cf2f9b758ff75ca51b43ef338a4 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450cb04903bbc42a3c877f2f10b6cdb5108224cd14fb42acc1814939606a28fa +size 29360128 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..703e113d053aea156885d414d249b0532f32ad0b --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f57c991c35d0932aff71dcc9550834807c5545aa34a90537cc6926285745d7c +size 27803648 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..822352a191ca75486a7cf27a529c459ec204e92e --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde1d9e9dacd2f1463eb9959ef9839acc0a647688bb7dc3681f1bb99972bc53c +size 58720256 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..a072fc76f858553bde81b17316de6cbe01a7de67 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ce5cf6a3f61ecc7bedded5f4dcf02872d0fcffc2ad204e44bdea02d47355d9 +size 29360128 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..69753da9cc3ed91dead2342d6ee151c3ef65b3f6 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133bc282d76315d1a24b4af4e05ef6b6b9be3d9ac63c8f170cac526ebd86a63e +size 27803648 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..aebe488958af7a71d572dcdc320173bbd7e72441 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74ad7cca4591de80147d229465a563488c96bd0078e02f0c7658fbc78b88c4f +size 58720256 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e10debf1fc6cb7e8ee3e388cf1489e0cec64492 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790f7dd50848eb792a5ce7934881456df2216464451bf4737f5a144331eee888 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef933da7e7623b1066cf4b9cc59501af4c9f6ae7 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6257c278f3b579cf0ed214c62761594cd375868c3cdeba342cac0da8254126 +size 27803648 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcf64085565c910ee8588177eae3a859f671be28 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8d88bf7157c9bf2214990031be0a4f166fcbb5a943776e755fec0d99789c37 +size 58720256 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..02070294a1b5d230b6faa238d17b9add16196f8e --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752363cea71f09ff7ec2f17565ab366db881517ec385c043b99ffa5d4376c685 +size 58720256 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..19235ae111214e800ae86a0a546a7cbf88ca324d --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a28ca9abc2e776fe7a34b20ddf0c41b71f62883c7cc953e695ea7cfe8272d8d +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..e19d7d24180c067a3db8933ad94baaecc11546ee --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9a2d187cce75b401658419ff7abc386f54e0c2cca3d5d2f9e910b523669347 +size 27803648 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..e858b4919c26e773545b66287f7f8c3546fbd800 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee006a54b3089d36ae817db37918b7663bd2def077e6c04f74b4511469f4b8e3 +size 58720256 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..917052fd6db565e32aa4af4b4817feea2c9c6773 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b5df90ded6bc7cf24ba7f9b68bcc93ee030587f1ea1cbf1d20faa1af9c03d2 +size 29360128 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaf8b79591b64b40704e72c0f76b713c6cd4d71b --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23282797a666b17483c538bf119e06f94919665b8018a6c8a3f4a711cf04ed66 +size 27803648 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e8475eeb2964a815085a0e0cb441eaaf3841835 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e6f2ba55987fe8425a1e0ec842bae4a1fbc8b646af7d203860695de6f4e147 +size 58720256 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..52463563931ca1cd37c2d547de715275523bf243 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962e27d8b8eb658a565ca060607221996aaab8f27110cf0eed406f1414286dd9 +size 29360128 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..30228d9b4ea4dd63f0b72116bf767ff772ea3bae --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb64d9c27b4e8d16025d859f4948fd3e77b3cacbd5d91afca6907eab8a5ca528 +size 27803648 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd0bb49dda4feac62a54336f5b7be2426df8d652 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a391b53f56e93ce6920e0846cd19b54173ade06d87dd9de207443be90aaedc00 +size 58720256 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..191e0741109475dd25fc5e48f3edbcfafb2ff291 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4aca1ab4ad1438c2cfd6a1f1caacdf6f93ee26742c23bdb6bdf93fefb086f4 +size 29360128 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee4dbc7224b1ff172ad4c36b870c3d302dcc1e97 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c022f92c2e8d74465deeefd25fb8d7838a160c532f994c85659279efc26c3829 +size 29360128 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e9e20a6ffe6e97b50070b68b6c00a213506314c --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87ab9f52136b7bd0c9641dedbfc0903ba3e88c2d7e91ee6f8abfed6c0e7cf6e +size 27803648 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..19c16b58e6e2827fff994df1d4a36a889f9f682c --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67d3c25d96f4526781e2c961421d3ee3aec57c94ac066cc2c1e6b1f4858cef4 +size 58720256 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b006519b78c6f51e148d8d2cf61f9cb7a0582c5 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b52a4851c8ea869bf61acaae404a9bd9aebedbe22bf139549e28e2e02faf68 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c3d5fc5b055f0634eb77571f2f6c8735f9f6c33 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ea4114aecfb642c98655d4ddd0e19289b1f4d44a8b01c67d9082cca85cdbce +size 27803648 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b434f95db8cd612bb74fc79843d2681d944eff1 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1eb95a3e9a7e3a2350098012ead30656a5dda62919d169ad0bff34f603454d +size 58720256 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c3ae380e5686a9676d92d9cd2c3a57613c96fa7 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a680fa570a71ed6112d6f4aa2aa3f16cd520f0bdfa40bd102e68f689777e9f03 +size 29360128 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..c047ce70431bf4fe99b5ea5d06c420da92112e81 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1dac7a2c215363021466e2a82db39490b9cf28048e9cebbca2be559e94070c +size 27803648 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..f17d63a588eac7bec17ba85aa5cb3e222a64f1d2 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7722de9828e847483ee1fe59aac3133f107287eae451ab0c6afa9f880e2df25c +size 58720256 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6e046839306df032296426b567ac0c6e30f7f97 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af770f3548805c47abc80c904b79638cf94e92aeb5a1be3b7f4fa54f1161875f +size 29360128 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f8e2aa21a411a2bf69373d5e0349ea40e5516a5 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb68f4eca7c799c435adf5f4fc174a107379287af651793cd1b9af4a4a5f05b +size 27803648 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..26b3c08c16a0a3e0ded3c2cd12af0e988f9a24a5 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff47c90a1e2aa03a698e3dbdc603cc5ad37f5b5a25d7bb344cfdcc5c67d5c31 +size 27803648 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..6083043baebc06561b557badbafecebe3577cff2 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca4976274c3b391c0bc89b212f225262c40f721b232f735a4e391303fd53f5e +size 58720256 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ae4a7689940077c40580b27337a8d8e6772abfd --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8dfdce3b46e3d945c4592b308e71f7dfdefc1626d0d5c6bd406ff0303e63fdb +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5272b58f15b83b7e448ab54ce644c94c2d80393 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b17865c3da17ecba95d345ce209c83abe4cc53dcbcf6254e8bafc1d93536c1 +size 27803648 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e776a7a1d564ace6055884abc620c00ab72cbae --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461f2c20ae8726b24ba5410ee5f77b83522cff4560d2a1324a601600ca952900 +size 58720256 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..d99bf389ca69ceef73616b7e5bc8c918094b72f9 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d247be698fbb847761e73410dcc0b87bbc9626eb3332895920c2f477c46ed76 +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d0657c46e059387d89ed1efb8e2cb9721458406 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca7a683f149d304d611c9f4c8d96f26869732b83e0109a38bf3398e044a2d01 +size 262160384 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..675fa3e77374ae573adf11a60424919bea87a5d7 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270c706f731b30136015b79bf7fd81f5880640d1103eae047c9c0a362bedbe39 +size 28860416 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..d56cefd73e4eb32f62f51b907be5dbb02e68940a --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,24 @@ +{ + "model_lib": "Mistral-7B-OpenOrca-w4a16g128asym", + "local_id": "Mistral-7B-OpenOrca-w4a16g128asym", + "conv_template": "Mistral-7B-OpenOrca", + "temperature": 0.7, + "repetition_penalty": 1.1, + "top_p": 0.95, + "mean_gen_len": 512, + "max_gen_len": 2048, + "num_shards": 1, + "shift_fill_factor": 0.3, + "tokenizer_files": [ + "added_tokens.json", + "tokenizer.model" + ], + "model_category": "mistral", + "model_name": "Mistral-7B-OpenOrca", + "vocab_size": 32002, + "sliding_window": 4096, + "chunk_size": 4096, + "conv_config": { + "system": "<|im_start|>system\\nYou are MistralOrca, a large language model trained by Alignment Lab AI. Write out your reasoning step-by-step to be sure you get the right answers!\\n<|im_end|>" + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..41edaf458f8c0bd049f00fff4399341d7f8c40f3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,67 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "<|im_end|>", + "<|im_start|>" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "tokenizer_file": null, + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}