diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c75ba70d9b733b71249bf400ba94b01d531fda2f --- /dev/null +++ b/README.md @@ -0,0 +1,76 @@ +--- +license: mit +base_model: zai-org/GLM-5.1 +tags: +- nvidia +- nvfp4 +- quantized +- moe +- modelopt +- glm +library_name: transformers +pipeline_tag: text-generation +--- + +# CortexLM/GLM-5.1-NVFP4-MTP + +NVFP4 quantized version of [zai-org/GLM-5.1](https://huggingface.co/zai-org/GLM-5.1), a 754B parameter Mixture-of-Experts language model with 256 routed experts per layer. + +Quantized using [NVIDIA Model Optimizer (modelopt)](https://github.com/NVIDIA/Model-Optimizer) with full activation calibration on all 58,459 linear modules including every individual routed expert. + +## Model Details + +| | | +|---|---| +| **Base model** | [zai-org/GLM-5.1](https://huggingface.co/zai-org/GLM-5.1) | +| **Architecture** | GlmMoeDsaForCausalLM (754B MoE) | +| **Layers** | 78 transformer layers + 1 MTP layer | +| **Experts** | 256 routed + 1 shared per MoE layer (layers 3-77) | +| **Hidden size** | 6144 | +| **Context length** | 202,752 tokens | +| **Quantization** | NVFP4 (4-bit float weights, FP8 block scales, group size 16) | +| **KV cache** | FP8 quantized | +| **MTP layer** | BF16 (stored separately in `mtp.safetensors`) | +| **Total size** | ~441 GB (vs 1.4 TB BF16 original) | + +## Quantization Details + +This model was quantized using NVIDIA's official [Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (`modelopt`) NVFP4 pipeline with proper per-expert calibration: + +- **Quantization format**: NVFP4 -- 4-bit floating point with FP8 per-block scaling factors (`float8_e4m3fn`) and a global FP32 `weight_scale_2`, block size of 16 +- **Calibration**: 256 samples from [cnn_dailymail](https://huggingface.co/datasets/cnn_dailymail) and [nvidia/Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Nemotron-Post-Training-Dataset-v2) (chat, code, math, stem splits), sequence length 2048 +- **Quantized modules**: 58,459 `nn.Linear` modules, including all 256 routed experts per layer individually quantized with calibrated `input_scale` (activation statistics) +- **KV cache**: FP8 cast quantization on all attention layers +- **Excluded**: `lm_head` (kept in BF16) +- **MTP**: Multi-Token Prediction layer (layer 78) kept in BF16 as a separate `mtp.safetensors` file (19.9 GB) +- **Hardware**: 8x NVIDIA B300 SXM6 275GB GPUs +- **Calibration time**: ~21 minutes +- **modelopt version**: 0.42.0.dev (from source, April 2026) +- **transformers version**: 5.5.0 + +### Weight format + +Each quantized linear layer is stored as: +- `weight`: `uint8` (two FP4 values packed per byte) +- `weight_scale`: `float8_e4m3fn` (per-block FP8 scale, one per 16 elements) +- `weight_scale_2`: `float32` scalar (global per-tensor scale) +- `input_scale`: `float32` scalar (calibrated activation scale, where applicable) + +## Usage + +This checkpoint is designed for use with inference engines that support the NVFP4 format, such as [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) and [vLLM](https://github.com/vllm-project/vllm) with modelopt backend. + +## Files + +- 85 model shards (`model-00001-of-00085.safetensors` to `model-00085-of-00085.safetensors`) -- NVFP4 quantized layers 0-77 +- `mtp.safetensors` -- BF16 Multi-Token Prediction layer (layer 78, 791 keys, 19.9 GB) +- `model.safetensors.index.json` -- shard index mapping +- `config.json` -- model configuration with `quantization_config` +- `hf_quant_config.json` -- NVFP4 quantization metadata +- `tokenizer.json`, `tokenizer_config.json` -- tokenizer files +- `generation_config.json` -- generation defaults + +## Acknowledgements + +- Base model by [ZhipuAI](https://huggingface.co/zai-org) +- Quantization tooling by [NVIDIA Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e70dd3bfe01c2e61b00d557cca39adff3806f23 --- /dev/null +++ b/config.json @@ -0,0 +1,174 @@ +{ + "architectures": [ + "GlmMoeDsaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 0, + "dtype": "bfloat16", + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "ep_size": 1, + "first_k_dense_replace": 3, + "hidden_act": "silu", + "hidden_size": 6144, + "index_head_dim": 128, + "index_n_heads": 32, + "index_topk": 2048, + "indexer_rope_interleave": true, + "initializer_range": 0.02, + "intermediate_size": 12288, + "kv_lora_rank": 512, + "max_position_embeddings": 202752, + "mlp_layer_types": [ + "dense", + "dense", + "dense", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse" + ], + "model_type": "glm_moe_dsa", + "moe_intermediate_size": 2048, + "moe_layer_freq": 1, + "n_group": 1, + "n_routed_experts": 256, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 78, + "num_key_value_heads": 64, + "num_nextn_predict_layers": 1, + "pad_token_id": 154820, + "pretraining_tp": 1, + "q_lora_rank": 2048, + "qk_head_dim": 256, + "qk_nope_head_dim": 192, + "qk_rope_head_dim": 64, + "rms_norm_eps": 1e-05, + "rope_interleave": true, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "routed_scaling_factor": 2.5, + "scoring_func": "sigmoid", + "tie_word_embeddings": false, + "topk_group": 1, + "topk_method": "noaux_tc", + "transformers_version": "5.5.0", + "use_cache": true, + "v_head_dim": 256, + "vocab_size": 154880, + "quantization_config": { + "config_groups": { + "group_0": { + "input_activations": { + "dynamic": false, + "num_bits": 4, + "type": "float", + "group_size": 16 + }, + "weights": { + "dynamic": false, + "num_bits": 4, + "type": "float", + "group_size": 16 + }, + "targets": [ + "Linear" + ] + } + }, + "ignore": [ + "lm_head" + ], + "quant_algo": "NVFP4", + "kv_cache_scheme": { + "dynamic": false, + "num_bits": 8, + "type": "float" + }, + "producer": { + "name": "modelopt", + "version": "0.0.1.dev1+g5dc17dfd1.d20260407" + }, + "quant_method": "modelopt" + } +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..453800a061bdc65b75b9dd99ecc66ede543dac89 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "_from_model_config": true, + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "pad_token_id": 154820, + "temperature": 1.0, + "top_p": 0.95, + "transformers_version": "5.4.0" +} diff --git a/hf_quant_config.json b/hf_quant_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3deaca2b76a45b0636aa7ee6782aa10b13a7b099 --- /dev/null +++ b/hf_quant_config.json @@ -0,0 +1,14 @@ +{ + "producer": { + "name": "modelopt", + "version": "0.0.1.dev1+g5dc17dfd1.d20260407" + }, + "quantization": { + "quant_algo": "NVFP4", + "kv_cache_quant_algo": "FP8", + "group_size": 16, + "exclude_modules": [ + "lm_head" + ] + } +} \ No newline at end of file diff --git a/model-00001-of-00085.safetensors b/model-00001-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6301d2869263e31e659af0399e19fa5031a77040 --- /dev/null +++ b/model-00001-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb7f509b7f0c5b841d3b7c39d47e0c9cde54d8a990877deba45c1412afb6db4 +size 4999575736 diff --git a/model-00003-of-00085.safetensors b/model-00003-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f98ceea9704babb1a528f0b532c181e7274bcd9 --- /dev/null +++ b/model-00003-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1367c56079a0e47472f213643eb37032d4960298d8d73181ab895b5404a3a7d +size 4999506536 diff --git a/model-00006-of-00085.safetensors b/model-00006-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d3bc7dd18d9e2e75ebfa4bf2c06ced9d3881b58 --- /dev/null +++ b/model-00006-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5156f32255d87f214611a3830f07ce283ae9201e4121560abe2aec9ed89f282e +size 4997327120 diff --git a/model-00007-of-00085.safetensors b/model-00007-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12d1d6dcf157f93f2350be382e7b3af31df5322e --- /dev/null +++ b/model-00007-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f527633e1c4d01e99de355930cb2ddaf031a6ff7514313ea0da6a89170c6ab +size 4999507056 diff --git a/model-00008-of-00085.safetensors b/model-00008-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb7b58699c549f51df74c0ecd738ef831efaa5a3 --- /dev/null +++ b/model-00008-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e1f8adf000a74d5b7c64724526fcbcd97a76fe4b073b259416e4e82dc2fefe +size 4999505068 diff --git a/model-00009-of-00085.safetensors b/model-00009-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ac2423c0215dabe7ee03be5bf7e6464b119b7f8 --- /dev/null +++ b/model-00009-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1999bf42c9b756d0cfc3f30083b46453baced237b67320a2279d48ff4c090d99 +size 4999499836 diff --git a/model-00012-of-00085.safetensors b/model-00012-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..569b067a180bc1f5e37e085ac2d96c077485122d --- /dev/null +++ b/model-00012-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58aa0c1ede39f29c9b97cc9dadae4987ae79501d808b1a4d184e9108ef6f7d5b +size 4999482268 diff --git a/model-00014-of-00085.safetensors b/model-00014-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..175d58039891ce15c16aa909d08cf5311fea8d8f --- /dev/null +++ b/model-00014-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc17d55f89dec89daa8cf220c5a2fa8fc4b3adf10fb60efe2d22e1f250381f05 +size 4999489336 diff --git a/model-00016-of-00085.safetensors b/model-00016-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac41f9a76b6b81cf15fd052bf1a1fe28072596d0 --- /dev/null +++ b/model-00016-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a15553828325040c1ee1749c64e1e28fe25567b337a1ef3ca5070ba5a12c434 +size 4997301720 diff --git a/model-00019-of-00085.safetensors b/model-00019-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3240a5baec1c8fff726edc039b514c80efc5194e --- /dev/null +++ b/model-00019-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98379a62ab06349dc904647ad67ce405e1fc4a5d641112976eb739e0e65920c6 +size 4999468960 diff --git a/model-00022-of-00085.safetensors b/model-00022-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7fcd48e6f77a06110ecd8349b87ba3e27a89a72 --- /dev/null +++ b/model-00022-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c028c69e425ae4da9a5d810504ece10f37e9b11d8ed3524c11ea49ebc7624b +size 4999465028 diff --git a/model-00024-of-00085.safetensors b/model-00024-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a51950905ef13d873ec31d3cc66f8e58faee61f --- /dev/null +++ b/model-00024-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bfac22237d6b894c481a5e42a178a42891f6843a9468b247755ac9e1a5bb4b +size 4999466220 diff --git a/model-00026-of-00085.safetensors b/model-00026-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c446e36743fb474bec54cea699e518878991d15 --- /dev/null +++ b/model-00026-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5640c129d9245d127714bfcb2a1aeb5627762a06107c027578786a8c41e146e +size 4997286104 diff --git a/model-00028-of-00085.safetensors b/model-00028-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..463550457178143b13e59ddacc64ec20b89c3c98 --- /dev/null +++ b/model-00028-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f32eba103ec07dad22c8396f72e63f54fc32873d1ec5a1c671c1c6e301a08e1 +size 4999470076 diff --git a/model-00029-of-00085.safetensors b/model-00029-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39c4192dd7e7d1dc5837e16ae6a560a041b2f638 --- /dev/null +++ b/model-00029-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9118a7b7594bbd58c7dc0778196f67482e0a3ce1bb0c8ef72491c178dfc95e +size 4999469424 diff --git a/model-00030-of-00085.safetensors b/model-00030-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fa18400da8c04845198625a4e6aaf482082aecb --- /dev/null +++ b/model-00030-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40407806babbdd63a0d9bacaa2fac8e904fbdf84dc446b4c5aa610aa95dc7680 +size 4999464644 diff --git a/model-00035-of-00085.safetensors b/model-00035-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ae7450ea258243d5f72758454623c944471d818 --- /dev/null +++ b/model-00035-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286e3628620ad0c5353b25e7eadb104d3594fe826d68402d3e392dc720ad7f08 +size 4958797740 diff --git a/model-00036-of-00085.safetensors b/model-00036-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47b143992ff910245d1173e414a17d17ff2dc703 --- /dev/null +++ b/model-00036-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b45ef2085482defb217c894de566afa3ad97cf9a8cffa549ef88a6138a142b +size 4995468148 diff --git a/model-00037-of-00085.safetensors b/model-00037-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fff0b1badfa3fa3d5ff4a0cccd8fda5a04c030ee --- /dev/null +++ b/model-00037-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78128b323fd9f3bd26f6187908204e934260a36582ff2886e512b4509ab7ee8 +size 4999456216 diff --git a/model-00040-of-00085.safetensors b/model-00040-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19a99a9baa94b03da03c6a89f9b81bf1aeca67ba --- /dev/null +++ b/model-00040-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8753dc763c93624cdbd95629da26aefabf949479c12646fe4e35d5efa7925e4b +size 4999454044 diff --git a/model-00042-of-00085.safetensors b/model-00042-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c30cb39717b02265aa808f8885f21beedf83ca9e --- /dev/null +++ b/model-00042-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b504eaf7f0c0a306a67936047a2167a72f7b53cb187889db9973c3527aa32d7b +size 4999457936 diff --git a/model-00044-of-00085.safetensors b/model-00044-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..120e0f922b1265ece7554f3ba8954e5b7b705c77 --- /dev/null +++ b/model-00044-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05336cc25dd8fa1d8b55f2da663642ab4dcf4e36cc25af2a7f1a16b6ad32a43 +size 4999462320 diff --git a/model-00045-of-00085.safetensors b/model-00045-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d46d68627eac489aec3c52275c4c86e31d61028a --- /dev/null +++ b/model-00045-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cca9c32cf6aa04f91aeb8788a828805908501bae863c71a74e2ba3018b83e6 +size 4999635488 diff --git a/model-00051-of-00085.safetensors b/model-00051-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e87890612ec6da28ec07cce2836e246b23a7e8b6 --- /dev/null +++ b/model-00051-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d7ed232958e9530e884e43770f2f05a0ce2f50511df86bfa70baf040ceb41f +size 4999462704 diff --git a/model-00052-of-00085.safetensors b/model-00052-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..438f244389c5183162401ce6b7600b62620d0ff1 --- /dev/null +++ b/model-00052-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bb941fba46577d93bee6d875040085583e9e60b60a506eb4ad1dc9675f9c87 +size 4999461304 diff --git a/model-00055-of-00085.safetensors b/model-00055-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..216f4e748db5a87af3da4e835257856c511981b0 --- /dev/null +++ b/model-00055-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bc426b001bd74e6e603ae0ae0acb2868ff0181256d6cc3beb10d060ab341ac +size 4997280008 diff --git a/model-00057-of-00085.safetensors b/model-00057-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4d0a6314a3352068a2b5cf5e80fc8a8b5bb4343 --- /dev/null +++ b/model-00057-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeff47e54102e6902c4d2ec24b40c59208c656db59d4c81f0a96f2c6f8217630 +size 4999463932 diff --git a/model-00060-of-00085.safetensors b/model-00060-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..619160f632aa30cf4596f748896111bc300e5446 --- /dev/null +++ b/model-00060-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a597c3beb4a96b763d185ed3361766605ac476c60ed32351fc881a4f448f984 +size 4999461924 diff --git a/model-00061-of-00085.safetensors b/model-00061-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32f3ea1418bbd108e19f27c9d86ae5472411f635 --- /dev/null +++ b/model-00061-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c136378840d5731f500630be9c456ee5de5b53565e1e5475370e7db8dcc0dd +size 4999460488 diff --git a/model-00063-of-00085.safetensors b/model-00063-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9f20b5e5d985e244287c1b8acfeabb6f44065b5 --- /dev/null +++ b/model-00063-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bafa6f7ffa649825bca670cb30792561e94565a72ad7d558f1ea2fbfe44e6608 +size 4999460276 diff --git a/model-00064-of-00085.safetensors b/model-00064-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07cca06598d79a9cd1b95dd2f594e9b0401aeacb --- /dev/null +++ b/model-00064-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb55d1759959aa48fc9dde7101eaaa251d8b1c478d6b432695ed66607510b05c +size 4999460784 diff --git a/model-00065-of-00085.safetensors b/model-00065-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e11324d535bbd8fae09549a6e73abeb9fc2c07e --- /dev/null +++ b/model-00065-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9af318aa69404d967102fa7ec506014e12ece81a913a72795f0f82da43024c +size 4997280440 diff --git a/model-00067-of-00085.safetensors b/model-00067-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fda3c84c55df9bb71a9dd8f435af7aee0df237b --- /dev/null +++ b/model-00067-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd8d6bb7e2aa9475ed2fd404a66582c06e328afd8edc42638d5127ebd200f9f +size 4999462496 diff --git a/model-00068-of-00085.safetensors b/model-00068-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..38ac43056796fc2ceb13a7bb7c3deacbdb3f72bb --- /dev/null +++ b/model-00068-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0d618b43e3c9675bc0908efb8fc5f8c1260a7c03b66bed27fd459be850e443 +size 4999465208 diff --git a/model-00070-of-00085.safetensors b/model-00070-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16e1a499e9ecae853b3dfa45d53db5b40a358270 --- /dev/null +++ b/model-00070-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500f99068f1393bebdc675ac2f396f8ef13e78f8a44c163e5f8ae78a34ac23dd +size 4999459576 diff --git a/model-00074-of-00085.safetensors b/model-00074-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d1d53f7bab3eefabd3720b5322d9aa5adfff024 --- /dev/null +++ b/model-00074-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d8bb4d2c1f4158625e5c9627023fc989669091a22e63a317a9c7c30a27e336 +size 4999463464 diff --git a/model-00076-of-00085.safetensors b/model-00076-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..feb506e5bfb9b2f57d5cd4e7f005c333af97280d --- /dev/null +++ b/model-00076-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8deef5978ca7a367536eb08365f9c56f3c9731101e62546910f8348d27db28c5 +size 4999458732 diff --git a/model-00078-of-00085.safetensors b/model-00078-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b76ed47f67709a4ea1f5d1843516440ce92320e0 --- /dev/null +++ b/model-00078-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8876ea722058351980502738475600111e3fab2aa894e6e729317a895c32590b +size 4999460680 diff --git a/model-00079-of-00085.safetensors b/model-00079-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..201309cffeedb5eb6f64e0049db0e91788ce6dde --- /dev/null +++ b/model-00079-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8940a8c70a1ff20a5cb2330bf5b763e98134afc6b1070650b096d9924df0a47c +size 4999463284 diff --git a/model-00080-of-00085.safetensors b/model-00080-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1153a0c07d202658344fb14c9833be3eb5b222e9 --- /dev/null +++ b/model-00080-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91f5a61ef7343849c157e0d36e4cb23e4b7e63d6b94e6f40dc7d8143946e0f2 +size 4999461600 diff --git a/model-00081-of-00085.safetensors b/model-00081-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56430b04f705a4c09a579017202280fc06a1f0b7 --- /dev/null +++ b/model-00081-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56049d50b2ac490f83d0adacfa3db6e00739f0a321e22ba2c230ecb57cad1b3b +size 4999458340 diff --git a/model-00082-of-00085.safetensors b/model-00082-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..225cb1a987a2c9320c9dc4b8d03570934ed27b00 --- /dev/null +++ b/model-00082-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cd87cfbf73f0745451e91cdd27f3680a71d0356fef14a22067206cc7efb12a +size 4999458772 diff --git a/model-00083-of-00085.safetensors b/model-00083-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e70e2c19b30950ea843c598e70b68bb2425ace8 --- /dev/null +++ b/model-00083-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdad5e268e84b774f5340e920029c70f88b88798be890f45f1b16dde6b49a0c0 +size 4999459488 diff --git a/model-00084-of-00085.safetensors b/model-00084-of-00085.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25e6a6eaccb6c65d4243d35b628c234096e22b87 --- /dev/null +++ b/model-00084-of-00085.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14433c686404beedadc58fdbc21af6eeb5dcf297f028fec04642fd8c58a144d8 +size 4582843272 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..aba40197a4cdb5607f4ab7a05fb0a4ee8054fd6d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e773648cb4e65de8660ea6365e10acca112d42a854923df93db4a6f333a82d +size 20217442 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1723f7d90e3fb497303ec7b18f88cf5d05928f37 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,33 @@ +{ + "backend": "tokenizers", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>" + ], + "is_local": true, + "model_max_length": 202752, + "model_specific_special_tokens": {}, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "TokenizersBackend" +}