Upload 3 files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -6,11 +6,6 @@ language:
 EXL2 quants of [Sao10K/L3.1-70B-Hanami-x1](https://huggingface.co/Sao10K/L3.1-70B-Hanami-x1)
-## Earlier uploaded models had the problem of constantly ending with “.assistant” and repeating the output.
-I have replaced “eos_token_id” in config with "128009".
-If you still have the problem, please add “.assistant” to Custom Stopping Strings in SillyTavern.
-Or use GGUF: https://huggingface.co/mradermacher/L3.1-70B-Hanami-x1-GGUF
 ---
 ![hanami](https://huggingface.co/Sao10K/L3.1-70B-Hanami-x1/resolve/main/Cute.png)

 EXL2 quants of [Sao10K/L3.1-70B-Hanami-x1](https://huggingface.co/Sao10K/L3.1-70B-Hanami-x1)
 ---
 ![hanami](https://huggingface.co/Sao10K/L3.1-70B-Hanami-x1/resolve/main/Cute.png)

config.json CHANGED Viewed

@@ -1,46 +1,39 @@
 {
-    "_name_or_path": "/workspace/Meta-Llama-3.1-70B-Instruct",
-    "architectures": [
-        "LlamaForCausalLM"
-    ],
-    "attention_bias": false,
-    "attention_dropout": 0.0,
-    "bos_token_id": 128000,
-    "eos_token_id": 128009,
-    "hidden_act": "silu",
-    "hidden_size": 8192,
-    "initializer_range": 0.02,
-    "intermediate_size": 28672,
-    "max_position_embeddings": 131072,
-    "mlp_bias": false,
-    "model_type": "llama",
-    "num_attention_heads": 64,
-    "num_hidden_layers": 80,
-    "num_key_value_heads": 8,
-    "pretraining_tp": 1,
-    "rms_norm_eps": 1e-05,
-    "rope_scaling": {
-        "factor": 8.0,
-        "high_freq_factor": 4.0,
-        "low_freq_factor": 1.0,
-        "original_max_position_embeddings": 8192,
-        "rope_type": "llama3"
-    },
-    "rope_theta": 500000.0,
-    "tie_word_embeddings": false,
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.44.2",
-    "use_cache": true,
-    "vocab_size": 128256,
-    "quantization_config": {
-        "quant_method": "exl2",
-        "version": "0.2.1",
-        "bits": 4.65,
-        "head_bits": 6,
-        "calibration": {
-            "rows": 115,
-            "length": 2048,
-            "dataset": "(default)"
-        }
-    }
-}

 {
+  "_name_or_path": "/workspace/Meta-Llama-3.1-70B-Instruct",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "hidden_act": "silu",
+  "hidden_size": 8192,
+  "initializer_range": 0.02,
+  "intermediate_size": 28672,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_hidden_layers": 80,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 128256
+}

generation_config.json ADDED Viewed

+{
+  "bos_token_id": 128000,
+  "do_sample": true,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.42.3"
+}