Upload folder using huggingface_hub

Files changed (5) hide show

README.md CHANGED Viewed

@@ -31,6 +31,10 @@ This is Transformers/HF format fp16 weights for CodeLlama 7B-Python.  It is the
 Quantisations will be coming shortly.
 ## Prompt template: TBC

 Quantisations will be coming shortly.
+Please note that due to a change in the RoPE Theta value, for correct results you must load these FP16 models with `trust_remote_code=True`
+Credit to @emozilla for creating the necessary modelling code to achieve this!
 ## Prompt template: TBC

README.mde CHANGED Viewed

@@ -22,15 +22,19 @@ tags:
 <hr style="margin-top: 1.0em; margin-bottom: 1.0em;">
 <!-- header end -->
-# CodeLlama 13B-Instruct fp16
 - Model creator: [Meta](https://ai.meta.com/llama/)
 ## Description
-This is Transformers/HF format fp16 weights for CodeLlama 13B-Instruct.  It is the result of downloading CodeLlama 13B-Instruct from [Meta](https://ai.meta.com/blog/code-llama-large-language-model-coding/) and converting to HF using `convert_llama_weights_to_hf.py`.
 Quantisations will be coming shortly.
 ## Prompt template: TBC

 <hr style="margin-top: 1.0em; margin-bottom: 1.0em;">
 <!-- header end -->
+# CodeLlama %%MODEL%% fp16
 - Model creator: [Meta](https://ai.meta.com/llama/)
 ## Description
+This is Transformers/HF format fp16 weights for CodeLlama %%MODEL%%.  It is the result of downloading CodeLlama %%MODEL%% from [Meta](https://ai.meta.com/blog/code-llama-large-language-model-coding/) and converting to HF using `convert_llama_weights_to_hf.py`.
 Quantisations will be coming shortly.
+Please note that due to a change in the RoPE Theta value, for correct results you must load these FP16 models with `trust_remote_code=True`
+Credit to @emozilla for creating the necessary modelling code to achieve this!
 ## Prompt template: TBC

config.json CHANGED Viewed

@@ -1,24 +1,31 @@
 {
-  "architectures": [
-    "LlamaForCausalLM"
-  ],
-  "bos_token_id": 1,
-  "eos_token_id": 2,
-  "hidden_act": "silu",
-  "hidden_size": 4096,
-  "initializer_range": 0.02,
-  "intermediate_size": 11008,
-  "max_position_embeddings": 2048,
-  "model_type": "llama",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 32,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float16",
-  "transformers_version": "4.32.0",
-  "use_cache": true,
-  "vocab_size": 32016
-}

 {
+    "architectures": [
+        "LlamaForCausalLM"
+    ],
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 16384,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 32,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float16",
+    "transformers_version": "4.32.0",
+    "use_cache": true,
+    "vocab_size": 32016,
+    "auto_map": {
+        "AutoConfig": "configuration_llama.LlamaConfig",
+        "AutoModel": "modeling_llama.LlamaModel",
+        "AutoModelForCausalLM": "modeling_llama.LlamaForCausalLM",
+        "AutoModelForSequenceClassification": "modeling_llama.LlamaForSequenceClassification"
+    },
+    "rope_theta": 1000000
+}

tokenizer.json CHANGED Viewed

@@ -32134,23 +32134,7 @@
       "왕": 31996,
       "收": 31997,
       "弘": 31998,
-      "给": 31999,
-      "▁<SU": 32000,
-      "▁<SUF": 32001,
-      "▁<PRE": 32002,
-      "▁<M": 32003,
-      "▁<MID": 32004,
-      "▁<E": 32005,
-      "▁<EOT": 32006,
-      "▁<PRE>": 32007,
-      "▁<SUF>": 32008,
-      "▁<MID>": 32009,
-      "▁<EOT>": 32010,
-      "▁<EOT><EOT>": 32011,
-      "▁<EOT><EOT><EOT>": 32012,
-      "▁<EOT><EOT><EOT><EOT>": 32013,
-      "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
-      "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
     },
     "merges": [
       "▁ t",
@@ -93401,18 +93385,7 @@
       "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
-      "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
-      "▁< SU",
-      "▁<SU F",
-      "▁< PRE",
-      "▁< M",
-      "▁<M ID",
-      "▁< E",
-      "▁<E OT",
-      "▁<PRE >",
-      "▁<SUF >",
-      "▁<MID >",
-      "▁<EOT >"
     ]
   }
 }

       "왕": 31996,
       "收": 31997,
       "弘": 31998,
+      "给": 31999
     },
     "merges": [
       "▁ t",
       "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
       "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
+      "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
     ]
   }
 }

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
-size 500058

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723