Weyaxi commited on Jan 12, 2024

Commit

276da9a

verified ·

1 Parent(s): 6e19275

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.ipynb_checkpoints/config-checkpoint.json +34 -0
config.json +34 -0
mergekit_moe_config.yml +19 -0
model-00001-of-00023.safetensors +3 -0
model-00002-of-00023.safetensors +3 -0
model-00003-of-00023.safetensors +3 -0
model-00004-of-00023.safetensors +3 -0
model-00005-of-00023.safetensors +3 -0
model-00006-of-00023.safetensors +3 -0
model-00007-of-00023.safetensors +3 -0
model-00008-of-00023.safetensors +3 -0
model-00009-of-00023.safetensors +3 -0
model-00010-of-00023.safetensors +3 -0
model-00011-of-00023.safetensors +3 -0
model-00012-of-00023.safetensors +3 -0
model-00013-of-00023.safetensors +3 -0
model-00014-of-00023.safetensors +3 -0
model-00015-of-00023.safetensors +3 -0
model-00016-of-00023.safetensors +3 -0
model-00017-of-00023.safetensors +3 -0
model-00018-of-00023.safetensors +3 -0
model-00019-of-00023.safetensors +3 -0
model-00020-of-00023.safetensors +3 -0
model-00021-of-00023.safetensors +3 -0
model-00022-of-00023.safetensors +3 -0
model-00023-of-00023.safetensors +3 -0
model.safetensors.index.json +0 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +41 -0
yaml_file_moe.yaml +19 -0

.ipynb_checkpoints/config-checkpoint.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "nontoxic-bagel-34b-v0.2",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 7168,
+  "initializer_range": 0.02,
+  "intermediate_size": 20480,
+  "max_position_embeddings": 200000,
+  "model_type": "mixtral",
+  "num_attention_heads": 56,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 60,
+  "num_key_value_heads": 8,
+  "num_local_experts": 4,
+  "output_router_logits": false,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 64000
+}

config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "nontoxic-bagel-34b-v0.2",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 7168,
+  "initializer_range": 0.02,
+  "intermediate_size": 20480,
+  "max_position_embeddings": 200000,
+  "model_type": "mixtral",
+  "num_attention_heads": 56,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 60,
+  "num_key_value_heads": 8,
+  "num_local_experts": 4,
+  "output_router_logits": false,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 5000000.0,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 64000
+}

mergekit_moe_config.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+base_model: nontoxic-bagel-34b-v0.2
+gate_mode: hidden
+dtype: bfloat16
+experts:
+  - source_model: bagel-dpo-34b-v0.2
+    positive_prompts: ["question answering", "Q:", science", "biology", "chemistry", "physics"]
+    negative_prompts: ["math", "reason", "mathematics", "solve", "count", "code", "python", "javascript", "programming", "algorithm"]
+  - source_model: Nous-Hermes-2-Yi-34B
+    positive_prompts: ["chat", "math", "reason", "mathematics", "solve", "count", "python", "javascript", "programming", "algorithm", "tell me", "assistant"]
+  - source_model: SUS-Chat-34B
+    positive_prompts: ["math", "reason", "mathematics", "solve", "count", "assistant"]
+  - source_model: platypus-yi-34b
+    positive_prompts: [""]
+    negative_prompts: ["math", "reason", "mathematics", "solve", "count"]

model-00001-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24f31acdf0d121871985d0524ec5544e0d8a158eb9e25b4f79e0ace524f00b98
+size 9763127624

model-00002-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cba73f6e1bab7769c30c71479207678541cd89cbda02879cb9416b27fe000693
+size 9982448024

model-00003-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4617f4d2491180e78057ab4c3906bf024e9c679d15a78827eacc385d59bc933
+size 9982448040

model-00004-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09506d6402088d877e79422ff17763eb5268ce4edff59434a9575bd31c4c6719
+size 9982448040

model-00005-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f11059cb8dd97388823a02489899fd87fdffbda2fb1425421b8993ffd4e8f8cb
+size 9982448040

model-00006-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45747b7bf86985def0451575f7ef9bb6f6baaf58f58cfa2d03457ef53653a46c
+size 9982448040

model-00007-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd69edc2102d9c1cdd087a77b19b2060885394604d828b1c2e4a83377f45b14
+size 9982448040

model-00008-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c265450174c94ee1bdff372eb73df2c7c068d8adcc6a5c4a3aa22e4b55447c4
+size 9982448008

model-00009-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc3203d417697861d11a381ea9faa0dffbee18bbe3dd878a51cf16417bf49ab2
+size 9982448024

model-00010-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b5c2d59278802f9c63e4008b440d7a1b86c347bccf20980c1378da37cfcccd4
+size 9982448040

model-00011-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82b31c0e128662c3d8a2d271e7853ab2d6b71b472a11a2f1091bed291ce71bb4
+size 9982448040

model-00012-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd030c0ebb19c65c526c8979546ee5c5abe6e25502416d2507bd857aaa3eb256
+size 9982448040

model-00013-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e9279e81398606cd42d0b998385767903a5d89a69d1eae43b354c050c760f12
+size 9982448040

model-00014-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1eaf7295e34858d28f98379691a97f385aef58e9fedeea9d770dd65c4adb4241
+size 9982448040

model-00015-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d01597356206fae1717d7959b0fd0f09f210c833687601cc6eeb6c723e18dc9
+size 9982448016

model-00016-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc86cc736fe97b8f3ac4562d2073735871cbcd0187f47d1c9a777a0fde83dfcf
+size 9982448016

model-00017-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd438ac48c52883d6d57f33f77373226debfa85b303824f286806b7db92384dd
+size 9982448040

model-00018-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2d50619ab239e50203d2dbfa5b7397a8183a03596e509379cb26ccd194fa6a4
+size 9982448040

model-00019-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43ab8ef661e06c6a8f0661a113529f9551c2e7b8a5ea941d466562fb5b78e879
+size 9982448040

model-00020-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e86306048cee4176481e9f551dfb1dbe0a22ea8751d0549f18e07bb59ad7a48
+size 9982448040

model-00021-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:861a2729357f2cc4a3ba20b3f06d9ec62bf81d335b82bf52699a514fe013df6c
+size 9982448040

model-00022-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99933ba46bf44318b7148a1bcba8a09ad3953090ea1c425429741e975abbd1c5
+size 9997999880

model-00023-of-00023.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0d4240ff9e8982a7c7753bf1e59a3945b994f3dfdb68ff37094dd66f341b96f
+size 7916023384

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
+size 1033105

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 200000,
+  "pad_token": "<s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

yaml_file_moe.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+base_model: nontoxic-bagel-34b-v0.2
+gate_mode: hidden
+dtype: bfloat16
+experts:
+  - source_model: bagel-dpo-34b-v0.2
+    positive_prompts: ["question answering", "Q:", science", "biology", "chemistry", "physics"]
+    negative_prompts: ["math", "reason", "mathematics", "solve", "count", "code", "python", "javascript", "programming", "algorithm"]
+  - source_model: Nous-Hermes-2-Yi-34B
+    positive_prompts: ["chat", "math", "reason", "mathematics", "solve", "count", "python", "javascript", "programming", "algorithm", "tell me", "assistant"]
+  - source_model: SUS-Chat-34B
+    positive_prompts: ["math", "reason", "mathematics", "solve", "count", "assistant"]
+  - source_model: platypus-yi-34b
+    positive_prompts: [""]
+    negative_prompts: ["math", "reason", "mathematics", "solve", "count"]