shio3616 commited on
Commit
8feed57
·
verified ·
1 Parent(s): 6a16a38

feat: ✨ Uploaded artifacts

Browse files
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nitic-nlp-team/webnavix-llama-base",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6912,
15
+ "max_position_embeddings": 4096,
16
+ "mlp_bias": false,
17
+ "model_type": "mixtral",
18
+ "num_attention_heads": 20,
19
+ "num_experts_per_tok": 2,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 20,
22
+ "num_local_experts": 8,
23
+ "output_router_logits": false,
24
+ "pad_token_id": 0,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": null,
28
+ "rope_theta": 10000.0,
29
+ "router_aux_loss_coef": 0.001,
30
+ "router_jitter_noise": 0.0,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.47.1",
35
+ "use_cache": false,
36
+ "vocab_size": 32000
37
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: nitic-nlp-team/webnavix-llama-base
2
+ gate_mode: hidden
3
+ dtype: bfloat16
4
+ experts_per_token: 2
5
+ experts:
6
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-ai-tools/checkpoint-500/
7
+ positive_prompts:
8
+ - "Could you please open brandmark?"
9
+ - "Please help me to create a logo for my brand."
10
+ - "Change the font size - 2.5, Light Peach color & let the font style be as it is."
11
+ - "Open Quillbot website."
12
+ - 'Can you please open the website "Hugging face" stable diffusion?'
13
+ - "Open bard and login with below credentials: \n\tID - webtasks.navigator@gmail.com \n\tPassword - KEG24qweUHij%^"
14
+ - "Ask bard to summarize the below news from BBC news \\n\\t\\n\\t- Modi in US: Elon Musk says Tesla to come to India 'as soon as possible'"
15
+ - 'I want to create a mood board on "African Savanna".'
16
+ - "Open copy.ai website."
17
+ - "Got any creative ideas for a 10-year old's birthday?"
18
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-booking/checkpoint-1050/
19
+ positive_prompts:
20
+ - "Open momondo.in and login with google using the below details: \\n\\t\\n\\tId: webtasks.navigator@gmail.com \\n\\tPassword: KEG24qweUHij%^"
21
+ - "Great, How many people will be in your party?"
22
+ - "Open Cheaptickets website."
23
+ - "Send me cruise options from Canada/New England for 1st July to 8th July."
24
+ - "I am searching for one-way flights from Azerbaijan to Turkey."
25
+ - "Date of journey: 30th June. Number of travelers: 4"
26
+ - "It will be picked up in Toronto (and vicinity), Ontario, Canada on June 8th at 10:00 AM. Drop-off will take place at 10 AM on 10th June."
27
+ - "Please open Vrbo website."
28
+ - "In which location?"
29
+ - "What will be the price for the 1st option?"
30
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-composing/checkpoint-500/
31
+ positive_prompts:
32
+ - "Please open Wattpad."
33
+ - "Create a new story"
34
+ - "Below in the text box, write: \\n\\t \\n\\tWhispers in the wind, \\n\\tA dance of words unseen, \\n\\tEmotions unfurling, \\n\\tIn the spaces between. \\n\\t \\n\\tLove's tender embrace, \\n\\tOr heartache's bitter sting, \\n\\tIn ...\") Please select the best action using the correct format, do not provide any other information or explanation."
35
+ - 'Click on "Publish" and tick the "*You must complete this field to proceed."'
36
+ - 'Open a new Google Docs document and create a with the bulleted list with the title "Healthy Eating Habits."'
37
+ - "Add the following bullet points to the list: \\n\\t\\n\\tPrioritize tasks based on importance and urgency. \\n\\t\\n\\tBreak larger tasks into smaller, manageable chunks. \\n\\t\\n\\tSet realistic goals and deadlines. \\n\\t\\n..."
38
+ - 'Select "Untitled Part 1" paste this "The Enchanted Gears" and click on "Save."'
39
+ - "I want to create a blog in a few words on Beginner's Guide to Meditation: Finding Inner Peace."
40
+ - 'Let''s add a subtitle to the document stating, "Enhancing Collaboration and Productivity."'
41
+ - "For the title, let's use Georgia with a font size of 28, and for the subtitle, let's use Tahoma with a font size of 18."
42
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-information-lookup/checkpoint-500/
43
+ positive_prompts:
44
+ - "Please open the HowStuffWorks website."
45
+ - "Search for the article titled 'How to Prepare for a Hurricane.'"
46
+ - "Name the contributing writers in this article."
47
+ - "What is the Pythagorean theorem?"
48
+ - "Search 'Trading for Beginners'."
49
+ - "Show me some articles"
50
+ - 'Can you open the website of the Central Intelligence Agency? [00:34] Please go to country "China". [01:06] What is the population of China?'
51
+ - "Could you tell me the major rivers in China, listed by length in kilometers?"
52
+ - 'Go to the "New on Britannica " section. [00:47] Open the article on "Sister Rosetta Tharpe." [01:01] Who was Sister Rosetta Tharpe? [01:39] Whose daughter was Sister Rosetta Tharpe? [02:11] At what age did Sister Rosetta Tharpe start singing and playing the guitar?'
53
+ - "When did she officially join Lucky Millinder's swing band?"
54
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-shopping/checkpoint-400/
55
+ positive_prompts:
56
+ - "Can you please open the Grubhub website?"
57
+ - "Can you show me some of their Neighborhood Gems options?"
58
+ - "I am looking for DSLR camera."
59
+ - "Okay. May I know the price range?"
60
+ - "Can you find a Samsung 5G smartphone?"
61
+ - "My budget is between Rs25000 to 30000."
62
+ - "Select Peach colour and what is the price?"
63
+ - "What will be the delivery date for the last one if I purchase it today?"
64
+ - "Please open the eBay website."
65
+ - "Look for 3 width and 60 gauge."
66
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-social-interaction/checkpoint-150/
67
+ positive_prompts:
68
+ - "Open Discourse Org & sign in with google using the below credentials: \\n\\tID - webtasks.navigator@gmail.com \\n\\tPassword - KEG24qweUHij%^"
69
+ - "Open “Gaming” categories and select 'new topic'."
70
+ - "Sure, for the title add: \\n\\tBest free games to-play on PC"
71
+ - "For description add the text below: \\n\\tLooking for some fantastic free games to enjoy on your PC? Look no further! Here’s a list of the best free games that offer incredible experiences without brea..."
72
+ - "Please create the Topic."
73
+ - "Browse games & send me some recommended Games"
74
+ - "How many followers does it have?"
75
+ - "Go to videos and open the topic “Recommend a great YouTube video”."
76
+ - "Add a server on discord then select “Create my own” and Please name it as “pina colada's server”."
77
+ - "Now please go to the “Mirage” server and create an event."
78
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-summarizing/checkpoint-450/
79
+ positive_prompts:
80
+ - "Open the Second one and please summarize this article."
81
+ - 'Please open the New Yorker website. [01:04] Click on "Goings On" [01:24] Open the link to the first article [01:56] Please summarize this article briefly.'
82
+ - "Please summarize the first two paragraphs."
83
+ - "Search Jalapeno Falafels Recipe [00:57] Please summarize this article name “Jalapeno Falafels Recipe”"
84
+ - "Summarize the above article in few lines."
85
+ - "Open the 2nd article and summarize the first 2 paragraphs of the article."
86
+ - "Show me few articles and summarize for me."
87
+ - "Open the Third one and please summarize this article."
88
+ - "Please summarize the first two paragraphs."
89
+ - "Provide me with a list of the articles."
90
+ - source_model: /content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-task-management/checkpoint-450/
91
+ positive_prompts:
92
+ - "Open the Trello website and please search for “Annotations”."
93
+ - "Go to “Gmail” and Add “Template”. Can you tell me the title name? Email Empowerment."
94
+ - "Please Select Notification as watching."
95
+ - "Create a Workspace in Trello."
96
+ - "Create a new section as \"Do laundry\" & add the below tasks: \\n\\t- Sort and wash clothes. \\n\\t- Dry and fold clean laundry. \\n\\t- Iron or steam clothes if necessary. \\n\\t- Put away clean clothes in their de..."
97
+ - "What will be their Due date? This weekend."
98
+ - "Now, please add the following tasks to the below sections:\\n\\tPlanning\\n\\tTasks: \\n\\t\\n\\t- Research home renovation ideas online \\n\\t\\n\\t- Create a mood board for design inspiration\\n\\t2. Budgeting \\n\\t\\n\\tTasks: \\n\\t..."
99
+ - "Do you want any of the tasks to be set as a priority?"
100
+ - "Let's open the Todoist website."
101
+ - "Could you provide me with the task name and description?"
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98121ebe544d0cfcccb231faccabe8c821662dc0c125611448475e7c324744f3
3
+ size 4972953912
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3caac6089b6e8e06de100767966702d5b41647e8b6709603203141720792f32f
3
+ size 4969011496
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0387270df6546b814d2355eead67c732afd57b7087159df8c7b46e4265c011fc
3
+ size 4986061816
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cde5b24c6b03bc4f92444a97de09f280130504ee5d2c017873d0be097a595b3
3
+ size 4969011640
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bced034dde24294a5ebbc74b553d65a3e7b8816715c32d7cd14d38d9d4b2641f
3
+ size 4986061808
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039cc7c63062162d1233cb098a3a22cc078b49dd31da1fd9cad5606fa974733c
3
+ size 4303161160
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.5.2", "total_size": 29186135040}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00006.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", "model.norm.weight": "model-00006-of-00006.safetensors", "lm_head.weight": "model-00006-of-00006.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.12.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.13.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.14.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.15.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.16.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.17.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.18.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.19.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.20.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.21.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.22.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.23.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.24.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.25.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.26.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.27.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.28.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.29.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.30.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors", "model.layers.31.block_sparse_moe.gate.weight": "model-00006-of-00006.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "max_length": 4096,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "<s>",
39
+ "padding_side": "left",
40
+ "sp_model_kwargs": {},
41
+ "stride": 0,
42
+ "tokenizer_class": "LlamaTokenizer",
43
+ "truncation_side": "right",
44
+ "truncation_strategy": "longest_first",
45
+ "unk_token": "<unk>",
46
+ "use_default_system_prompt": false
47
+ }