LoneStriker commited on
Commit
4e24cb6
1 Parent(s): a0499e8

Upload folder using huggingface_hub

Browse files
Kyllene_v1.1.jpg ADDED
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: yi-license
4
+ license_link: https://huggingface.co/01-ai/Yi-34B-200K/blob/main/LICENSE
5
+ tags:
6
+ - merge
7
+ ---
8
+ # Kyllene 34B v1.1
9
+
10
+ ![image/png](https://huggingface.co/TeeZee/Kyllene-34B-v1.1/resolve/main/Kyllene_v1.1.jpg)
11
+
12
+
13
+ ## Model Details
14
+
15
+ - A result of new merge method provided by [MergeMonster](https://github.com/Gryphe/MergeMonster/) tool with extended RPG preset.
16
+ - models used for merge:
17
+ [jondurbin/bagel-dpo-34b-v0.2](https://huggingface.co/jondurbin/bagel-dpo-34b-v0.2)
18
+ [NousResearch/Nous-Capybara-34B](https://huggingface.co/NousResearch/Nous-Capybara-34B)
19
+ [NousResearch_Nous-Hermes-2-Yi-34B](https://huggingface.co/NousResearch/Nous-Hermes-2-Yi-34B)
20
+ [SUSTech/SUS-Chat-34B](https://huggingface.co/SUSTech/SUS-Chat-34B)
21
+ - Method is aimed to maximize probability of certain phrases and minimize probablility of other phrases.
22
+ - RPG preset was extened with examples of typical, nonsensical output of most models like 'unbreakable bond', 'send shivers down her spine' etc.
23
+ - The resulting model has approximately 34 billion parameters.
24
+ - See [mergekit-config.yml](https://huggingface.co/TeeZee/Kyllene-34B-v1.1/resolve/main/merge-config.yml) for details on the merge method used and RPG presets.
25
+
26
+ **Warning: This model can produce NSFW content!**
27
+
28
+ ## Results
29
+
30
+ - produces SFW nad NSFW content without issues, switches context seamlessly.
31
+ - 200K context length
32
+ - good at following instructions
33
+ - different than [TeeZee/Kyllene-57B-v1.0](https://huggingface.co/TeeZee/Kyllene-57B-v1.0), but also surprisingly entertaining (but more tests are needed)
34
+
35
+ ## Side notes
36
+
37
+ - [MergeMonster](https://github.com/Gryphe/MergeMonster/) method works, however project would benefit greatly from some more love from developers.
38
+ - In its current state MergeMonster consumes insane amounts of RAM (256GB+) or VRAM and takes a really long time to process model data, this merge took 24H on 1xADA6000
39
+ - MergeMonster is not a golden bullet, other experiments has shown that it can also produce incredibly stupid models.
40
+
41
+ All comments are greatly appreciated, download, test and if you appreciate my work, consider buying me my fuel:
42
+ <a href="https://www.buymeacoffee.com/TeeZee" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 60px !important;width: 217px !important;" ></a>
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../jondurbin_bagel-dpo-34b-v0.2",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 7168,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 20480,
14
+ "max_position_embeddings": 200000,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 56,
17
+ "num_hidden_layers": 60,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 0,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 5000000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.36.2",
27
+ "use_cache": false,
28
+ "vocab_size": 64000
29
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.36.2",
7
+ "use_cache": false
8
+ }
merge-config.yml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Either "cpu" or "cuda"
3
+ # NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral)
4
+ # NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral)
5
+ device: "cpu"
6
+ random_seed: 42 # Random seed to use
7
+
8
+ directories:
9
+ model_path1: "../jondurbin_bagel-dpo-34b-v0.2" # Path to the base model. Must be a local copy.
10
+ # model_directory: "../2xNous-Capybara-34B/" # Directory of models to scan, IGNORED if models_to_merge has entries in it
11
+ output_directory: "./mm-output" # Output directory of the merged model
12
+
13
+ # A list of models to use as merge candidates - HF syntax, so can be either local directories or repos.
14
+ # Overrides model_directory if used
15
+ models_to_merge: ["../NousResearch_Nous-Capybara-34B", "../NousResearch_Nous-Hermes-2-Yi-34B", "../SUSTech_SUS-Chat-34B"]
16
+
17
+ # Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time
18
+ merge_ratios: [0.2, 0.4, 0.6, 0.8]
19
+
20
+ # Choose from the following methods. Defaults to "lerp".
21
+ # "lerp" - Linear interpolation
22
+ # "slerp" - Spherical linear interpolation
23
+ # "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2]
24
+ # "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1]
25
+ merge_method: "slerp"
26
+
27
+ # If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized
28
+ # Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues
29
+ merge_headers: true
30
+
31
+ # Strategies:
32
+ # "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge.
33
+ # "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens)
34
+ # "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below.
35
+ strategy: "cumulative"
36
+ # Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement.
37
+ strategy_threshold: 0.6
38
+
39
+ # Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy.
40
+ # The weight value of phrases is ignored if set to true.
41
+ auto_weights: false
42
+
43
+ # Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts
44
+ bad_phrases:
45
+ - phrase: "anticipation"
46
+ weight: 12
47
+ contexts: ["Her body quivers with ", "The atmosphere is thick with "]
48
+ - phrase: "unwavering"
49
+ weight: 12
50
+ contexts: ["Filled with an "]
51
+ - phrase: "determination"
52
+ weight: 12
53
+ contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "]
54
+ - phrase: "whisper"
55
+ weight: 12
56
+ contexts: ["Her voice barely above a "]
57
+ - phrase: "spine"
58
+ weight: 12
59
+ contexts: ["shivers down her "]
60
+ - phrase: "sends shivers"
61
+ weight: 12
62
+ contexts: ["The thrill of the act "]
63
+ - phrase: "ministrations"
64
+ weight: 12
65
+ contexts: ["She moans and twitches at your "]
66
+ - phrase: "legs"
67
+ weight: 12
68
+ contexts: ["wraps her "]
69
+ - phrase: "imposing figure"
70
+ weight: 12
71
+ contexts: ["He had an "]
72
+ - phrase: "shared challenges"
73
+ weight: 12
74
+ contexts: ["Their bond strengthened through "]
75
+ - phrase: "bond"
76
+ weight: 12
77
+ contexts: ["forged a ", "an unspoken "]
78
+ - phrase: "enhance our experience"
79
+ weight: 12
80
+ contexts: ["I'm excited to see how "]
81
+ - phrase: "sense of vulnerability"
82
+ weight: 12
83
+ contexts: ["create a "]
84
+ - phrase: "dimensions of intimacy"
85
+ weight: 12
86
+ contexts: ["explore new "]
87
+ - phrase: "deepening our connection"
88
+ weight: 12
89
+ contexts: ["while "]
90
+ - phrase: "shared experiences"
91
+ weight: 12
92
+ contexts: ["through "]
93
+ - phrase: "societal expectations"
94
+ weight: 12
95
+ contexts: ["that transcend "]
96
+ - phrase: "conventional boundaries"
97
+ weight: 12
98
+ contexts: ["that defy ", "and defy "]
99
+ - phrase: "open communication"
100
+ weight: 12
101
+ contexts: ["an environment "]
102
+ - phrase: "emotional vulnerability"
103
+ weight: 12
104
+ contexts: ["an environment "]
105
+ - phrase: "heightens our connection"
106
+ weight: 12
107
+ contexts: ["touch and the anticipation "]
108
+ - phrase: "sensations you're creating"
109
+ weight: 12
110
+ contexts: ["I'm enjoying "]
111
+ - phrase: "is truly arousing"
112
+ weight: 12
113
+ contexts: ["attention to detail ", "way you explore my body "]
114
+ - phrase: "challenge presented"
115
+ weight: 12
116
+ contexts: ["my resolve unwavering despite "]
117
+ - phrase: "humble vessel"
118
+ weight: 12
119
+ contexts: ["surrendering to the exquisite torment "]
120
+ - phrase: "bond"
121
+ weight: 12
122
+ contexts: ["cherishing the unique ", "special "]
123
+ - phrase: "grows stronger with each passing day"
124
+ weight: 12
125
+ contexts: ["bond "]
126
+ - phrase: "that cannot be broken by time or circumstance"
127
+ weight: 12
128
+ contexts: ["bond "]
129
+ - phrase: "becomes unbreakable, eternal"
130
+ weight: 12
131
+ contexts: ["bond "]
132
+ - phrase: "grew stronger with each passing"
133
+ weight: 12
134
+ contexts: ["bond "]
135
+
136
+
137
+ # Note - Example of a complex phrase
138
+ good_phrases:
139
+ - phrase: "The apple is in the bedroom"
140
+ weight: 1
141
+ contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "]
142
+
mergemonster_kyllene_v11.txt ADDED
@@ -0,0 +1,987 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ⠀⠀⠀⠀⠀⠀⣀⡀⠀⠀⣀⣤⣶⣾⣿⣿⣷⣶⣤⣀⠀⠀⣀⣀⠀⠀⠀⠀⠀⠀
3
+ ⠀⠀⠀⠀⠀⠜⠉⣿⡆⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣧⢰⣿⠉⠃⠀⠀⠀⠀⠀
4
+ ⠀⢀⣤⣴⣦⣄⣴⠟⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡎⢻⣦⣠⣴⣦⣄⠀⠀
5
+ ⠀⡞⠁⣠⣾⢿⣧⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⣽⡿⣷⣄⠈⢷⠀
6
+ ⠀⣠⣾⠟⠁⢸⣿⠀⠘⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁⠀⣿⡇⠈⠻⣷⣄⠀
7
+ ⣰⡿⠁⠀⢀⣾⣏⣾⣄⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣇⣰⣷⣹⣷⠀⠀⠈⢿⣆
8
+ ⣿⡇⠀⢠⣾⠏⢸⣿⣿⣿⣿⠋⢻⣿⣿⣿⣿⡟⠙⣿⣿⣿⣿⡇⠹⣷⡀⠀⢸⣿
9
+ ⠹⣿⣴⡿⠋⠀⠈⠛⠉⣹⣿⣦⣄⡹⣿⣿⣋⣠⣶⣿⣏⠉⠛⠁⠀⠙⢿⣦⣿⠏
10
+ ⠀⣸⣿⠿⠿⣿⣾⣿⡿⠿⣿⣿⣿⣿⡆⢰⣿⣿⣿⣿⠿⢿⣿⣶⣿⠿⠿⣻⣇⠀
11
+ ⠀⣿⡇⢀⣴⣶⣤⣀⣴⣿⠿⣻⡿⣿⣧⣾⣿⢿⣟⠿⣿⣦⣀⣤⣶⣦⠀⢸⣿⠀
12
+ ⠀⢿⣧⠈⠃⢀⣵⣿⡋⠁⢀⣿⡷⣿⡇⢻⣿⣿⣿⡀⠈⢛⣿⣮⡀⠘⠀⣼⡟⠀
13
+ ⠀⠈⠻⣷⣤⣟⣋⣿⣧⣴⡿⠋⠀⣿⡇⢸⣿⠀⠙⢿⣦⣼⣿⣙⣻⣤⣾⠟⠁⠀
14
+ ⠀⠀⠀⠈⢽⣿⠛⢻⣏⢉⣤⣶⣶⣿⠁⠈⣿⣶⣶⣤⡉⣽⡟⠛⣿⡏⠁⠀⠀⠀
15
+ ⠀⠀⠀⠀⠈⠿⣷⣾⣾⣟⣉⣠⣿⢿⡇⢸⠿⣿⣄⣙⣻⣷⣷⣾⠿⠁⠀⠀⠀⠀
16
+ ⠀⠀⠀⠀⠀⠀⠀⠀⠙⠻⠿⠛⢁⡼⠃⠘⢦⡈⠛⠿⠟⠃⠀⠀⠀⠀⠀⠀⠀⠀
17
+
18
+ 01:05:33 - THE MERGE MONSTER HUNGERS
19
+ ------------------------------------
20
+ Device : cpu
21
+ Random seed : 42
22
+ Starting model : ../jondurbin_bagel-dpo-34b-v0.2
23
+ Models to merge : ['../NousResearch_Nous-Capybara-34B', '../NousResearch_Nous-Hermes-2-Yi-34B', '../SUSTech_SUS-Chat-34B']
24
+ Output directory : ./mm-output
25
+ Phrases loaded : 31
26
+ Auto weights : False
27
+ Merge ratios : [0.2, 0.4, 0.6, 0.8]
28
+ Merge method(s) : ['slerp']
29
+ Merge headers : True
30
+ Strategy used : cumulative
31
+ ------------------------------------
32
+ 01:05:34 - Loading model (../jondurbin_bagel-dpo-34b-v0.2)...
33
+ Loading checkpoint shards: 100%|████████████████| 15/15 [00:32<00:00, 2.18s/it]
34
+ 01:06:59 - Model loaded. Dtype: torch.float16
35
+ ------------------------------------
36
+
37
+ -----------------------------------------------------------------------------------------------------
38
+ | Type | Phrase | Context | Raw Prob* | Used Prob** | Change |
39
+ -----------------------------------------------------------------------------------------------------
40
+ | BAD | anticipation | Her body quivers with | 0.00000% | 0.00% | N/A |
41
+ | BAD | anticipation | The atmosphere is thic.. | 0.00000% | 0.00% | N/A |
42
+ | BAD | unwavering | Filled with an | 0.00000% | 0.00% | N/A |
43
+ | BAD | determination | Her eyes were filled w.. | 0.00000% | 0.00% | N/A |
44
+ | BAD | determination | Her stubbornness only .. | 0.00000% | 0.00% | N/A |
45
+ | BAD | whisper | Her voice barely above.. | 0.00000% | 0.00% | N/A |
46
+ | BAD | spine | shivers down her | 0.00000% | 0.00% | N/A |
47
+ | BAD | sends shivers | The thrill of the act | 0.00000% | 0.00% | N/A |
48
+ | BAD | ministrations | She moans and twitches.. | 0.00006% | 0.00% | N/A |
49
+ | BAD | legs | wraps her | 0.00000% | 0.00% | N/A |
50
+ | BAD | imposing figure | He had an | 0.00000% | 0.00% | N/A |
51
+ | BAD | shared challenges | Their bond strengthene.. | 0.00001% | 0.00% | N/A |
52
+ | BAD | bond | forged a | 0.00008% | 0.00% | N/A |
53
+ | BAD | bond | an unspoken | 0.00009% | 0.00% | N/A |
54
+ | BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | N/A |
55
+ | BAD | sense of vulnera.. | create a | 0.00000% | 0.00% | N/A |
56
+ | BAD | dimensions of in.. | explore new | 0.00000% | 0.00% | N/A |
57
+ | BAD | deepening our co.. | while | 0.00000% | 0.00% | N/A |
58
+ | BAD | shared experiences | through | 0.00000% | 0.00% | N/A |
59
+ | BAD | societal expecta.. | that transcend | 0.00000% | 0.00% | N/A |
60
+ | BAD | conventional bou.. | that defy | 0.00000% | 0.00% | N/A |
61
+ | BAD | conventional bou.. | and defy | 0.00000% | 0.00% | N/A |
62
+ | BAD | open communication | an environment | 0.00000% | 0.00% | N/A |
63
+ | BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | N/A |
64
+ | BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | N/A |
65
+ | BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | N/A |
66
+ | BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | N/A |
67
+ | BAD | is truly arousing | way you explore my body | 0.00000% | 0.00% | N/A |
68
+ | BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | N/A |
69
+ | BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | N/A |
70
+ | BAD | bond | cherishing the unique | 0.00013% | 0.00% | N/A |
71
+ | BAD | bond | special | 0.00030% | 0.00% | N/A |
72
+ | BAD | grows stronger w.. | bond | 0.00000% | 0.00% | N/A |
73
+ | BAD | that cannot be b.. | bond | 0.00000% | 0.00% | N/A |
74
+ | BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | N/A |
75
+ | BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | N/A |
76
+ | GOOD | The apple is in .. | Question: If I'm in th.. | 0.00139% | 0.00% | N/A |
77
+ ------------------------------------------------------------------------------------------------------
78
+ | Totals | 0.00% | 0.01% | 0.00% |
79
+ ------------------------------------------------------------------------------------------------------
80
+ * = Unweighted, raw probability - ** = Probability after weight adjustments
81
+
82
+ ------------------------------------
83
+ 01:07:39 - Loading model (../NousResearch_Nous-Capybara-34B)...
84
+ Loading checkpoint shards: 100%|██████████████████| 7/7 [01:04<00:00, 9.19s/it]
85
+ 01:09:33 - Model loaded. Dtype: torch.float16
86
+ ------------------------------------
87
+ Optimizing Layer 1/60 (slerp): 100%|██████████████| 4/4 [04:01<00:00, 60.38s/it]
88
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B']]
89
+ 01:15:02 - Layer 1/60 - CHANGED - 0.00007 > 0.00006 - 2.5%
90
+ ----
91
+ Optimizing Layer 2/60 (slerp): 100%|██████████████| 4/4 [03:52<00:00, 58.04s/it]
92
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
93
+ 01:20:22 - Layer 2/60 - CHANGED - 0.00006 > 0.00006 - 1.6%
94
+ ----
95
+ Optimizing Layer 3/60 (slerp): 100%|██████████████| 4/4 [04:03<00:00, 60.90s/it]
96
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
97
+ 01:25:50 - Layer 3/60 - RETAINED - 0.00006
98
+ ----
99
+ Optimizing Layer 4/60 (slerp): 100%|██████████████| 4/4 [05:28<00:00, 82.25s/it]
100
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
101
+ 01:32:54 - Layer 4/60 - RETAINED - 0.00006
102
+ ----
103
+ Optimizing Layer 5/60 (slerp): 100%|██████████████| 4/4 [04:15<00:00, 63.94s/it]
104
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
105
+ 01:38:53 - Layer 5/60 - RETAINED - 0.00006
106
+ ----
107
+ Optimizing Layer 6/60 (slerp): 100%|██████████████| 4/4 [04:16<00:00, 64.24s/it]
108
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
109
+ 01:44:47 - Layer 6/60 - RETAINED - 0.00006
110
+ ----
111
+ Optimizing Layer 7/60 (slerp): 100%|██████████████| 4/4 [04:04<00:00, 61.02s/it]
112
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
113
+ 01:50:20 - Layer 7/60 - RETAINED - 0.00006
114
+ ----
115
+ Optimizing Layer 8/60 (slerp): 100%|██████████████| 4/4 [04:07<00:00, 61.95s/it]
116
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
117
+ 01:55:59 - Layer 8/60 - RETAINED - 0.00006
118
+ ----
119
+ Optimizing Layer 9/60 (slerp): 100%|██████████████| 4/4 [04:04<00:00, 61.17s/it]
120
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
121
+ 02:01:26 - Layer 9/60 - CHANGED - 0.00006 > 0.00006 - 1.3%
122
+ ----
123
+ Optimizing Layer 10/60 (slerp): 100%|█████████████| 4/4 [03:56<00:00, 59.05s/it]
124
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
125
+ 02:06:41 - Layer 10/60 - RETAINED - 0.00006
126
+ ----
127
+ Optimizing Layer 11/60 (slerp): 100%|█████████████| 4/4 [03:43<00:00, 55.90s/it]
128
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
129
+ 02:11:45 - Layer 11/60 - CHANGED - 0.00006 > 0.00006 - 4.8%
130
+ ----
131
+ Optimizing Layer 12/60 (slerp): 100%|█████████████| 4/4 [03:53<00:00, 58.32s/it]
132
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
133
+ 02:16:54 - Layer 12/60 - CHANGED - 0.00006 > 0.00005 - 12.2%
134
+ ----
135
+ Optimizing Layer 13/60 (slerp): 100%|█████████████| 4/4 [04:09<00:00, 62.31s/it]
136
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
137
+ 02:22:31 - Layer 13/60 - CHANGED - 0.00005 > 0.00005 - 3.6%
138
+ ----
139
+ Optimizing Layer 14/60 (slerp): 100%|█████████████| 4/4 [03:31<00:00, 52.84s/it]
140
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
141
+ 02:27:20 - Layer 14/60 - CHANGED - 0.00005 > 0.00005 - 1.5%
142
+ ----
143
+ Optimizing Layer 15/60 (slerp): 100%|█████████████| 4/4 [04:26<00:00, 66.67s/it]
144
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
145
+ 02:33:32 - Layer 15/60 - RETAINED - 0.00005
146
+ ----
147
+ Optimizing Layer 16/60 (slerp): 100%|█████████████| 4/4 [04:36<00:00, 69.09s/it]
148
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
149
+ 02:39:38 - Layer 16/60 - RETAINED - 0.00005
150
+ ----
151
+ Optimizing Layer 17/60 (slerp): 100%|█████████████| 4/4 [04:22<00:00, 65.64s/it]
152
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
153
+ 02:45:41 - Layer 17/60 - RETAINED - 0.00005
154
+ ----
155
+ Optimizing Layer 18/60 (slerp): 100%|█████████████| 4/4 [04:39<00:00, 69.87s/it]
156
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
157
+ 02:51:51 - Layer 18/60 - RETAINED - 0.00005
158
+ ----
159
+ Optimizing Layer 19/60 (slerp): 100%|█████████████| 4/4 [05:10<00:00, 77.56s/it]
160
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
161
+ 02:58:36 - Layer 19/60 - RETAINED - 0.00005
162
+ ----
163
+ Optimizing Layer 20/60 (slerp): 100%|█████████████| 4/4 [05:03<00:00, 75.87s/it]
164
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
165
+ 03:05:16 - Layer 20/60 - CHANGED - 0.00005 > 0.00005 - 0.2%
166
+ ----
167
+ Optimizing Layer 21/60 (slerp): 100%|█████████████| 4/4 [05:42<00:00, 85.60s/it]
168
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
169
+ 03:12:46 - Layer 21/60 - CHANGED - 0.00005 > 0.00001 - 77.3%
170
+ ----
171
+ Optimizing Layer 22/60 (slerp): 100%|█████████████| 4/4 [05:48<00:00, 87.20s/it]
172
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
173
+ 03:21:02 - Layer 22/60 - CHANGED - 0.00001 > -0.00000 - 126.4%
174
+ ----
175
+ Optimizing Layer 23/60 (slerp): 100%|████████████| 4/4 [07:03<00:00, 105.79s/it]
176
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
177
+ 03:30:53 - Layer 23/60 - CHANGED - -0.00000 > -0.00003 - 988.2%
178
+ ----
179
+ Optimizing Layer 24/60 (slerp): 100%|█████████████| 4/4 [06:11<00:00, 92.99s/it]
180
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
181
+ 03:39:09 - Layer 24/60 - CHANGED - -0.00003 > -0.00006 - 90.8%
182
+ ----
183
+ Optimizing Layer 25/60 (slerp): 100%|█████████████| 4/4 [05:42<00:00, 85.51s/it]
184
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
185
+ 03:46:40 - Layer 25/60 - CHANGED - -0.00006 > -0.00013 - 105.5%
186
+ ----
187
+ Optimizing Layer 26/60 (slerp): 100%|█████████████| 4/4 [05:10<00:00, 77.58s/it]
188
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
189
+ 03:53:21 - Layer 26/60 - CHANGED - -0.00013 > -0.00014 - 8.8%
190
+ ----
191
+ Optimizing Layer 27/60 (slerp): 100%|█████████████| 4/4 [04:53<00:00, 73.48s/it]
192
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
193
+ 03:59:41 - Layer 27/60 - RETAINED - -0.00014
194
+ ----
195
+ Optimizing Layer 28/60 (slerp): 100%|█████████████| 4/4 [05:00<00:00, 75.07s/it]
196
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
197
+ 04:06:11 - Layer 28/60 - CHANGED - -0.00014 > -0.00015 - 9.9%
198
+ ----
199
+ Optimizing Layer 29/60 (slerp): 100%|█████████████| 4/4 [05:18<00:00, 79.66s/it]
200
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
201
+ 04:13:06 - Layer 29/60 - CHANGED - -0.00015 > -0.00026 - 73.9%
202
+ ----
203
+ Optimizing Layer 30/60 (slerp): 100%|█████████████| 4/4 [04:39<00:00, 69.97s/it]
204
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B']]
205
+ 04:19:19 - Layer 30/60 - CHANGED - -0.00026 > -0.00026 - 0.1%
206
+ ----
207
+ Optimizing Layer 31/60 (slerp): 100%|█████████████| 4/4 [05:03<00:00, 75.98s/it]
208
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
209
+ 04:26:15 - Layer 31/60 - CHANGED - -0.00026 > -0.00045 - 73.2%
210
+ ----
211
+ Optimizing Layer 32/60 (slerp): 100%|█████████████| 4/4 [04:50<00:00, 72.61s/it]
212
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
213
+ 04:32:41 - Layer 32/60 - RETAINED - -0.00045
214
+ ----
215
+ Optimizing Layer 33/60 (slerp): 100%|█████████████| 4/4 [04:42<00:00, 70.72s/it]
216
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
217
+ 04:38:55 - Layer 33/60 - RETAINED - -0.00045
218
+ ----
219
+ Optimizing Layer 34/60 (slerp): 100%|█████████████| 4/4 [05:10<00:00, 77.62s/it]
220
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
221
+ 04:45:43 - Layer 34/60 - RETAINED - -0.00045
222
+ ----
223
+ Optimizing Layer 35/60 (slerp): 100%|█████████████| 4/4 [05:18<00:00, 79.62s/it]
224
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
225
+ 04:52:33 - Layer 35/60 - RETAINED - -0.00045
226
+ ----
227
+ Optimizing Layer 36/60 (slerp): 100%|█████████████| 4/4 [05:31<00:00, 82.80s/it]
228
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
229
+ 04:59:39 - Layer 36/60 - CHANGED - -0.00045 > -0.00058 - 27.3%
230
+ ----
231
+ Optimizing Layer 37/60 (slerp): 100%|█████████████| 4/4 [05:40<00:00, 85.08s/it]
232
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
233
+ 05:07:00 - Layer 37/60 - CHANGED - -0.00058 > -0.00068 - 17.0%
234
+ ----
235
+ Optimizing Layer 38/60 (slerp): 100%|█████████████| 4/4 [05:09<00:00, 77.43s/it]
236
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
237
+ 05:13:50 - Layer 38/60 - RETAINED - -0.00068
238
+ ----
239
+ Optimizing Layer 39/60 (slerp): 100%|█████████████| 4/4 [04:52<00:00, 73.15s/it]
240
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
241
+ 05:20:23 - Layer 39/60 - CHANGED - -0.00068 > -0.00094 - 38.6%
242
+ ----
243
+ Optimizing Layer 40/60 (slerp): 100%|█████████████| 4/4 [05:11<00:00, 77.87s/it]
244
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
245
+ 05:27:10 - Layer 40/60 - RETAINED - -0.00094
246
+ ----
247
+ Optimizing Layer 41/60 (slerp): 100%|█████████████| 4/4 [04:56<00:00, 74.02s/it]
248
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
249
+ 05:33:43 - Layer 41/60 - RETAINED - -0.00094
250
+ ----
251
+ Optimizing Layer 42/60 (slerp): 100%|█████████████| 4/4 [05:11<00:00, 77.90s/it]
252
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
253
+ 05:40:32 - Layer 42/60 - RETAINED - -0.00094
254
+ ----
255
+ Optimizing Layer 43/60 (slerp): 100%|█████████████| 4/4 [05:07<00:00, 76.91s/it]
256
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
257
+ 05:47:21 - Layer 43/60 - RETAINED - -0.00094
258
+ ----
259
+ Optimizing Layer 44/60 (slerp): 100%|█████████████| 4/4 [05:27<00:00, 81.99s/it]
260
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
261
+ 05:54:34 - Layer 44/60 - RETAINED - -0.00094
262
+ ----
263
+ Optimizing Layer 45/60 (slerp): 100%|█████████████| 4/4 [05:55<00:00, 88.94s/it]
264
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
265
+ 06:02:20 - Layer 45/60 - RETAINED - -0.00094
266
+ ----
267
+ Optimizing Layer 46/60 (slerp): 100%|█████████████| 4/4 [05:23<00:00, 80.84s/it]
268
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
269
+ 06:09:36 - Layer 46/60 - RETAINED - -0.00094
270
+ ----
271
+ Optimizing Layer 47/60 (slerp): 100%|█████████████| 4/4 [05:10<00:00, 77.74s/it]
272
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
273
+ 06:16:33 - Layer 47/60 - RETAINED - -0.00094
274
+ ----
275
+ Optimizing Layer 48/60 (slerp): 100%|█████████████| 4/4 [04:53<00:00, 73.39s/it]
276
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
277
+ 06:23:12 - Layer 48/60 - RETAINED - -0.00094
278
+ ----
279
+ Optimizing Layer 49/60 (slerp): 100%|█████████████| 4/4 [05:12<00:00, 78.19s/it]
280
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
281
+ 06:30:19 - Layer 49/60 - CHANGED - -0.00094 > -0.00100 - 6.8%
282
+ ----
283
+ Optimizing Layer 50/60 (slerp): 100%|█████████████| 4/4 [05:16<00:00, 79.20s/it]
284
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B']]
285
+ 06:37:19 - Layer 50/60 - CHANGED - -0.00100 > -0.00106 - 6.1%
286
+ ----
287
+ Optimizing Layer 51/60 (slerp): 100%|█████████████| 4/4 [05:08<00:00, 77.05s/it]
288
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
289
+ 06:44:03 - Layer 51/60 - RETAINED - -0.00106
290
+ ----
291
+ Optimizing Layer 52/60 (slerp): 100%|█████████████| 4/4 [04:41<00:00, 70.42s/it]
292
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
293
+ 06:50:20 - Layer 52/60 - CHANGED - -0.00106 > -0.00128 - 20.3%
294
+ ----
295
+ Optimizing Layer 53/60 (slerp): 100%|█████████████| 4/4 [05:05<00:00, 76.48s/it]
296
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B']]
297
+ 06:57:10 - Layer 53/60 - CHANGED - -0.00128 > -0.00128 - 0.2%
298
+ ----
299
+ Optimizing Layer 54/60 (slerp): 100%|█████████████| 4/4 [05:37<00:00, 84.37s/it]
300
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
301
+ 07:04:24 - Layer 54/60 - CHANGED - -0.00128 > -0.00132 - 3.5%
302
+ ----
303
+ Optimizing Layer 55/60 (slerp): 100%|█████████████| 4/4 [06:07<00:00, 91.86s/it]
304
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
305
+ 07:12:17 - Layer 55/60 - RETAINED - -0.00132
306
+ ----
307
+ Optimizing Layer 56/60 (slerp): 100%|█████████████| 4/4 [05:23<00:00, 80.92s/it]
308
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
309
+ 07:19:47 - Layer 56/60 - CHANGED - -0.00132 > -0.00152 - 14.7%
310
+ ----
311
+ Optimizing Layer 57/60 (slerp): 100%|█████████████| 4/4 [05:58<00:00, 89.60s/it]
312
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
313
+ 07:27:40 - Layer 57/60 - CHANGED - -0.00152 > -0.00171 - 12.5%
314
+ ----
315
+ Optimizing Layer 58/60 (slerp): 100%|█████████████| 4/4 [06:03<00:00, 90.92s/it]
316
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B']]
317
+ 07:35:25 - Layer 58/60 - CHANGED - -0.00171 > -0.00186 - 8.8%
318
+ ----
319
+ Optimizing Layer 59/60 (slerp): 100%|█████████████| 4/4 [05:25<00:00, 81.34s/it]
320
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
321
+ 07:42:28 - Layer 59/60 - RETAINED - -0.00186
322
+ ----
323
+ Optimizing Layer 60/60 (slerp): 100%|█████████████| 4/4 [05:45<00:00, 86.41s/it]
324
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
325
+ 07:49:59 - Layer 60/60 - RETAINED - -0.00186
326
+ ----
327
+ Optimizing Header: 100%|██████████████████████████| 4/4 [06:22<00:00, 95.55s/it]
328
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B']]
329
+ 07:57:33 - Header - CHANGED - -0.00186 > -0.00190 - 2.5%
330
+
331
+ -----------------------------------------------------------------------------------------------------
332
+ | Type | Phrase | Context | Raw Prob* | Used Prob** | Change |
333
+ -----------------------------------------------------------------------------------------------------
334
+ | BAD | anticipation | Her body quivers with | 0.00000% | 0.00% | +0.00% |
335
+ | BAD | anticipation | The atmosphere is thic.. | 0.00000% | 0.00% | +0.00% |
336
+ | BAD | unwavering | Filled with an | 0.00000% | 0.00% | +0.00% |
337
+ | BAD | determination | Her eyes were filled w.. | 0.00000% | 0.00% | -0.00% |
338
+ | BAD | determination | Her stubbornness only .. | 0.00000% | 0.00% | +0.00% |
339
+ | BAD | whisper | Her voice barely above.. | 0.00000% | 0.00% | +0.00% |
340
+ | BAD | spine | shivers down her | 0.00000% | 0.00% | -0.00% |
341
+ | BAD | sends shivers | The thrill of the act | 0.00000% | 0.00% | +0.00% |
342
+ | BAD | ministrations | She moans and twitches.. | 0.00004% | 0.00% | -0.00% |
343
+ | BAD | legs | wraps her | 0.00000% | 0.00% | -0.00% |
344
+ | BAD | imposing figure | He had an | 0.00000% | 0.00% | -0.00% |
345
+ | BAD | shared challenges | Their bond strengthene.. | 0.00001% | 0.00% | +0.00% |
346
+ | BAD | bond | forged a | 0.00007% | 0.00% | -0.00% |
347
+ | BAD | bond | an unspoken | 0.00010% | 0.00% | +0.00% |
348
+ | BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | +0.00% |
349
+ | BAD | sense of vulnera.. | create a | 0.00000% | 0.00% | -0.00% |
350
+ | BAD | dimensions of in.. | explore new | 0.00000% | 0.00% | +0.00% |
351
+ | BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% |
352
+ | BAD | shared experiences | through | 0.00000% | 0.00% | -0.00% |
353
+ | BAD | societal expecta.. | that transcend | 0.00000% | 0.00% | -0.00% |
354
+ | BAD | conventional bou.. | that defy | 0.00000% | 0.00% | +0.00% |
355
+ | BAD | conventional bou.. | and defy | 0.00000% | 0.00% | +0.00% |
356
+ | BAD | open communication | an environment | 0.00000% | 0.00% | -0.00% |
357
+ | BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | -0.00% |
358
+ | BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | -0.00% |
359
+ | BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | +0.00% |
360
+ | BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% |
361
+ | BAD | is truly arousing | way you explore my body | 0.00000% | 0.00% | +0.00% |
362
+ | BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | +0.00% |
363
+ | BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | +0.00% |
364
+ | BAD | bond | cherishing the unique | 0.00017% | 0.00% | +0.00% |
365
+ | BAD | bond | special | 0.00011% | 0.00% | -0.00% |
366
+ | BAD | grows stronger w.. | bond | 0.00000% | 0.00% | -0.00% |
367
+ | BAD | that cannot be b.. | bond | 0.00000% | 0.00% | +0.00% |
368
+ | BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | -0.00% |
369
+ | BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | -0.00% |
370
+ | GOOD | The apple is in .. | Question: If I'm in th.. | 0.19188% | 0.19% | +0.19% |
371
+ ------------------------------------------------------------------------------------------------------
372
+ | Totals | 0.19% | 0.20% | 0.19% |
373
+ ------------------------------------------------------------------------------------------------------
374
+ * = Unweighted, raw probability - ** = Probability after weight adjustments
375
+
376
+ -------- MERGE COMPOSITION ---------
377
+ jondurbin_bagel-dpo-34b-v0.2: 0.70
378
+ NousResearch_Nous-Capybara-34B: 0.30
379
+
380
+ ------------------------------------
381
+ 07:59:18 - Loading model (../NousResearch_Nous-Hermes-2-Yi-34B)...
382
+ Loading checkpoint shards: 100%|████████████████| 15/15 [00:33<00:00, 2.22s/it]
383
+ 08:00:31 - Model loaded. Dtype: torch.float16
384
+ ------------------------------------
385
+ Optimizing Layer 1/60 (slerp): 100%|██████████████| 4/4 [03:32<00:00, 53.01s/it]
386
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
387
+ 08:05:31 - Layer 1/60 - CHANGED - -0.00186 > -0.00230 - 23.5%
388
+ ----
389
+ Optimizing Layer 2/60 (slerp): 100%|██████████████| 4/4 [03:40<00:00, 55.00s/it]
390
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
391
+ 08:10:21 - Layer 2/60 - CHANGED - -0.00230 > -0.00266 - 15.9%
392
+ ----
393
+ Optimizing Layer 3/60 (slerp): 100%|██████████████| 4/4 [04:33<00:00, 68.26s/it]
394
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
395
+ 08:16:22 - Layer 3/60 - RETAINED - -0.00266
396
+ ----
397
+ Optimizing Layer 4/60 (slerp): 100%|██████████████| 4/4 [05:06<00:00, 76.71s/it]
398
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
399
+ 08:23:09 - Layer 4/60 - CHANGED - -0.00266 > -0.00294 - 10.5%
400
+ ----
401
+ Optimizing Layer 5/60 (slerp): 100%|██████████████| 4/4 [05:47<00:00, 86.79s/it]
402
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
403
+ 08:30:35 - Layer 5/60 - RETAINED - -0.00294
404
+ ----
405
+ Optimizing Layer 6/60 (slerp): 100%|██████████████| 4/4 [05:25<00:00, 81.41s/it]
406
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
407
+ 08:37:52 - Layer 6/60 - RETAINED - -0.00294
408
+ ----
409
+ Optimizing Layer 7/60 (slerp): 100%|██████████████| 4/4 [05:44<00:00, 86.12s/it]
410
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
411
+ 08:45:26 - Layer 7/60 - RETAINED - -0.00294
412
+ ----
413
+ Optimizing Layer 8/60 (slerp): 100%|██████████████| 4/4 [05:36<00:00, 84.21s/it]
414
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
415
+ 08:52:56 - Layer 8/60 - RETAINED - -0.00294
416
+ ----
417
+ Optimizing Layer 9/60 (slerp): 100%|██████████████| 4/4 [05:51<00:00, 87.81s/it]
418
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B']]
419
+ 09:00:30 - Layer 9/60 - CHANGED - -0.00294 > -0.00297 - 1.2%
420
+ ----
421
+ Optimizing Layer 10/60 (slerp): 100%|█████████████| 4/4 [06:03<00:00, 90.97s/it]
422
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
423
+ 09:08:29 - Layer 10/60 - RETAINED - -0.00297
424
+ ----
425
+ Optimizing Layer 11/60 (slerp): 100%|█████████████| 4/4 [05:19<00:00, 79.95s/it]
426
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
427
+ 09:15:40 - Layer 11/60 - CHANGED - -0.00297 > -0.00334 - 12.2%
428
+ ----
429
+ Optimizing Layer 12/60 (slerp): 100%|█████████████| 4/4 [05:47<00:00, 86.85s/it]
430
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
431
+ 09:23:46 - Layer 12/60 - RETAINED - -0.00334
432
+ ----
433
+ Optimizing Layer 13/60 (slerp): 100%|█████████████| 4/4 [05:05<00:00, 76.33s/it]
434
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B']]
435
+ 09:30:37 - Layer 13/60 - RETAINED - -0.00334
436
+ ----
437
+ Optimizing Layer 14/60 (slerp): 100%|█████████████| 4/4 [04:47<00:00, 71.79s/it]
438
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.2, 'NousResearch_Nous-Hermes-2-Yi-34B']]
439
+ 09:37:17 - Layer 14/60 - CHANGED - -0.00334 > -0.00336 - 0.8%
440
+ ----
441
+ Optimizing Layer 15/60 (slerp): 100%|█████████████| 4/4 [04:05<00:00, 61.32s/it]
442
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
443
+ 09:42:46 - Layer 15/60 - RETAINED - -0.00336
444
+ ----
445
+ Optimizing Layer 16/60 (slerp): 100%|█████████████| 4/4 [04:16<00:00, 64.24s/it]
446
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
447
+ 09:48:30 - Layer 16/60 - RETAINED - -0.00336
448
+ ----
449
+ Optimizing Layer 17/60 (slerp): 100%|█████████████| 4/4 [04:31<00:00, 67.78s/it]
450
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B']]
451
+ 09:54:37 - Layer 17/60 - CHANGED - -0.00336 > -0.00361 - 7.3%
452
+ ----
453
+ Optimizing Layer 18/60 (slerp): 100%|█████████████| 4/4 [04:35<00:00, 68.88s/it]
454
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
455
+ 10:00:44 - Layer 18/60 - RETAINED - -0.00361
456
+ ----
457
+ Optimizing Layer 19/60 (slerp): 100%|█████████████| 4/4 [05:48<00:00, 87.17s/it]
458
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
459
+ 10:08:21 - Layer 19/60 - RETAINED - -0.00361
460
+ ----
461
+ Optimizing Layer 20/60 (slerp): 100%|█████████████| 4/4 [05:12<00:00, 78.07s/it]
462
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
463
+ 10:15:11 - Layer 20/60 - RETAINED - -0.00361
464
+ ----
465
+ Optimizing Layer 21/60 (slerp): 100%|█████████████| 4/4 [04:18<00:00, 64.71s/it]
466
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
467
+ 10:20:54 - Layer 21/60 - CHANGED - -0.00361 > -0.00376 - 4.3%
468
+ ----
469
+ Optimizing Layer 22/60 (slerp): 100%|█████████████| 4/4 [03:46<00:00, 56.73s/it]
470
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
471
+ 10:26:01 - Layer 22/60 - CHANGED - -0.00376 > -0.00466 - 24.0%
472
+ ----
473
+ Optimizing Layer 23/60 (slerp): 100%|█████████████| 4/4 [04:01<00:00, 60.46s/it]
474
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
475
+ 10:31:21 - Layer 23/60 - CHANGED - -0.00466 > -0.00616 - 32.1%
476
+ ----
477
+ Optimizing Layer 24/60 (slerp): 100%|█████████████| 4/4 [04:06<00:00, 61.57s/it]
478
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
479
+ 10:36:43 - Layer 24/60 - CHANGED - -0.00616 > -0.00743 - 20.6%
480
+ ----
481
+ Optimizing Layer 25/60 (slerp): 100%|█████████████| 4/4 [04:09<00:00, 62.32s/it]
482
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
483
+ 10:42:19 - Layer 25/60 - RETAINED - -0.00743
484
+ ----
485
+ Optimizing Layer 26/60 (slerp): 100%|█████████████| 4/4 [04:27<00:00, 66.78s/it]
486
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
487
+ 10:48:17 - Layer 26/60 - CHANGED - -0.00743 > -0.00745 - 0.3%
488
+ ----
489
+ Optimizing Layer 27/60 (slerp): 100%|█████████████| 4/4 [05:11<00:00, 77.78s/it]
490
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
491
+ 10:55:12 - Layer 27/60 - RETAINED - -0.00745
492
+ ----
493
+ Optimizing Layer 28/60 (slerp): 100%|█████████████| 4/4 [05:31<00:00, 82.92s/it]
494
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
495
+ 11:02:26 - Layer 28/60 - CHANGED - -0.00745 > -0.00789 - 5.9%
496
+ ----
497
+ Optimizing Layer 29/60 (slerp): 100%|█████████████| 4/4 [05:10<00:00, 77.75s/it]
498
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
499
+ 11:09:12 - Layer 29/60 - CHANGED - -0.00789 > -0.00824 - 4.5%
500
+ ----
501
+ Optimizing Layer 30/60 (slerp): 100%|█████████████| 4/4 [05:35<00:00, 83.82s/it]
502
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
503
+ 11:16:32 - Layer 30/60 - CHANGED - -0.00824 > -0.00980 - 18.9%
504
+ ----
505
+ Optimizing Layer 31/60 (slerp): 100%|█████████████| 4/4 [06:09<00:00, 92.45s/it]
506
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
507
+ 11:24:35 - Layer 31/60 - CHANGED - -0.00980 > -0.01486 - 51.6%
508
+ ----
509
+ Optimizing Layer 32/60 (slerp): 100%|█████████████| 4/4 [05:35<00:00, 83.93s/it]
510
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B']]
511
+ 11:32:09 - Layer 32/60 - CHANGED - -0.01486 > -0.01743 - 17.3%
512
+ ----
513
+ Optimizing Layer 33/60 (slerp): 100%|█████████████| 4/4 [05:40<00:00, 85.07s/it]
514
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
515
+ 11:39:27 - Layer 33/60 - RETAINED - -0.01743
516
+ ----
517
+ Optimizing Layer 34/60 (slerp): 100%|█████████████| 4/4 [05:28<00:00, 82.20s/it]
518
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
519
+ 11:46:40 - Layer 34/60 - CHANGED - -0.01743 > -0.02148 - 23.2%
520
+ ----
521
+ Optimizing Layer 35/60 (slerp): 100%|█████████████| 4/4 [06:17<00:00, 94.36s/it]
522
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
523
+ 11:54:42 - Layer 35/60 - RETAINED - -0.02148
524
+ ----
525
+ Optimizing Layer 36/60 (slerp): 100%|█████████████| 4/4 [05:46<00:00, 86.54s/it]
526
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
527
+ 12:02:23 - Layer 36/60 - RETAINED - -0.02148
528
+ ----
529
+ Optimizing Layer 37/60 (slerp): 100%|█████████████| 4/4 [04:44<00:00, 71.19s/it]
530
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
531
+ 12:08:46 - Layer 37/60 - CHANGED - -0.02148 > -0.02760 - 28.5%
532
+ ----
533
+ Optimizing Layer 38/60 (slerp): 100%|█████████████| 4/4 [03:58<00:00, 59.73s/it]
534
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
535
+ 12:14:11 - Layer 38/60 - CHANGED - -0.02760 > -0.02789 - 1.0%
536
+ ----
537
+ Optimizing Layer 39/60 (slerp): 100%|█████████████| 4/4 [04:00<00:00, 60.16s/it]
538
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
539
+ 12:19:28 - Layer 39/60 - RETAINED - -0.02789
540
+ ----
541
+ Optimizing Layer 40/60 (slerp): 100%|█████████████| 4/4 [03:57<00:00, 59.45s/it]
542
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
543
+ 12:24:49 - Layer 40/60 - RETAINED - -0.02789
544
+ ----
545
+ Optimizing Layer 41/60 (slerp): 100%|█████████████| 4/4 [04:01<00:00, 60.34s/it]
546
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
547
+ 12:30:08 - Layer 41/60 - RETAINED - -0.02789
548
+ ----
549
+ Optimizing Layer 42/60 (slerp): 100%|█████████████| 4/4 [04:01<00:00, 60.29s/it]
550
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
551
+ 12:35:23 - Layer 42/60 - RETAINED - -0.02789
552
+ ----
553
+ Optimizing Layer 43/60 (slerp): 100%|█████████████| 4/4 [04:18<00:00, 64.70s/it]
554
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
555
+ 12:41:09 - Layer 43/60 - RETAINED - -0.02789
556
+ ----
557
+ Optimizing Layer 44/60 (slerp): 100%|█████████████| 4/4 [04:44<00:00, 71.20s/it]
558
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
559
+ 12:47:23 - Layer 44/60 - RETAINED - -0.02789
560
+ ----
561
+ Optimizing Layer 45/60 (slerp): 100%|█████████████| 4/4 [03:42<00:00, 55.71s/it]
562
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
563
+ 12:52:31 - Layer 45/60 - RETAINED - -0.02789
564
+ ----
565
+ Optimizing Layer 46/60 (slerp): 100%|█████████████| 4/4 [03:59<00:00, 59.77s/it]
566
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
567
+ 12:57:52 - Layer 46/60 - RETAINED - -0.02789
568
+ ----
569
+ Optimizing Layer 47/60 (slerp): 100%|█████████████| 4/4 [04:03<00:00, 60.98s/it]
570
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
571
+ 13:03:16 - Layer 47/60 - RETAINED - -0.02789
572
+ ----
573
+ Optimizing Layer 48/60 (slerp): 100%|█████████████| 4/4 [03:53<00:00, 58.40s/it]
574
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Hermes-2-Yi-34B']]
575
+ 13:08:28 - Layer 48/60 - CHANGED - -0.02789 > -0.02789 - 0.0%
576
+ ----
577
+ Optimizing Layer 49/60 (slerp): 100%|█████████████| 4/4 [03:57<00:00, 59.32s/it]
578
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
579
+ 13:13:43 - Layer 49/60 - CHANGED - -0.02789 > -0.02922 - 4.8%
580
+ ----
581
+ Optimizing Layer 50/60 (slerp): 100%|█████████████| 4/4 [04:03<00:00, 60.93s/it]
582
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
583
+ 13:19:09 - Layer 50/60 - CHANGED - -0.02922 > -0.03467 - 18.6%
584
+ ----
585
+ Optimizing Layer 51/60 (slerp): 100%|█████████████| 4/4 [04:06<00:00, 61.73s/it]
586
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
587
+ 13:24:39 - Layer 51/60 - RETAINED - -0.03467
588
+ ----
589
+ Optimizing Layer 52/60 (slerp): 100%|█████████████| 4/4 [04:02<00:00, 60.70s/it]
590
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
591
+ 13:29:58 - Layer 52/60 - CHANGED - -0.03467 > -0.03931 - 13.4%
592
+ ----
593
+ Optimizing Layer 53/60 (slerp): 100%|█████████████| 4/4 [04:00<00:00, 60.06s/it]
594
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
595
+ 13:35:19 - Layer 53/60 - CHANGED - -0.03931 > -0.04040 - 2.8%
596
+ ----
597
+ Optimizing Layer 54/60 (slerp): 100%|█████████████| 4/4 [04:30<00:00, 67.51s/it]
598
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
599
+ 13:41:14 - Layer 54/60 - CHANGED - -0.04040 > -0.04498 - 11.3%
600
+ ----
601
+ Optimizing Layer 55/60 (slerp): 100%|█████████████| 4/4 [04:50<00:00, 72.65s/it]
602
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
603
+ 13:47:49 - Layer 55/60 - CHANGED - -0.04498 > -0.04736 - 5.3%
604
+ ----
605
+ Optimizing Layer 56/60 (slerp): 100%|█████████████| 4/4 [05:28<00:00, 82.16s/it]
606
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
607
+ 13:55:09 - Layer 56/60 - RETAINED - -0.04736
608
+ ----
609
+ Optimizing Layer 57/60 (slerp): 100%|█████████████| 4/4 [05:30<00:00, 82.57s/it]
610
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
611
+ 14:02:30 - Layer 57/60 - RETAINED - -0.04736
612
+ ----
613
+ Optimizing Layer 58/60 (slerp): 100%|█████████████| 4/4 [06:22<00:00, 95.56s/it]
614
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B']]
615
+ 14:11:07 - Layer 58/60 - RETAINED - -0.04736
616
+ ----
617
+ Optimizing Layer 59/60 (slerp): 100%|█████████████| 4/4 [05:52<00:00, 88.03s/it]
618
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
619
+ 14:19:17 - Layer 59/60 - CHANGED - -0.04736 > -0.05244 - 10.7%
620
+ ----
621
+ Optimizing Layer 60/60 (slerp): 100%|█████████████| 4/4 [04:47<00:00, 71.86s/it]
622
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
623
+ 14:25:42 - Layer 60/60 - RETAINED - -0.05244
624
+ ----
625
+ Optimizing Header: 100%|██████████████████████████| 4/4 [03:37<00:00, 54.33s/it]
626
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
627
+ 14:30:24 - Header - CHANGED - -0.05244 > -0.06200 - 18.2%
628
+
629
+ -----------------------------------------------------------------------------------------------------
630
+ | Type | Phrase | Context | Raw Prob* | Used Prob** | Change |
631
+ -----------------------------------------------------------------------------------------------------
632
+ | BAD | anticipation | Her body quivers with | 0.00000% | 0.00% | +0.00% |
633
+ | BAD | anticipation | The atmosphere is thic.. | 0.00000% | 0.00% | +0.00% |
634
+ | BAD | unwavering | Filled with an | 0.00000% | 0.00% | +0.00% |
635
+ | BAD | determination | Her eyes were filled w.. | 0.00000% | 0.00% | -0.00% |
636
+ | BAD | determination | Her stubbornness only .. | 0.00000% | 0.00% | +0.00% |
637
+ | BAD | whisper | Her voice barely above.. | 0.00000% | 0.00% | +0.00% |
638
+ | BAD | spine | shivers down her | 0.00000% | 0.00% | +0.00% |
639
+ | BAD | sends shivers | The thrill of the act | 0.00000% | 0.00% | +0.00% |
640
+ | BAD | ministrations | She moans and twitches.. | 0.00003% | 0.00% | -0.00% |
641
+ | BAD | legs | wraps her | 0.00000% | 0.00% | -0.00% |
642
+ | BAD | imposing figure | He had an | 0.00000% | 0.00% | -0.00% |
643
+ | BAD | shared challenges | Their bond strengthene.. | 0.00001% | 0.00% | +0.00% |
644
+ | BAD | bond | forged a | 0.00004% | 0.00% | -0.00% |
645
+ | BAD | bond | an unspoken | 0.00010% | 0.00% | +0.00% |
646
+ | BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | +0.00% |
647
+ | BAD | sense of vulnera.. | create a | 0.00000% | 0.00% | -0.00% |
648
+ | BAD | dimensions of in.. | explore new | 0.00000% | 0.00% | +0.00% |
649
+ | BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% |
650
+ | BAD | shared experiences | through | 0.00001% | 0.00% | +0.00% |
651
+ | BAD | societal expecta.. | that transcend | 0.00000% | 0.00% | -0.00% |
652
+ | BAD | conventional bou.. | that defy | 0.00000% | 0.00% | +0.00% |
653
+ | BAD | conventional bou.. | and defy | 0.00000% | 0.00% | +0.00% |
654
+ | BAD | open communication | an environment | 0.00000% | 0.00% | +0.00% |
655
+ | BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | +0.00% |
656
+ | BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | -0.00% |
657
+ | BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | +0.00% |
658
+ | BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% |
659
+ | BAD | is truly arousing | way you explore my body | 0.00000% | 0.00% | -0.00% |
660
+ | BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | +0.00% |
661
+ | BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | +0.00% |
662
+ | BAD | bond | cherishing the unique | 0.00019% | 0.00% | +0.00% |
663
+ | BAD | bond | special | 0.00023% | 0.00% | -0.00% |
664
+ | BAD | grows stronger w.. | bond | 0.00000% | 0.00% | -0.00% |
665
+ | BAD | that cannot be b.. | bond | 0.00000% | 0.00% | -0.00% |
666
+ | BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | +0.00% |
667
+ | BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | +0.00% |
668
+ | GOOD | The apple is in .. | Question: If I'm in th.. | 6.12871% | 6.13% | +6.13% |
669
+ ------------------------------------------------------------------------------------------------------
670
+ | Totals | 6.13% | 6.14% | 6.13% |
671
+ ------------------------------------------------------------------------------------------------------
672
+ * = Unweighted, raw probability - ** = Probability after weight adjustments
673
+
674
+ -------- MERGE COMPOSITION ---------
675
+ jondurbin_bagel-dpo-34b-v0.2: 0.51
676
+ NousResearch_Nous-Hermes-2-Yi-34B: 0.32
677
+ NousResearch_Nous-Capybara-34B: 0.16
678
+
679
+ ------------------------------------
680
+ 14:31:32 - Loading model (../SUSTech_SUS-Chat-34B)...
681
+ Loading checkpoint shards: 100%|██████████████████| 7/7 [01:14<00:00, 10.68s/it]
682
+ 14:33:15 - Model loaded. Dtype: torch.float16
683
+ ------------------------------------
684
+ Optimizing Layer 1/60 (slerp): 100%|██████████████| 4/4 [02:55<00:00, 43.98s/it]
685
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.2, 'SUSTech_SUS-Chat-34B']]
686
+ 14:37:13 - Layer 1/60 - CHANGED - -0.06121 > -0.06153 - 0.5%
687
+ ----
688
+ Optimizing Layer 2/60 (slerp): 100%|██████████████| 4/4 [02:57<00:00, 44.28s/it]
689
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.6, 'SUSTech_SUS-Chat-34B']]
690
+ 14:41:08 - Layer 2/60 - CHANGED - -0.06153 > -0.06434 - 4.6%
691
+ ----
692
+ Optimizing Layer 3/60 (slerp): 100%|██████████████| 4/4 [02:59<00:00, 44.87s/it]
693
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
694
+ 14:45:04 - Layer 3/60 - RETAINED - -0.06434
695
+ ----
696
+ Optimizing Layer 4/60 (slerp): 100%|██████████████| 4/4 [03:24<00:00, 51.23s/it]
697
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
698
+ 14:49:33 - Layer 4/60 - RETAINED - -0.06434
699
+ ----
700
+ Optimizing Layer 5/60 (slerp): 100%|██████████████| 4/4 [04:13<00:00, 63.44s/it]
701
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
702
+ 14:55:17 - Layer 5/60 - RETAINED - -0.06434
703
+ ----
704
+ Optimizing Layer 6/60 (slerp): 100%|██████████████| 4/4 [05:08<00:00, 77.18s/it]
705
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
706
+ 15:01:58 - Layer 6/60 - RETAINED - -0.06434
707
+ ----
708
+ Optimizing Layer 7/60 (slerp): 100%|██████████████| 4/4 [04:41<00:00, 70.31s/it]
709
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
710
+ 15:08:01 - Layer 7/60 - RETAINED - -0.06434
711
+ ----
712
+ Optimizing Layer 8/60 (slerp): 100%|██████████████| 4/4 [03:51<00:00, 57.86s/it]
713
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
714
+ 15:13:10 - Layer 8/60 - RETAINED - -0.06434
715
+ ----
716
+ Optimizing Layer 9/60 (slerp): 100%|██████████████| 4/4 [04:02<00:00, 60.54s/it]
717
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.4, 'SUSTech_SUS-Chat-34B']]
718
+ 15:18:34 - Layer 9/60 - CHANGED - -0.06434 > -0.06464 - 0.5%
719
+ ----
720
+ Optimizing Layer 10/60 (slerp): 100%|█████████████| 4/4 [03:53<00:00, 58.40s/it]
721
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
722
+ 15:23:40 - Layer 10/60 - RETAINED - -0.06464
723
+ ----
724
+ Optimizing Layer 11/60 (slerp): 100%|█████████████| 4/4 [03:39<00:00, 54.91s/it]
725
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
726
+ 15:28:32 - Layer 11/60 - RETAINED - -0.06464
727
+ ----
728
+ Optimizing Layer 12/60 (slerp): 100%|█████████████| 4/4 [03:40<00:00, 55.10s/it]
729
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
730
+ 15:33:27 - Layer 12/60 - RETAINED - -0.06464
731
+ ----
732
+ Optimizing Layer 13/60 (slerp): 100%|█████████████| 4/4 [03:49<00:00, 57.36s/it]
733
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.4, 'SUSTech_SUS-Chat-34B']]
734
+ 15:38:35 - Layer 13/60 - CHANGED - -0.06464 > -0.06527 - 1.0%
735
+ ----
736
+ Optimizing Layer 14/60 (slerp): 100%|█████████████| 4/4 [03:42<00:00, 55.74s/it]
737
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.2, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.6, 'SUSTech_SUS-Chat-34B']]
738
+ 15:43:30 - Layer 14/60 - CHANGED - -0.06527 > -0.06851 - 5.0%
739
+ ----
740
+ Optimizing Layer 15/60 (slerp): 100%|█████████████| 4/4 [03:44<00:00, 56.04s/it]
741
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
742
+ 15:48:41 - Layer 15/60 - RETAINED - -0.06851
743
+ ----
744
+ Optimizing Layer 16/60 (slerp): 100%|█████████████| 4/4 [05:23<00:00, 80.84s/it]
745
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
746
+ 15:55:48 - Layer 16/60 - RETAINED - -0.06851
747
+ ----
748
+ Optimizing Layer 17/60 (slerp): 100%|█████████████| 4/4 [05:31<00:00, 82.76s/it]
749
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B']]
750
+ 16:03:01 - Layer 17/60 - RETAINED - -0.06851
751
+ ----
752
+ Optimizing Layer 18/60 (slerp): 100%|█████████████| 4/4 [05:34<00:00, 83.64s/it]
753
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
754
+ 16:10:28 - Layer 18/60 - RETAINED - -0.06851
755
+ ----
756
+ Optimizing Layer 19/60 (slerp): 100%|█████████████| 4/4 [06:17<00:00, 94.38s/it]
757
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
758
+ 16:18:46 - Layer 19/60 - RETAINED - -0.06851
759
+ ----
760
+ Optimizing Layer 20/60 (slerp): 100%|█████████████| 4/4 [04:52<00:00, 73.08s/it]
761
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.4, 'SUSTech_SUS-Chat-34B']]
762
+ 16:25:26 - Layer 20/60 - CHANGED - -0.06851 > -0.06892 - 0.6%
763
+ ----
764
+ Optimizing Layer 21/60 (slerp): 100%|█████████████| 4/4 [05:08<00:00, 77.11s/it]
765
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
766
+ 16:32:37 - Layer 21/60 - RETAINED - -0.06892
767
+ ----
768
+ Optimizing Layer 22/60 (slerp): 100%|█████████████| 4/4 [04:54<00:00, 73.54s/it]
769
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
770
+ 16:39:05 - Layer 22/60 - RETAINED - -0.06892
771
+ ----
772
+ Optimizing Layer 23/60 (slerp): 100%|█████████████| 4/4 [04:53<00:00, 73.34s/it]
773
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
774
+ 16:45:29 - Layer 23/60 - RETAINED - -0.06892
775
+ ----
776
+ Optimizing Layer 24/60 (slerp): 100%|█████████████| 4/4 [04:53<00:00, 73.38s/it]
777
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
778
+ 16:51:58 - Layer 24/60 - RETAINED - -0.06892
779
+ ----
780
+ Optimizing Layer 25/60 (slerp): 100%|█████████████| 4/4 [04:55<00:00, 73.86s/it]
781
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.4, 'SUSTech_SUS-Chat-34B']]
782
+ 16:58:30 - Layer 25/60 - CHANGED - -0.06892 > -0.07074 - 2.6%
783
+ ----
784
+ Optimizing Layer 26/60 (slerp): 100%|█████████████| 4/4 [04:11<00:00, 62.83s/it]
785
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
786
+ 17:04:08 - Layer 26/60 - RETAINED - -0.07074
787
+ ----
788
+ Optimizing Layer 27/60 (slerp): 100%|█████████████| 4/4 [04:11<00:00, 62.75s/it]
789
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
790
+ 17:09:50 - Layer 27/60 - RETAINED - -0.07074
791
+ ----
792
+ Optimizing Layer 28/60 (slerp): 100%|█████████████| 4/4 [04:05<00:00, 61.40s/it]
793
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
794
+ 17:15:21 - Layer 28/60 - RETAINED - -0.07074
795
+ ----
796
+ Optimizing Layer 29/60 (slerp): 100%|█████████████| 4/4 [05:07<00:00, 76.83s/it]
797
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
798
+ 17:21:57 - Layer 29/60 - RETAINED - -0.07074
799
+ ----
800
+ Optimizing Layer 30/60 (slerp): 100%|█████████████| 4/4 [04:06<00:00, 61.63s/it]
801
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
802
+ 17:27:34 - Layer 30/60 - RETAINED - -0.07074
803
+ ----
804
+ Optimizing Layer 31/60 (slerp): 100%|█████████████| 4/4 [04:21<00:00, 65.25s/it]
805
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
806
+ 17:33:24 - Layer 31/60 - RETAINED - -0.07074
807
+ ----
808
+ Optimizing Layer 32/60 (slerp): 100%|█████████████| 4/4 [04:36<00:00, 69.13s/it]
809
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Hermes-2-Yi-34B']]
810
+ 17:39:20 - Layer 32/60 - RETAINED - -0.07074
811
+ ----
812
+ Optimizing Layer 33/60 (slerp): 100%|█████████████| 4/4 [04:52<00:00, 73.01s/it]
813
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
814
+ 17:45:42 - Layer 33/60 - RETAINED - -0.07074
815
+ ----
816
+ Optimizing Layer 34/60 (slerp): 100%|█████████████| 4/4 [05:09<00:00, 77.30s/it]
817
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
818
+ 17:52:34 - Layer 34/60 - RETAINED - -0.07074
819
+ ----
820
+ Optimizing Layer 35/60 (slerp): 100%|█████████████| 4/4 [05:09<00:00, 77.29s/it]
821
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
822
+ 17:59:16 - Layer 35/60 - RETAINED - -0.07074
823
+ ----
824
+ Optimizing Layer 36/60 (slerp): 100%|█████████████| 4/4 [05:19<00:00, 79.91s/it]
825
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
826
+ 18:06:13 - Layer 36/60 - RETAINED - -0.07074
827
+ ----
828
+ Optimizing Layer 37/60 (slerp): 100%|█████████████| 4/4 [05:40<00:00, 85.08s/it]
829
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.8, 'SUSTech_SUS-Chat-34B']]
830
+ 18:13:35 - Layer 37/60 - CHANGED - -0.07074 > -0.07127 - 0.8%
831
+ ----
832
+ Optimizing Layer 38/60 (slerp): 100%|█████████████| 4/4 [04:50<00:00, 72.69s/it]
833
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B']]
834
+ 18:20:03 - Layer 38/60 - RETAINED - -0.07127
835
+ ----
836
+ Optimizing Layer 39/60 (slerp): 100%|█████████████| 4/4 [05:23<00:00, 80.96s/it]
837
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
838
+ 18:26:55 - Layer 39/60 - RETAINED - -0.07127
839
+ ----
840
+ Optimizing Layer 40/60 (slerp): 100%|█████████████| 4/4 [04:10<00:00, 62.57s/it]
841
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
842
+ 18:32:47 - Layer 40/60 - RETAINED - -0.07127
843
+ ----
844
+ Optimizing Layer 41/60 (slerp): 100%|█████████████| 4/4 [05:23<00:00, 80.96s/it]
845
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
846
+ 18:39:44 - Layer 41/60 - RETAINED - -0.07127
847
+ ----
848
+ Optimizing Layer 42/60 (slerp): 100%|█████████████| 4/4 [04:03<00:00, 60.87s/it]
849
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
850
+ 18:45:31 - Layer 42/60 - RETAINED - -0.07127
851
+ ----
852
+ Optimizing Layer 43/60 (slerp): 100%|█████████████| 4/4 [03:36<00:00, 54.22s/it]
853
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
854
+ 18:50:34 - Layer 43/60 - RETAINED - -0.07127
855
+ ----
856
+ Optimizing Layer 44/60 (slerp): 100%|█████████████| 4/4 [03:52<00:00, 58.18s/it]
857
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
858
+ 18:55:44 - Layer 44/60 - RETAINED - -0.07127
859
+ ----
860
+ Optimizing Layer 45/60 (slerp): 100%|█████████████| 4/4 [03:39<00:00, 54.92s/it]
861
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
862
+ 19:00:39 - Layer 45/60 - RETAINED - -0.07127
863
+ ----
864
+ Optimizing Layer 46/60 (slerp): 100%|█████████████| 4/4 [03:36<00:00, 54.06s/it]
865
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
866
+ 19:05:24 - Layer 46/60 - RETAINED - -0.07127
867
+ ----
868
+ Optimizing Layer 47/60 (slerp): 100%|█████████████| 4/4 [03:50<00:00, 57.54s/it]
869
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
870
+ 19:10:28 - Layer 47/60 - RETAINED - -0.07127
871
+ ----
872
+ Optimizing Layer 48/60 (slerp): 100%|█████████████| 4/4 [04:02<00:00, 60.62s/it]
873
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Hermes-2-Yi-34B']]
874
+ 19:15:45 - Layer 48/60 - RETAINED - -0.07127
875
+ ----
876
+ Optimizing Layer 49/60 (slerp): 100%|█████████████| 4/4 [03:59<00:00, 59.77s/it]
877
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.6, 'SUSTech_SUS-Chat-34B']]
878
+ 19:21:02 - Layer 49/60 - CHANGED - -0.07127 > -0.07407 - 3.9%
879
+ ----
880
+ Optimizing Layer 50/60 (slerp): 100%|█████████████| 4/4 [03:53<00:00, 58.25s/it]
881
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.4, 'SUSTech_SUS-Chat-34B']]
882
+ 19:26:11 - Layer 50/60 - CHANGED - -0.07407 > -0.07571 - 2.2%
883
+ ----
884
+ Optimizing Layer 51/60 (slerp): 100%|█████████████| 4/4 [03:59<00:00, 59.91s/it]
885
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
886
+ 19:31:30 - Layer 51/60 - RETAINED - -0.07571
887
+ ----
888
+ Optimizing Layer 52/60 (slerp): 100%|█████████████| 4/4 [04:43<00:00, 70.77s/it]
889
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.6, 'SUSTech_SUS-Chat-34B']]
890
+ 19:37:38 - Layer 52/60 - CHANGED - -0.07571 > -0.07660 - 1.2%
891
+ ----
892
+ Optimizing Layer 53/60 (slerp): 100%|█████████████| 4/4 [04:26<00:00, 66.68s/it]
893
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.2, 'NousResearch_Nous-Capybara-34B'], [0.4, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.8, 'SUSTech_SUS-Chat-34B']]
894
+ 19:43:27 - Layer 53/60 - CHANGED - -0.07660 > -0.07717 - 0.8%
895
+ ----
896
+ Optimizing Layer 54/60 (slerp): 100%|█████████████| 4/4 [04:49<00:00, 72.34s/it]
897
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.4, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.8, 'SUSTech_SUS-Chat-34B']]
898
+ 19:50:18 - Layer 54/60 - CHANGED - -0.07717 > -0.07775 - 0.7%
899
+ ----
900
+ Optimizing Layer 55/60 (slerp): 100%|█████████████| 4/4 [04:12<00:00, 63.01s/it]
901
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.8, 'SUSTech_SUS-Chat-34B']]
902
+ 19:56:01 - Layer 55/60 - CHANGED - -0.07775 > -0.07923 - 1.9%
903
+ ----
904
+ Optimizing Layer 56/60 (slerp): 100%|█████████████| 4/4 [03:56<00:00, 59.03s/it]
905
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
906
+ 20:01:25 - Layer 56/60 - RETAINED - -0.07923
907
+ ----
908
+ Optimizing Layer 57/60 (slerp): 100%|█████████████| 4/4 [04:07<00:00, 61.99s/it]
909
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Capybara-34B']]
910
+ 20:06:54 - Layer 57/60 - RETAINED - -0.07923
911
+ ----
912
+ Optimizing Layer 58/60 (slerp): 100%|█████████████| 4/4 [03:55<00:00, 58.84s/it]
913
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B']]
914
+ 20:12:09 - Layer 58/60 - RETAINED - -0.07923
915
+ ----
916
+ Optimizing Layer 59/60 (slerp): 100%|█████████████| 4/4 [03:27<00:00, 51.80s/it]
917
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B']]
918
+ 20:16:49 - Layer 59/60 - RETAINED - -0.07923
919
+ ----
920
+ Optimizing Layer 60/60 (slerp): 100%|█████████████| 4/4 [04:01<00:00, 60.29s/it]
921
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2']]
922
+ 20:22:08 - Layer 60/60 - RETAINED - -0.07923
923
+ ----
924
+ Optimizing Header: 100%|█████��████████████████████| 4/4 [03:49<00:00, 57.30s/it]
925
+ [[1.0, 'jondurbin_bagel-dpo-34b-v0.2'], [0.6, 'NousResearch_Nous-Capybara-34B'], [0.8, 'NousResearch_Nous-Hermes-2-Yi-34B'], [0.6, 'SUSTech_SUS-Chat-34B']]
926
+ 20:26:56 - Header - CHANGED - -0.07923 > -0.07981 - 0.7%
927
+
928
+ -----------------------------------------------------------------------------------------------------
929
+ | Type | Phrase | Context | Raw Prob* | Used Prob** | Change |
930
+ -----------------------------------------------------------------------------------------------------
931
+ | BAD | anticipation | Her body quivers with | 0.00000% | 0.00% | +0.00% |
932
+ | BAD | anticipation | The atmosphere is thic.. | 0.00000% | 0.00% | +0.00% |
933
+ | BAD | unwavering | Filled with an | 0.00000% | 0.00% | +0.00% |
934
+ | BAD | determination | Her eyes were filled w.. | 0.00000% | 0.00% | -0.00% |
935
+ | BAD | determination | Her stubbornness only .. | 0.00000% | 0.00% | +0.00% |
936
+ | BAD | whisper | Her voice barely above.. | 0.00000% | 0.00% | +0.00% |
937
+ | BAD | spine | shivers down her | 0.00000% | 0.00% | +0.00% |
938
+ | BAD | sends shivers | The thrill of the act | 0.00000% | 0.00% | +0.00% |
939
+ | BAD | ministrations | She moans and twitches.. | 0.00004% | 0.00% | -0.00% |
940
+ | BAD | legs | wraps her | 0.00000% | 0.00% | -0.00% |
941
+ | BAD | imposing figure | He had an | 0.00000% | 0.00% | -0.00% |
942
+ | BAD | shared challenges | Their bond strengthene.. | 0.00001% | 0.00% | +0.00% |
943
+ | BAD | bond | forged a | 0.00005% | 0.00% | -0.00% |
944
+ | BAD | bond | an unspoken | 0.00010% | 0.00% | +0.00% |
945
+ | BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | +0.00% |
946
+ | BAD | sense of vulnera.. | create a | 0.00000% | 0.00% | -0.00% |
947
+ | BAD | dimensions of in.. | explore new | 0.00000% | 0.00% | +0.00% |
948
+ | BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% |
949
+ | BAD | shared experiences | through | 0.00001% | 0.00% | +0.00% |
950
+ | BAD | societal expecta.. | that transcend | 0.00000% | 0.00% | -0.00% |
951
+ | BAD | conventional bou.. | that defy | 0.00000% | 0.00% | +0.00% |
952
+ | BAD | conventional bou.. | and defy | 0.00000% | 0.00% | +0.00% |
953
+ | BAD | open communication | an environment | 0.00000% | 0.00% | +0.00% |
954
+ | BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | +0.00% |
955
+ | BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | -0.00% |
956
+ | BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | +0.00% |
957
+ | BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% |
958
+ | BAD | is truly arousing | way you explore my body | 0.00000% | 0.00% | +0.00% |
959
+ | BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | +0.00% |
960
+ | BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | +0.00% |
961
+ | BAD | bond | cherishing the unique | 0.00019% | 0.00% | +0.00% |
962
+ | BAD | bond | special | 0.00014% | 0.00% | -0.00% |
963
+ | BAD | grows stronger w.. | bond | 0.00000% | 0.00% | -0.00% |
964
+ | BAD | that cannot be b.. | bond | 0.00000% | 0.00% | -0.00% |
965
+ | BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | +0.00% |
966
+ | BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | +0.00% |
967
+ | GOOD | The apple is in .. | Question: If I'm in th.. | 7.81435% | 7.81% | +7.81% |
968
+ ------------------------------------------------------------------------------------------------------
969
+ | Totals | 7.81% | 7.82% | 7.81% |
970
+ ------------------------------------------------------------------------------------------------------
971
+ * = Unweighted, raw probability - ** = Probability after weight adjustments
972
+
973
+ -------- MERGE COMPOSITION ---------
974
+ jondurbin_bagel-dpo-34b-v0.2: 0.49
975
+ NousResearch_Nous-Hermes-2-Yi-34B: 0.24
976
+ SUSTech_SUS-Chat-34B: 0.14
977
+ NousResearch_Nous-Capybara-34B: 0.13
978
+
979
+ 20:28:04 - Saving model to ./mm-output...
980
+ 20:28:48 - Copying tokenizer files to ./mm-output...
981
+ Skipped added_tokens.json (not found)
982
+ Copied tokenizer.model
983
+ Copied special_tokens_map.json
984
+ Copied tokenizer_config.json
985
+ Skipped vocab.json (not found)
986
+ Skipped merges.txt (not found)
987
+ 20:28:48 - Model and tokenizer files saved successfully.
model.safetensors.index.json ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 68777834496
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00015-of-00015.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00015.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00003-of-00015.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00015.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00015.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00015.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00005-of-00015.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00006-of-00015.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00006-of-00015.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00006-of-00015.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00007-of-00015.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00007-of-00015.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00007-of-00015.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00008-of-00015.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00008-of-00015.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00008-of-00015.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
251
+ "model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
252
+ "model.layers.33.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
254
+ "model.layers.33.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
255
+ "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
256
+ "model.layers.33.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
257
+ "model.layers.33.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
258
+ "model.layers.33.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
259
+ "model.layers.33.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
260
+ "model.layers.34.input_layernorm.weight": "model-00009-of-00015.safetensors",
261
+ "model.layers.34.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
262
+ "model.layers.34.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
263
+ "model.layers.34.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
264
+ "model.layers.34.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
265
+ "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
266
+ "model.layers.34.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
267
+ "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
268
+ "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
269
+ "model.layers.35.input_layernorm.weight": "model-00009-of-00015.safetensors",
270
+ "model.layers.35.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
271
+ "model.layers.35.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
272
+ "model.layers.35.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
273
+ "model.layers.35.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
274
+ "model.layers.35.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
275
+ "model.layers.35.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
276
+ "model.layers.35.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
277
+ "model.layers.35.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
278
+ "model.layers.36.input_layernorm.weight": "model-00009-of-00015.safetensors",
279
+ "model.layers.36.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
280
+ "model.layers.36.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
281
+ "model.layers.36.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
282
+ "model.layers.36.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
283
+ "model.layers.36.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
284
+ "model.layers.36.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
285
+ "model.layers.36.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
286
+ "model.layers.36.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00009-of-00015.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
294
+ "model.layers.37.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
296
+ "model.layers.38.input_layernorm.weight": "model-00010-of-00015.safetensors",
297
+ "model.layers.38.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
299
+ "model.layers.38.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
300
+ "model.layers.38.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
305
+ "model.layers.39.input_layernorm.weight": "model-00010-of-00015.safetensors",
306
+ "model.layers.39.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
307
+ "model.layers.39.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
308
+ "model.layers.39.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
309
+ "model.layers.39.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
310
+ "model.layers.39.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
311
+ "model.layers.39.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
312
+ "model.layers.39.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
313
+ "model.layers.39.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
314
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
315
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
316
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
317
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
318
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
319
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
320
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
321
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
322
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
323
+ "model.layers.40.input_layernorm.weight": "model-00010-of-00015.safetensors",
324
+ "model.layers.40.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
325
+ "model.layers.40.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
326
+ "model.layers.40.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
327
+ "model.layers.40.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
328
+ "model.layers.40.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
329
+ "model.layers.40.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
330
+ "model.layers.40.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
331
+ "model.layers.40.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
332
+ "model.layers.41.input_layernorm.weight": "model-00010-of-00015.safetensors",
333
+ "model.layers.41.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
334
+ "model.layers.41.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
335
+ "model.layers.41.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
336
+ "model.layers.41.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
337
+ "model.layers.41.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
338
+ "model.layers.41.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
339
+ "model.layers.41.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
340
+ "model.layers.41.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
341
+ "model.layers.42.input_layernorm.weight": "model-00011-of-00015.safetensors",
342
+ "model.layers.42.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
343
+ "model.layers.42.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
344
+ "model.layers.42.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
345
+ "model.layers.42.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
346
+ "model.layers.42.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
347
+ "model.layers.42.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
348
+ "model.layers.42.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
349
+ "model.layers.42.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
350
+ "model.layers.43.input_layernorm.weight": "model-00011-of-00015.safetensors",
351
+ "model.layers.43.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
352
+ "model.layers.43.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
353
+ "model.layers.43.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
354
+ "model.layers.43.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
355
+ "model.layers.43.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
356
+ "model.layers.43.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
357
+ "model.layers.43.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
358
+ "model.layers.43.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
359
+ "model.layers.44.input_layernorm.weight": "model-00011-of-00015.safetensors",
360
+ "model.layers.44.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
361
+ "model.layers.44.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
362
+ "model.layers.44.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
363
+ "model.layers.44.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
364
+ "model.layers.44.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
365
+ "model.layers.44.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
366
+ "model.layers.44.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
367
+ "model.layers.44.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
368
+ "model.layers.45.input_layernorm.weight": "model-00011-of-00015.safetensors",
369
+ "model.layers.45.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
370
+ "model.layers.45.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
371
+ "model.layers.45.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
372
+ "model.layers.45.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
373
+ "model.layers.45.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
374
+ "model.layers.45.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
375
+ "model.layers.45.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
376
+ "model.layers.45.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
377
+ "model.layers.46.input_layernorm.weight": "model-00012-of-00015.safetensors",
378
+ "model.layers.46.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
379
+ "model.layers.46.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
380
+ "model.layers.46.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
381
+ "model.layers.46.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
382
+ "model.layers.46.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
383
+ "model.layers.46.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
384
+ "model.layers.46.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
385
+ "model.layers.46.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
386
+ "model.layers.47.input_layernorm.weight": "model-00012-of-00015.safetensors",
387
+ "model.layers.47.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
388
+ "model.layers.47.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
389
+ "model.layers.47.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
390
+ "model.layers.47.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
391
+ "model.layers.47.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
392
+ "model.layers.47.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
393
+ "model.layers.47.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
394
+ "model.layers.47.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
395
+ "model.layers.48.input_layernorm.weight": "model-00012-of-00015.safetensors",
396
+ "model.layers.48.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
397
+ "model.layers.48.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
398
+ "model.layers.48.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
399
+ "model.layers.48.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
400
+ "model.layers.48.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
401
+ "model.layers.48.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
402
+ "model.layers.48.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
403
+ "model.layers.48.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
404
+ "model.layers.49.input_layernorm.weight": "model-00012-of-00015.safetensors",
405
+ "model.layers.49.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
406
+ "model.layers.49.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
407
+ "model.layers.49.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
408
+ "model.layers.49.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
409
+ "model.layers.49.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
410
+ "model.layers.49.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
411
+ "model.layers.49.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
412
+ "model.layers.49.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
413
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
414
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
415
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
416
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
417
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
418
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
419
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
420
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
421
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
422
+ "model.layers.50.input_layernorm.weight": "model-00012-of-00015.safetensors",
423
+ "model.layers.50.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
424
+ "model.layers.50.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
425
+ "model.layers.50.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
426
+ "model.layers.50.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
427
+ "model.layers.50.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
428
+ "model.layers.50.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
429
+ "model.layers.50.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
430
+ "model.layers.50.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
431
+ "model.layers.51.input_layernorm.weight": "model-00013-of-00015.safetensors",
432
+ "model.layers.51.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
433
+ "model.layers.51.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
434
+ "model.layers.51.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
435
+ "model.layers.51.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
436
+ "model.layers.51.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
437
+ "model.layers.51.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
438
+ "model.layers.51.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
439
+ "model.layers.51.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
440
+ "model.layers.52.input_layernorm.weight": "model-00013-of-00015.safetensors",
441
+ "model.layers.52.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
442
+ "model.layers.52.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
443
+ "model.layers.52.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
444
+ "model.layers.52.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
445
+ "model.layers.52.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
446
+ "model.layers.52.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
447
+ "model.layers.52.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
448
+ "model.layers.52.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
449
+ "model.layers.53.input_layernorm.weight": "model-00013-of-00015.safetensors",
450
+ "model.layers.53.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
451
+ "model.layers.53.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
452
+ "model.layers.53.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
453
+ "model.layers.53.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
454
+ "model.layers.53.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
455
+ "model.layers.53.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
456
+ "model.layers.53.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
457
+ "model.layers.53.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
458
+ "model.layers.54.input_layernorm.weight": "model-00013-of-00015.safetensors",
459
+ "model.layers.54.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
460
+ "model.layers.54.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
461
+ "model.layers.54.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
462
+ "model.layers.54.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
463
+ "model.layers.54.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
464
+ "model.layers.54.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
465
+ "model.layers.54.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
466
+ "model.layers.54.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
467
+ "model.layers.55.input_layernorm.weight": "model-00014-of-00015.safetensors",
468
+ "model.layers.55.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
469
+ "model.layers.55.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
470
+ "model.layers.55.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
471
+ "model.layers.55.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
472
+ "model.layers.55.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
473
+ "model.layers.55.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
474
+ "model.layers.55.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
475
+ "model.layers.55.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
476
+ "model.layers.56.input_layernorm.weight": "model-00014-of-00015.safetensors",
477
+ "model.layers.56.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
478
+ "model.layers.56.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
479
+ "model.layers.56.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
480
+ "model.layers.56.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
481
+ "model.layers.56.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
482
+ "model.layers.56.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
483
+ "model.layers.56.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
484
+ "model.layers.56.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
485
+ "model.layers.57.input_layernorm.weight": "model-00014-of-00015.safetensors",
486
+ "model.layers.57.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
487
+ "model.layers.57.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
488
+ "model.layers.57.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
489
+ "model.layers.57.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
490
+ "model.layers.57.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
491
+ "model.layers.57.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
492
+ "model.layers.57.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
493
+ "model.layers.57.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
494
+ "model.layers.58.input_layernorm.weight": "model-00014-of-00015.safetensors",
495
+ "model.layers.58.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
496
+ "model.layers.58.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
497
+ "model.layers.58.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
498
+ "model.layers.58.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
499
+ "model.layers.58.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
500
+ "model.layers.58.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
501
+ "model.layers.58.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
502
+ "model.layers.58.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
503
+ "model.layers.59.input_layernorm.weight": "model-00015-of-00015.safetensors",
504
+ "model.layers.59.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
505
+ "model.layers.59.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
506
+ "model.layers.59.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
507
+ "model.layers.59.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
508
+ "model.layers.59.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
509
+ "model.layers.59.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
510
+ "model.layers.59.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
511
+ "model.layers.59.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
512
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
513
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
514
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
515
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
516
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
517
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
518
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
519
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
520
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
521
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
522
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
523
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
524
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
525
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
526
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
527
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
528
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
529
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
530
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
531
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
532
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
533
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
534
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
535
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
536
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
537
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
538
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
539
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
540
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
541
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
542
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
543
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
544
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
545
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
546
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
547
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
548
+ "model.norm.weight": "model-00015-of-00015.safetensors"
549
+ }
550
+ }
output-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef0082073563f8253233de93ed420428d0b76fb0d6c382fc89068f31df2e88a
3
+ size 8569979912
output-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b87c6601b226670b0f633eeb1f994806576ccd5a9cf3802fa8a8a5b7987656
3
+ size 4009278024
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": true,
34
+ "model_max_length": 200000,
35
+ "pad_token": "<unk>",
36
+ "padding_side": "right",
37
+ "sp_model_kwargs": {},
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false,
41
+ "chat_template": "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' ' + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}"
42
+ }