skylersterling commited on
Commit
d0efe8c
1 Parent(s): 66589c3

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +6 -0
  2. config.json +40 -0
  3. model.safetensors +3 -0
  4. pytorch_model.bin +3 -0
  5. tokenizer.json +0 -0
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+ This repository highlights the outcome of an experimental merging algorithm that combined the weights of two distinct language models through the application of the add difference technique. The process of weight merging is an innovative approach that enables the integration of knowledge from multiple models, culminating in the development of a more dynamic and advanced language model.
5
+
6
+ Proto-Synthia showcases an achievement in optimization within a mere 10 minutes, thereby, in many cases, obviating the need for the conventional time-intensive training process.
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2-xl",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1600,
16
+ "n_head": 25,
17
+ "n_inner": null,
18
+ "n_layer": 48,
19
+ "n_positions": 1024,
20
+ "output_past": true,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.25.1",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67a0e2ea20c28fd91e737737937ca67eb80a8e0e5c72681858fcbcdc926ab7c
3
+ size 6280847584
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05937f1dd96505d5f35728c6bfbabe604ccf5997e63b4fe103d35b2918ac759
3
+ size 6280990705
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff