snewcomer commited on
Commit
cd13ad0
1 Parent(s): 3873c9c

Model save

Browse files
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
2
  license: mit
3
- base_model: microsoft/phi-2
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: phi-2-finetuned
8
  results: []
@@ -33,12 +34,15 @@ More information needed
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.0002
36
- - train_batch_size: 4
37
  - eval_batch_size: 8
38
  - seed: 42
 
 
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
  - lr_scheduler_type: constant
41
- - num_epochs: 5
 
42
 
43
  ### Training results
44
 
@@ -46,7 +50,8 @@ The following hyperparameters were used during training:
46
 
47
  ### Framework versions
48
 
49
- - Transformers 4.35.2
 
50
  - Pytorch 2.1.0+cu121
51
  - Datasets 2.16.1
52
- - Tokenizers 0.15.0
 
1
  ---
2
  license: mit
3
+ library_name: peft
4
  tags:
5
  - generated_from_trainer
6
+ base_model: microsoft/phi-2
7
  model-index:
8
  - name: phi-2-finetuned
9
  results: []
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0002
37
+ - train_batch_size: 8
38
  - eval_batch_size: 8
39
  - seed: 42
40
+ - gradient_accumulation_steps: 4
41
+ - total_train_batch_size: 32
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: constant
44
+ - lr_scheduler_warmup_ratio: 0.03
45
+ - num_epochs: 20
46
 
47
  ### Training results
48
 
 
50
 
51
  ### Framework versions
52
 
53
+ - PEFT 0.7.1
54
+ - Transformers 4.36.0
55
  - Pytorch 2.1.0+cu121
56
  - Datasets 2.16.1
57
+ - Tokenizers 0.15.0
adapter_config.json CHANGED
@@ -9,18 +9,23 @@
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
- "lora_alpha": 32,
13
  "lora_dropout": 0.1,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
- "r": 32,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "Wqkv",
23
- "out_proj"
 
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
+ "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
+ "r": 16,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "fc1",
23
+ "lm_head",
24
+ "fc2",
25
+ "k_proj",
26
+ "q_proj",
27
+ "v_proj",
28
+ "o_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4df7325080423adeedaaa1ed0ec3efbfd2a5a24cf444c3497925b8add516fb
3
- size 62931408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6138b3893e6e44c9fe18b3f0aebcb7b4c4214d1d5beaae8ec322c75312525af0
3
+ size 347895224
runs/Jan12_04-01-39_b3be375e58ed/events.out.tfevents.1705032105.b3be375e58ed.157.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5289985b3ddd04836ed15b8b563425ae2bc87a523e81e35381a556e6c1352094
3
+ size 5549
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:051643a4d73b5ab606d4ad87fd0164e759007e3002e069475d2b9ec4f6c1679c
3
- size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7c8bb8d4339fa7bfee37f1783b0e8244d19d07328a7beb9d45a5fe4df442b6
3
+ size 4728