Bram Vanroy commited on
Commit
0877bc4
1 Parent(s): 0aa0a4d

init adapters

Browse files
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: tiiuae/falcon-7b
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - yhavinga/mc4_nl_cleaned
8
+ model-index:
9
+ - name: tiny-3e-4lr+1152tbs+1ep+0.1wd
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # tiny-3e-4lr+1152tbs+1ep+0.1wd
17
+
18
+ This model is a fine-tuned version of [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) on the yhavinga/mc4_nl_cleaned micro dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 2.0928
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 0.0003
40
+ - train_batch_size: 12
41
+ - eval_batch_size: 24
42
+ - seed: 42
43
+ - distributed_type: multi-GPU
44
+ - num_devices: 16
45
+ - gradient_accumulation_steps: 6
46
+ - total_train_batch_size: 1152
47
+ - total_eval_batch_size: 384
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_ratio: 0.03
51
+ - num_epochs: 1
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:-----:|:----:|:---------------:|
57
+ | 2.6094 | 0.1 | 170 | 2.5980 |
58
+ | 2.4503 | 0.19 | 340 | 2.4405 |
59
+ | 2.3243 | 0.29 | 510 | 2.3428 |
60
+ | 2.2822 | 0.39 | 680 | 2.2752 |
61
+ | 2.238 | 0.49 | 850 | 2.2248 |
62
+ | 2.2015 | 0.58 | 1020 | 2.1865 |
63
+ | 2.1678 | 0.68 | 1190 | 2.1560 |
64
+ | 2.1301 | 0.78 | 1360 | 2.1312 |
65
+ | 2.1161 | 0.88 | 1530 | 2.1112 |
66
+ | 2.0997 | 0.97 | 1700 | 2.0928 |
67
+
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.31.0.dev0
72
+ - Pytorch 2.0.1+cu117
73
+ - Datasets 2.13.1
74
+ - Tokenizers 0.13.3
adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "tiiuae/falcon-7b",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "query_key_value",
18
+ "dense",
19
+ "dense_h_to_4h",
20
+ "dense_4h_to_h"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac246f63bdeeb3c9050e5a778aae35fbdba1bad7081e19a52946f2c3146c1453
3
+ size 261185933
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 2.0928452014923096,
4
+ "eval_runtime": 2111.0546,
5
+ "eval_samples": 105484,
6
+ "eval_samples_per_second": 49.967,
7
+ "eval_steps_per_second": 0.13,
8
+ "perplexity": 8.107951132441189,
9
+ "train_loss": 0.05167265042312105,
10
+ "train_runtime": 3158.899,
11
+ "train_samples": 2008858,
12
+ "train_samples_per_second": 635.936,
13
+ "train_steps_per_second": 0.552
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 2.0928452014923096,
4
+ "eval_runtime": 2111.0546,
5
+ "eval_samples": 105484,
6
+ "eval_samples_per_second": 49.967,
7
+ "eval_steps_per_second": 0.13,
8
+ "perplexity": 8.107951132441189
9
+ }
info.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ CMD: -n 4 -g 4 -t 80 -o falcon-7b-ft-mc4_nl_cleaned_tiny/tiny-3e-4lr+1152tbs+1ep+0.1wd -p falcon-7b-ft-mc4_nl_cleaned -e --preprocessed_dataset /dodrio/scratch/projects/2023_005/llm-finetuning/preprocessed_datasets/mc4_nl_cleaned--tiny-falcon-40b-2048 --learning_rate 3e-4 --model_name_or_path tiiuae/falcon-7b --per_device_train_batch_size 12 --per_device_eval_batch_size 24 --gradient_accumulation_steps 6 --eval_accumulation_steps 6 --save_total_limit 3 --eval_steps 170 --save_steps 170 --logging_first_step --weight_decay 0.1 --lr_scheduler_type cosine --early_stopping_patience 5 --warmup_ratio 0.03 --deepspeed ds_config_zero2.json --report_to none
2
+
special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ ">>TITLE<<",
4
+ ">>ABSTRACT<<",
5
+ ">>INTRODUCTION<<",
6
+ ">>SUMMARY<<",
7
+ ">>COMMENT<<",
8
+ ">>ANSWER<<",
9
+ ">>QUESTION<<",
10
+ ">>DOMAIN<<",
11
+ ">>PREFIX<<",
12
+ ">>SUFFIX<<",
13
+ ">>MIDDLE<<"
14
+ ],
15
+ "eos_token": "<|endoftext|>",
16
+ "pad_token": "<|endoftext|>"
17
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 2048,
6
+ "tokenizer_class": "PreTrainedTokenizerFast"
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.05167265042312105,
4
+ "train_runtime": 3158.899,
5
+ "train_samples": 2008858,
6
+ "train_samples_per_second": 635.936,
7
+ "train_steps_per_second": 0.552
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46136d04b167007cede48d42d509fef829949160a7f2582a890f527d43f974f2
3
+ size 5627