Bibek1129 commited on
Commit
f18db35
1 Parent(s): d816d93

Upload 11 files

Browse files
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  library_name: peft
3
  base_model: Sakonii/distilgpt2-nepali
4
- license: apache-2.0
5
- datasets:
6
- - Bibek1129/nepali_SQuAD_multiple_qsns
7
- language:
8
- - ne
9
- pipeline_tag: text-generation
10
  ---
11
 
12
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
  base_model: Sakonii/distilgpt2-nepali
 
 
 
 
 
 
4
  ---
5
 
6
  # Model Card for Model ID
adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Sakonii/distilgpt2-nepali",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 64,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 32,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "c_proj",
23
+ "lm_head",
24
+ "c_fc",
25
+ "c_attn"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_dora": false,
29
+ "use_rslora": false
30
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df55f938a36adbc331f4638f59c8c111ca427c441c7f3aa9c5595dade78966b
3
+ size 170364520
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd98899c51448b98aa5f8372267a409a84792612f01284e9f7c6d3c2be9a446
3
+ size 31966906
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6e404d99f48d71f4aa39106b5d5b4ed22efdd437298179d99bd7e77546ebc4
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b4bf3b5450f1ce67be35c30bc019d175d5b6cf7a6f330f56db97eb9e0bbfa3
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": "</s>",
31
+ "sep_token": {
32
+ "content": "</s>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ "unk_token": {
39
+ "content": "<unk>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ }
45
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_eos_token": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "</s>",
52
+ "padding_side": "right",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "unk_token": "<unk>"
56
+ }
trainer_state.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "eval_steps": 500,
6
+ "global_step": 118100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 2.12,
13
+ "grad_norm": 1.353366494178772,
14
+ "learning_rate": 0.0002,
15
+ "loss": 3.6004,
16
+ "step": 5000
17
+ },
18
+ {
19
+ "epoch": 4.23,
20
+ "grad_norm": 1.4591169357299805,
21
+ "learning_rate": 0.0002,
22
+ "loss": 3.3767,
23
+ "step": 10000
24
+ },
25
+ {
26
+ "epoch": 6.35,
27
+ "grad_norm": 1.5558066368103027,
28
+ "learning_rate": 0.0002,
29
+ "loss": 3.3035,
30
+ "step": 15000
31
+ },
32
+ {
33
+ "epoch": 8.47,
34
+ "grad_norm": 1.54267418384552,
35
+ "learning_rate": 0.0002,
36
+ "loss": 3.2643,
37
+ "step": 20000
38
+ },
39
+ {
40
+ "epoch": 10.58,
41
+ "grad_norm": 1.6217669248580933,
42
+ "learning_rate": 0.0002,
43
+ "loss": 3.238,
44
+ "step": 25000
45
+ },
46
+ {
47
+ "epoch": 12.7,
48
+ "grad_norm": 1.5397529602050781,
49
+ "learning_rate": 0.0002,
50
+ "loss": 3.2201,
51
+ "step": 30000
52
+ },
53
+ {
54
+ "epoch": 14.82,
55
+ "grad_norm": 1.5048496723175049,
56
+ "learning_rate": 0.0002,
57
+ "loss": 3.2048,
58
+ "step": 35000
59
+ },
60
+ {
61
+ "epoch": 16.93,
62
+ "grad_norm": 1.5197534561157227,
63
+ "learning_rate": 0.0002,
64
+ "loss": 3.1947,
65
+ "step": 40000
66
+ },
67
+ {
68
+ "epoch": 19.05,
69
+ "grad_norm": 1.5024417638778687,
70
+ "learning_rate": 0.0002,
71
+ "loss": 3.1829,
72
+ "step": 45000
73
+ },
74
+ {
75
+ "epoch": 21.17,
76
+ "grad_norm": 1.5805625915527344,
77
+ "learning_rate": 0.0002,
78
+ "loss": 3.1729,
79
+ "step": 50000
80
+ },
81
+ {
82
+ "epoch": 23.29,
83
+ "grad_norm": 1.5682896375656128,
84
+ "learning_rate": 0.0002,
85
+ "loss": 3.1681,
86
+ "step": 55000
87
+ },
88
+ {
89
+ "epoch": 25.4,
90
+ "grad_norm": 1.5147111415863037,
91
+ "learning_rate": 0.0002,
92
+ "loss": 3.1619,
93
+ "step": 60000
94
+ },
95
+ {
96
+ "epoch": 27.52,
97
+ "grad_norm": 1.6233525276184082,
98
+ "learning_rate": 0.0002,
99
+ "loss": 3.1557,
100
+ "step": 65000
101
+ },
102
+ {
103
+ "epoch": 29.64,
104
+ "grad_norm": 1.5563185214996338,
105
+ "learning_rate": 0.0002,
106
+ "loss": 3.153,
107
+ "step": 70000
108
+ },
109
+ {
110
+ "epoch": 31.75,
111
+ "grad_norm": 1.5635435581207275,
112
+ "learning_rate": 0.0002,
113
+ "loss": 3.151,
114
+ "step": 75000
115
+ },
116
+ {
117
+ "epoch": 33.87,
118
+ "grad_norm": 1.471053957939148,
119
+ "learning_rate": 0.0002,
120
+ "loss": 3.1449,
121
+ "step": 80000
122
+ },
123
+ {
124
+ "epoch": 35.99,
125
+ "grad_norm": 1.5087348222732544,
126
+ "learning_rate": 0.0002,
127
+ "loss": 3.1414,
128
+ "step": 85000
129
+ },
130
+ {
131
+ "epoch": 38.1,
132
+ "grad_norm": 1.6342508792877197,
133
+ "learning_rate": 0.0002,
134
+ "loss": 3.1363,
135
+ "step": 90000
136
+ },
137
+ {
138
+ "epoch": 40.22,
139
+ "grad_norm": 1.7150408029556274,
140
+ "learning_rate": 0.0002,
141
+ "loss": 3.1327,
142
+ "step": 95000
143
+ },
144
+ {
145
+ "epoch": 42.34,
146
+ "grad_norm": 1.8055483102798462,
147
+ "learning_rate": 0.0002,
148
+ "loss": 3.1319,
149
+ "step": 100000
150
+ },
151
+ {
152
+ "epoch": 44.45,
153
+ "grad_norm": 1.509770154953003,
154
+ "learning_rate": 0.0002,
155
+ "loss": 3.13,
156
+ "step": 105000
157
+ },
158
+ {
159
+ "epoch": 46.57,
160
+ "grad_norm": 1.6583279371261597,
161
+ "learning_rate": 0.0002,
162
+ "loss": 3.1266,
163
+ "step": 110000
164
+ },
165
+ {
166
+ "epoch": 48.69,
167
+ "grad_norm": 1.7038261890411377,
168
+ "learning_rate": 0.0002,
169
+ "loss": 3.1273,
170
+ "step": 115000
171
+ }
172
+ ],
173
+ "logging_steps": 5000,
174
+ "max_steps": 118100,
175
+ "num_input_tokens_seen": 0,
176
+ "num_train_epochs": 50,
177
+ "save_steps": 500,
178
+ "total_flos": 7.86717088860818e+16,
179
+ "train_batch_size": 4,
180
+ "trial_name": null,
181
+ "trial_params": null
182
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc49f4b336eeaa68f4661a657d0ab6a846ff743e9a59c9b32b51489b7cd5a138
3
+ size 4920