damerajee commited on
Commit
f055cde
1 Parent(s): abda038

damerajee/paligemma-hindi-version-1

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - generated_from_trainer
5
+ base_model: google/paligemma-3b-pt-224
6
+ model-index:
7
+ - name: paligemma-hindi
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # paligemma-hindi
15
+
16
+ This model is a fine-tuned version of [google/paligemma-3b-pt-224](https://huggingface.co/google/paligemma-3b-pt-224) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - eval_loss: 1.4683
19
+ - eval_runtime: 388.3071
20
+ - eval_samples_per_second: 1.288
21
+ - eval_steps_per_second: 1.288
22
+ - epoch: 0.7111
23
+ - step: 200
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 2e-05
43
+ - train_batch_size: 1
44
+ - eval_batch_size: 1
45
+ - seed: 42
46
+ - gradient_accumulation_steps: 16
47
+ - total_train_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - lr_scheduler_warmup_steps: 2
51
+ - num_epochs: 1
52
+ - mixed_precision_training: Native AMP
53
+
54
+ ### Framework versions
55
+
56
+ - PEFT 0.11.1
57
+ - Transformers 4.41.2
58
+ - Pytorch 2.1.2
59
+ - Datasets 2.18.0
60
+ - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -18,14 +18,17 @@
18
  "lm_head"
19
  ],
20
  "peft_type": "LORA",
21
- "r": 64,
22
  "rank_pattern": {},
23
  "revision": null,
24
  "target_modules": [
 
25
  "o_proj",
26
- "k_proj",
27
  "v_proj",
28
- "q_proj"
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
 
18
  "lm_head"
19
  ],
20
  "peft_type": "LORA",
21
+ "r": 16,
22
  "rank_pattern": {},
23
  "revision": null,
24
  "target_modules": [
25
+ "gate_proj",
26
  "o_proj",
 
27
  "v_proj",
28
+ "q_proj",
29
+ "k_proj",
30
+ "up_proj",
31
+ "down_proj"
32
  ],
33
  "task_type": "CAUSAL_LM",
34
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:791ac1247ca5d56c8a889414be995ce0113bdc3a3c9f7b795a57adbf34a74113
3
- size 2213920832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4844da83ca69360494c7eb5935a86b9b01b94fdf9d37c609a5b5bc6340f2b6d3
3
+ size 2197569448
runs/Jun02_11-45-59_f563fd6f7cdb/events.out.tfevents.1717329228.f563fd6f7cdb.35.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52b8f602194ce9b40e302412602485f829ed21aa3fb937de645251dad8ceefd
3
+ size 12535
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe14bd53d6106a8027cc9914931fcf9029d67123256a1311ebfc4b7e500a09a0
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad57882b4984a28fdba8ae44cf9d0d820afa982d979058223bdd73f9777b1127
3
  size 5176