mwz commited on
Commit
c9817bb
1 Parent(s): 1dc74bb

End of training

Browse files
README.md CHANGED
@@ -4,9 +4,6 @@ tags:
4
  model-index:
5
  - name: UrduBert
6
  results: []
7
-
8
- inference: false
9
-
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,20 +30,21 @@ More information needed
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
- - learning_rate: 0.001
37
- - train_batch_size: 72
38
- - eval_batch_size: 16
39
  - seed: 42
40
- - gradient_accumulation_steps: 56
41
- - total_train_batch_size: 4032
42
- - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-09
43
  - lr_scheduler_type: linear
44
- - lr_scheduler_warmup_ratio: 0.5
45
  - num_epochs: 1
46
 
 
 
 
 
47
  ### Framework versions
48
 
49
- - Transformers 4.33.0
50
- - Pytorch 2.0.0
51
- - Datasets 2.1.0
52
- - Tokenizers 0.13.3
 
4
  model-index:
5
  - name: UrduBert
6
  results: []
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
  ### Training hyperparameters
31
 
32
  The following hyperparameters were used during training:
33
+ - learning_rate: 5e-05
34
+ - train_batch_size: 64
35
+ - eval_batch_size: 8
36
  - seed: 42
37
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
 
38
  - lr_scheduler_type: linear
 
39
  - num_epochs: 1
40
 
41
+ ### Training results
42
+
43
+
44
+
45
  ### Framework versions
46
 
47
+ - Transformers 4.35.2
48
+ - Pytorch 2.1.0+cu118
49
+ - Datasets 2.15.0
50
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -1,24 +1,26 @@
1
  {
2
  "architectures": [
3
- "BertForMaskedLM"
4
  ],
5
- "attention_probs_dropout_prob": 0,
 
6
  "classifier_dropout": null,
 
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0,
9
  "hidden_size": 768,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 3072,
12
  "layer_norm_eps": 1e-12,
13
- "max_position_embeddings": 128,
14
- "model_type": "bert",
15
  "num_attention_heads": 12,
16
- "num_hidden_layers": 12,
17
- "pad_token_id": 0,
18
  "position_embedding_type": "absolute",
19
  "torch_dtype": "float32",
20
- "transformers_version": "4.33.0",
21
- "type_vocab_size": 2,
22
  "use_cache": true,
23
- "vocab_size": 32768
24
  }
 
1
  {
2
  "architectures": [
3
+ "RobertaForMaskedLM"
4
  ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
  "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 3072,
14
  "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "roberta",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 1,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.35.2",
23
+ "type_vocab_size": 1,
24
  "use_cache": true,
25
+ "vocab_size": 52000
26
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c481934b104d13f5f3486df15e39ec6732512db63838b089ac8e78c0e4268df9
3
+ size 334030264
runs/Nov26_18-41-28_8dcbbf658321/events.out.tfevents.1701024096.8dcbbf658321.377.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2622d176df874fc31d8dc038813c9a2d6baa015eb5d7962983723333305146e8
3
+ size 5874
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3efa374828c8d7ed9d214dc58b3b1a646022db50c8cccc67415567846e22b9e2
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe7dceb16223304c7be38604b6e960c6365ffa3a4a9c6c936718849eb776605
3
+ size 4536