sgugger commited on
Commit
db18258
1 Parent(s): 5b9f034

Training in progress, step 500

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ base_model: bert-base-cased
6
+ tags:
7
+ - generated_from_trainer
8
+ datasets:
9
+ - glue
10
+ metrics:
11
+ - accuracy
12
+ - f1
13
+ model-index:
14
+ - name: push-to-hub-test
15
+ results:
16
+ - task:
17
+ name: Text Classification
18
+ type: text-classification
19
+ dataset:
20
+ name: GLUE MRPC
21
+ type: glue
22
+ config: mrpc
23
+ split: validation
24
+ args: mrpc
25
+ metrics:
26
+ - name: Accuracy
27
+ type: accuracy
28
+ value: 0.8676470588235294
29
+ - name: F1
30
+ type: f1
31
+ value: 0.9078498293515359
32
+ ---
33
+
34
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
35
+ should probably proofread and complete it, then remove this comment. -->
36
+
37
+ # push-to-hub-test
38
+
39
+ This model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased) on the GLUE MRPC dataset.
40
+ It achieves the following results on the evaluation set:
41
+ - Loss: 0.6255
42
+ - Accuracy: 0.8676
43
+ - F1: 0.9078
44
+ - Combined Score: 0.8877
45
+
46
+ ## Model description
47
+
48
+ More information needed
49
+
50
+ ## Intended uses & limitations
51
+
52
+ More information needed
53
+
54
+ ## Training and evaluation data
55
+
56
+ More information needed
57
+
58
+ ## Training procedure
59
+
60
+ ### Training hyperparameters
61
+
62
+ The following hyperparameters were used during training:
63
+ - learning_rate: 5e-05
64
+ - train_batch_size: 16
65
+ - eval_batch_size: 16
66
+ - seed: 42
67
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
68
+ - lr_scheduler_type: linear
69
+ - num_epochs: 3.0
70
+
71
+ ### Training results
72
+
73
+
74
+
75
+ ### Framework versions
76
+
77
+ - Transformers 4.32.0.dev0
78
+ - Pytorch 2.0.0+cu117
79
+ - Datasets 2.14.4.dev0
80
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8676470588235294,
4
+ "eval_combined_score": 0.8877484440875327,
5
+ "eval_f1": 0.9078498293515359,
6
+ "eval_loss": 0.6254646182060242,
7
+ "eval_runtime": 0.9668,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 422.028,
10
+ "eval_steps_per_second": 26.894,
11
+ "train_loss": 0.29736072982566947,
12
+ "train_runtime": 72.3445,
13
+ "train_samples": 3668,
14
+ "train_samples_per_second": 152.106,
15
+ "train_steps_per_second": 9.538
16
+ }
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "mrpc",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "not_equivalent",
15
+ "1": "equivalent"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "equivalent": 1,
21
+ "not_equivalent": 0
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "problem_type": "single_label_classification",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.32.0.dev0",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 28996
36
+ }
emissions.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
+ 2022-06-10T06:57:35,eb65b40c-6656-492b-beab-b1127ec40a89,codecarbon,75.77277112007141,0.0019661329089992424,0.009341366942677192,United States,USA,new york,N,,
3
+ 2023-06-07T12:57:51,10bf2505-7043-4977-a3bd-89ff2bceb74c,codecarbon,22.494215726852417,0.0005877247722987192,0.002792360950887197,United States,USA,new york,N,,
4
+ 2023-06-07T12:59:15,27c1fb6c-5f93-4e1a-a0ac-c86241e84a50,codecarbon,70.80675888061523,0.0020041835812859015,0.009522150902204202,United States,USA,new york,N,,
5
+ 2023-06-07T13:02:28,943ea6b5-c22a-42ef-9877-f6599b3c7395,codecarbon,44.19557809829712,0.0009018572087676258,0.0042848472137548,United States,USA,new york,N,,
6
+ 2023-06-07T13:03:52,f3cf84e8-913c-4d05-aba1-1a4af046873d,codecarbon,70.99636244773865,0.0020228958118171324,0.009611055274288165,United States,USA,new york,N,,
7
+ 2023-06-07T13:04:57,ca4ee800-6554-4ea4-818a-44a382e60064,codecarbon,44.18032264709473,0.0009230705444922738,0.004385634679431778,United States,USA,new york,N,,
8
+ 2023-06-07T13:27:08,fa461c6a-6555-473b-aa79-a138281392a6,codecarbon,44.5587215423584,0.0008968433736060909,0.004261025795670793,United States,USA,new york,N,,
9
+ 2023-06-07T13:28:11,e36c4165-eb4f-4588-ae3b-e56e1a3280c2,codecarbon,44.47044777870178,0.0009021980585399627,0.004286466637742442,United States,USA,new york,N,,
10
+ 2023-06-07T13:33:28,27210e69-2b41-40a8-8f20-daea46e4428c,codecarbon,44.4998722076416,0.0008911166244071989,0.004233817225278285,United States,USA,new york,N,,
11
+ 2023-08-04T10:27:32,03f5fb31-9194-4a04-b842-0ba1aa7b8eec,codecarbon,72.91463541984558,0.002010806494296473,0.009553617269700523,United States,USA,new york,N,,
12
+ 2023-08-04T10:30:40,5757fa70-c58e-4185-8470-3e46d3363f15,codecarbon,75.35435104370117,0.0020779724772222426,0.009872732060823447,United States,USA,new york,N,,
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8676470588235294,
4
+ "eval_combined_score": 0.8877484440875327,
5
+ "eval_f1": 0.9078498293515359,
6
+ "eval_loss": 0.6254646182060242,
7
+ "eval_runtime": 0.9668,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 422.028,
10
+ "eval_steps_per_second": 26.894
11
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b311abbc8de524a975f2f205c66376fb0d9855e205e315d54c14bb6769dd2997
3
+ size 433315633
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.29736072982566947,
4
+ "train_runtime": 72.3445,
5
+ "train_samples": 3668,
6
+ "train_samples_per_second": 152.106,
7
+ "train_steps_per_second": 9.538
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 690,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 2.17,
12
+ "learning_rate": 1.3768115942028985e-05,
13
+ "loss": 0.3782,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 3.0,
18
+ "step": 690,
19
+ "total_flos": 723818513295360.0,
20
+ "train_loss": 0.29736072982566947,
21
+ "train_runtime": 72.3445,
22
+ "train_samples_per_second": 152.106,
23
+ "train_steps_per_second": 9.538
24
+ }
25
+ ],
26
+ "max_steps": 690,
27
+ "num_train_epochs": 3,
28
+ "total_flos": 723818513295360.0,
29
+ "trial_name": null,
30
+ "trial_params": null
31
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2007c2fd1134bc449e84210b49888da8ea596302ac9fd53aff2469ae577e540
3
+ size 4091
vocab.txt ADDED
The diff for this file is too large to render. See raw diff