Vui Seng Chua commited on
Commit
de654c9
1 Parent(s): f9a16c1

add model collaterals

Browse files
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model is developed with transformers v4.9.1.
2
+
3
+ ```
4
+ m = 0.8444
5
+ eval_samples = 9815
6
+
7
+ mm = 0.8495
8
+ eval_samples = 9832
9
+ ```
10
+
11
+ # Train
12
+ ```bash
13
+ #!/usr/bin/env bash
14
+
15
+ export CUDA_VISIBLE_DEVICES=0
16
+
17
+ OUTDIR=bert-mnli
18
+ NEPOCH=3
19
+
20
+ WORKDIR=transformers/examples/pytorch/text-classification
21
+ cd $WORKDIR
22
+
23
+ python run_glue.py \
24
+ --model_name_or_path bert-base-uncased \
25
+ --task_name mnli \
26
+ --max_seq_length 128 \
27
+ --do_train \
28
+ --per_device_train_batch_size 32 \
29
+ --learning_rate 2e-5 \
30
+ --num_train_epochs $NEPOCH \
31
+ --logging_steps 1 \
32
+ --evaluation_strategy steps \
33
+ --save_steps 3000 \
34
+ --do_eval \
35
+ --per_device_eval_batch_size 128 \
36
+ --eval_steps 250 \
37
+ --output_dir $OUTDIR
38
+ --overwrite_output_dir
39
+ ```
40
+
41
+ # Eval
42
+ ```bash
43
+ export CUDA_VISIBLE_DEVICES=0
44
+
45
+ OUTDIR=eval-bert-mnli
46
+ WORKDIR=transformers/examples/pytorch/text-classification
47
+ cd $WORKDIR
48
+
49
+ nohup python run_glue.py \
50
+ --model_name_or_path vuiseng9/bert-mnli \
51
+ --task_name mnli \
52
+ --do_eval \
53
+ --per_device_eval_batch_size 128 \
54
+ --max_seq_length 128 \
55
+ --overwrite_output_dir \
56
+ --output_dir $OUTDIR 2>&1 | tee $OUTDIR/run.log &
57
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.8494711147274207,
3
+ "eval_loss": 0.46933791041374207,
4
+ "eval_runtime": 22.6994,
5
+ "eval_samples": 9832,
6
+ "eval_samples_per_second": 433.139,
7
+ "eval_steps_per_second": 3.392
8
+ }
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "finetuning_task": "mnli",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "LABEL_0": 0,
21
+ "LABEL_1": 1,
22
+ "LABEL_2": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "single_label_classification",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.9.1",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30522
37
+ }
eval_mnli-mm_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.8494711147274207,
3
+ "eval_loss": 0.46933791041374207,
4
+ "eval_runtime": 22.6994,
5
+ "eval_samples": 9832,
6
+ "eval_samples_per_second": 433.139,
7
+ "eval_steps_per_second": 3.392
8
+ }
eval_mnli_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.8444218033622007,
3
+ "eval_loss": 0.49247315526008606,
4
+ "eval_runtime": 22.5125,
5
+ "eval_samples": 9815,
6
+ "eval_samples_per_second": 435.98,
7
+ "eval_steps_per_second": 3.42
8
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8494711147274207,
4
+ "eval_loss": 0.46933791041374207,
5
+ "eval_runtime": 22.4675,
6
+ "eval_samples": 9832,
7
+ "eval_samples_per_second": 437.61,
8
+ "eval_steps_per_second": 3.427
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ecc6a182477feca62817a4e028dbd3a949fac885da7b457b4517ad086774c9
3
+ size 438022317
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.3605867331176782,
4
+ "train_runtime": 11742.5051,
5
+ "train_samples": 392702,
6
+ "train_samples_per_second": 100.328,
7
+ "train_steps_per_second": 3.135
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6f3cbe28aea9c1b0c5b889ec91bec153ace6d29b73b98f5a59d8812b2be826
3
+ size 2735
vocab.txt ADDED
The diff for this file is too large to render. See raw diff