lysandre HF staff commited on
Commit
8fccccf
1 Parent(s): 34ab9f5

Add model weights and configuration

Browse files
Files changed (6) hide show
  1. README.md +19 -0
  2. config.json +31 -0
  3. pytorch_model.bin +3 -0
  4. tf_model.h5 +3 -0
  5. tokenizer_config.json +1 -0
  6. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ datasets:
5
+ - sst-2
6
+ ---
7
+
8
+ # DistilBERT base uncased finetuned SST-2
9
+
10
+ This model is a fine-tune checkpoint of [DistilBERT-base-uncased](https://huggingface.co/distilbert-base-uncased), fine-tuned on SST-2.
11
+ This model reaches an accuracy of 91.3 on the dev set (for comparison, Bert bert-base-uncased version reaches an accuracy of 92.7).
12
+
13
+ # Fine-tuning hyper-parameters
14
+
15
+ - learning_rate = 1e-5
16
+ - batch_size = 32
17
+ - warmup = 600
18
+ - max_seq_length = 128
19
+ - num_train_epochs = 3.0
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "finetuning_task": "sst-2",
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "NEGATIVE",
13
+ "1": "POSITIVE"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "NEGATIVE": 0,
18
+ "POSITIVE": 1
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "vocab_size": 30522
31
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60554cbd7781b09d87f1ececbea8c064b94e49a7f03fd88e8775bfe6cc3d9f88
3
+ size 267844284
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44df675bb34ccd8e57c14292c811ac7358b7c8e37c7f212745f640cd6019ac8
3
+ size 267949840
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"model_max_length": 512, "do_lower_case": true}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff