Jorgeutd commited on
Commit
08d5c62
1 Parent(s): c11a821

commit files to HF hub

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language: en
4
+ widget:
5
+ - text: "I am really upset that I have to call up to three times to the number on the back of my insurance card for my call to be answer"
6
+ tags:
7
+ - sagemaker
8
+ - roberta-base
9
+ - text classification
10
+ license: apache-2.0
11
+ datasets:
12
+ - emotion
13
+ model-index:
14
+ - name: sagemaker-roberta-base-emotion
15
+ results:
16
+ - task:
17
+ name: Multi Class Text Classification
18
+ type: text-classification
19
+ dataset:
20
+ name: "emotion"
21
+ type: emotion
22
+ metrics:
23
+ - name: Validation Accuracy
24
+ type: accuracy
25
+ value: 94.1
26
+ - name: Validation F1
27
+ type: f1
28
+ value: 94.13
29
+
30
+ ---
31
+ ## roberta-base
32
+
33
+ This model is a fine-tuned model that was trained using Amazon SageMaker and the new Hugging Face Deep Learning container.
34
+ - Problem type: Multi Class Text Classification (emotion detection).
35
+
36
+ It achieves the following results on the evaluation set:
37
+ - Loss: 0.1613253802061081
38
+ - f1: 0.9413321705151999
39
+
40
+ ## Hyperparameters
41
+ ```json
42
+ {
43
+ "epochs": 10,
44
+ "train_batch_size": 16,
45
+ "learning_rate": 3e-5,
46
+ "weight_decay":0.01,
47
+ "load_best_model_at_end": true,
48
+ "model_name":"roberta-base",
49
+ "do_eval": True,
50
+ "load_best_model_at_end":True
51
+ }
52
+ ```
53
+ ## Validation Metrics
54
+ | key | value |
55
+ | --- | ----- |
56
+ | eval_accuracy | 0.941 |
57
+ | eval_f1 | 0.9413321705151999 |
58
+ | eval_loss | 0.1613253802061081|
59
+ | eval_recall | 0.941 |
60
+ | eval_precision | 0.9419519436781406 |
61
+
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "sadness",
15
+ "1": "joy",
16
+ "2": "love",
17
+ "3": "anger",
18
+ "4": "fear",
19
+ "5": "surprise"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "anger": "3",
25
+ "fear": "4",
26
+ "joy": "1",
27
+ "love": "2",
28
+ "sadness": "0",
29
+ "surprise": "5"
30
+ },
31
+ "layer_norm_eps": 1e-05,
32
+ "max_position_embeddings": 514,
33
+ "model_type": "roberta",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 1,
37
+ "position_embedding_type": "absolute",
38
+ "problem_type": "single_label_classification",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.12.3",
41
+ "type_vocab_size": 1,
42
+ "use_cache": true,
43
+ "vocab_size": 50265
44
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy" : 0.941,
4
+ "eval_f1": 0.9413321705151999,
5
+ "eval_loss": 0.1613253802061081,
6
+ "eval_precision": 0.9419519436781406,
7
+ "eval_recall" : 0.941,
8
+ "eval_runtime": 2.8452 ,
9
+ "eval_samples_per_second": 702.933,
10
+ "eval_steps_per_second": 2.812
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a3555838a38a25afcb25e7babef289525333ae8f6e26d296ed82c4260961c4
3
+ size 498686381
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe64ebddd0efc5a9611168cec3017c1b346021aac943776b2ce9bb38c027180
3
+ size 2799
vocab.json ADDED
The diff for this file is too large to render. See raw diff