abhishek HF staff commited on
Commit
db6c021
1 Parent(s): 40642ed

Commit From AutoNLP

Browse files
.gitattributes CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
29
+ *.pkl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags: autonlp
3
+ language: en
4
+ widget:
5
+ - text: "I love AutoNLP 🤗"
6
+ datasets:
7
+ - madhurjindal/autonlp-data-Gibberish-Detector
8
+ co2_eq_emissions: 5.527544460835904
9
+ ---
10
+
11
+ # Model Trained Using AutoNLP
12
+
13
+ - Problem type: Multi-class Classification
14
+ - Model ID: 492513457
15
+ - CO2 Emissions (in grams): 5.527544460835904
16
+
17
+ ## Validation Metrics
18
+
19
+ - Loss: 0.07609463483095169
20
+ - Accuracy: 0.9735624586913417
21
+ - Macro F1: 0.9736173135739408
22
+ - Micro F1: 0.9735624586913417
23
+ - Weighted F1: 0.9736173135739408
24
+ - Macro Precision: 0.9737771415197378
25
+ - Micro Precision: 0.9735624586913417
26
+ - Weighted Precision: 0.9737771415197378
27
+ - Macro Recall: 0.9735624586913417
28
+ - Micro Recall: 0.9735624586913417
29
+ - Weighted Recall: 0.9735624586913417
30
+
31
+
32
+ ## Usage
33
+
34
+ You can use cURL to access this model:
35
+
36
+ ```
37
+ $ curl -X POST -H "Authorization: Bearer YOUR_API_KEY" -H "Content-Type: application/json" -d '{"inputs": "I love AutoNLP"}' https://api-inference.huggingface.co/models/madhurjindal/autonlp-Gibberish-Detector-492513457
38
+ ```
39
+
40
+ Or Python API:
41
+
42
+ ```
43
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
44
+
45
+ model = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
48
+
49
+ inputs = tokenizer("I love AutoNLP", return_tensors="pt")
50
+
51
+ outputs = model(**inputs)
52
+ ```
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "AutoNLP",
3
+ "_num_labels": 4,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "clean",
14
+ "1": "mild gibberish",
15
+ "2": "noise",
16
+ "3": "word salad"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "label2id": {
20
+ "clean": 0,
21
+ "mild gibberish": 1,
22
+ "noise": 2,
23
+ "word salad": 3
24
+ },
25
+ "max_length": 64,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "distilbert",
28
+ "n_heads": 12,
29
+ "n_layers": 6,
30
+ "pad_token_id": 0,
31
+ "padding": "max_length",
32
+ "problem_type": "single_label_classification",
33
+ "qa_dropout": 0.1,
34
+ "seq_classif_dropout": 0.2,
35
+ "sinusoidal_pos_embds": false,
36
+ "tie_weights_": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.15.0",
39
+ "vocab_size": 30522
40
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6b3f193eb8bca34495fb733d1187dcbc1ee4a03327396f609fd384f6d62d19
3
+ size 267866225
sample_input.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be88e587f6b9b4ada6406c0ccb4d9a9eb199fb025cf4bb245d43e97662543603
3
+ size 2034
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "AutoNLP", "tokenizer_class": "DistilBertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff