Afreen Aman commited on
Commit
b73bde2
1 Parent(s): 80638f9
Files changed (6) hide show
  1. README.md +42 -0
  2. config.json +51 -0
  3. special_tokens_map.json +1 -0
  4. tf_model.h5 +3 -0
  5. tokenizer_config.json +1 -0
  6. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - Text Classification
6
+ co2_eq_emissions: 0.1069 Kg
7
+ widget:
8
+ - text: "At the every month post-injection monitoring event, TCE, carbon tetrachloride, and chloroform concentrations were above CBSGs in three of the wells"
9
+ example_title: "Remediation Standards"
10
+ - text: "TRPH exceedances were observed in the subsurface soils immediately above the water table and there are no TRPH exceedances in surface soils."
11
+ example_title: "Extent of Contamination"
12
+ - text: "weathered shale was encountered below the surface area with fluvial deposits. Sediments in the coastal plain region are found above and below the bedrock with sandstones and shales that form the basement rock"
13
+ example_title: "Geology"
14
+
15
+ ---
16
+
17
+ ## About the Model
18
+ An English sequence classification model, trained on MBAD Dataset to detect bias and fairness in sentences. This model was built on top of distilbert-base-uncased model and trained for 30 epochs with a batch size of 16, a learning rate of 5e-5, and a maximum sequence length of 512.
19
+
20
+ - Dataset : Custom Data
21
+ - Carbon emission 0.1069 Kg
22
+
23
+ | Train Accuracy | Validation Accuracy | Train loss | Test loss |
24
+ |---------------:| -------------------:| ----------:|----------:|
25
+ | 99.10 | 01.00 | 0.04 | 0.003 |
26
+
27
+ ## Usage
28
+ The easiest way is to load through the pipeline object offered by transformers library.
29
+ ```python
30
+ from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
31
+ from transformers import pipeline
32
+ tokenizer = AutoTokenizer.from_pretrained("d4data/bias-detection-model")
33
+ model = TFAutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model")
34
+
35
+ classifier = pipeline('text-classification', model=model, tokenizer=tokenizer) # cuda = 0,1 based on gpu availability
36
+ classifier("The irony, of course, is that the exhibit that invites people to throw trash at vacuuming Ivanka Trump lookalike reflects every stereotype feminists claim to stand against, oversexualizing Ivanka’s body and ignoring her hard work.")
37
+ ```
38
+
39
+ ## Author
40
+ This model is part of the Research topic "Environmental Due Diligence" conducted by Deepak John Reji, Afreen Aman, Shaina Raza. If you use this work (code, model or dataset), please cite as:
41
+ > Environmental Due Diligence, (2020), GitHub repository, <...>
42
+
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10",
23
+ "11": "LABEL_11"
24
+ },
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "LABEL_0": 0,
28
+ "LABEL_1": 1,
29
+ "LABEL_10": 10,
30
+ "LABEL_11": 11,
31
+ "LABEL_2": 2,
32
+ "LABEL_3": 3,
33
+ "LABEL_4": 4,
34
+ "LABEL_5": 5,
35
+ "LABEL_6": 6,
36
+ "LABEL_7": 7,
37
+ "LABEL_8": 8,
38
+ "LABEL_9": 9
39
+ },
40
+ "max_position_embeddings": 512,
41
+ "model_type": "distilbert",
42
+ "n_heads": 12,
43
+ "n_layers": 6,
44
+ "pad_token_id": 0,
45
+ "qa_dropout": 0.1,
46
+ "seq_classif_dropout": 0.2,
47
+ "sinusoidal_pos_embds": false,
48
+ "tie_weights_": true,
49
+ "transformers_version": "4.12.3",
50
+ "vocab_size": 30522
51
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac0ffa40134e2eb9d8ffb5c638e5d626971eda34b709dc1216e39bd071ac284
3
+ size 267983432
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "C:\\Users\\EL221XK/.cache\\huggingface\\transformers\\75abb59d7a06f4f640158a9bfcde005264e59e8d566781ab1415b139d2e4c603.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff