Renato commited on
Commit
ca694c9
1 Parent(s): 771bf6a

Initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +5 -32
  2. README.md +329 -0
  3. config.json +29 -0
  4. special_tokens_map.json +1 -0
  5. tokenizer_config.json +1 -0
.gitattributes CHANGED
@@ -1,35 +1,8 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
README.md ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: text-classification
3
+ license: apache-2.0
4
+ ---
5
+ <div align="center">
6
+
7
+ **⚠️ Disclaimer:**
8
+ The huggingface models currently give different results to the detoxify library (see issue [here](https://github.com/unitaryai/detoxify/issues/15)). For the most up to date models we recommend using the models from https://github.com/unitaryai/detoxify
9
+
10
+ # 🙊 Detoxify
11
+ ## Toxic Comment Classification with ⚡ Pytorch Lightning and 🤗 Transformers
12
+
13
+ ![CI testing](https://github.com/unitaryai/detoxify/workflows/CI%20testing/badge.svg)
14
+ ![Lint](https://github.com/unitaryai/detoxify/workflows/Lint/badge.svg)
15
+
16
+ </div>
17
+
18
+ ![Examples image](examples.png)
19
+
20
+ ## Description
21
+
22
+ Trained models & code to predict toxic comments on 3 Jigsaw challenges: Toxic comment classification, Unintended Bias in Toxic comments, Multilingual toxic comment classification.
23
+
24
+ Built by [Laura Hanu](https://laurahanu.github.io/) at [Unitary](https://www.unitary.ai/), where we are working to stop harmful content online by interpreting visual content in context.
25
+
26
+ Dependencies:
27
+ - For inference:
28
+ - 🤗 Transformers
29
+ - ⚡ Pytorch lightning
30
+ - For training will also need:
31
+ - Kaggle API (to download data)
32
+
33
+
34
+ | Challenge | Year | Goal | Original Data Source | Detoxify Model Name | Top Kaggle Leaderboard Score | Detoxify Score
35
+ |-|-|-|-|-|-|-|
36
+ | [Toxic Comment Classification Challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge) | 2018 | build a multi-headed model that’s capable of detecting different types of of toxicity like threats, obscenity, insults, and identity-based hate. | Wikipedia Comments | `original` | 0.98856 | 0.98636
37
+ | [Jigsaw Unintended Bias in Toxicity Classification](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification) | 2019 | build a model that recognizes toxicity and minimizes this type of unintended bias with respect to mentions of identities. You'll be using a dataset labeled for identity mentions and optimizing a metric designed to measure unintended bias. | Civil Comments | `unbiased` | 0.94734 | 0.93639
38
+ | [Jigsaw Multilingual Toxic Comment Classification](https://www.kaggle.com/c/jigsaw-multilingual-toxic-comment-classification) | 2020 | build effective multilingual models | Wikipedia Comments + Civil Comments | `multilingual` | 0.9536 | 0.91655*
39
+
40
+ *Score not directly comparable since it is obtained on the validation set provided and not on the test set. To update when the test labels are made available.
41
+
42
+ It is also noteworthy to mention that the top leadearboard scores have been achieved using model ensembles. The purpose of this library was to build something user-friendly and straightforward to use.
43
+
44
+ ## Limitations and ethical considerations
45
+
46
+ If words that are associated with swearing, insults or profanity are present in a comment, it is likely that it will be classified as toxic, regardless of the tone or the intent of the author e.g. humorous/self-deprecating. This could present some biases towards already vulnerable minority groups.
47
+
48
+ The intended use of this library is for research purposes, fine-tuning on carefully constructed datasets that reflect real world demographics and/or to aid content moderators in flagging out harmful content quicker.
49
+
50
+ Some useful resources about the risk of different biases in toxicity or hate speech detection are:
51
+ - [The Risk of Racial Bias in Hate Speech Detection](https://homes.cs.washington.edu/~msap/pdfs/sap2019risk.pdf)
52
+ - [Automated Hate Speech Detection and the Problem of Offensive Language](https://arxiv.org/pdf/1703.04009.pdf%201.pdf)
53
+ - [Racial Bias in Hate Speech and Abusive Language Detection Datasets](https://arxiv.org/pdf/1905.12516.pdf)
54
+
55
+ ## Quick prediction
56
+
57
+
58
+ The `multilingual` model has been trained on 7 different languages so it should only be tested on: `english`, `french`, `spanish`, `italian`, `portuguese`, `turkish` or `russian`.
59
+
60
+ ```bash
61
+ # install detoxify
62
+
63
+ pip install detoxify
64
+
65
+ ```
66
+ ```python
67
+
68
+ from detoxify import Detoxify
69
+
70
+ # each model takes in either a string or a list of strings
71
+
72
+ results = Detoxify('original').predict('example text')
73
+
74
+ results = Detoxify('unbiased').predict(['example text 1','example text 2'])
75
+
76
+ results = Detoxify('multilingual').predict(['example text','exemple de texte','texto de ejemplo','testo di esempio','texto de exemplo','örnek metin','пример текста'])
77
+
78
+ # optional to display results nicely (will need to pip install pandas)
79
+
80
+ import pandas as pd
81
+
82
+ print(pd.DataFrame(results, index=input_text).round(5))
83
+
84
+ ```
85
+ For more details check the Prediction section.
86
+
87
+
88
+ ## Labels
89
+ All challenges have a toxicity label. The toxicity labels represent the aggregate ratings of up to 10 annotators according the following schema:
90
+ - **Very Toxic** (a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective)
91
+ - **Toxic** (a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective)
92
+ - **Hard to Say**
93
+ - **Not Toxic**
94
+
95
+ More information about the labelling schema can be found [here](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/data).
96
+
97
+ ### Toxic Comment Classification Challenge
98
+ This challenge includes the following labels:
99
+
100
+ - `toxic`
101
+ - `severe_toxic`
102
+ - `obscene`
103
+ - `threat`
104
+ - `insult`
105
+ - `identity_hate`
106
+
107
+ ### Jigsaw Unintended Bias in Toxicity Classification
108
+ This challenge has 2 types of labels: the main toxicity labels and some additional identity labels that represent the identities mentioned in the comments.
109
+
110
+ Only identities with more than 500 examples in the test set (combined public and private) are included during training as additional labels and in the evaluation calculation.
111
+
112
+ - `toxicity`
113
+ - `severe_toxicity`
114
+ - `obscene`
115
+ - `threat`
116
+ - `insult`
117
+ - `identity_attack`
118
+ - `sexual_explicit`
119
+
120
+ Identity labels used:
121
+ - `male`
122
+ - `female`
123
+ - `homosexual_gay_or_lesbian`
124
+ - `christian`
125
+ - `jewish`
126
+ - `muslim`
127
+ - `black`
128
+ - `white`
129
+ - `psychiatric_or_mental_illness`
130
+
131
+ A complete list of all the identity labels available can be found [here](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/data).
132
+
133
+
134
+ ### Jigsaw Multilingual Toxic Comment Classification
135
+
136
+ Since this challenge combines the data from the previous 2 challenges, it includes all labels from above, however the final evaluation is only on:
137
+
138
+ - `toxicity`
139
+
140
+ ## How to run
141
+
142
+ First, install dependencies
143
+ ```bash
144
+ # clone project
145
+
146
+ git clone https://github.com/unitaryai/detoxify
147
+
148
+ # create virtual env
149
+
150
+ python3 -m venv toxic-env
151
+ source toxic-env/bin/activate
152
+
153
+ # install project
154
+
155
+ pip install -e detoxify
156
+ cd detoxify
157
+
158
+ # for training
159
+ pip install -r requirements.txt
160
+
161
+ ```
162
+
163
+ ## Prediction
164
+
165
+ Trained models summary:
166
+
167
+ |Model name| Transformer type| Data from
168
+ |:--:|:--:|:--:|
169
+ |`original`| `bert-base-uncased` | Toxic Comment Classification Challenge
170
+ |`unbiased`| `roberta-base`| Unintended Bias in Toxicity Classification
171
+ |`multilingual`| `xlm-roberta-base`| Multilingual Toxic Comment Classification
172
+
173
+ For a quick prediction can run the example script on a comment directly or from a txt containing a list of comments.
174
+ ```bash
175
+
176
+ # load model via torch.hub
177
+
178
+ python run_prediction.py --input 'example' --model_name original
179
+
180
+ # load model from from checkpoint path
181
+
182
+ python run_prediction.py --input 'example' --from_ckpt_path model_path
183
+
184
+ # save results to a .csv file
185
+
186
+ python run_prediction.py --input test_set.txt --model_name original --save_to results.csv
187
+
188
+ # to see usage
189
+
190
+ python run_prediction.py --help
191
+
192
+ ```
193
+
194
+ Checkpoints can be downloaded from the latest release or via the Pytorch hub API with the following names:
195
+ - `toxic_bert`
196
+ - `unbiased_toxic_roberta`
197
+ - `multilingual_toxic_xlm_r`
198
+ ```bash
199
+ model = torch.hub.load('unitaryai/detoxify','toxic_bert')
200
+ ```
201
+
202
+ Importing detoxify in python:
203
+
204
+ ```python
205
+
206
+ from detoxify import Detoxify
207
+
208
+ results = Detoxify('original').predict('some text')
209
+
210
+ results = Detoxify('unbiased').predict(['example text 1','example text 2'])
211
+
212
+ results = Detoxify('multilingual').predict(['example text','exemple de texte','texto de ejemplo','testo di esempio','texto de exemplo','örnek metin','пример текста'])
213
+
214
+ # to display results nicely
215
+
216
+ import pandas as pd
217
+
218
+ print(pd.DataFrame(results,index=input_text).round(5))
219
+
220
+ ```
221
+
222
+
223
+ ## Training
224
+
225
+ If you do not already have a Kaggle account:
226
+ - you need to create one to be able to download the data
227
+
228
+ - go to My Account and click on Create New API Token - this will download a kaggle.json file
229
+
230
+ - make sure this file is located in ~/.kaggle
231
+
232
+ ```bash
233
+
234
+ # create data directory
235
+
236
+ mkdir jigsaw_data
237
+ cd jigsaw_data
238
+
239
+ # download data
240
+
241
+ kaggle competitions download -c jigsaw-toxic-comment-classification-challenge
242
+
243
+ kaggle competitions download -c jigsaw-unintended-bias-in-toxicity-classification
244
+
245
+ kaggle competitions download -c jigsaw-multilingual-toxic-comment-classification
246
+
247
+ ```
248
+ ## Start Training
249
+ ### Toxic Comment Classification Challenge
250
+
251
+ ```bash
252
+
253
+ python create_val_set.py
254
+
255
+ python train.py --config configs/Toxic_comment_classification_BERT.json
256
+ ```
257
+ ### Unintended Bias in Toxicicity Challenge
258
+
259
+ ```bash
260
+
261
+ python train.py --config configs/Unintended_bias_toxic_comment_classification_RoBERTa.json
262
+
263
+ ```
264
+ ### Multilingual Toxic Comment Classification
265
+
266
+ This is trained in 2 stages. First, train on all available data, and second, train only on the translated versions of the first challenge.
267
+
268
+ The [translated data](https://www.kaggle.com/miklgr500/jigsaw-train-multilingual-coments-google-api) can be downloaded from Kaggle in french, spanish, italian, portuguese, turkish, and russian (the languages available in the test set).
269
+
270
+ ```bash
271
+
272
+ # stage 1
273
+
274
+ python train.py --config configs/Multilingual_toxic_comment_classification_XLMR.json
275
+
276
+ # stage 2
277
+
278
+ python train.py --config configs/Multilingual_toxic_comment_classification_XLMR_stage2.json
279
+
280
+ ```
281
+ ### Monitor progress with tensorboard
282
+
283
+ ```bash
284
+
285
+ tensorboard --logdir=./saved
286
+
287
+ ```
288
+ ## Model Evaluation
289
+
290
+ ### Toxic Comment Classification Challenge
291
+
292
+ This challenge is evaluated on the mean AUC score of all the labels.
293
+
294
+ ```bash
295
+
296
+ python evaluate.py --checkpoint saved/lightning_logs/checkpoints/example_checkpoint.pth --test_csv test.csv
297
+
298
+ ```
299
+ ### Unintended Bias in Toxicicity Challenge
300
+
301
+ This challenge is evaluated on a novel bias metric that combines different AUC scores to balance overall performance. More information on this metric [here](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/overview/evaluation).
302
+
303
+ ```bash
304
+
305
+ python evaluate.py --checkpoint saved/lightning_logs/checkpoints/example_checkpoint.pth --test_csv test.csv
306
+
307
+ # to get the final bias metric
308
+ python model_eval/compute_bias_metric.py
309
+
310
+ ```
311
+ ### Multilingual Toxic Comment Classification
312
+
313
+ This challenge is evaluated on the AUC score of the main toxic label.
314
+
315
+ ```bash
316
+
317
+ python evaluate.py --checkpoint saved/lightning_logs/checkpoints/example_checkpoint.pth --test_csv test.csv
318
+
319
+ ```
320
+
321
+ ### Citation
322
+ ```
323
+ @misc{Detoxify,
324
+ title={Detoxify},
325
+ author={Hanu, Laura and {Unitary team}},
326
+ howpublished={Github. https://github.com/unitaryai/detoxify},
327
+ year={2020}
328
+ }
329
+ ```
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "toxic"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "label2id": {
18
+ "toxic": 0
19
+ },
20
+ "layer_norm_eps": 1e-05,
21
+ "max_position_embeddings": 514,
22
+ "model_type": "xlm-roberta",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 1,
27
+ "type_vocab_size": 1,
28
+ "vocab_size": 250002
29
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "name_or_path": "xlm-roberta-base"}