Chirag1994 commited on
Commit
8c26ba0
1 Parent(s): 0804379

Upload 3 files

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -39
  2. app.py +55 -0
  3. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,39 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- multilingual_toxic_comment_files/env/Lib/site-packages/clang/native/libclang.dll filter=lfs diff=lfs merge=lfs -text
36
- multilingual_toxic_comment_files/env/Lib/site-packages/cryptography/hazmat/bindings/_rust.pyd filter=lfs diff=lfs merge=lfs -text
37
- multilingual_toxic_comment_files/env/Lib/site-packages/grpc/_cython/cygrpc.cp310-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
38
- multilingual_toxic_comment_files/env/Lib/site-packages/h5py/hdf5.dll filter=lfs diff=lfs merge=lfs -text
39
- multilingual_toxic_comment_files/Multilingual_toxic_comment_classifier/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 
1
+ Multilingual_toxic_comment_classifier/ filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from transformers import AutoTokenizer
5
+
6
+ model_save_path = "Multilingual_toxic_comment_classifier/"
7
+ ### Loading the fine-tuned model ###
8
+ loaded_model = tf.keras.models.load_model(model_save_path)
9
+ ### Initializing the tokenizer ###
10
+ tokenizer_ = AutoTokenizer.from_pretrained("xlm-roberta-large")
11
+
12
+ examples_list = [
13
+ [example]
14
+ for example in pd.read_csv("examples/sample_comments.csv")["comment_text"].tolist()
15
+ ]
16
+
17
+
18
+ def prep_data(text, tokenizer, max_len=192):
19
+ tokens = tokenizer(
20
+ text,
21
+ max_length=max_len,
22
+ truncation=True,
23
+ padding="max_length",
24
+ add_special_tokens=True,
25
+ return_tensors="tf",
26
+ )
27
+
28
+ return {
29
+ "input_ids": tokens["input_ids"],
30
+ "attention_mask": tokens["attention_mask"],
31
+ }
32
+
33
+
34
+ def predict(text):
35
+ prob_of_toxic_comment = loaded_model.predict(
36
+ prep_data(text=text, tokenizer=tokenizer_, max_len=192)
37
+ )[0][0]
38
+ prob_of_non_toxic_comment = 1 - prob_of_toxic_comment
39
+ prob_of_toxic_comment, prob_of_non_toxic_comment
40
+ probs = {
41
+ "prob_of_toxic_comment": float(prob_of_toxic_comment),
42
+ "prob_of_non_toxic_comment": float(prob_of_non_toxic_comment),
43
+ }
44
+ return probs
45
+
46
+
47
+ interface = gr.Interface(
48
+ fn=predict,
49
+ inputs=gr.components.Textbox(lines=4, label="Comment"),
50
+ outputs=[gr.Label(label="Probabilities")],
51
+ examples=examples_list,
52
+ title="Multi-Lingual Toxic Comment Classification.",
53
+ description="XLM-Roberta Large model",
54
+ )
55
+ interface.launch(debug=False, share=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tensorflow==2.12
2
+ pandas==1.5.2
3
+ gradio==3.1.4
4
+ transformers==4.28.1