JulianHame commited on
Commit
edce24a
0 Parent(s):

Duplicate from JulianHame/toxicity-classifier-app

Browse files
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ toxicity_model.h5 filter=lfs diff=lfs merge=lfs -text
2
+ train\[1\].csv filter=lfs diff=lfs merge=lfs -text
3
+ train.csv filter=lfs diff=lfs merge=lfs -text
.github/workflows/sync_to_huggingface.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://JulianHame:$HF_TOKEN@huggingface.co/spaces/JulianHame/streamlit-application main
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Toxicity Classifier App
3
+ emoji: 🔥
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: JulianHame/toxicity-classifier-app
11
+ ---
12
+
13
+ # CS482-project-streamlit-application
14
+ huggingface spaces deployment of a streamlit python application
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import tensorflow as tf
4
+ import numpy as np
5
+ import pandas as pd
6
+ from tensorflow.keras.layers import TextVectorization
7
+ from tensorflow import keras
8
+
9
+ model = tf.keras.models.load_model('toxicity_model.h5')
10
+
11
+ dataset = pd.read_csv('train.csv')
12
+ comments = dataset['comment_text']
13
+
14
+ vectorizer = TextVectorization(max_tokens = 2500000,
15
+ output_sequence_length=1800,
16
+ output_mode='int')
17
+
18
+ vectorizer.adapt(comments.values)
19
+
20
+ st.title('Toxicity Classifier')
21
+
22
+ st.header('Write a message here:')
23
+ text = st.text_area('The toxicity of the message will be evaluated.',
24
+ value = "You're fucking ugly.")
25
+
26
+ input_str = vectorizer(text)
27
+ res = model.predict(np.expand_dims(input_str,0))
28
+ classification = res[0].tolist()
29
+
30
+ toxicity = classification[0]
31
+ toxicity_severe = classification[1]
32
+ obscene = classification[2]
33
+ threat = classification[3]
34
+ insult = classification[4]
35
+ identity_hate = classification[5]
36
+
37
+ highest_class = "Severe toxicity"
38
+ highest_class_rating = toxicity_severe
39
+ if(obscene > highest_class_rating):
40
+ highest_class = "Obscenity"
41
+ highest_class_rating = obscene
42
+ if(threat > highest_class_rating):
43
+ highest_class = "Threat"
44
+ highest_class_rating = threat
45
+ if(insult > highest_class_rating):
46
+ highest_class = "Insult"
47
+ highest_class_rating = insult
48
+ if(identity_hate > highest_class_rating):
49
+ highest_class = "Identity hate"
50
+ highest_class_rating = identity_hate
51
+
52
+ st.write("---")
53
+ st.write("Overall toxicity rating: " +str(toxicity))
54
+ st.write("---")
55
+ st.write("Classifications:")
56
+ if(toxicity_severe > 0.5):
57
+ st.write("Severely toxic - " +str(toxicity_severe))
58
+ if(obscene > 0.5):
59
+ st.write("Obscene - " +str(obscene))
60
+ if(threat > 0.5):
61
+ st.write("Threat - " +str(threat))
62
+ if(insult > 0.5):
63
+ st.write("Insult - " +str(insult))
64
+ if(identity_hate > 0.5):
65
+ st.write("Identity hate - " +str(identity_hate))
66
+ st.write("---")
67
+ st.write("Invalid classifications:")
68
+ if(toxicity_severe <= 0.5):
69
+ st.write("Severely toxic - " +str(toxicity_severe))
70
+ if(obscene <= 0.5):
71
+ st.write("Obscene - " +str(obscene))
72
+ if(threat <= 0.5):
73
+ st.write("Threat - " +str(threat))
74
+ if(insult <= 0.5):
75
+ st.write("Insult - " +str(insult))
76
+ if(identity_hate <= 0.5):
77
+ st.write("Identity hate - " +str(identity_hate))
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ opencv-python-headless
3
+ numpy
4
+ easyocr
5
+ Pillow
6
+ torch
7
+ transformers
8
+ tensorflow
9
+ pandas
toxicity_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b957463ca9befba2c69a5f49b3a6baa6e3a3974b6d40cc57fb6638cbb19d8fc
3
+ size 77969544
train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
3
+ size 68802655