shivansh-ka
commited on
Commit
•
5d8baf5
1
Parent(s):
f467969
correction for latency
Browse files- app.py +9 -10
- requirements.txt +1 -0
- src/__init__.py +2 -3
- src/model_loader.py +10 -0
- src/{batch_predict.py → predict.py} +27 -5
- src/single_predict.py +0 -46
app.py
CHANGED
@@ -1,24 +1,25 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
from src import *
|
4 |
|
5 |
-
|
6 |
-
|
7 |
|
8 |
def single_predict(text):
|
9 |
-
preds
|
10 |
|
11 |
if preds < 0.5:
|
12 |
st.success(f'Non Toxic Comment!!! :thumbsup:')
|
13 |
-
st.plotly_chart(fig, theme="streamlit", use_container_width=True)
|
14 |
else:
|
15 |
st.error(f'Toxic Comment!!! :thumbsdown:')
|
16 |
-
|
|
|
17 |
|
18 |
def batch_predict(data):
|
19 |
-
if
|
20 |
st.success(f'Data Validation Successfull :thumbsup:')
|
21 |
-
preds =
|
22 |
return preds.to_csv(index=False).encode('utf-8')
|
23 |
else:
|
24 |
st.error(f'Data Validation Failed :thumbsdown:')
|
@@ -29,9 +30,7 @@ choice = st.sidebar.radio("Menu",menu)
|
|
29 |
|
30 |
if choice=="Single Value Prediciton":
|
31 |
st.subheader("Prediction")
|
32 |
-
|
33 |
-
#trigger = st.button('Predict', on_click=single_predict(comment))
|
34 |
-
form = st.form("my_form")
|
35 |
comment = form.text_input("Enter comment")
|
36 |
form.form_submit_button("Predict",on_click=single_predict(comment))
|
37 |
else:
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
from src import *
|
5 |
|
6 |
+
model = ModelLoader()
|
7 |
+
prediction = PredictionServices(model.Model, model.Tokenizer)
|
8 |
|
9 |
def single_predict(text):
|
10 |
+
preds = prediction.single_predict(text)
|
11 |
|
12 |
if preds < 0.5:
|
13 |
st.success(f'Non Toxic Comment!!! :thumbsup:')
|
|
|
14 |
else:
|
15 |
st.error(f'Toxic Comment!!! :thumbsdown:')
|
16 |
+
|
17 |
+
prediction.plot(preds)
|
18 |
|
19 |
def batch_predict(data):
|
20 |
+
if prediction.data_validation(data):
|
21 |
st.success(f'Data Validation Successfull :thumbsup:')
|
22 |
+
preds = prediction.batch_predict(data)
|
23 |
return preds.to_csv(index=False).encode('utf-8')
|
24 |
else:
|
25 |
st.error(f'Data Validation Failed :thumbsdown:')
|
|
|
30 |
|
31 |
if choice=="Single Value Prediciton":
|
32 |
st.subheader("Prediction")
|
33 |
+
form = st.form("comment_form")
|
|
|
|
|
34 |
comment = form.text_input("Enter comment")
|
35 |
form.form_submit_button("Predict",on_click=single_predict(comment))
|
36 |
else:
|
requirements.txt
CHANGED
@@ -5,3 +5,4 @@ transformers[sentencepiece]
|
|
5 |
streamlit
|
6 |
matplotlib
|
7 |
seaborn
|
|
|
|
5 |
streamlit
|
6 |
matplotlib
|
7 |
seaborn
|
8 |
+
plotly
|
src/__init__.py
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from src.single_predict import *
|
|
|
1 |
+
from predict import *
|
2 |
+
from model_loader import *
|
|
src/model_loader.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import transformers
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
from src.constants import *
|
5 |
+
|
6 |
+
|
7 |
+
class ModelLoader:
|
8 |
+
def __init__(self):
|
9 |
+
self.Model = tf.keras.models.load_model(MODEL_PATH)
|
10 |
+
self.Tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
|
src/{batch_predict.py → predict.py}
RENAMED
@@ -1,17 +1,19 @@
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
import tensorflow as tf
|
|
|
4 |
import transformers
|
5 |
from transformers import AutoTokenizer
|
6 |
import os
|
7 |
from src.constants import *
|
8 |
import re
|
|
|
9 |
|
10 |
|
11 |
-
class
|
12 |
-
def __init__(self):
|
13 |
-
self.model =
|
14 |
-
self.tokenizer =
|
15 |
|
16 |
def tokenizer(self, text:str):
|
17 |
tokens = self.tokenizer(text,
|
@@ -23,6 +25,17 @@ class BatchPrediction:
|
|
23 |
return_token_type_ids = False)
|
24 |
return dict(tokens)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def data_validation(data):
|
27 |
df = pd.read_csv(data)
|
28 |
status=True
|
@@ -31,7 +44,7 @@ class BatchPrediction:
|
|
31 |
status=False
|
32 |
return status
|
33 |
|
34 |
-
def
|
35 |
try:
|
36 |
df = pd.read_csv(data)
|
37 |
df.dropna(inplace=True)
|
@@ -41,5 +54,14 @@ class BatchPrediction:
|
|
41 |
df['probabilities'] = preds
|
42 |
df['toxic'] = np.where(df['probabilities']>0.5, 1, 0)
|
43 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
except Exception as e:
|
45 |
print(e)
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
import tensorflow as tf
|
4 |
+
import plotly.express as px
|
5 |
import transformers
|
6 |
from transformers import AutoTokenizer
|
7 |
import os
|
8 |
from src.constants import *
|
9 |
import re
|
10 |
+
import streamlit as st
|
11 |
|
12 |
|
13 |
+
class PredictionServices:
|
14 |
+
def __init__(self, model, tokenizer):
|
15 |
+
self.model = model
|
16 |
+
self.tokenizer = tokenizer
|
17 |
|
18 |
def tokenizer(self, text:str):
|
19 |
tokens = self.tokenizer(text,
|
|
|
25 |
return_token_type_ids = False)
|
26 |
return dict(tokens)
|
27 |
|
28 |
+
def plot(self, pred):
|
29 |
+
fig = px.bar(x=[round(pred), round(1-pred)],
|
30 |
+
y=['toxic', 'non-toxic'],
|
31 |
+
width=500, height=250,
|
32 |
+
template="plotly_dark",
|
33 |
+
text_auto='1',
|
34 |
+
title="Probabilities(%)")
|
35 |
+
fig.update_traces(width=0.3,textfont_size=15, textangle=0, textposition="outside")
|
36 |
+
fig.update_layout(yaxis_title=None,xaxis_title=None)
|
37 |
+
st.plotly_chart(fig, theme="streamlit", use_container_width=True)
|
38 |
+
|
39 |
def data_validation(data):
|
40 |
df = pd.read_csv(data)
|
41 |
status=True
|
|
|
44 |
status=False
|
45 |
return status
|
46 |
|
47 |
+
def batch_predict(self, data):
|
48 |
try:
|
49 |
df = pd.read_csv(data)
|
50 |
df.dropna(inplace=True)
|
|
|
54 |
df['probabilities'] = preds
|
55 |
df['toxic'] = np.where(df['probabilities']>0.5, 1, 0)
|
56 |
return df
|
57 |
+
except Exception as e:
|
58 |
+
print(e)
|
59 |
+
|
60 |
+
def single_predict(self, text:str):
|
61 |
+
try:
|
62 |
+
text = re.sub('\n',' ',text).strip()
|
63 |
+
input = self.tokenizer(text)
|
64 |
+
pred = self.model.predict(input)[0][0]
|
65 |
+
return pred
|
66 |
except Exception as e:
|
67 |
print(e)
|
src/single_predict.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
import tensorflow as tf
|
4 |
-
import plotly.express as px
|
5 |
-
import transformers
|
6 |
-
from transformers import AutoTokenizer
|
7 |
-
import os
|
8 |
-
from src.constants import *
|
9 |
-
import re
|
10 |
-
|
11 |
-
|
12 |
-
class SinglePrediction:
|
13 |
-
def __init__(self):
|
14 |
-
self.model = tf.keras.models.load_model(MODEL_PATH)
|
15 |
-
self.tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
|
16 |
-
|
17 |
-
def tokenizer(self, text:str):
|
18 |
-
tokens = self.tokenizer(text,
|
19 |
-
max_length=MAX_LEN,
|
20 |
-
truncation=True,
|
21 |
-
padding="max_length",
|
22 |
-
add_special_tokens=True,
|
23 |
-
return_tensors="tf",
|
24 |
-
return_token_type_ids = False)
|
25 |
-
return dict(tokens)
|
26 |
-
|
27 |
-
def plot(self, pred):
|
28 |
-
fig = px.bar(x=[round(pred), round(1-pred)],
|
29 |
-
y=['toxic', 'non-toxic'],
|
30 |
-
width=500, height=250,
|
31 |
-
template="plotly_dark",
|
32 |
-
text_auto='1',
|
33 |
-
title="Probabilities(%)")
|
34 |
-
fig.update_traces(width=0.3,textfont_size=15, textangle=0, textposition="outside")
|
35 |
-
fig.update_layout(yaxis_title=None,xaxis_title=None)
|
36 |
-
return fig
|
37 |
-
|
38 |
-
def predict(self, text:str):
|
39 |
-
try:
|
40 |
-
text = re.sub('\n',' ',text).strip()
|
41 |
-
input = self.tokenizer(text)
|
42 |
-
pred = self.model.predict(input)[0][0]
|
43 |
-
fig = self.plot(pred)
|
44 |
-
return pred, fig
|
45 |
-
except Exception as e:
|
46 |
-
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|