File size: 1,203 Bytes
106e5ae
013ca27
 
6c477d3
013ca27
6c477d3
013ca27
3f47019
 
013ca27
 
 
3f47019
 
 
 
013ca27
 
 
6c477d3
 
 
 
013ca27
3f47019
106e5ae
6c477d3
106e5ae
 
 
 
 
 
013ca27
106e5ae
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
import os
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import TextVectorization

df = pd.read_csv(os.path.join('train.csv'))
X = df['comment_text']

columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

# # load numpy array from npy file
# from numpy import load
# # load array
# xValues = np.load('forVectorizer.npy', allow_pickle=True)

# SET THE VECTORIZER    
#set max features
MAX_FEATURES = 200000
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                              output_sequence_length=1800,
                              output_mode='int')

vectorizer.adapt(X.values)

model = tf.keras.models.load_model('toxicity.h5')

def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(columns):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    return text
    
interface = gr.Interface(fn=score_comment,
                        inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')


interface.launch()