File size: 1,545 Bytes
e6cdfd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f331055
c89d0b4
f331055
 
14b8594
e6cdfd2
14b8594
e6cdfd2
 
 
14b8594
e6cdfd2
 
 
14b8594
e6cdfd2
 
14b8594
e6cdfd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46704cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import json
import string
import string
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gradio as gr 
import joblib 
import nltk

nltk.download('stopwords')
nltk.download('punkt')

model = joblib.load('model.bin')

def remove_punctuation(text):
    punctuation_free = "".join([i for i in text if i not in string.punctuation])
    return punctuation_free

def test_model(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove punctuation
    text =remove_punctuation(text)

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    filtered_text = [word for word in tokens if word not in stop_words]

    # Join the filtered tokens back into a string
    preprocessed_text = ' '.join(filtered_text)

    # Vectorize the preprocessed text
    text_vectorized = vectorizer.transform([preprocessed_text])

    # Make prediction on the vectorized text
    prediction = model.predict(text_vectorized)[0]

    # Return the prediction
    return prediction
# Create the Gradio interface
iface = gr.Interface(fn=test_model, inputs="text", outputs="text")

# Launch the interface
iface.launch()