File size: 1,768 Bytes
e6cdfd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f331055
c89d0b4
f331055
 
14b8594
49b04e1
 
14b8594
e6cdfd2
 
 
1835c4c
49b04e1
 
 
 
 
 
11b5667
e6cdfd2
 
14b8594
e6cdfd2
49b04e1
14b8594
e6cdfd2
 
 
 
49b04e1
e6cdfd2
49b04e1
e6cdfd2
 
 
49b04e1
40f4307
49b04e1
 
e6cdfd2
49b04e1
e6cdfd2
 
 
49b04e1
871c9bf
49b04e1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import json
import string
import string
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gradio as gr 
import joblib 
import nltk

nltk.download('stopwords')
nltk.download('punkt')

# Load the trained model
model = joblib.load('model.bin')

def remove_punctuation(text):
    punctuation_free = "".join([i for i in text if i not in string.punctuation])
    return punctuation_free
    
def vectorize_text(texts):
    vectorizer = CountVectorizer()
    vectorizer.fit(texts)
    text_vectorized = vectorizer.transform(texts)
    return text_vectorized, vectorizer

def test_model(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove punctuation
    text = remove_punctuation(text)

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Remove stopwords
    stopwords_set = set(stopwords.words('english'))
    tokens = word_tokenize(text)
    filtered_text = [word for word in tokens if word not in stopwords_set]

    # Join the filtered tokens back into a string
    preprocessed_text = ' '.join(filtered_text)
    
    # Vectorize the preprocessed text
    vectorize_texts = vectorize_text([preprocessed_text])
   
    # Make prediction on the vectorized text
    prediction = model.predict(vectorize_texts[0])[0]

    # Return the prediction
    return prediction

# Create the Gradio interface
iface = gr.Interface(fn=test_model, inputs="text", outputs="text", title="Text Classification")
iface.launch()