CSharpGrammer / app.py
Priyanhsu's picture
Update app.py
80fb6b9
raw
history blame
1.76 kB
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import json
import string
import string
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gradio as gr
import joblib
import nltk
nltk.download('stopwords')
nltk.download('punkt')
# Load the trained model
model = joblib.load('model.bin')
def remove_punctuation(text):
punctuation_free = "".join([i for i in text if i not in string.punctuation])
return punctuation_free
def vectorize_text(texts):
vectorizer = CountVectorizer()
vectorizer.fit(texts)
text_vectorized = vectorizer.transform(texts)
return text_vectorized, vectorizer
def test_model(text):
# Convert text to lowercase
text = text.lower()
# Remove punctuation
text = remove_punctuation(text)
# Remove numbers
text = re.sub(r'\d+', '', text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
tokens = word_tokenize(text)
filtered_text = [word for word in tokens if word not in stop_words]
# Join the filtered tokens back into a string
preprocessed_text = ' '.join(filtered_text)
# Vectorize the preprocessed text
vectorize_texts = vectorize_text([preprocessed_text])
# Make prediction on the vectorized text
prediction = model.predict(vectorize_texts[0])[0]
# Return the prediction
return prediction
# Create the Gradio interface
iface = gr.Interface(fn=test_model, inputs="text", outputs="text", title="Text Classification")
iface.launch()