import numpy as np import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix import json import string import string import re from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import gradio as gr import joblib import nltk nltk.download('stopwords') nltk.download('punkt') # Load the trained model model = joblib.load('model.bin') def remove_punctuation(text): punctuation_free = "".join([i for i in text if i not in string.punctuation]) return punctuation_free def vectorize_text(texts): vectorizer = CountVectorizer() vectorizer.fit(texts) text_vectorized = vectorizer.transform(texts) return text_vectorized, vectorizer def test_model(text): # Convert text to lowercase text = text.lower() # Remove punctuation text = remove_punctuation(text) # Remove numbers text = re.sub(r'\d+', '', text) # Remove stopwords stop_words = set(stopwords.words('english')) tokens = word_tokenize(text) filtered_text = [word for word in tokens if word not in stop_words] # Join the filtered tokens back into a string preprocessed_text = ' '.join(filtered_text) # Vectorize the preprocessed text vectorize_texts = vectorize_text([preprocessed_text]) # Make prediction on the vectorized text prediction = model.predict(vectorize_texts[0])[0] # Return the prediction return prediction # Create the Gradio interface iface = gr.Interface(fn=test_model, inputs="text", outputs="text", title="Text Classification") iface.launch()