stack_onnx / app.py
KeeganFdes's picture
Update app.py
31071b3
raw
history blame contribute delete
No virus
1.34 kB
import gradio as gr
import numpy as np
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
# Initialize NLTK resources (download if needed)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
# Text preprocessing functions
def preprocess_text(text):
# Tokenization
words = word_tokenize(text.lower()) # Convert to lowercase and tokenize
# Remove stopwords
stop_words = set(stopwords.words('english'))
words = [word for word in words if word not in stop_words]
# Lemmatization
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]
return ' '.join(words)
def predict_tags(text):
return mlb.classes_[np.where(model.predict(vectorizer.transform([preprocess_text(text)])).flatten() == 1)]
# Load the instance back
with open('classes.pkl', 'rb') as file:
mlb = pickle.load(file)
with open('vectorizer.pkl', 'rb') as file:
vectorizer = pickle.load(file)
with open('model.pkl', 'rb') as file:
model = pickle.load(file)
# Create a function to predict tags using the ONNX model
iface = gr.Interface(fn=predict_tags, inputs="text", outputs="text")
iface.launch()