import gradio as gr import numpy as np import pickle import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer # Initialize NLTK resources (download if needed) nltk.download('punkt') nltk.download('wordnet') nltk.download('stopwords') # Text preprocessing functions def preprocess_text(text): # Tokenization words = word_tokenize(text.lower()) # Convert to lowercase and tokenize # Remove stopwords stop_words = set(stopwords.words('english')) words = [word for word in words if word not in stop_words] # Lemmatization lemmatizer = WordNetLemmatizer() words = [lemmatizer.lemmatize(word) for word in words] return ' '.join(words) def predict_tags(text): return mlb.classes_[np.where(model.predict(vectorizer.transform([preprocess_text(text)])).flatten() == 1)] # Load the instance back with open('classes.pkl', 'rb') as file: mlb = pickle.load(file) with open('vectorizer.pkl', 'rb') as file: vectorizer = pickle.load(file) with open('model.pkl', 'rb') as file: model = pickle.load(file) # Create a function to predict tags using the ONNX model iface = gr.Interface(fn=predict_tags, inputs="text", outputs="text") iface.launch()