File size: 1,344 Bytes
6dd247c
 
 
a768eaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31071b3
a768eaa
 
 
6dd247c
 
 
 
 
a768eaa
 
6dd247c
a768eaa
 
6dd247c
 
a768eaa
6dd247c
8057064
6dd247c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import numpy as np
import pickle



import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer

# Initialize NLTK resources (download if needed)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Text preprocessing functions

def preprocess_text(text):
    # Tokenization
    words = word_tokenize(text.lower())  # Convert to lowercase and tokenize
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    
    return ' '.join(words)





def predict_tags(text):
    return mlb.classes_[np.where(model.predict(vectorizer.transform([preprocess_text(text)])).flatten() == 1)]




# Load the instance back
with open('classes.pkl', 'rb') as file:
    mlb = pickle.load(file)

with open('vectorizer.pkl', 'rb') as file:
    vectorizer = pickle.load(file)

with open('model.pkl', 'rb') as file:
    model = pickle.load(file)


# Create a function to predict tags using the ONNX model

iface = gr.Interface(fn=predict_tags, inputs="text", outputs="text")
iface.launch()