Spaces:
Sleeping
Sleeping
import gradio as gr | |
import joblib | |
import spacy | |
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer | |
from sklearn.preprocessing import MultiLabelBinarizer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
nlp = spacy.load('en_core_web_sm') | |
tfidf = joblib.load('./tfidf.joblib') | |
model = joblib.load('./model.joblib') | |
tags_binarizer = joblib.load('./tags.joblib') | |
def lemmatize(s: str) -> iter: | |
# tokenize | |
doc = nlp(s) | |
# remove punct and stopwords | |
tokens = filter(lambda token: not token.is_space and not token.is_punct and not token.is_stop and not token.is_digit, doc) | |
# lemmatize | |
return map(lambda token: token.lemma_.lower(), tokens) | |
def predict(title: str , post: str, predict_proba: bool): | |
text = title + " " + post | |
lemmes = np.array([' '.join(list(lemmatize(text)))]) | |
X = tfidf.transform(lemmes) | |
if predict_proba: | |
y_proba = model.predict_proba(X)[0] | |
tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys()) | |
result = list(zip(tags, y_proba)) | |
else: | |
y_bin = model.predict(X) | |
y_tags = tags_binarizer.inverse_transform(y_bin) | |
result = y_tags | |
return result | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Textbox(label="Title", lines=1, placeholder="Title..."), | |
gr.Textbox(label="Post", lines=10, placeholder="Post..."), | |
gr.Checkbox(label="Proba?")], | |
outputs=gr.Textbox(lines=10)) | |
demo.launch() |