stackoverflow / app.py
mikachou's picture
add chart with proba
4b67ac0
raw
history blame
No virus
2.81 kB
import gradio as gr
import joblib
import spacy
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.base import BaseEstimator, TransformerMixin
nlp = spacy.load('en_core_web_sm')
tfidf = joblib.load('./tfidf.joblib')
model = joblib.load('./model.joblib')
tags_binarizer = joblib.load('./tags.joblib')
def lemmatize(s: str) -> iter:
# tokenize
doc = nlp(s)
# remove punct and stopwords
tokens = filter(lambda token: not token.is_space and not token.is_punct and not token.is_stop and not token.is_digit, doc)
# lemmatize
return map(lambda token: token.lemma_.lower(), tokens)
def plot(tags, proba):
plt.style.use('dark_background')
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,9))
ax.barh(tags, proba, align='center', color='darkred')
ax.set_yticks(tags, labels=tags)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Score')
ax.set_title('Score/Tag')
for i, v in enumerate(proba):
ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
plt.xlim(0, 1)
plt.show()
def predict_words(X):
y_bin = model.predict(X)
y_tags = " ".join(tags_binarizer.inverse_transform(y_bin)[0])
return y_tags
def proba_chart(X):
y_proba = model.predict_proba(X)[0]
tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys())
# combine
data = list(zip(tags, y_proba))
# sort
data = sorted(data, key=lambda tag_value: tag_value[1], reverse=True)
# keep values >= min_score
data = list(filter(lambda tag_value: tag_value[1] >= 0.1, data))
# we have our two dimensions for chart
tags, proba = zip(*data)
# build chart
plt.style.use('dark_background')
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,9))
ax.barh(tags, proba, align='center', color='darkred')
ax.set_yticks(tags, labels=tags)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Score')
ax.set_title('Score/Tag')
for i, v in enumerate(proba):
ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
plt.xlim(0, 1)
return fig
def predict(title: str , post: str):
text = title + " " + post
lemmes = np.array([' '.join(list(lemmatize(text)))])
X = tfidf.transform(lemmes)
# predicted words
words = predict_words(X)
# proba chart
chart = proba_chart(X)
return words, chart
demo = gr.Interface(
fn=predict,
inputs=[
gr.Textbox(label="Title", lines=1, placeholder="Title..."),
gr.Textbox(label="Post", lines=20, placeholder="Post...")],
outputs=[gr.Textbox(label="Tags"), gr.Plot()])
demo.launch()