import gradio as gr import joblib import spacy import numpy as np import matplotlib.pyplot as plt from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.preprocessing import MultiLabelBinarizer from sklearn.base import BaseEstimator, TransformerMixin nlp = spacy.load('en_core_web_sm') tfidf = joblib.load('./tfidf.joblib') model = joblib.load('./model.joblib') tags_binarizer = joblib.load('./tags.joblib') def lemmatize(s: str) -> iter: # tokenize doc = nlp(s) # remove punct and stopwords tokens = filter(lambda token: not token.is_space and not token.is_punct and not token.is_stop and not token.is_digit, doc) # lemmatize return map(lambda token: token.lemma_.lower(), tokens) def plot(tags, proba): plt.style.use('dark_background') plt.rcParams.update({'font.size': 16}) fig, ax = plt.subplots(figsize=(12,9)) ax.barh(tags, proba, align='center', color='darkred') ax.set_yticks(tags, labels=tags) ax.invert_yaxis() # labels read top-to-bottom ax.set_xlabel('Score') ax.set_title('Score/Tag') for i, v in enumerate(proba): ax.text(v - 0.065, i + 0.05, str(round(v, 2))) plt.xlim(0, 1) plt.show() def predict_words(X): y_bin = model.predict(X) y_tags = " ".join(tags_binarizer.inverse_transform(y_bin)[0]) return y_tags def proba_chart(X): y_proba = model.predict_proba(X)[0] tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys()) # combine data = list(zip(tags, y_proba)) # sort data = sorted(data, key=lambda tag_value: tag_value[1], reverse=True) # keep values >= min_score data = list(filter(lambda tag_value: tag_value[1] >= 0.1, data)) # we have our two dimensions for chart tags, proba = zip(*data) # build chart plt.style.use('dark_background') plt.rcParams.update({'font.size': 16}) fig, ax = plt.subplots(figsize=(12,9)) ax.barh(tags, proba, align='center', color='darkred') ax.set_yticks(tags, labels=tags) ax.invert_yaxis() # labels read top-to-bottom ax.set_xlabel('Score') ax.set_title('Score/Tag') for i, v in enumerate(proba): ax.text(v - 0.065, i + 0.05, str(round(v, 2))) plt.xlim(0, 1) return fig def predict(title: str , post: str): text = title + " " + post lemmes = np.array([' '.join(list(lemmatize(text)))]) X = tfidf.transform(lemmes) # predicted words words = predict_words(X) # proba chart chart = proba_chart(X) return words, chart demo = gr.Interface( fn=predict, inputs=[ gr.Textbox(label="Title", lines=1, placeholder="Title..."), gr.Textbox(label="Post", lines=20, placeholder="Post...")], outputs=[gr.Textbox(label="Tags"), gr.Plot()]) demo.launch()