import gradio as gr from transformers import pipeline import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer, util import nltk from nltk import sent_tokenize nltk.download("punkt") # Loading in quotes dataset df = pd.read_json("krishnamurti_df.json") # Loading back in our sentence similarity and language model model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model krishnamurti_generator = pipeline("text-generation", model="distilgpt2") ############### DEFINING FUNCTIONS ########################### def ask_krishnamurti(question): answer = krishnamurti_generator(question, min_length=100, max_length=120)[0]['generated_text'] # generate about 50 word tokens answer = " ".join(sent_tokenize(answer)[:6]) # Get the first five sentences return answer def get_similar_quotes(question): question_embedding = model.encode(question) sims = [util.dot_score(question_embedding, quote_embedding) for quote_embedding in df['Embedding']] ind = np.argpartition(sims, -5)[-5:] similar_sentences = [df['Quotes'][i] for i in ind] top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6)) top5quotes['Quotes'] = top5quotes['Quotes'].str[:-1].str[:250] + "..." return top5quotes def main(question): return ask_krishnamurti(question), get_similar_quotes(question) with gr.Blocks() as demo: gr.Markdown(""" # Ask Krishanmurti """ ) with gr.Row(): inp = gr.Textbox(placeholder="Place your question here...") with gr.Column(): out1 = gr.Textbox( lines=3, max_lines=10, label="Answer" ) out2 = gr.DataFrame( headers=["Quotes"], max_rows=5, interactive=False, wrap=True) btn = gr.Button("Run") btn.click(fn=main, inputs=inp, outputs=[out1,out2]) demo.launch()