MVP-1.1 / app.py
eliwill's picture
Update app.py
1dbcf29
import gradio as gr
from transformers import pipeline
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import nltk
from nltk import sent_tokenize
nltk.download("punkt")
# Loading in quotes dataset
df = pd.read_json("krishnamurti_df.json")
# Loading back in our sentence similarity and language model
model = SentenceTransformer("msmarco-roberta-base-v3") # best performing model
krishnamurti_generator = pipeline("text-generation", model="distilgpt2")
############### DEFINING FUNCTIONS ###########################
def ask_krishnamurti(question):
answer = krishnamurti_generator(question, min_length=100, max_length=120)[0]['generated_text'] # generate about 50 word tokens
answer = " ".join(sent_tokenize(answer)[:6]) # Get the first five sentences
return answer
def get_similar_quotes(question):
question_embedding = model.encode(question)
sims = [util.dot_score(question_embedding, quote_embedding) for quote_embedding in df['Embedding']]
ind = np.argpartition(sims, -5)[-5:]
similar_sentences = [df['Quotes'][i] for i in ind]
top5quotes = pd.DataFrame(data = similar_sentences, columns=["Quotes"], index=range(1,6))
top5quotes['Quotes'] = top5quotes['Quotes'].str[:-1].str[:250] + "..."
return top5quotes
def main(question):
return ask_krishnamurti(question), get_similar_quotes(question)
with gr.Blocks() as demo:
gr.Markdown("""
# Ask Krishanmurti
"""
)
with gr.Row():
inp = gr.Textbox(placeholder="Place your question here...")
with gr.Column():
out1 = gr.Textbox(
lines=3,
max_lines=10,
label="Answer"
)
out2 = gr.DataFrame(
headers=["Quotes"],
max_rows=5,
interactive=False,
wrap=True)
btn = gr.Button("Run")
btn.click(fn=main, inputs=inp, outputs=[out1,out2])
demo.launch()