Trading-Chatbot / app.py
vjain's picture
Update app.py
43093ce
import gradio as gr
import openai
import pandas as pd
import numpy as np
import csv
import os
from datasets import load_dataset
openai.api_key= os.environ.get("openai.api_key")
from openai.embeddings_utils import get_embedding
import requests
model_id = "sentence-transformers/all-MiniLM-L6-v2"
import json
hf_token = os.environ.get("hugginface.api.token")
import re
from sklearn.metrics.pairwise import cosine_similarity
def generate_embeddings(texts, model_id, hf_token):
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {hf_token}"}
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
embeddings = response.json()
return embeddings
AP_Bio = load_dataset('vjain/biology_AP_embeddings')
df1 = pd.DataFrame(AP_Bio['train'])
df1["similarity"] = 0
AP_Physics = load_dataset('vjain/AP_physics_embeddings')
df2 = pd.DataFrame(AP_Physics['train'])
df2["similarity"] = 0
Personality = load_dataset('vjain/Personality_em')
df3 = pd.DataFrame(Personality['train'])
df3["similarity"] = 0
AP_statistics = load_dataset('vjain/AP_statistics')
df4 = pd.DataFrame(AP_statistics['train'])
df4["similarity"] = 0
tax_embeddings = load_dataset('vjain/tax_embeddings')
df5 = pd.DataFrame(tax_embeddings['train'])
df5["similarity"] = 0
therapy = load_dataset('vjain/therapy')
df6 = pd.DataFrame(therapy['train'])
df6["similarity"] = 0
gurbani = load_dataset('vjain/gurbani')
df7 = pd.DataFrame(gurbani['train'])
df7["similarity"] = 0
dataframes = {
"AP_Bio": df1,
"AP_Physics": df2,
"Personality" : df3,
"AP_statistics": df4,
"tax_embeddings": df5,
"therapy": df6,
"gurbani":df7
}
#df = pd.read_csv("TA_embeddings.csv")
#df["embedding"]=df["embedding"].apply(eval).apply(np.array)
def reply(input, dataset_name):
try:
if dataset_name not in dataframes:
return "Invalid dataset selected. Please select a valid dataset."
if not input:
return "Please Enter a Question to get an Answer"
df = dataframes[dataset_name]
input = input
input_vector = generate_embeddings(input, model_id,hf_token)
df["similarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0])
data = df.sort_values("similarities", ascending=False).head(5)
data.to_csv("sorted.csv")
context = []
for i, row in data.iterrows():
context.append(row['text'])
context
text = "\n".join(context)
context = text
prompt = f"""
Answer the following question using the context given below.If you don't know the answer for certain, say I don't know.
Context: {context}
Q: {input}
"""
response= openai.Completion.create(
prompt=prompt,
temperature=1,
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
model="text-davinci-003"
)["choices"][0]["text"].strip(" \n")
return response
except Exception as e:
return f"An error occurred: {e}"
csv_dropdown = gr.inputs.Dropdown(
label="Select the Book",
choices=["AP_Bio", "AP_Physics","Personality","AP_statistics","tax_embeddings","therapy","gurbani"],
default="AP_Bio"
)
input_text = gr.inputs.Textbox(
label="Enter your questions here",
placeholder="E.g. What is DNA?",
lines=3
)
text_output = gr.outputs.Textbox(label="Answer")
description = "Scholar Bot is a question answering system designed to provide accurate and relevant answers to questions from this book hosted by OpenStax https://openstax.org/details/books/biology-ap-courses. Simply enter your question in the text box above and Scholar Bot will use advanced natural language processing algorithms to search a large corpus of biology text to find the best answer for you. Scholar Bot uses the Sentence Transformers model to generate embeddings of text, and OpenAI's GPT-3 language model to provide answers to your questions."
ui = gr.Interface(fn=reply,
inputs=[input_text, csv_dropdown],
outputs=[text_output],
title="Scholar Bot",
description=description,
theme="light",
layout="vertical",
allow_flagging=False,
examples=[["What is the function of DNA polymerase?", "AP_Bio"]]
)
ui.launch()