Spaces:
Runtime error
Runtime error
import gradio as gr | |
import openai | |
import pandas as pd | |
import numpy as np | |
import csv | |
import os | |
from datasets import load_dataset | |
openai.api_key= os.environ.get("openai.api_key") | |
from openai.embeddings_utils import get_embedding | |
import requests | |
model_id = "sentence-transformers/all-MiniLM-L6-v2" | |
import json | |
hf_token = os.environ.get("hugginface.api.token") | |
import re | |
from sklearn.metrics.pairwise import cosine_similarity | |
def generate_embeddings(texts, model_id, hf_token): | |
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}" | |
headers = {"Authorization": f"Bearer {hf_token}"} | |
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}}) | |
embeddings = response.json() | |
return embeddings | |
AP_Bio = load_dataset('vjain/biology_AP_embeddings') | |
df1 = pd.DataFrame(AP_Bio['train']) | |
df1["similarity"] = 0 | |
AP_Physics = load_dataset('vjain/AP_physics_embeddings') | |
df2 = pd.DataFrame(AP_Physics['train']) | |
df2["similarity"] = 0 | |
Personality = load_dataset('vjain/Personality_em') | |
df3 = pd.DataFrame(Personality['train']) | |
df3["similarity"] = 0 | |
AP_statistics = load_dataset('vjain/AP_statistics') | |
df4 = pd.DataFrame(AP_statistics['train']) | |
df4["similarity"] = 0 | |
tax_embeddings = load_dataset('vjain/tax_embeddings') | |
df5 = pd.DataFrame(tax_embeddings['train']) | |
df5["similarity"] = 0 | |
dataframes = { | |
"AP_Bio": df1, | |
"AP_Physics": df2, | |
"Personality" : df3, | |
"AP_statistics": df4, | |
"tax_embeddings": df5 | |
} | |
#df = pd.read_csv("TA_embeddings.csv") | |
#df["embedding"]=df["embedding"].apply(eval).apply(np.array) | |
def reply(input, dataset_name): | |
try: | |
if dataset_name not in dataframes: | |
return "Invalid dataset selected. Please select a valid dataset." | |
if not input: | |
return "Please Enter a Question to get an Answer" | |
df = dataframes[dataset_name] | |
input = input | |
input_vector = generate_embeddings(input, model_id,hf_token) | |
df["similarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0]) | |
data = df.sort_values("similarities", ascending=False).head(5) | |
data.to_csv("sorted.csv") | |
context = [] | |
for i, row in data.iterrows(): | |
context.append(row['text']) | |
context | |
text = "\n".join(context) | |
context = text | |
prompt = f""" | |
Answer the following question using the context given below.If you don't know the answer for certain, say I don't know. | |
Context: {context} | |
Q: {input} | |
""" | |
response= openai.Completion.create( | |
prompt=prompt, | |
temperature=1, | |
max_tokens=500, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0, | |
model="text-davinci-003" | |
)["choices"][0]["text"].strip(" \n") | |
return response | |
except Exception as e: | |
return f"An error occurred: {e}" | |
csv_dropdown = gr.inputs.Dropdown( | |
label="Select the Book", | |
choices=["AP_Bio", "AP_Physics","Personality","AP_statistics","tax_embeddings"], | |
default="AP_Bio" | |
) | |
input_text = gr.inputs.Textbox( | |
label="Enter your questions here", | |
placeholder="E.g. What is DNA?", | |
lines=3 | |
) | |
text_output = gr.outputs.Textbox(label="Answer") | |
description = "Scholar Bot is a question answering system designed to provide accurate and relevant answers to questions from this book hosted by OpenStax https://openstax.org/details/books/biology-ap-courses. Simply enter your question in the text box above and Scholar Bot will use advanced natural language processing algorithms to search a large corpus of biology text to find the best answer for you. Scholar Bot uses the Sentence Transformers model to generate embeddings of text, and OpenAI's GPT-3 language model to provide answers to your questions." | |
ui = gr.Interface(fn=reply, | |
inputs=[input_text, csv_dropdown], | |
outputs=[text_output], | |
title="Scholar Bot", | |
description=description, | |
theme="light", | |
layout="vertical", | |
allow_flagging=False, | |
examples=[["What is the function of DNA polymerase?", "AP_Bio"]] | |
) | |
ui.launch() |