Spaces:
Runtime error
Runtime error
File size: 4,122 Bytes
2980408 31ca135 a9ea810 31ca135 2980408 9ddaca4 31ca135 9ddaca4 a9ea810 9ddaca4 7055aff 60c3d1b 8c2e21c 86e6300 5073e2b 44e5665 8c2e21c 359755a 8c2e21c c156bc5 8c2e21c a9ea810 675e3c4 aadb451 81f8cdc 62a1daa a9baa59 417ec9a 81f8cdc 6195c27 2980408 81f8cdc 2980408 65e4312 dbb5ddb 65e4312 81f8cdc aadb451 65e4312 2980408 cc21469 86e6300 1c7baa8 f2889c6 1c7baa8 f2889c6 c84f2f8 bd01a7f cc21469 f2889c6 a657bab 3a2d3ef 2980408 8c2e21c 2980408 3a2d3ef 2980408 92012a0 f2889c6 92012a0 2980408 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import openai
import pandas as pd
import numpy as np
import csv
import os
from datasets import load_dataset
openai.api_key= os.environ.get("openai.api_key")
from openai.embeddings_utils import get_embedding
from openai.embeddings_utils import cosine_similarity
import requests
model_id = "sentence-transformers/all-MiniLM-L6-v2"
import json
hf_token = os.environ.get("hf_token")
import re
from sklearn.metrics.pairwise import cosine_similarity
def generate_embeddings(texts, model_id, hf_token):
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {hf_token}"}
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
embeddings = response.json()
return embeddings
AP_Bio = load_dataset('vjain/biology_AP_embeddings')
df1 = pd.DataFrame(AP_Bio['train'])
df1["similarity"] = 0
AP_Physics = load_dataset('vjain/AP_physics_embeddings')
df2 = pd.DataFrame(AP_Physics['train'])
df2["similarity"] = 0
dataframes = {
"AP_Bio": df1,
"AP_Physics": df2
}
#df = pd.read_csv("TA_embeddings.csv")
#df["embedding"]=df["embedding"].apply(eval).apply(np.array)
def reply(input, dataset_name):
global messages
try:
if dataset_name not in dataframes:
return "Invalid dataset selected. Please select a valid dataset."
if not input:
return "Please Enter a Question to get an Answer"
df = dataframes[dataset_name]
input = input
input_vector = generate_embeddings(input, model_id,hf_token)
df["similarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0])
data = df.sort_values("similarities", ascending=False).head(10)
data.to_csv("sorted.csv")
context = []
for i, row in data.iterrows():
context.append(row['text'])
context
text = "\n".join(context)
context = text
prompt = f"""
Answer the following question using the context given below.If you don't know the answer for certain, say I don't know.
Context: {context}
Q: {input}
"""
response= openai.Completion.create(
prompt=prompt,
temperature=1,
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
model="text-davinci-003"
)["choices"][0]["text"].strip(" \n")
chat_transcript = ""
chat_transcript += input + "\n\n" + message + "\n\n"
return chat_transcript
except Exception as e:
return f"An error occurred: {e}"
csv_dropdown = gr.inputs.Dropdown(
label="Select the Book",
choices=["AP_Bio", "AP_Physics"],
default="AP_Bio"
)
input_text = gr.inputs.Textbox(
label="Enter your questions here",
placeholder="E.g. What is DNA?",
lines=3
)
text_output = gr.outputs.Textbox(label="Answer")
description = "Scholar Bot is a question answering system designed to provide accurate and relevant answers to questions from this book hosted by OpenStax https://openstax.org/details/books/biology-ap-courses. Simply enter your question in the text box above and Scholar Bot will use advanced natural language processing algorithms to search a large corpus of biology text to find the best answer for you. Scholar Bot uses the Sentence Transformers model to generate embeddings of text, and OpenAI's GPT-3 language model to provide answers to your questions."
ui = gr.Interface(fn=reply,
inputs=[input_text, csv_dropdown],
outputs=[text_output],
title="Scholar Bot",
description=description,
theme="light",
layout="vertical",
allow_flagging=False,
examples=[["What is the function of DNA polymerase?", "AP_Bio"]]
)
ui.launch() |