|
import os |
|
|
|
import gradio as gr |
|
import openai |
|
import pandas as pd |
|
from openai.embeddings_utils import distances_from_embeddings |
|
|
|
openai.api_key = os.environ["openai_key"] |
|
final_file = 'processed/embeddings_with_metadata.csv' |
|
|
|
|
|
df_combined = pd.read_csv(final_file, index_col=0) |
|
|
|
|
|
df_combined['embeddings'] = df_combined['embeddings'].apply(eval) |
|
|
|
|
|
|
|
|
|
def create_context( |
|
question, df_combined, max_len=1800, size="ada" |
|
): |
|
""" |
|
Create a context for a question by finding the most similar context from the dataframe |
|
""" |
|
|
|
|
|
q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding'] |
|
|
|
|
|
df_combined['distances'] = distances_from_embeddings(q_embeddings, df_combined['embeddings'].values, |
|
distance_metric='cosine') |
|
|
|
|
|
|
|
|
|
returns = [] |
|
cur_len = 0 |
|
additional_context_list = [] |
|
for i, row in df_combined.sort_values('distances', ascending=True).iterrows(): |
|
print(i) |
|
df_old = pd.read_csv('processed/ddd .csv') |
|
try: |
|
additional_context = {"fname_value": df_old.at[i, 'fname'], "start": df_old.at[i, 'start'], |
|
"end": df_old.at[i, 'end']} |
|
except KeyError: |
|
print(f"KeyError: {i} is not a valid index value") |
|
continue |
|
additional_context_list.append(additional_context) |
|
|
|
|
|
cur_len += row['n_tokens'] + 4 |
|
|
|
|
|
if cur_len > max_len: |
|
break |
|
|
|
|
|
returns.append(row["text"]) |
|
|
|
print(additional_context_list) |
|
|
|
|
|
context = "\n\n###\n\n".join(returns) |
|
return {'context': context, "add_context": additional_context_list} |
|
|
|
|
|
def answer_question( |
|
df_combined, |
|
model="text-davinci-003", |
|
question="", |
|
max_len=2500, |
|
size="ada", |
|
debug=False, |
|
max_tokens=400, |
|
stop_sequence=None |
|
): |
|
""" |
|
Answer a question based on the most similar context from the dataframe texts |
|
""" |
|
context = create_context( |
|
question, |
|
df_combined, |
|
max_len=max_len, |
|
size=size, |
|
) |
|
|
|
if debug: |
|
context = context['context'] |
|
print("Context:\n" + context) |
|
print("\n\n") |
|
|
|
try: |
|
|
|
response = openai.Completion.create( |
|
prompt=f"You're an assistant of a Dr. that holds a phd in Biochemistry. You help to answer peoples questions using Dr. Dougs transcripts. Answer the question in a short but clearly understandable way given the provided transcript , and if the question can't be answered based on the transcript, say \"I don't know yet.\"\n\n \"\n\nTranscript: {context['context']}\n\n---\n\nQuestion: {question}\nAnswer:", |
|
temperature=0, |
|
max_tokens=max_tokens, |
|
top_p=1, |
|
frequency_penalty=0, |
|
presence_penalty=0, |
|
stop=stop_sequence, |
|
model=model, |
|
) |
|
answer = response["choices"][0]["text"].strip() |
|
|
|
return {'Answer': f'{answer}', 'Context': f'{context["context"]}','Additional_context':f'{context["add_context"]}'} |
|
except Exception as e: |
|
print(e) |
|
return "" |
|
|
|
|
|
start_sequence = "\nQuestion:" |
|
restart_sequence = "\nAnswer: " |
|
|
|
prompt = "Koks tinkamiausias eterinis aliejus pagerinti smegenų veiklai? Atsakyk Lietuviškai." |
|
|
|
|
|
def chatgpt_clone(input, history): |
|
history = history or [] |
|
s = list(sum(history, ())) |
|
s.append(input) |
|
inp = ' '.join(s) |
|
output_og = answer_question(df_combined, question=f"{inp}", debug=False) |
|
output = output_og['Answer'].replace('\n', ' ') |
|
context = output_og['Context'].replace('\n', '<br>') |
|
additional_context = output_og['Additional_context'].replace('\n', '<br>') |
|
history.append((input, output)) |
|
return history, history,context, additional_context |
|
|
|
|
|
block = gr.Blocks() |
|
|
|
|
|
with block: |
|
with gr.Tab("Chat"): |
|
gr.Markdown("""<h1><center>Pokalbis su ponu D.</center></h1> |
|
""") |
|
chatbot = gr.Chatbot() |
|
message = gr.Textbox(placeholder=prompt) |
|
state = gr.Variable() |
|
submit = gr.Button("SEND") |
|
|
|
|
|
with gr.Tab("Data"): |
|
|
|
context = gr.HTML(label="Context") |
|
|
|
with gr.Tab("Video"): |
|
gr.Markdown("""<h1><center>Video</center></h1> |
|
""") |
|
gr.Video("https://www.youtube.com/watch?v=3q3Y8ZdD0aQ") |
|
additional_context = gr.TextArea(label="Context") |
|
|
|
|
|
|
|
submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state, context, additional_context]) |
|
|
|
|
|
block.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|