import os import gradio as gr import openai import pandas as pd from openai.embeddings_utils import distances_from_embeddings openai.api_key = os.environ["openai_key"] final_file = 'processed/embeddings_with_metadata.csv' # Load the combined DataFrame df_combined = pd.read_csv(final_file, index_col=0) # Convert the 'embeddings' column from a string to a list df_combined['embeddings'] = df_combined['embeddings'].apply(eval) # ################################################################################ # ### Step 12 # ################################################################################ def create_context( question, df_combined, max_len=1800, size="ada" ): """ Create a context for a question by finding the most similar context from the dataframe """ # Get the embeddings for the question q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding'] # Get the distances from the embeddings df_combined['distances'] = distances_from_embeddings(q_embeddings, df_combined['embeddings'].values, distance_metric='cosine') # additional_context = {'file_name':df_combined['fname'],'start':df_combined['start'],'end':df_combined['end']} # print(additional_context) returns = [] cur_len = 0 additional_context_list = [] for i, row in df_combined.sort_values('distances', ascending=True).iterrows(): print(i) df_old = pd.read_csv('processed/ddd .csv') try: additional_context = {"fname_value": df_old.at[i, 'fname'], "start": df_old.at[i, 'start'], "end": df_old.at[i, 'end']} except KeyError: print(f"KeyError: {i} is not a valid index value") continue additional_context_list.append(additional_context) # Add the length of the text to the current length cur_len += row['n_tokens'] + 4 # If the context is too long, break if cur_len > max_len: break # Else add it to the text that is being returned returns.append(row["text"]) print(additional_context_list) # Return the context and additional context as a dictionary context = "\n\n###\n\n".join(returns) return {'context': context, "add_context": additional_context_list} def answer_question( df_combined, model="text-davinci-003", question="", max_len=2500, size="ada", debug=False, max_tokens=400, stop_sequence=None ): """ Answer a question based on the most similar context from the dataframe texts """ context = create_context( question, df_combined, max_len=max_len, size=size, ) # If debug, print the raw model response if debug: context = context['context'] print("Context:\n" + context) print("\n\n") try: # Create a completions using the questin and context response = openai.Completion.create( prompt=f"You're an assistant of a Dr. that holds a phd in Biochemistry. You help to answer peoples questions using Dr. Dougs transcripts. Answer the question in a short but clearly understandable way given the provided transcript , and if the question can't be answered based on the transcript, say \"I don't know yet.\"\n\n \"\n\nTranscript: {context['context']}\n\n---\n\nQuestion: {question}\nAnswer:", temperature=0, max_tokens=max_tokens, top_p=1, frequency_penalty=0, presence_penalty=0, stop=stop_sequence, model=model, ) answer = response["choices"][0]["text"].strip() return {'Answer': f'{answer}', 'Context': f'{context["context"]}','Additional_context':f'{context["add_context"]}'} except Exception as e: print(e) return "" start_sequence = "\nQuestion:" restart_sequence = "\nAnswer: " prompt = "Koks tinkamiausias eterinis aliejus pagerinti smegenų veiklai? Atsakyk Lietuviškai." def chatgpt_clone(input, history): history = history or [] s = list(sum(history, ())) s.append(input) inp = ' '.join(s) output_og = answer_question(df_combined, question=f"{inp}", debug=False) output = output_og['Answer'].replace('\n', ' ') context = output_og['Context'].replace('\n', '
') additional_context = output_og['Additional_context'].replace('\n', '
') history.append((input, output)) return history, history,context, additional_context block = gr.Blocks() with block: with gr.Tab("Chat"): gr.Markdown("""

Pokalbis su ponu D.

""") chatbot = gr.Chatbot() message = gr.Textbox(placeholder=prompt) state = gr.Variable() submit = gr.Button("SEND") # df = gr.dataframe(columns=['text', 'n_tokens','embeddings'], data=[df]) with gr.Tab("Data"): #context = gr.TextArea(label="Context") context = gr.HTML(label="Context") with gr.Tab("Video"): gr.Markdown("""

Video

""") gr.Video("https://www.youtube.com/watch?v=3q3Y8ZdD0aQ") additional_context = gr.TextArea(label="Context") submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state, context, additional_context]) block.launch() ##archive # HF_TOKEN = os.getenv('HF_TOKEN') # hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "FeedbackontalkingtoD") # # with gr.Blocks() as demo: # klausimas = gr.Textbox(label="Klausimas") # atsakymas = gr.Textbox(label="Atsakymas!") # klausimas.change(answer_question_gr, klausimas, atsakymas) # # # demo.launch()