Spaces:

Arnasltlt
/

KlauskD

Runtime error

KlauskD / app.py

Arnasltlt

e5dcef9 over 2 years ago

5.82 kB

	import os

	import gradio as gr
	import openai
	import pandas as pd
	from openai.embeddings_utils import distances_from_embeddings

	openai.api_key = os.environ["openai_key"]
	final_file = 'processed/embeddings_with_metadata.csv'

	# Load the combined DataFrame
	df_combined = pd.read_csv(final_file, index_col=0)

	# Convert the 'embeddings' column from a string to a list
	df_combined['embeddings'] = df_combined['embeddings'].apply(eval)

	# ################################################################################
	# ### Step 12
	# ################################################################################
	def create_context(
	question, df_combined, max_len=1800, size="ada"
	):
	"""
	Create a context for a question by finding the most similar context from the dataframe
	"""

	# Get the embeddings for the question
	q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']

	# Get the distances from the embeddings
	df_combined['distances'] = distances_from_embeddings(q_embeddings, df_combined['embeddings'].values,
	distance_metric='cosine')

	# additional_context = {'file_name':df_combined['fname'],'start':df_combined['start'],'end':df_combined['end']}
	# print(additional_context)

	returns = []
	cur_len = 0
	additional_context_list = []
	for i, row in df_combined.sort_values('distances', ascending=True).iterrows():
	print(i)
	df_old = pd.read_csv('processed/ddd .csv')
	try:
	additional_context = {"fname_value": df_old.at[i, 'fname'], "start": df_old.at[i, 'start'],
	"end": df_old.at[i, 'end']}
	except KeyError:
	print(f"KeyError: {i} is not a valid index value")
	continue
	additional_context_list.append(additional_context)

	# Add the length of the text to the current length
	cur_len += row['n_tokens'] + 4

	# If the context is too long, break
	if cur_len > max_len:
	break

	# Else add it to the text that is being returned
	returns.append(row["text"])

	print(additional_context_list)

	# Return the context and additional context as a dictionary
	context = "\n\n###\n\n".join(returns)
	return {'context': context, "add_context": additional_context_list}


	def answer_question(
	df_combined,
	model="text-davinci-003",
	question="",
	max_len=2500,
	size="ada",
	debug=False,
	max_tokens=400,
	stop_sequence=None
	):
	"""
	Answer a question based on the most similar context from the dataframe texts
	"""
	context = create_context(
	question,
	df_combined,
	max_len=max_len,
	size=size,
	)
	# If debug, print the raw model response
	if debug:
	context = context['context']
	print("Context:\n" + context)
	print("\n\n")

	try:
	# Create a completions using the questin and context
	response = openai.Completion.create(
	prompt=f"You're an assistant of a Dr. that holds a phd in Biochemistry. You help to answer peoples questions using Dr. Dougs transcripts. Answer the question in a short but clearly understandable way given the provided transcript , and if the question can't be answered based on the transcript, say \"I don't know yet.\"\n\n \"\n\nTranscript: {context['context']}\n\n---\n\nQuestion: {question}\nAnswer:",
	temperature=0,
	max_tokens=max_tokens,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0,
	stop=stop_sequence,
	model=model,
	)
	answer = response["choices"][0]["text"].strip()

	return {'Answer': f'{answer}', 'Context': f'{context["context"]}','Additional_context':f'{context["add_context"]}'}
	except Exception as e:
	print(e)
	return ""


	start_sequence = "\nQuestion:"
	restart_sequence = "\nAnswer: "

	prompt = "Koks tinkamiausias eterinis aliejus pagerinti smegenų veiklai? Atsakyk Lietuviškai."


	def chatgpt_clone(input, history):
	history = history or []
	s = list(sum(history, ()))
	s.append(input)
	inp = ' '.join(s)
	output_og = answer_question(df_combined, question=f"{inp}", debug=False)
	output = output_og['Answer'].replace('\n', ' ')
	context = output_og['Context'].replace('\n', '<br>')
	additional_context = output_og['Additional_context'].replace('\n', '<br>')
	history.append((input, output))
	return history, history,context, additional_context


	block = gr.Blocks()


	with block:
	with gr.Tab("Chat"):
	gr.Markdown("""<h1><center>Pokalbis su ponu D.</center></h1>
	""")
	chatbot = gr.Chatbot()
	message = gr.Textbox(placeholder=prompt)
	state = gr.Variable()
	submit = gr.Button("SEND")
	# df = gr.dataframe(columns=['text', 'n_tokens','embeddings'], data=[df])

	with gr.Tab("Data"):
	#context = gr.TextArea(label="Context")
	context = gr.HTML(label="Context")

	with gr.Tab("Video"):
	gr.Markdown("""<h1><center>Video</center></h1>
	""")
	gr.Video("https://www.youtube.com/watch?v=3q3Y8ZdD0aQ")
	additional_context = gr.TextArea(label="Context")



	submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state, context, additional_context])


	block.launch()




	##archive

	# HF_TOKEN = os.getenv('HF_TOKEN')
	# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "FeedbackontalkingtoD")
	#
	# with gr.Blocks() as demo:
	# klausimas = gr.Textbox(label="Klausimas")
	# atsakymas = gr.Textbox(label="Atsakymas!")
	# klausimas.change(answer_question_gr, klausimas, atsakymas)
	#
	#
	# demo.launch()