Spaces:

noorulamean444
/

ChatBot_for_Jupyter_Notebook

Sleeping

App Files Files Community

ChatBot_for_Jupyter_Notebook / app.py

noorulamean444

Update app.py

7ec22a8 verified about 1 year ago

raw

history blame contribute delete

6.81 kB

	import requests
	import os
	import gradio as gr
	import time
	import heapq
	import re
	from utils import context_identifier, query, convert_ipynb_to_html
	from sentence_transformers import SentenceTransformer, util
	import nbconvert
	import nbformat
	from bs4 import BeautifulSoup
	from inflect import engine


	def user_prompt_template(user_msg:str):
	return f"<\|user\|>\n{user_msg}<\|end\|>\n<\|assistant\|>"


	def assistant_response_template(assistant_msg:str):
	return f"{assistant_msg}<\|end\|>\n"


	API_TOKEN = os.environ['HF_TOKEN']
	API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
	headers = {"Authorization": f"Bearer {API_TOKEN}"}


	inflect_engine = engine()
	embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	with gr.Blocks() as demo:

	def chat(message,history,file_path):

	context = context_identifier(message,API_TOKEN)
	print(context)
	hist_cop = history.copy()

	try:
	html_code = convert_ipynb_to_html(file_path)
	soup = BeautifulSoup(html_code, 'html.parser')
	text = soup.get_text()

	code_data_raw = text.split('In\xa0[\xa0]:')
	code_data_raw2 = [cell.strip() for cell in code_data_raw if cell.strip() != 'Notebook' and len(cell.strip()) > 0]

	code_data_cleaned = []
	for item in code_data_raw2:
	new_line_sequence = "\n\n\n\n\n"
	if new_line_sequence in item:
	split_items = [i.strip() for i in item.split(new_line_sequence) if len(i)>0]
	for j in split_items:
	if j != 'Notebook':
	code_data_cleaned.append(j)
	else:
	code_data_cleaned.append(item)


	indexed_cells_list = []
	index_comments = []
	for i in range(len(code_data_cleaned)):
	itxt = code_data_cleaned[i]
	cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
	if i+1 % 10 == 1:
	indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
	index_comments.append(f'# {i+1}st cell\n'+ cell_addresses)
	elif i+1 % 10 == 2:
	indexed_cells_list.append(f'# {i+1}nd cell\n' + cell_addresses + itxt)
	index_comments.append(f'# {i+1}nd cell\n' + cell_addresses)
	elif i+1 % 10 == 3:
	indexed_cells_list.append(f'# {i+1}rd cell\n' + cell_addresses + itxt)
	index_comments.append(f'# {i+1}rd cell\n' + cell_addresses)
	else:
	indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
	index_comments.append(f'# {i+1}th cell\n' + cell_addresses)

	emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
	emb_msg = embedding_model.encode(message,convert_to_tensor=True)
	cosine_sim_0 = util.cos_sim(emb_msg,emb_cells)

	top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
	top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]

	top_2_chats = None
	if hist_cop:
	chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
	emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
	cosine_similarity_scores = util.cos_sim(emb_msg,emb_chat_history)
	top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
	top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]

	similar_chat_history = ''
	if top_2_chats:
	for chats in top_2_chats:

	similar_chat_history += chats

	top_5_cells_string = '\n'.join(top_5_cells)

	if context == 'notebook_cell_context':
	prompt = f"""
	You are a coding assistant who clarifies queries based on python. You will be given two types of context. One type of context
	consists of previous chat messages and the other consists of code from a jupyter notebook. Your task is to answer/explanation user
	query by picking relevant information from both context and coming up with the answer which explains the query. The user
	query is delimited by ####.

	previous_chat_context:
	{similar_chat_history}

	notebook_cell_context:
	{top_5_cells_string}

	####
	{message}
	####
	"""
	elif context == 'previous_cell_context':
	prompt = f"""
	You are a coding assistant who clarifies queries based on python. You will be given a context which consists of previous chat messages.
	Your task is to answer/explain user query based on the context.The user query is delimited by ####.

	previous_chat_context:
	{similar_chat_history}

	####
	{message}
	####
	"""

	except:
	prompt = message

	user_input = user_prompt_template(prompt)

	inp_dict = {"inputs":user_input,
	"parameters": {"max_new_tokens":750,"temperature":0.01}}
	output = query(url=API_URL,headers=headers,payload=inp_dict)

	try:
	output_text = output[0]['generated_text']
	formatted_assistant_msg = output_text.replace(user_input,'').strip().removesuffix('<\|end\|>')
	except:
	if type(output) == dict:
	formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and items of output are: {output.items()}"
	else:
	formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"

	output = ''
	for char in formatted_assistant_msg:
	output += char
	time.sleep(0.05)
	yield output

	file = gr.File(interactive=True,container=False)
	chatbot = gr.ChatInterface(fn=chat,fill_height=False,additional_inputs=[file],stop_btn='Stop Generation',
	description="[Read the Instructions here!](https://huggingface.co/spaces/noorulamean444/clma_try/blob/main/README.md)")


	demo.launch()