File size: 6,892 Bytes
b1aff23 ff1b0a1 b1aff23 8731178 b1aff23 ff1b0a1 4562763 eb3a503 ff1b0a1 b1aff23 ff1b0a1 4562763 ff1b0a1 eb3a503 ff1b0a1 eb3a503 ff1b0a1 eb3a503 60cc907 b1aff23 60cc907 b1aff23 60cc907 b1aff23 45cbcf8 b1aff23 e4ec87f b1aff23 45cbcf8 b1aff23 45cbcf8 60cc907 b1aff23 3abe5e0 b1aff23 eb3a503 a270dbe b9de9a4 71f88f8 b9de9a4 c9796b2 71f88f8 c9796b2 71f88f8 c9796b2 71f88f8 c9796b2 2f3887c b9de9a4 a270dbe 3abe5e0 7919b38 b9de9a4 71f88f8 3abe5e0 71f88f8 3abe5e0 5b84f6f b1aff23 3abe5e0 b1aff23 5b84f6f b1aff23 281b6ba 71f88f8 e167d10 281b6ba e4ec87f 7919b38 ddca142 b1aff23 12c98b4 ae3db05 1002a87 281b6ba 4546887 b1aff23 4546887 b1aff23 4546887 b1aff23 4546887 b1aff23 60cc907 b1aff23 60cc907 b1aff23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import requests
import os
import gradio as gr
import time
import heapq
import re
from utils import package_installer
package_installer('sentence_transformers')
package_installer('nbconvert')
package_installer('inflect')
# package_installer('nbformat')
# package_installer('beautifulsoup4')
from sentence_transformers import SentenceTransformer, util
import nbconvert
import nbformat
from bs4 import BeautifulSoup
from inflect import engine
inflect_engine = engine()
def convert_ipynb_to_html(input_file):
# Load .ipynb file into a nbformat.NotebookNode object
notebook = nbformat.read(input_file, as_version=4)
# Convert using HTML exporter
html_exporter = nbconvert.HTMLExporter()
(body, resources) = html_exporter.from_notebook_node(notebook)
# Write to output html file
# with open(output_file, 'w') as f:
# f.write(body)
return body
API_TOKEN = os.environ['HF_TOKEN']
API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
headers = {"Authorization": f"Bearer {API_TOKEN}"}
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def user_prompt_template(user_msg:str):
return f"<|user|>\n{user_msg}<|end|>\n<|assistant|>"
def assistant_response_template(assistant_msg:str):
return f"{assistant_msg}<|end|>\n"
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload,timeout=120)
return response.json()
def chat(message,history):
formatted_user_msg = user_prompt_template(message['text'])
hist_cop = history.copy()
for item in history:
if None in item and type(item[0]) == tuple:
hist_cop.remove(item)
try:
html_code = convert_ipynb_to_html(message['files'][0])
soup = BeautifulSoup(html_code, 'html.parser')
text = soup.get_text()
code_data = text.split('\n')
string = ''
cells_list = []
for item in code_data:
if len(item) > 0:
string += item + '\n'
continue
if len(item) == 0 and len(string) > 0:
cells_list.append(string)
string = ''
cells_list_copy = cells_list.copy()
for item in cells_list:
if item == 'Notebook\n' or item == 'In\xa0[\xa0]:\n':
cells_list_copy.remove(item)
indexed_cells_list = []
index_comments = []
for i in range(len(cells_list_copy)):
itxt = cells_list_copy[i]
cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
if i+1 % 10 == 1:
indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
index_comments.append(f'# {i+1}st cell\n'+ cell_addresses)
elif i+1 % 10 == 2:
indexed_cells_list.append(f'# {i+1}nd cell\n' + cell_addresses + itxt)
index_comments.append(f'# {i+1}nd cell\n' + cell_addresses)
elif i+1 % 10 == 3:
indexed_cells_list.append(f'# {i+1}rd cell\n' + cell_addresses + itxt)
index_comments.append(f'# {i+1}rd cell\n' + cell_addresses)
else:
indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
index_comments.append(f'# {i+1}th cell\n' + cell_addresses)
# cells = re.split(r"In\xa0\[[0-9\xa0]*\]:",text)
# cells = [element.strip() for element in cells]
# cells = [element for element in cells if element != '']
except:
pass
# print(cells)
# print()
# print(len(cells))
# cells_as_string = '\n'.join(cells)
emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
cosine_sim_0 = util.cos_sim(emb_formatted_user_msg,emb_cells)
top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]
top_2_chats = None
if hist_cop:
chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
# emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
cosine_similarity_scores = util.cos_sim(emb_formatted_user_msg,emb_chat_history)
top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]
similar_chat_history = ''
if top_2_chats:
for chats in top_2_chats:
# formatted_assistant_msg = chats[1].replace(chats[0],'').strip().removesuffix('<|end|>')
similar_chat_history += chats
#prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
top_5_cells_string = '\n'.join(top_5_cells)
context_plus_message = top_5_cells_string + message['text']
formatted_context_plus_message = user_prompt_template(context_plus_message)
user_input = similar_chat_history + formatted_context_plus_message
# print(user_input)
# print('-'*20)
# print('\n')
inp_dict = {"inputs":user_input,
"parameters": {"max_new_tokens":750,"temperature":0.01}}
output = query(inp_dict)
#
try:
output_text = output[0]['generated_text']
formatted_assistant_msg = output_text.replace(user_input,'').strip().removesuffix('<|end|>')
except:
if type(output) == dict:
formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and items of output are: {output.items()}"
else:
formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"
print(user_input)
print()
print(indexed_cells_list)
return formatted_assistant_msg
demo = gr.ChatInterface(chat, multimodal=True)
if __name__ == '__main__':
demo.launch()
# import gradio as gr
# def process_file(file_path):
# # This function will be called when a file is uploaded.
# # 'file_path' is a string that contains the path to the uploaded file.
# # You can read the file using this path and process it as needed.
# # For example, you can return the name of the file:
# return f"You uploaded {file_path}"
# iface = gr.Interface(
# fn=process_file, # the function to call when a file is uploaded
# inputs=gr.File(), # creates a file upload button
# outputs="text" # the output of 'process_file' is text
# )
# iface.launch() |