Spaces:

noorulamean444
/

ChatBot_for_Jupyter_Notebook

Sleeping

App Files Files Community

noorulamean444 commited on May 14

Commit

c039f17

•

1 Parent(s): d920f18

Almost there version

Browse files

Files changed (1) hide show

app.py +19 -82

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 import time
 import heapq
 import re
-from utils import package_installer
 package_installer('sentence_transformers')
 package_installer('nbconvert')
@@ -18,41 +18,22 @@ import nbformat
 from bs4 import BeautifulSoup
 from inflect import engine
-inflect_engine = engine()
-def convert_ipynb_to_html(input_file):
-    # Load .ipynb file into a nbformat.NotebookNode object
-    notebook = nbformat.read(input_file, as_version=4)
-    # Convert using HTML exporter
-    html_exporter = nbconvert.HTMLExporter()
-    (body, resources) = html_exporter.from_notebook_node(notebook)
-    # Write to output html file
-    # with open(output_file, 'w') as f:
-    #     f.write(body)
-    return body
-API_TOKEN = os.environ['HF_TOKEN']
-API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
-headers = {"Authorization": f"Bearer {API_TOKEN}"}
-embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 def user_prompt_template(user_msg:str):
     return f"<|user|>\n{user_msg}<|end|>\n<|assistant|>"
 def assistant_response_template(assistant_msg:str):
     return f"{assistant_msg}<|end|>\n"
-def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload,timeout=120)
-	return response.json()
 def chat(message,history):
@@ -68,28 +49,15 @@ def chat(message,history):
         soup = BeautifulSoup(html_code, 'html.parser')
         text = soup.get_text()
-        code_data = text.split('\n')
-        string = ''
-        cells_list = []
-        for item in code_data:
-            if len(item) > 0:
-                string += item + '\n'
-                continue
-            if len(item) == 0 and len(string) > 0:
-                cells_list.append(string)
-                string = ''
-        cells_list_copy = cells_list.copy()
-        for item in cells_list:
-            if item == 'Notebook\n' or item == 'In\xa0[\xa0]:\n':
-                cells_list_copy.remove(item)
         indexed_cells_list = []
         index_comments = []
-        for i in range(len(cells_list_copy)):
-            itxt = cells_list_copy[i]
             cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
             if i+1 % 10 == 1:
                 indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
@@ -103,34 +71,23 @@ def chat(message,history):
             else:
                 indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
                 index_comments.append(f'# {i+1}th cell\n' + cell_addresses)
-        # cells = re.split(r"In\xa0\[[0-9\xa0]*\]:",text)
-        # cells = [element.strip() for element in cells]
-        # cells = [element for element in cells if element != '']
     except:
         pass
-    # print(cells)
-    # print()
-    # print(len(cells))
-    # cells_as_string = '\n'.join(cells)
     emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
-    emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
-    cosine_sim_0 = util.cos_sim(emb_formatted_user_msg,emb_cells)
     top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
     top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]
     top_2_chats = None
     if hist_cop:
         chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
         # emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
         emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
-        cosine_similarity_scores = util.cos_sim(emb_formatted_user_msg,emb_chat_history)
         top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
         top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]
@@ -153,7 +110,7 @@ def chat(message,history):
     inp_dict = {"inputs":user_input,
                 "parameters": {"max_new_tokens":750,"temperature":0.01}}
-    output = query(inp_dict)
     #
     try:
         output_text = output[0]['generated_text']
@@ -164,30 +121,10 @@ def chat(message,history):
         else:
             formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"
-    print(user_input)
-    print()
-    print(indexed_cells_list)
     return formatted_assistant_msg
 demo = gr.ChatInterface(chat, multimodal=True)
 if __name__ == '__main__':
     demo.launch()
-# import gradio as gr
-# def process_file(file_path):
-#     # This function will be called when a file is uploaded.
-#     # 'file_path' is a string that contains the path to the uploaded file.
-#     # You can read the file using this path and process it as needed.
-#     # For example, you can return the name of the file:
-#     return f"You uploaded {file_path}"
-# iface = gr.Interface(
-#     fn=process_file,  # the function to call when a file is uploaded
-#     inputs=gr.File(),  # creates a file upload button
-#     outputs="text"  # the output of 'process_file' is text
-# )
-# iface.launch()

 import time
 import heapq
 import re
+from utils import package_installer, cell_number_extractor, query, convert_ipynb_to_html
 package_installer('sentence_transformers')
 package_installer('nbconvert')
 from bs4 import BeautifulSoup
 from inflect import engine
 def user_prompt_template(user_msg:str):
     return f"<|user|>\n{user_msg}<|end|>\n<|assistant|>"
 def assistant_response_template(assistant_msg:str):
     return f"{assistant_msg}<|end|>\n"
+API_TOKEN = os.environ['HF_TOKEN']
+API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
+headers = {"Authorization": f"Bearer {API_TOKEN}"}
+inflect_engine = engine()
+embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 def chat(message,history):
         soup = BeautifulSoup(html_code, 'html.parser')
         text = soup.get_text()
+        code_data_raw = text.split('In\xa0[\xa0]:')
+        code_data_cleaned = [cell.strip() for cell in code_data_raw if cell.strip() != 'Notebook' and len(cell.strip()) > 0]
+        cdc_string = '\n\n'.join(code_data_cleaned)
         indexed_cells_list = []
         index_comments = []
+        for i in range(len(code_data_cleaned)):
+            itxt = code_data_cleaned[i]
             cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
             if i+1 % 10 == 1:
                 indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
             else:
                 indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
                 index_comments.append(f'# {i+1}th cell\n' + cell_addresses)
     except:
         pass
     emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
+    emb_msg = embedding_model.encode(message['text'],convert_to_tensor=True)
+    cosine_sim_0 = util.cos_sim(emb_msg,emb_cells)
     top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
     top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]
     top_2_chats = None
     if hist_cop:
         chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
         # emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
         emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
+        cosine_similarity_scores = util.cos_sim(emb_msg,emb_chat_history)
         top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
         top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]
     inp_dict = {"inputs":user_input,
                 "parameters": {"max_new_tokens":750,"temperature":0.01}}
+    output = query(url=API_URL,headers=headers,payload=inp_dict)
     #
     try:
         output_text = output[0]['generated_text']
         else:
             formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"
     return formatted_assistant_msg
 demo = gr.ChatInterface(chat, multimodal=True)
 if __name__ == '__main__':
     demo.launch()