noorulamean444 commited on
Commit
c039f17
1 Parent(s): d920f18

Almost there version

Browse files
Files changed (1) hide show
  1. app.py +19 -82
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import time
5
  import heapq
6
  import re
7
- from utils import package_installer
8
 
9
  package_installer('sentence_transformers')
10
  package_installer('nbconvert')
@@ -18,41 +18,22 @@ import nbformat
18
  from bs4 import BeautifulSoup
19
  from inflect import engine
20
 
21
- inflect_engine = engine()
22
-
23
- def convert_ipynb_to_html(input_file):
24
- # Load .ipynb file into a nbformat.NotebookNode object
25
- notebook = nbformat.read(input_file, as_version=4)
26
-
27
- # Convert using HTML exporter
28
- html_exporter = nbconvert.HTMLExporter()
29
- (body, resources) = html_exporter.from_notebook_node(notebook)
30
- # Write to output html file
31
- # with open(output_file, 'w') as f:
32
- # f.write(body)
33
-
34
- return body
35
-
36
-
37
- API_TOKEN = os.environ['HF_TOKEN']
38
-
39
- API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
40
- headers = {"Authorization": f"Bearer {API_TOKEN}"}
41
-
42
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
43
 
44
  def user_prompt_template(user_msg:str):
45
  return f"<|user|>\n{user_msg}<|end|>\n<|assistant|>"
46
 
 
47
  def assistant_response_template(assistant_msg:str):
48
  return f"{assistant_msg}<|end|>\n"
49
 
50
 
51
- def query(payload):
52
- response = requests.post(API_URL, headers=headers, json=payload,timeout=120)
53
- return response.json()
54
 
55
-
 
 
56
 
57
  def chat(message,history):
58
 
@@ -68,28 +49,15 @@ def chat(message,history):
68
  soup = BeautifulSoup(html_code, 'html.parser')
69
  text = soup.get_text()
70
 
71
- code_data = text.split('\n')
72
- string = ''
73
- cells_list = []
74
- for item in code_data:
75
-
76
- if len(item) > 0:
77
- string += item + '\n'
78
- continue
79
-
80
- if len(item) == 0 and len(string) > 0:
81
- cells_list.append(string)
82
- string = ''
83
 
84
- cells_list_copy = cells_list.copy()
85
- for item in cells_list:
86
- if item == 'Notebook\n' or item == 'In\xa0[\xa0]:\n':
87
- cells_list_copy.remove(item)
88
 
89
  indexed_cells_list = []
90
  index_comments = []
91
- for i in range(len(cells_list_copy)):
92
- itxt = cells_list_copy[i]
93
  cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
94
  if i+1 % 10 == 1:
95
  indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
@@ -103,34 +71,23 @@ def chat(message,history):
103
  else:
104
  indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
105
  index_comments.append(f'# {i+1}th cell\n' + cell_addresses)
106
-
107
-
108
- # cells = re.split(r"In\xa0\[[0-9\xa0]*\]:",text)
109
- # cells = [element.strip() for element in cells]
110
- # cells = [element for element in cells if element != '']
111
  except:
112
  pass
113
-
114
- # print(cells)
115
- # print()
116
- # print(len(cells))
117
 
118
- # cells_as_string = '\n'.join(cells)
119
  emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
120
-
121
- emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
122
- cosine_sim_0 = util.cos_sim(emb_formatted_user_msg,emb_cells)
123
 
124
  top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
125
  top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]
126
 
127
-
128
  top_2_chats = None
129
  if hist_cop:
130
  chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
131
  # emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
132
  emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
133
- cosine_similarity_scores = util.cos_sim(emb_formatted_user_msg,emb_chat_history)
134
  top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
135
  top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]
136
 
@@ -153,7 +110,7 @@ def chat(message,history):
153
 
154
  inp_dict = {"inputs":user_input,
155
  "parameters": {"max_new_tokens":750,"temperature":0.01}}
156
- output = query(inp_dict)
157
  #
158
  try:
159
  output_text = output[0]['generated_text']
@@ -164,30 +121,10 @@ def chat(message,history):
164
  else:
165
  formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"
166
 
167
-
168
- print(user_input)
169
- print()
170
- print(indexed_cells_list)
171
  return formatted_assistant_msg
172
 
 
173
  demo = gr.ChatInterface(chat, multimodal=True)
174
 
175
  if __name__ == '__main__':
176
  demo.launch()
177
-
178
- # import gradio as gr
179
-
180
- # def process_file(file_path):
181
- # # This function will be called when a file is uploaded.
182
- # # 'file_path' is a string that contains the path to the uploaded file.
183
- # # You can read the file using this path and process it as needed.
184
- # # For example, you can return the name of the file:
185
- # return f"You uploaded {file_path}"
186
-
187
- # iface = gr.Interface(
188
- # fn=process_file, # the function to call when a file is uploaded
189
- # inputs=gr.File(), # creates a file upload button
190
- # outputs="text" # the output of 'process_file' is text
191
- # )
192
-
193
- # iface.launch()
 
4
  import time
5
  import heapq
6
  import re
7
+ from utils import package_installer, cell_number_extractor, query, convert_ipynb_to_html
8
 
9
  package_installer('sentence_transformers')
10
  package_installer('nbconvert')
 
18
  from bs4 import BeautifulSoup
19
  from inflect import engine
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def user_prompt_template(user_msg:str):
23
  return f"<|user|>\n{user_msg}<|end|>\n<|assistant|>"
24
 
25
+
26
  def assistant_response_template(assistant_msg:str):
27
  return f"{assistant_msg}<|end|>\n"
28
 
29
 
30
+ API_TOKEN = os.environ['HF_TOKEN']
31
+ API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct"
32
+ headers = {"Authorization": f"Bearer {API_TOKEN}"}
33
 
34
+
35
+ inflect_engine = engine()
36
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
37
 
38
  def chat(message,history):
39
 
 
49
  soup = BeautifulSoup(html_code, 'html.parser')
50
  text = soup.get_text()
51
 
52
+ code_data_raw = text.split('In\xa0[\xa0]:')
53
+ code_data_cleaned = [cell.strip() for cell in code_data_raw if cell.strip() != 'Notebook' and len(cell.strip()) > 0]
 
 
 
 
 
 
 
 
 
 
54
 
55
+ cdc_string = '\n\n'.join(code_data_cleaned)
 
 
 
56
 
57
  indexed_cells_list = []
58
  index_comments = []
59
+ for i in range(len(code_data_cleaned)):
60
+ itxt = code_data_cleaned[i]
61
  cell_addresses = f'# Cell Number: {i+1}\n' + f'# Cell Number: {inflect_engine.number_to_words(i+1)}\n'
62
  if i+1 % 10 == 1:
63
  indexed_cells_list.append(f'# {i+1}st cell\n'+ cell_addresses + itxt)
 
71
  else:
72
  indexed_cells_list.append(f'# {i+1}th cell\n' + cell_addresses + itxt)
73
  index_comments.append(f'# {i+1}th cell\n' + cell_addresses)
74
+
 
 
 
 
75
  except:
76
  pass
 
 
 
 
77
 
 
78
  emb_cells = embedding_model.encode(index_comments,convert_to_tensor=True)
79
+ emb_msg = embedding_model.encode(message['text'],convert_to_tensor=True)
80
+ cosine_sim_0 = util.cos_sim(emb_msg,emb_cells)
 
81
 
82
  top_5_cells_scores = heapq.nlargest(5,cosine_sim_0[0])
83
  top_5_cells = [indexed_cells_list[index] for index in sorted(list(cosine_sim_0[0]).index(score) for score in top_5_cells_scores)]
84
 
 
85
  top_2_chats = None
86
  if hist_cop:
87
  chat_history = [user_prompt_template(item[0]) + assistant_response_template(item[1]) for item in hist_cop]
88
  # emb_formatted_user_msg = embedding_model.encode(formatted_user_msg,convert_to_tensor=True)
89
  emb_chat_history = embedding_model.encode(chat_history,convert_to_tensor=True)
90
+ cosine_similarity_scores = util.cos_sim(emb_msg,emb_chat_history)
91
  top_2_scores = heapq.nlargest(2,cosine_similarity_scores[0])
92
  top_2_chats = [chat_history[i] for i in sorted(list(cosine_similarity_scores[0]).index(val) for val in top_2_scores)]
93
 
 
110
 
111
  inp_dict = {"inputs":user_input,
112
  "parameters": {"max_new_tokens":750,"temperature":0.01}}
113
+ output = query(url=API_URL,headers=headers,payload=inp_dict)
114
  #
115
  try:
116
  output_text = output[0]['generated_text']
 
121
  else:
122
  formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}"
123
 
 
 
 
 
124
  return formatted_assistant_msg
125
 
126
+
127
  demo = gr.ChatInterface(chat, multimodal=True)
128
 
129
  if __name__ == '__main__':
130
  demo.launch()