kouki321 commited on
Commit
91aacab
Β·
verified Β·
1 Parent(s): 596284f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -39
app.py CHANGED
@@ -97,11 +97,9 @@ def clone_cache(cache):
97
  return new_cache
98
 
99
  @st.cache_resource
100
- def load_document_and_cache(file_path):
101
  try:
102
  t2 = time()
103
- with open(file_path, "r", encoding="utf-8") as f:
104
- doc_text = f.read()
105
  doc_text_count = len(doc_text)
106
  model, tokenizer = load_model_and_tokenizer(doc_text_count)
107
  system_prompt = f"""
@@ -115,7 +113,7 @@ def load_document_and_cache(file_path):
115
  cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
116
  t3 = time()
117
  print(f"{t3-t2}")
118
- return cache, origin_len, doc_text, doc_text_count, model, tokenizer
119
  except FileNotFoundError:
120
  st.error(f"Document file not found at {file_path}")
121
  return None, None, None, None, None, None
@@ -154,39 +152,37 @@ if uploaded_file:
154
  log = []
155
 
156
  # PART 1: File Upload & Save
157
- t_start = time()
158
- temp_file_path = "temp_document.txt"
159
- with open(temp_file_path, "wb") as f:
160
- f.write(uploaded_file.getvalue())
161
- t_end = time()
162
- log.append(f"πŸ“‚ File Upload & Save Time: {t_end - t_start:.2f} s")
163
- print(f"πŸ“‚ File Upload & Save Time: {t_end - t_start:.2f} s")
164
 
165
  # PART 2: Document and Cache Load
166
- t_start = time()
167
- cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(temp_file_path)
168
- t_end = time()
169
- log.append(f"πŸ“„ Document & Cache Load Time: {t_end - t_start:.2f} s")
170
- print(f"πŸ“„ Document & Cache Load Time: {t_end - t_start:.2f} s")
171
 
172
  # PART 3: Document Preview Display
173
- t_start = time()
174
  with st.expander("πŸ“„ Document Preview"):
175
  preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
176
  st.text(preview)
177
- t_end = time()
178
- log.append(f"πŸ‘€ Document Preview Display Time: {t_end - t_start:.2f} s")
179
- print(f"πŸ‘€ Document Preview Display Time: {t_end - t_start:.2f} s")
180
- t_start = time()
181
  # PART 4: Show Basic Info
182
- doc_size_kb = os.path.getsize(temp_file_path) / 1024
183
- cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
184
- t_end = time()
185
- log.append(f"πŸ‘€ doc_size_kb Preview Display Time: {t_end - t_start:.2f} s")
186
- print(f"πŸ‘€ doc_size_kb Preview Display Time: {t_end - t_start:.2f} s")
187
  st.info(
188
- f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
189
- f"Cache Size: {cache_size if cache_size == 'N/A' else f'{cache_size:.2f} KB'}"
190
  )
191
 
192
  # =========================
@@ -199,14 +195,14 @@ if uploaded_file:
199
  log.append("πŸš€ Query & Generation Steps:")
200
 
201
  # PART 4.1: Clone Cache
202
- t_start = time()
203
  current_cache = clone_cache(cache)
204
- t_end = time()
205
- print(f"πŸ” Clone Cache Time: {t_end - t_start:.2f} s")
206
- log.append(f"πŸ” Clone Cache Time: {t_end - t_start:.2f} s")
207
 
208
  # PART 4.2: Tokenize Prompt
209
- t_start = time()
210
  model, tokenizer = load_model_and_tokenizer(doc_text_count)
211
  full_prompt = f"""
212
  <|user|>
@@ -215,14 +211,14 @@ if uploaded_file:
215
  """.strip()
216
  input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
217
  input_tokens_count += input_ids.shape[-1]
218
- t_end = time()
219
- print(f"✍️ Tokenization Time: {t_end - t_start:.2f} s")
220
- log.append(f"✍️ Tokenization Time: {t_end - t_start:.2f} s")
221
 
222
  # PART 4.3: Generate Answer
223
- t_start = time()
224
  output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
225
- last_generation_time = time() - t_start
226
  print(f"πŸ’‘ Generation Time: {last_generation_time:.2f} s")
227
  log.append(f"πŸ’‘ Generation Time: {last_generation_time:.2f} s")
228
 
@@ -237,7 +233,7 @@ if uploaded_file:
237
 
238
  # Final Info Display
239
  st.info(
240
- f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
241
  f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
242
  )
243
 
 
97
  return new_cache
98
 
99
  @st.cache_resource
100
+ def load_document_and_cache(doc_text):
101
  try:
102
  t2 = time()
 
 
103
  doc_text_count = len(doc_text)
104
  model, tokenizer = load_model_and_tokenizer(doc_text_count)
105
  system_prompt = f"""
 
113
  cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
114
  t3 = time()
115
  print(f"{t3-t2}")
116
+ return cache, origin_len, doc_text_count, model, tokenizer
117
  except FileNotFoundError:
118
  st.error(f"Document file not found at {file_path}")
119
  return None, None, None, None, None, None
 
152
  log = []
153
 
154
  # PART 1: File Upload & Save
155
+ t_start1 = time()
156
+ doc_text= uploaded_file.getvalue()
157
+ t_end1 = time()
158
+ log.append(f"πŸ“‚ File Upload & Save Time: {t_end1 - t_start1:.2f} s")
159
+ print(f"πŸ“‚ File Upload & Save Time: {t_end1 - t_start1:.2f} s")
 
 
160
 
161
  # PART 2: Document and Cache Load
162
+ t_start2 = time()
163
+ cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(doc_text)
164
+ t_end2 = time()
165
+ log.append(f"πŸ“„ Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
166
+ print(f"πŸ“„ Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
167
 
168
  # PART 3: Document Preview Display
169
+ t_start3 = time()
170
  with st.expander("πŸ“„ Document Preview"):
171
  preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
172
  st.text(preview)
173
+ t_end3 = time()
174
+ log.append(f"πŸ‘€ Document Preview Display Time: {t_end3 - t_start3:.2f} s")
175
+ print(f"πŸ‘€ Document Preview Display Time: {t_end3 - t_start3:.2f} s")
176
+ t_start4 = time()
177
  # PART 4: Show Basic Info
178
+ #doc_size_kb = os.path.getsize(temp_file_path) / 1024
179
+ #cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
180
+ t_end4 = time()
181
+ log.append(f"πŸ‘€ doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
182
+ print(f"πŸ‘€ doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
183
  st.info(
184
+ # f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
185
+ # f"Cache Size: {cache_size if cache_size == 'N/A' else f'{cache_size:.2f} KB'}"
186
  )
187
 
188
  # =========================
 
195
  log.append("πŸš€ Query & Generation Steps:")
196
 
197
  # PART 4.1: Clone Cache
198
+ t_start5 = time()
199
  current_cache = clone_cache(cache)
200
+ t_end5 = time()
201
+ print(f"πŸ” Clone Cache Time: {t_end5 - t_start5:.2f} s")
202
+ log.append(f"πŸ” Clone Cache Time: {t_end5 - t_start5:.2f} s")
203
 
204
  # PART 4.2: Tokenize Prompt
205
+ t_start6 = time()
206
  model, tokenizer = load_model_and_tokenizer(doc_text_count)
207
  full_prompt = f"""
208
  <|user|>
 
211
  """.strip()
212
  input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
213
  input_tokens_count += input_ids.shape[-1]
214
+ t_end6 = time()
215
+ print(f"✍️ Tokenization Time: {t_end6 - t_start6:.2f} s")
216
+ log.append(f"✍️ Tokenization Time: {t_end6 - t_start6:.2f} s")
217
 
218
  # PART 4.3: Generate Answer
219
+ t_start7 = time()
220
  output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
221
+ last_generation_time = time() - t_start7
222
  print(f"πŸ’‘ Generation Time: {last_generation_time:.2f} s")
223
  log.append(f"πŸ’‘ Generation Time: {last_generation_time:.2f} s")
224
 
 
233
 
234
  # Final Info Display
235
  st.info(
236
+ # f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
237
  f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
238
  )
239