Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -97,11 +97,9 @@ def clone_cache(cache):
|
|
| 97 |
return new_cache
|
| 98 |
|
| 99 |
@st.cache_resource
|
| 100 |
-
def load_document_and_cache(
|
| 101 |
try:
|
| 102 |
t2 = time()
|
| 103 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
| 104 |
-
doc_text = f.read()
|
| 105 |
doc_text_count = len(doc_text)
|
| 106 |
model, tokenizer = load_model_and_tokenizer(doc_text_count)
|
| 107 |
system_prompt = f"""
|
|
@@ -115,7 +113,7 @@ def load_document_and_cache(file_path):
|
|
| 115 |
cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
|
| 116 |
t3 = time()
|
| 117 |
print(f"{t3-t2}")
|
| 118 |
-
return cache, origin_len,
|
| 119 |
except FileNotFoundError:
|
| 120 |
st.error(f"Document file not found at {file_path}")
|
| 121 |
return None, None, None, None, None, None
|
|
@@ -154,39 +152,37 @@ if uploaded_file:
|
|
| 154 |
log = []
|
| 155 |
|
| 156 |
# PART 1: File Upload & Save
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
log.append(f"π File Upload & Save Time: {t_end - t_start:.2f} s")
|
| 163 |
-
print(f"π File Upload & Save Time: {t_end - t_start:.2f} s")
|
| 164 |
|
| 165 |
# PART 2: Document and Cache Load
|
| 166 |
-
|
| 167 |
-
cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(
|
| 168 |
-
|
| 169 |
-
log.append(f"π Document & Cache Load Time: {
|
| 170 |
-
print(f"π Document & Cache Load Time: {
|
| 171 |
|
| 172 |
# PART 3: Document Preview Display
|
| 173 |
-
|
| 174 |
with st.expander("π Document Preview"):
|
| 175 |
preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
|
| 176 |
st.text(preview)
|
| 177 |
-
|
| 178 |
-
log.append(f"π Document Preview Display Time: {
|
| 179 |
-
print(f"π Document Preview Display Time: {
|
| 180 |
-
|
| 181 |
# PART 4: Show Basic Info
|
| 182 |
-
doc_size_kb = os.path.getsize(temp_file_path) / 1024
|
| 183 |
-
cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
|
| 184 |
-
|
| 185 |
-
log.append(f"π doc_size_kb Preview Display Time: {
|
| 186 |
-
print(f"π doc_size_kb Preview Display Time: {
|
| 187 |
st.info(
|
| 188 |
-
|
| 189 |
-
|
| 190 |
)
|
| 191 |
|
| 192 |
# =========================
|
|
@@ -199,14 +195,14 @@ if uploaded_file:
|
|
| 199 |
log.append("π Query & Generation Steps:")
|
| 200 |
|
| 201 |
# PART 4.1: Clone Cache
|
| 202 |
-
|
| 203 |
current_cache = clone_cache(cache)
|
| 204 |
-
|
| 205 |
-
print(f"π Clone Cache Time: {
|
| 206 |
-
log.append(f"π Clone Cache Time: {
|
| 207 |
|
| 208 |
# PART 4.2: Tokenize Prompt
|
| 209 |
-
|
| 210 |
model, tokenizer = load_model_and_tokenizer(doc_text_count)
|
| 211 |
full_prompt = f"""
|
| 212 |
<|user|>
|
|
@@ -215,14 +211,14 @@ if uploaded_file:
|
|
| 215 |
""".strip()
|
| 216 |
input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
|
| 217 |
input_tokens_count += input_ids.shape[-1]
|
| 218 |
-
|
| 219 |
-
print(f"βοΈ Tokenization Time: {
|
| 220 |
-
log.append(f"βοΈ Tokenization Time: {
|
| 221 |
|
| 222 |
# PART 4.3: Generate Answer
|
| 223 |
-
|
| 224 |
output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
|
| 225 |
-
last_generation_time = time() -
|
| 226 |
print(f"π‘ Generation Time: {last_generation_time:.2f} s")
|
| 227 |
log.append(f"π‘ Generation Time: {last_generation_time:.2f} s")
|
| 228 |
|
|
@@ -237,7 +233,7 @@ if uploaded_file:
|
|
| 237 |
|
| 238 |
# Final Info Display
|
| 239 |
st.info(
|
| 240 |
-
|
| 241 |
f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
|
| 242 |
)
|
| 243 |
|
|
|
|
| 97 |
return new_cache
|
| 98 |
|
| 99 |
@st.cache_resource
|
| 100 |
+
def load_document_and_cache(doc_text):
|
| 101 |
try:
|
| 102 |
t2 = time()
|
|
|
|
|
|
|
| 103 |
doc_text_count = len(doc_text)
|
| 104 |
model, tokenizer = load_model_and_tokenizer(doc_text_count)
|
| 105 |
system_prompt = f"""
|
|
|
|
| 113 |
cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
|
| 114 |
t3 = time()
|
| 115 |
print(f"{t3-t2}")
|
| 116 |
+
return cache, origin_len, doc_text_count, model, tokenizer
|
| 117 |
except FileNotFoundError:
|
| 118 |
st.error(f"Document file not found at {file_path}")
|
| 119 |
return None, None, None, None, None, None
|
|
|
|
| 152 |
log = []
|
| 153 |
|
| 154 |
# PART 1: File Upload & Save
|
| 155 |
+
t_start1 = time()
|
| 156 |
+
doc_text= uploaded_file.getvalue()
|
| 157 |
+
t_end1 = time()
|
| 158 |
+
log.append(f"π File Upload & Save Time: {t_end1 - t_start1:.2f} s")
|
| 159 |
+
print(f"π File Upload & Save Time: {t_end1 - t_start1:.2f} s")
|
|
|
|
|
|
|
| 160 |
|
| 161 |
# PART 2: Document and Cache Load
|
| 162 |
+
t_start2 = time()
|
| 163 |
+
cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(doc_text)
|
| 164 |
+
t_end2 = time()
|
| 165 |
+
log.append(f"π Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
|
| 166 |
+
print(f"π Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
|
| 167 |
|
| 168 |
# PART 3: Document Preview Display
|
| 169 |
+
t_start3 = time()
|
| 170 |
with st.expander("π Document Preview"):
|
| 171 |
preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
|
| 172 |
st.text(preview)
|
| 173 |
+
t_end3 = time()
|
| 174 |
+
log.append(f"π Document Preview Display Time: {t_end3 - t_start3:.2f} s")
|
| 175 |
+
print(f"π Document Preview Display Time: {t_end3 - t_start3:.2f} s")
|
| 176 |
+
t_start4 = time()
|
| 177 |
# PART 4: Show Basic Info
|
| 178 |
+
#doc_size_kb = os.path.getsize(temp_file_path) / 1024
|
| 179 |
+
#cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
|
| 180 |
+
t_end4 = time()
|
| 181 |
+
log.append(f"π doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
|
| 182 |
+
print(f"π doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
|
| 183 |
st.info(
|
| 184 |
+
# f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
|
| 185 |
+
# f"Cache Size: {cache_size if cache_size == 'N/A' else f'{cache_size:.2f} KB'}"
|
| 186 |
)
|
| 187 |
|
| 188 |
# =========================
|
|
|
|
| 195 |
log.append("π Query & Generation Steps:")
|
| 196 |
|
| 197 |
# PART 4.1: Clone Cache
|
| 198 |
+
t_start5 = time()
|
| 199 |
current_cache = clone_cache(cache)
|
| 200 |
+
t_end5 = time()
|
| 201 |
+
print(f"π Clone Cache Time: {t_end5 - t_start5:.2f} s")
|
| 202 |
+
log.append(f"π Clone Cache Time: {t_end5 - t_start5:.2f} s")
|
| 203 |
|
| 204 |
# PART 4.2: Tokenize Prompt
|
| 205 |
+
t_start6 = time()
|
| 206 |
model, tokenizer = load_model_and_tokenizer(doc_text_count)
|
| 207 |
full_prompt = f"""
|
| 208 |
<|user|>
|
|
|
|
| 211 |
""".strip()
|
| 212 |
input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
|
| 213 |
input_tokens_count += input_ids.shape[-1]
|
| 214 |
+
t_end6 = time()
|
| 215 |
+
print(f"βοΈ Tokenization Time: {t_end6 - t_start6:.2f} s")
|
| 216 |
+
log.append(f"βοΈ Tokenization Time: {t_end6 - t_start6:.2f} s")
|
| 217 |
|
| 218 |
# PART 4.3: Generate Answer
|
| 219 |
+
t_start7 = time()
|
| 220 |
output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
|
| 221 |
+
last_generation_time = time() - t_start7
|
| 222 |
print(f"π‘ Generation Time: {last_generation_time:.2f} s")
|
| 223 |
log.append(f"π‘ Generation Time: {last_generation_time:.2f} s")
|
| 224 |
|
|
|
|
| 233 |
|
| 234 |
# Final Info Display
|
| 235 |
st.info(
|
| 236 |
+
# f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
|
| 237 |
f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
|
| 238 |
)
|
| 239 |
|