Spaces:

kouki321
/

Third_Try_Cag_pdf

Running

App Files Files Community

kouki321 commited on May 28

Commit

91aacab

verified ·

1 Parent(s): 596284f

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -39

app.py CHANGED Viewed

@@ -97,11 +97,9 @@ def clone_cache(cache):
     return new_cache
 @st.cache_resource
-def load_document_and_cache(file_path):
     try:
         t2 = time()
-        with open(file_path, "r", encoding="utf-8") as f:
-            doc_text = f.read()
         doc_text_count = len(doc_text)
         model, tokenizer = load_model_and_tokenizer(doc_text_count)
         system_prompt = f"""
@@ -115,7 +113,7 @@ def load_document_and_cache(file_path):
         cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
         t3 = time()
         print(f"{t3-t2}")
-        return cache, origin_len, doc_text, doc_text_count, model, tokenizer
     except FileNotFoundError:
         st.error(f"Document file not found at {file_path}")
         return None, None, None, None, None, None
@@ -154,39 +152,37 @@ if uploaded_file:
     log = []
     # PART 1: File Upload & Save
-    t_start = time()
-    temp_file_path = "temp_document.txt"
-    with open(temp_file_path, "wb") as f:
-        f.write(uploaded_file.getvalue())
-    t_end = time()
-    log.append(f"📂 File Upload & Save Time: {t_end - t_start:.2f} s")
-    print(f"📂 File Upload & Save Time: {t_end - t_start:.2f} s")
     # PART 2: Document and Cache Load
-    t_start = time()
-    cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(temp_file_path)
-    t_end = time()
-    log.append(f"📄 Document & Cache Load Time: {t_end - t_start:.2f} s")
-    print(f"📄 Document & Cache Load Time: {t_end - t_start:.2f} s")
     # PART 3: Document Preview Display
-    t_start = time()
     with st.expander("📄 Document Preview"):
         preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
         st.text(preview)
-    t_end = time()
-    log.append(f"👀 Document Preview Display Time: {t_end - t_start:.2f} s")
-    print(f"👀 Document Preview Display Time: {t_end - t_start:.2f} s")
-    t_start = time()
     # PART 4: Show Basic Info
-    doc_size_kb = os.path.getsize(temp_file_path) / 1024
-    cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
-    t_end = time()
-    log.append(f"👀 doc_size_kb Preview Display Time: {t_end - t_start:.2f} s")
-    print(f"👀 doc_size_kb Preview Display Time: {t_end - t_start:.2f} s")
     st.info(
-        f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
-        f"Cache Size: {cache_size if cache_size == 'N/A' else f'{cache_size:.2f} KB'}"
     )
     # =========================
@@ -199,14 +195,14 @@ if uploaded_file:
             log.append("🚀 Query & Generation Steps:")
             # PART 4.1: Clone Cache
-            t_start = time()
             current_cache = clone_cache(cache)
-            t_end = time()
-            print(f"🔁 Clone Cache Time: {t_end - t_start:.2f} s")
-            log.append(f"🔁 Clone Cache Time: {t_end - t_start:.2f} s")
             # PART 4.2: Tokenize Prompt
-            t_start = time()
             model, tokenizer = load_model_and_tokenizer(doc_text_count)
             full_prompt = f"""
             <|user|>
@@ -215,14 +211,14 @@ if uploaded_file:
             """.strip()
             input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
             input_tokens_count += input_ids.shape[-1]
-            t_end = time()
-            print(f"✍️ Tokenization Time: {t_end - t_start:.2f} s")
-            log.append(f"✍️ Tokenization Time: {t_end - t_start:.2f} s")
             # PART 4.3: Generate Answer
-            t_start = time()
             output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
-            last_generation_time = time() - t_start
             print(f"💡 Generation Time: {last_generation_time:.2f} s")
             log.append(f"💡 Generation Time: {last_generation_time:.2f} s")
@@ -237,7 +233,7 @@ if uploaded_file:
             # Final Info Display
             st.info(
-                f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
                 f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
             )

     return new_cache
 @st.cache_resource
+def load_document_and_cache(doc_text):
     try:
         t2 = time()
         doc_text_count = len(doc_text)
         model, tokenizer = load_model_and_tokenizer(doc_text_count)
         system_prompt = f"""
         cache, origin_len = get_kv_cache(model, tokenizer, system_prompt)
         t3 = time()
         print(f"{t3-t2}")
+        return cache, origin_len, doc_text_count, model, tokenizer
     except FileNotFoundError:
         st.error(f"Document file not found at {file_path}")
         return None, None, None, None, None, None
     log = []
     # PART 1: File Upload & Save
+    t_start1 = time()
+    doc_text=    uploaded_file.getvalue()
+    t_end1 = time()
+    log.append(f"📂 File Upload & Save Time: {t_end1 - t_start1:.2f} s")
+    print(f"📂 File Upload & Save Time: {t_end1 - t_start1:.2f} s")
     # PART 2: Document and Cache Load
+    t_start2 = time()
+    cache, _, doc_text, doc_text_count, model, tokenizer = load_document_and_cache(doc_text)
+    t_end2 = time()
+    log.append(f"📄 Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
+    print(f"📄 Document & Cache Load Time: {t_end2 - t_start2:.2f} s")
     # PART 3: Document Preview Display
+    t_start3 = time()
     with st.expander("📄 Document Preview"):
         preview = doc_text[:500] + "..." if len(doc_text) > 500 else doc_text
         st.text(preview)
+    t_end3 = time()
+    log.append(f"👀 Document Preview Display Time: {t_end3 - t_start3:.2f} s")
+    print(f"👀 Document Preview Display Time: {t_end3 - t_start3:.2f} s")
+    t_start4 = time()
     # PART 4: Show Basic Info
+    #doc_size_kb = os.path.getsize(temp_file_path) / 1024
+    #cache_size = os.path.getsize("temp_cache.pth") / 1024 if os.path.exists("temp_cache.pth") else "N/A"
+    t_end4 = time()
+    log.append(f"👀 doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
+    print(f"👀 doc_size_kb Preview Display Time: {t_end4 - t_start4:.2f} s")
     st.info(
+      #  f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
+       # f"Cache Size: {cache_size if cache_size == 'N/A' else f'{cache_size:.2f} KB'}"
     )
     # =========================
             log.append("🚀 Query & Generation Steps:")
             # PART 4.1: Clone Cache
+            t_start5 = time()
             current_cache = clone_cache(cache)
+            t_end5 = time()
+            print(f"🔁 Clone Cache Time: {t_end5 - t_start5:.2f} s")
+            log.append(f"🔁 Clone Cache Time: {t_end5 - t_start5:.2f} s")
             # PART 4.2: Tokenize Prompt
+            t_start6 = time()
             model, tokenizer = load_model_and_tokenizer(doc_text_count)
             full_prompt = f"""
             <|user|>
             """.strip()
             input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
             input_tokens_count += input_ids.shape[-1]
+            t_end6 = time()
+            print(f"✍️ Tokenization Time: {t_end6 - t_start6:.2f} s")
+            log.append(f"✍️ Tokenization Time: {t_end6 - t_start6:.2f} s")
             # PART 4.3: Generate Answer
+            t_start7 = time()
             output_ids = generate(model, input_ids, current_cache, max_new_tokens=4)
+            last_generation_time = time() - t_start7
             print(f"💡 Generation Time: {last_generation_time:.2f} s")
             log.append(f"💡 Generation Time: {last_generation_time:.2f} s")
             # Final Info Display
             st.info(
+             #   f"Document Chars: {len(doc_text)} | Size: {doc_size_kb:.2f} KB | "
                 f"Cache Clone Time: {log[-3].split(': ')[1]} | Generation Time: {last_generation_time:.2f} s"
             )