Spaces:

TETSU0701
/

OmniPathWithInterTaskAttention

Sleeping

App Files Files Community

TETSU0701 commited on 6 days ago

Commit

5fab04c

verified ·

1 Parent(s): 5108765

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -40

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ from Model import OmniPathWithInterTaskAttention
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import transformers
 import os
 # 强制设置 Gradio 为英文环境
 os.environ["GRADIO_LOCALE"] = "en"
@@ -53,8 +56,8 @@ def load_models():
     tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
     llm_model = AutoModelForCausalLM.from_pretrained(
         llm_model_name,
-        dtype="auto",
-        device_map="auto"
     )
     return classification_model, llm_model, tokenizer, label_mappings
@@ -105,20 +108,19 @@ def analyze_npy_file(npy_file):
         return None, f"An error occurred during processing: {str(e)}"
 def generate_response(message, chat_history, analysis_results):
-    """Generate response based on user message and analysis results"""
     if analysis_results is None:
-        return "Please upload an NPY file first to analyze the patient data.", chat_history
     pred_names = analysis_results["pred_names"]
     pred_scores = analysis_results["pred_scores"]
     patient_id = analysis_results["patient_id"]
-    # Build context from analysis results
     context = f"Patient {patient_id} analysis results:\n"
     for task, name in pred_names.items():
         context += f"- {task}: {name} (confidence: {pred_scores.get(task, 0.0):.3f})\n"
-    # Build prompt based on user message
     if "diagnosis" in message.lower() or "result" in message.lower():
         prompt = f"{context}\nBased on the above analysis results, provide a detailed diagnosis summary and interpretation."
     elif "treatment" in message.lower() or "therapy" in message.lower():
@@ -131,41 +133,37 @@ def generate_response(message, chat_history, analysis_results):
         prompt = f"{context}\nDescribe the histological characteristics and their significance."
     else:
         prompt = f"{context}\nUser question: {message}\nPlease provide a helpful response based on the analysis results."
-    try:
-        # Generate response using LLM
-        messages = [{"role": "user", "content": prompt}]
-        text = tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True,
-            enable_thinking=False
-        )
-        model_inputs = tokenizer([text], return_tensors="pt").to(llm_model.device)
-        generated_ids = llm_model.generate(
             **model_inputs,
-            max_new_tokens=2048,
             do_sample=True,
             temperature=0.7,
         )
-        output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
-        try:
-            index = len(output_ids) - output_ids[::-1].index(151668)
-        except ValueError:
-            index = 0
-        response = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
-        # Add to chat history
-        chat_history.append((message, response))
-        return "", chat_history
-    except Exception as e:
-        error_msg = f"Error generating response: {str(e)}"
-        chat_history.append((message, error_msg))
-        return "", chat_history
 def upload_file(npy_file, chat_history, analysis_results):
     """Handle file upload and initial analysis"""

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import transformers
 import os
+from threading import Thread
+from transformers import TextIteratorStreamer
 # 强制设置 Gradio 为英文环境
 os.environ["GRADIO_LOCALE"] = "en"
     tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
     llm_model = AutoModelForCausalLM.from_pretrained(
         llm_model_name,
+        device_map="auto",
+        load_in_4bit=True
     )
     return classification_model, llm_model, tokenizer, label_mappings
         return None, f"An error occurred during processing: {str(e)}"
 def generate_response(message, chat_history, analysis_results):
+    """Generate streamed LLM response"""
     if analysis_results is None:
+        yield "Please upload an NPY file first to analyze the patient data.", chat_history
+        return
     pred_names = analysis_results["pred_names"]
     pred_scores = analysis_results["pred_scores"]
     patient_id = analysis_results["patient_id"]
     context = f"Patient {patient_id} analysis results:\n"
     for task, name in pred_names.items():
         context += f"- {task}: {name} (confidence: {pred_scores.get(task, 0.0):.3f})\n"
     if "diagnosis" in message.lower() or "result" in message.lower():
         prompt = f"{context}\nBased on the above analysis results, provide a detailed diagnosis summary and interpretation."
     elif "treatment" in message.lower() or "therapy" in message.lower():
         prompt = f"{context}\nDescribe the histological characteristics and their significance."
     else:
         prompt = f"{context}\nUser question: {message}\nPlease provide a helpful response based on the analysis results."
+    messages = [{"role": "user", "content": prompt}]
+    text = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(llm_model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    thread = Thread(
+        target=lambda: llm_model.generate(
             **model_inputs,
+            max_new_tokens=1024,  # 🚀 改成较小输出以提升速度
             do_sample=True,
             temperature=0.7,
+            top_p=0.9,
+            streamer=streamer
         )
+    )
+    thread.start()
+    partial = ""
+    for new_text in streamer:
+        partial += new_text
+        # 实时输出
+        yield "", chat_history + [(message, partial)]
+    # 完成后写回最终内容到历史
+    chat_history.append((message, partial))
+    yield "", chat_history
 def upload_file(npy_file, chat_history, analysis_results):
     """Handle file upload and initial analysis"""