Spaces:

Bonosa2
/

notes

Sleeping

App Files Files Community

Bonosa2 commited on 17 days ago

Commit

afa25f5

verified ·

1 Parent(s): bd86d31

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -285

app.py DELETED Viewed

@@ -1,285 +0,0 @@
-# app.py
-import os
-import traceback
-import pandas as pd
-import torch
-import gradio as gr
-from transformers import (
-    logging,
-    AutoProcessor,
-    AutoTokenizer,
-    AutoModelForImageTextToText
-)
-from sklearn.model_selection import train_test_split
-import gc
-# ─── Silence irrelevant warnings ───────────────────────────────────────────────
-logging.set_verbosity_error()
-# ─── Configuration ────────────────────────────────────────────────────────────
-HF_TOKEN = os.environ.get("HF_TOKEN")
-if not HF_TOKEN:
-    raise RuntimeError("Missing HF_TOKEN in env vars – set it under Space Settings → Secrets")
-MODEL_ID = "google/gemma-3n-e2b-it"
-# ─── Fast startup: load only processor & tokenizer ─────────────────────────────
-processor = AutoProcessor.from_pretrained(
-    MODEL_ID, trust_remote_code=True, token=HF_TOKEN
-)
-tokenizer = AutoTokenizer.from_pretrained(
-    MODEL_ID, trust_remote_code=True, token=HF_TOKEN
-)
-# ─── Heavy work runs on button click ───────────────────────────────────────────
-def generate_and_export():
-    try:
-        # 1) Lazy‑load the full FP16 model
-        print("Loading model...")
-        model = AutoModelForImageTextToText.from_pretrained(
-            MODEL_ID,
-            trust_remote_code=True,
-            token=HF_TOKEN,
-            torch_dtype=torch.float16,
-            device_map="auto"
-        )
-        device = next(model.parameters()).device
-        print(f"Model loaded on device: {device}")
-        # 2) Text→Doctor Note helper
-        def generate_doctor_note() -> str:
-            prompt = """Generate a realistic, concise doctor's progress note for a single patient encounter.
-            Include patient symptoms, physical examination findings, and clinical observations.
-            Keep it brief and medical in nature.
-            Example format:
-            Patient presents with [symptoms]. Physical exam reveals [findings]. [Additional observations].
-            Doctor's Note:"""
-            inputs = processor.apply_chat_template(
-                [
-                    {"role": "system", "content": [{"type": "text", "text": "You are a medical assistant generating realistic patient encounter notes."}]},
-                    {"role": "user", "content": [{"type": "text", "text": prompt}]}
-                ],
-                add_generation_prompt=True,
-                tokenize=True,
-                return_tensors="pt",
-                return_dict=True
-            ).to(device)
-            out = model.generate(
-                **inputs,
-                max_new_tokens=150,
-                do_sample=True,
-                top_p=0.9,
-                temperature=0.8,
-                repetition_penalty=1.2,
-                pad_token_id=processor.tokenizer.eos_token_id,
-                use_cache=False
-            )
-            prompt_len = inputs["input_ids"].shape[-1]
-            return processor.batch_decode(
-                out[:, prompt_len:], skip_special_tokens=True
-            )[0].strip()
-        # 3) Doctor Note→SOAP helper
-        def convert_to_soap(doctor_note: str) -> str:
-            prompt = f"""Convert the following medical note into proper SOAP format.
-Medical Note: {doctor_note}
-Please structure your response exactly as follows:
-SUBJECTIVE:
-[Patient's reported symptoms, complaints, and history]
-OBJECTIVE:
-[Physical exam findings, vital signs, observable data]
-ASSESSMENT:
-[Clinical diagnosis, differential diagnosis, or impression]
-PLAN:
-[Treatment plan, medications, follow-up instructions]
-SOAP Note:"""
-            inputs = processor.apply_chat_template(
-                [
-                    {"role": "system", "content": [{"type": "text", "text": "You are a medical documentation assistant. Convert medical notes into structured SOAP format. Be concise and clinical."}]},
-                    {"role": "user", "content": [{"type": "text", "text": prompt}]}
-                ],
-                add_generation_prompt=True,
-                tokenize=True,
-                return_tensors="pt",
-                return_dict=True
-            ).to(device)
-            out = model.generate(
-                **inputs,
-                max_new_tokens=250,
-                do_sample=True,
-                top_p=0.9,
-                temperature=0.7,
-                repetition_penalty=1.3,
-                pad_token_id=processor.tokenizer.eos_token_id,
-                use_cache=False
-            )
-            prompt_len = inputs["input_ids"].shape[-1]
-            return processor.batch_decode(
-                out[:, prompt_len:], skip_special_tokens=True
-            )[0].strip()
-        # 4) Generate 20 doctor notes + convert to SOAP
-        print("Generating doctor notes and SOAP conversions...")
-        docs, soaps = [], []
-        for i in range(1, 21):
-            print(f"Generating note {i}/20...")
-            # Generate doctor note
-            doctor_note = generate_doctor_note()
-            docs.append(doctor_note)
-            # Convert to SOAP
-            soap_note = convert_to_soap(doctor_note)
-            soaps.append(soap_note)
-            # Memory cleanup every 3 iterations
-            if i % 3 == 0:
-                torch.cuda.empty_cache()
-                gc.collect()
-                print(f"Memory cleaned after note {i}")
-        print("All notes generated successfully!")
-        # 5) Split into 15 train / 5 test
-        df = pd.DataFrame({"doctor_note": docs, "soap_note": soaps})
-        train_df, test_df = train_test_split(df, test_size=5, random_state=42)
-        os.makedirs("outputs", exist_ok=True)
-        # 6) Generate predictions on train split → outputs/inference.tsv
-        print("Generating predictions for training set...")
-        train_preds = []
-        for idx, doctor_note in enumerate(train_df["doctor_note"]):
-            print(f"Predicting train {idx+1}/{len(train_df)}...")
-            pred_soap = convert_to_soap(doctor_note)
-            train_preds.append(pred_soap)
-            if (idx + 1) % 3 == 0:
-                torch.cuda.empty_cache()
-                gc.collect()
-        inf_df = train_df.reset_index(drop=True).copy()
-        inf_df["id"] = inf_df.index + 1
-        inf_df["ground_truth_soap"] = inf_df["soap_note"]
-        inf_df["predicted_soap"] = train_preds
-        # Save inference results
-        inf_df[["id", "ground_truth_soap", "predicted_soap"]].to_csv(
-            "outputs/inference.tsv", sep="\t", index=False
-        )
-        print("Inference results saved!")
-        # 7) Generate predictions on test split → outputs/eval.csv
-        print("Generating predictions for test set...")
-        test_preds = []
-        for idx, doctor_note in enumerate(test_df["doctor_note"]):
-            print(f"Predicting test {idx+1}/{len(test_df)}...")
-            pred_soap = convert_to_soap(doctor_note)
-            test_preds.append(pred_soap)
-            torch.cuda.empty_cache()
-            gc.collect()
-        eval_df = pd.DataFrame({
-            "id": range(1, len(test_preds) + 1),
-            "predicted_soap": test_preds
-        })
-        eval_df.to_csv("outputs/eval.csv", index=False)
-        print("Evaluation results saved!")
-        # 8) Save complete dataset for reference
-        complete_df = pd.DataFrame({
-            "id": range(1, len(docs) + 1),
-            "doctor_note": docs,
-            "soap_note": soaps
-        })
-        complete_df.to_csv("outputs/complete_dataset.csv", index=False)
-        print("Complete dataset saved!")
-        # 9) Cleanup model
-        del model
-        torch.cuda.empty_cache()
-        gc.collect()
-        print("Model cleaned up!")
-        # 10) Return status + file paths for download
-        return (
-            f"✅ Successfully generated 20 notes!\n"
-            f"📊 Training set: {len(train_df)} notes\n"
-            f"🧪 Test set: {len(test_df)} notes\n"
-            f"💾 Files ready for download",
-            "outputs/inference.tsv",
-            "outputs/eval.csv",
-            "outputs/complete_dataset.csv"
-        )
-    except Exception as e:
-        traceback.print_exc()
-        return (f"❌ Error: {e}", None, None, None)
-# ─── Gradio UI ─────────────────────────────────────────────────────────────────
-with gr.Blocks(title="SOAP Generator") as demo:
-    gr.Markdown("""
-    # 🩺 Medical SOAP Note Generator
-    This app generates realistic doctor's notes and converts them to SOAP format:
-    - **S**ubjective: Patient's reported symptoms
-    - **O**bjective: Observable findings and exam results
-    - **A**ssessment: Clinical diagnosis/impression
-    - **P**lan: Treatment plan and follow-up
-    **Process:**
-    1. Generate 20 realistic doctor's progress notes
-    2. Convert each note to structured SOAP format
-    3. Split into 15 training + 5 test samples
-    4. Generate predictions and export files
-    """)
-    with gr.Row():
-        btn = gr.Button("🚀 Generate & Export SOAP Notes", variant="primary", size="lg")
-    with gr.Row():
-        status = gr.Textbox(
-            label="📋 Generation Status",
-            interactive=False,
-            lines=5,
-            placeholder="Click the button above to start generation..."
-        )
-    with gr.Row():
-        with gr.Column():
-            inf_file = gr.File(label="📊 Download inference.tsv (Training Predictions)")
-        with gr.Column():
-            eval_file = gr.File(label="🧪 Download eval.csv (Test Predictions)")
-        with gr.Column():
-            complete_file = gr.File(label="💾 Download complete_dataset.csv (All Data)")
-    btn.click(
-        fn=generate_and_export,
-        inputs=None,
-        outputs=[status, inf_file, eval_file, complete_file]
-    )
-    gr.Markdown("""
-    ### 📁 Output Files:
-    - **inference.tsv**: Training set with ground truth and predicted SOAP notes
-    - **eval.csv**: Test set predictions only
-    - **complete_dataset.csv**: All 20 generated doctor notes and SOAP conversions
-    """)
-if __name__ == "__main__":
-    demo.launch()