Bonosa2 commited on
Commit
afa25f5
Β·
verified Β·
1 Parent(s): bd86d31

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -285
app.py DELETED
@@ -1,285 +0,0 @@
1
- # app.py
2
-
3
- import os
4
- import traceback
5
- import pandas as pd
6
- import torch
7
- import gradio as gr
8
- from transformers import (
9
- logging,
10
- AutoProcessor,
11
- AutoTokenizer,
12
- AutoModelForImageTextToText
13
- )
14
- from sklearn.model_selection import train_test_split
15
- import gc
16
-
17
- # ─── Silence irrelevant warnings ───────────────────────────────────────────────
18
- logging.set_verbosity_error()
19
-
20
- # ─── Configuration ────────────────────────────────────────────────────────────
21
- HF_TOKEN = os.environ.get("HF_TOKEN")
22
- if not HF_TOKEN:
23
- raise RuntimeError("Missing HF_TOKEN in env vars – set it under Space Settings β†’ Secrets")
24
- MODEL_ID = "google/gemma-3n-e2b-it"
25
-
26
- # ─── Fast startup: load only processor & tokenizer ─────────────────────────────
27
- processor = AutoProcessor.from_pretrained(
28
- MODEL_ID, trust_remote_code=True, token=HF_TOKEN
29
- )
30
- tokenizer = AutoTokenizer.from_pretrained(
31
- MODEL_ID, trust_remote_code=True, token=HF_TOKEN
32
- )
33
-
34
- # ─── Heavy work runs on button click ───────────────────────────────────────────
35
- def generate_and_export():
36
- try:
37
- # 1) Lazy‑load the full FP16 model
38
- print("Loading model...")
39
- model = AutoModelForImageTextToText.from_pretrained(
40
- MODEL_ID,
41
- trust_remote_code=True,
42
- token=HF_TOKEN,
43
- torch_dtype=torch.float16,
44
- device_map="auto"
45
- )
46
- device = next(model.parameters()).device
47
- print(f"Model loaded on device: {device}")
48
-
49
- # 2) Text→Doctor Note helper
50
- def generate_doctor_note() -> str:
51
- prompt = """Generate a realistic, concise doctor's progress note for a single patient encounter.
52
- Include patient symptoms, physical examination findings, and clinical observations.
53
- Keep it brief and medical in nature.
54
-
55
- Example format:
56
- Patient presents with [symptoms]. Physical exam reveals [findings]. [Additional observations].
57
-
58
- Doctor's Note:"""
59
-
60
- inputs = processor.apply_chat_template(
61
- [
62
- {"role": "system", "content": [{"type": "text", "text": "You are a medical assistant generating realistic patient encounter notes."}]},
63
- {"role": "user", "content": [{"type": "text", "text": prompt}]}
64
- ],
65
- add_generation_prompt=True,
66
- tokenize=True,
67
- return_tensors="pt",
68
- return_dict=True
69
- ).to(device)
70
-
71
- out = model.generate(
72
- **inputs,
73
- max_new_tokens=150,
74
- do_sample=True,
75
- top_p=0.9,
76
- temperature=0.8,
77
- repetition_penalty=1.2,
78
- pad_token_id=processor.tokenizer.eos_token_id,
79
- use_cache=False
80
- )
81
- prompt_len = inputs["input_ids"].shape[-1]
82
- return processor.batch_decode(
83
- out[:, prompt_len:], skip_special_tokens=True
84
- )[0].strip()
85
-
86
- # 3) Doctor Note→SOAP helper
87
- def convert_to_soap(doctor_note: str) -> str:
88
- prompt = f"""Convert the following medical note into proper SOAP format.
89
-
90
- Medical Note: {doctor_note}
91
-
92
- Please structure your response exactly as follows:
93
-
94
- SUBJECTIVE:
95
- [Patient's reported symptoms, complaints, and history]
96
-
97
- OBJECTIVE:
98
- [Physical exam findings, vital signs, observable data]
99
-
100
- ASSESSMENT:
101
- [Clinical diagnosis, differential diagnosis, or impression]
102
-
103
- PLAN:
104
- [Treatment plan, medications, follow-up instructions]
105
-
106
- SOAP Note:"""
107
-
108
- inputs = processor.apply_chat_template(
109
- [
110
- {"role": "system", "content": [{"type": "text", "text": "You are a medical documentation assistant. Convert medical notes into structured SOAP format. Be concise and clinical."}]},
111
- {"role": "user", "content": [{"type": "text", "text": prompt}]}
112
- ],
113
- add_generation_prompt=True,
114
- tokenize=True,
115
- return_tensors="pt",
116
- return_dict=True
117
- ).to(device)
118
-
119
- out = model.generate(
120
- **inputs,
121
- max_new_tokens=250,
122
- do_sample=True,
123
- top_p=0.9,
124
- temperature=0.7,
125
- repetition_penalty=1.3,
126
- pad_token_id=processor.tokenizer.eos_token_id,
127
- use_cache=False
128
- )
129
- prompt_len = inputs["input_ids"].shape[-1]
130
- return processor.batch_decode(
131
- out[:, prompt_len:], skip_special_tokens=True
132
- )[0].strip()
133
-
134
- # 4) Generate 20 doctor notes + convert to SOAP
135
- print("Generating doctor notes and SOAP conversions...")
136
- docs, soaps = [], []
137
-
138
- for i in range(1, 21):
139
- print(f"Generating note {i}/20...")
140
-
141
- # Generate doctor note
142
- doctor_note = generate_doctor_note()
143
- docs.append(doctor_note)
144
-
145
- # Convert to SOAP
146
- soap_note = convert_to_soap(doctor_note)
147
- soaps.append(soap_note)
148
-
149
- # Memory cleanup every 3 iterations
150
- if i % 3 == 0:
151
- torch.cuda.empty_cache()
152
- gc.collect()
153
- print(f"Memory cleaned after note {i}")
154
-
155
- print("All notes generated successfully!")
156
-
157
- # 5) Split into 15 train / 5 test
158
- df = pd.DataFrame({"doctor_note": docs, "soap_note": soaps})
159
- train_df, test_df = train_test_split(df, test_size=5, random_state=42)
160
-
161
- os.makedirs("outputs", exist_ok=True)
162
-
163
- # 6) Generate predictions on train split β†’ outputs/inference.tsv
164
- print("Generating predictions for training set...")
165
- train_preds = []
166
- for idx, doctor_note in enumerate(train_df["doctor_note"]):
167
- print(f"Predicting train {idx+1}/{len(train_df)}...")
168
- pred_soap = convert_to_soap(doctor_note)
169
- train_preds.append(pred_soap)
170
-
171
- if (idx + 1) % 3 == 0:
172
- torch.cuda.empty_cache()
173
- gc.collect()
174
-
175
- inf_df = train_df.reset_index(drop=True).copy()
176
- inf_df["id"] = inf_df.index + 1
177
- inf_df["ground_truth_soap"] = inf_df["soap_note"]
178
- inf_df["predicted_soap"] = train_preds
179
-
180
- # Save inference results
181
- inf_df[["id", "ground_truth_soap", "predicted_soap"]].to_csv(
182
- "outputs/inference.tsv", sep="\t", index=False
183
- )
184
- print("Inference results saved!")
185
-
186
- # 7) Generate predictions on test split β†’ outputs/eval.csv
187
- print("Generating predictions for test set...")
188
- test_preds = []
189
- for idx, doctor_note in enumerate(test_df["doctor_note"]):
190
- print(f"Predicting test {idx+1}/{len(test_df)}...")
191
- pred_soap = convert_to_soap(doctor_note)
192
- test_preds.append(pred_soap)
193
-
194
- torch.cuda.empty_cache()
195
- gc.collect()
196
-
197
- eval_df = pd.DataFrame({
198
- "id": range(1, len(test_preds) + 1),
199
- "predicted_soap": test_preds
200
- })
201
- eval_df.to_csv("outputs/eval.csv", index=False)
202
- print("Evaluation results saved!")
203
-
204
- # 8) Save complete dataset for reference
205
- complete_df = pd.DataFrame({
206
- "id": range(1, len(docs) + 1),
207
- "doctor_note": docs,
208
- "soap_note": soaps
209
- })
210
- complete_df.to_csv("outputs/complete_dataset.csv", index=False)
211
- print("Complete dataset saved!")
212
-
213
- # 9) Cleanup model
214
- del model
215
- torch.cuda.empty_cache()
216
- gc.collect()
217
- print("Model cleaned up!")
218
-
219
- # 10) Return status + file paths for download
220
- return (
221
- f"βœ… Successfully generated 20 notes!\n"
222
- f"πŸ“Š Training set: {len(train_df)} notes\n"
223
- f"πŸ§ͺ Test set: {len(test_df)} notes\n"
224
- f"πŸ’Ύ Files ready for download",
225
- "outputs/inference.tsv",
226
- "outputs/eval.csv",
227
- "outputs/complete_dataset.csv"
228
- )
229
-
230
- except Exception as e:
231
- traceback.print_exc()
232
- return (f"❌ Error: {e}", None, None, None)
233
-
234
- # ─── Gradio UI ─────────────────────────────────────────────────────────────────
235
- with gr.Blocks(title="SOAP Generator") as demo:
236
- gr.Markdown("""
237
- # 🩺 Medical SOAP Note Generator
238
-
239
- This app generates realistic doctor's notes and converts them to SOAP format:
240
- - **S**ubjective: Patient's reported symptoms
241
- - **O**bjective: Observable findings and exam results
242
- - **A**ssessment: Clinical diagnosis/impression
243
- - **P**lan: Treatment plan and follow-up
244
-
245
- **Process:**
246
- 1. Generate 20 realistic doctor's progress notes
247
- 2. Convert each note to structured SOAP format
248
- 3. Split into 15 training + 5 test samples
249
- 4. Generate predictions and export files
250
- """)
251
-
252
- with gr.Row():
253
- btn = gr.Button("πŸš€ Generate & Export SOAP Notes", variant="primary", size="lg")
254
-
255
- with gr.Row():
256
- status = gr.Textbox(
257
- label="πŸ“‹ Generation Status",
258
- interactive=False,
259
- lines=5,
260
- placeholder="Click the button above to start generation..."
261
- )
262
-
263
- with gr.Row():
264
- with gr.Column():
265
- inf_file = gr.File(label="πŸ“Š Download inference.tsv (Training Predictions)")
266
- with gr.Column():
267
- eval_file = gr.File(label="πŸ§ͺ Download eval.csv (Test Predictions)")
268
- with gr.Column():
269
- complete_file = gr.File(label="πŸ’Ύ Download complete_dataset.csv (All Data)")
270
-
271
- btn.click(
272
- fn=generate_and_export,
273
- inputs=None,
274
- outputs=[status, inf_file, eval_file, complete_file]
275
- )
276
-
277
- gr.Markdown("""
278
- ### πŸ“ Output Files:
279
- - **inference.tsv**: Training set with ground truth and predicted SOAP notes
280
- - **eval.csv**: Test set predictions only
281
- - **complete_dataset.csv**: All 20 generated doctor notes and SOAP conversions
282
- """)
283
-
284
- if __name__ == "__main__":
285
- demo.launch()