ashishninehertz commited on
Commit
bbf0437
·
1 Parent(s): 119610e

app application

Browse files
Files changed (3) hide show
  1. .gitignore +303 -0
  2. app.py +381 -0
  3. requirements.txt +68 -0
.gitignore ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be added to the global gitignore or merged into this project gitignore. For a PyCharm
158
+ # project, it is recommended to ignore these files.
159
+ .idea/
160
+
161
+ # VS Code
162
+ .vscode/
163
+
164
+ # macOS
165
+ .DS_Store
166
+ .AppleDouble
167
+ .LSOverride
168
+
169
+ # Icon must end with two \r
170
+ Icon
171
+
172
+ # Thumbnails
173
+ ._*
174
+
175
+ # Files that might appear in the root of a volume
176
+ .DocumentRevisions-V100
177
+ .fseventsd
178
+ .Spotlight-V100
179
+ .TemporaryItems
180
+ .Trashes
181
+ .VolumeIcon.icns
182
+ .com.apple.timemachine.donotpresent
183
+
184
+ # Directories potentially created on remote AFP share
185
+ .AppleDB
186
+ .AppleDesktop
187
+ Network Trash Folder
188
+ Temporary Items
189
+ .apdisk
190
+
191
+ # Windows
192
+ Thumbs.db
193
+ Thumbs.db:encryptable
194
+ ehthumbs.db
195
+ ehthumbs_vista.db
196
+
197
+ # Dump file
198
+ *.stackdump
199
+
200
+ # Folder config file
201
+ [Dd]esktop.ini
202
+
203
+ # Recycle Bin used on file shares
204
+ $RECYCLE.BIN/
205
+
206
+ # Windows Installer files
207
+ *.cab
208
+ *.msi
209
+ *.msix
210
+ *.msm
211
+ *.msp
212
+
213
+ # Windows shortcuts
214
+ *.lnk
215
+
216
+ # Linux
217
+ *~
218
+
219
+ # temporary files which can be created if a process still has a handle open of a deleted file
220
+ .fuse_hidden*
221
+
222
+ # KDE directory preferences
223
+ .directory
224
+
225
+ # Linux trash folder which might appear on any partition or disk
226
+ .Trash-*
227
+
228
+ # .nfs files are created when an open file is removed but is still being accessed
229
+ .nfs*
230
+
231
+ # Project-specific ignores
232
+ # Model files and caches
233
+ models/
234
+ *.bin
235
+ *.safetensors
236
+ *.onnx
237
+ *.pb
238
+ *.tflite
239
+ *.pth
240
+ *.pt
241
+ *.h5
242
+ *.pkl
243
+ *.pickle
244
+
245
+ # Hugging Face cache
246
+ .cache/
247
+ transformers_cache/
248
+ huggingface_hub/
249
+
250
+ # Gradio specific
251
+ gradio_cached_examples/
252
+ flagged/
253
+
254
+ # Temporary files
255
+ temp/
256
+ tmp/
257
+ *.tmp
258
+ *.temp
259
+
260
+ # Log files
261
+ *.log
262
+ logs/
263
+
264
+ # Data files (if sensitive)
265
+ data/
266
+ datasets/
267
+ *.csv
268
+ *.json
269
+ *.xml
270
+ *.yaml
271
+ *.yml
272
+
273
+ # Media files (examples might be large)
274
+ examples/
275
+ assets/
276
+ media/
277
+ *.mp4
278
+ *.avi
279
+ *.mov
280
+ *.jpg
281
+ *.jpeg
282
+ *.png
283
+ *.gif
284
+ *.bmp
285
+ *.tiff
286
+ *.webp
287
+
288
+ # Configuration files with secrets
289
+ config.ini
290
+ secrets.json
291
+ .secrets
292
+ credentials.json
293
+
294
+ # Backup files
295
+ *.bak
296
+ *.backup
297
+ *.old
298
+
299
+ # Node.js (if using any frontend tools)
300
+ node_modules/
301
+ npm-debug.log*
302
+ yarn-debug.log*
303
+ yarn-error.log*
app.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ import re
5
+ import tempfile
6
+ from collections.abc import Iterator
7
+ from threading import Thread
8
+
9
+ import cv2
10
+ import gradio as gr
11
+ import spaces
12
+ import torch
13
+ from loguru import logger
14
+ from PIL import Image
15
+ from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
16
+
17
+ model_id = os.getenv("MODEL_ID", "google/medgemma-4b-it")
18
+ processor = AutoProcessor.from_pretrained(model_id)
19
+ model = AutoModelForImageTextToText.from_pretrained(
20
+ model_id, device_map="auto", torch_dtype=torch.bfloat16
21
+ )
22
+
23
+ MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
24
+
25
+ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
26
+ image_count = 0
27
+ video_count = 0
28
+ for path in paths:
29
+ if path.endswith(".mp4"):
30
+ video_count += 1
31
+ else:
32
+ image_count += 1
33
+ return image_count, video_count
34
+
35
+ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
36
+ image_count = 0
37
+ video_count = 0
38
+ for item in history:
39
+ if item["role"] != "user" or isinstance(item["content"], str):
40
+ continue
41
+ if item["content"][0].endswith(".mp4"):
42
+ video_count += 1
43
+ else:
44
+ image_count += 1
45
+ return image_count, video_count
46
+
47
+ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
48
+ new_image_count, new_video_count = count_files_in_new_message(message["files"])
49
+ history_image_count, history_video_count = count_files_in_history(history)
50
+ image_count = history_image_count + new_image_count
51
+ video_count = history_video_count + new_video_count
52
+ if video_count > 1:
53
+ gr.Warning("Only one video is supported.")
54
+ return False
55
+ if video_count == 1:
56
+ if image_count > 0:
57
+ gr.Warning("Mixing images and videos is not allowed.")
58
+ return False
59
+ if "<image>" in message["text"]:
60
+ gr.Warning("Using <image> tags with video files is not supported.")
61
+ return False
62
+ if video_count == 0 and image_count > MAX_NUM_IMAGES:
63
+ gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
64
+ return False
65
+ if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
66
+ gr.Warning("The number of <image> tags in the text does not match the number of images.")
67
+ return False
68
+ return True
69
+
70
+ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
71
+ vidcap = cv2.VideoCapture(video_path)
72
+ fps = vidcap.get(cv2.CAP_PROP_FPS)
73
+ total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
74
+
75
+ frame_interval = max(total_frames // MAX_NUM_IMAGES, 1)
76
+ frames: list[tuple[Image.Image, float]] = []
77
+
78
+ for i in range(0, min(total_frames, MAX_NUM_IMAGES * frame_interval), frame_interval):
79
+ if len(frames) >= MAX_NUM_IMAGES:
80
+ break
81
+
82
+ vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
83
+ success, image = vidcap.read()
84
+ if success:
85
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
86
+ pil_image = Image.fromarray(image)
87
+ timestamp = round(i / fps, 2)
88
+ frames.append((pil_image, timestamp))
89
+
90
+ vidcap.release()
91
+ return frames
92
+
93
+ def process_video(video_path: str) -> list[dict]:
94
+ content = []
95
+ frames = downsample_video(video_path)
96
+ for frame in frames:
97
+ pil_image, timestamp = frame
98
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
99
+ pil_image.save(temp_file.name)
100
+ content.append({"type": "text", "text": f"Frame {timestamp}:"})
101
+ content.append({"type": "image", "url": temp_file.name})
102
+ logger.debug(f"{content=}")
103
+ return content
104
+
105
+ def process_interleaved_images(message: dict) -> list[dict]:
106
+ logger.debug(f"{message['files']=}")
107
+ parts = re.split(r"(<image>)", message["text"])
108
+ logger.debug(f"{parts=}")
109
+
110
+ content = []
111
+ image_index = 0
112
+ for part in parts:
113
+ logger.debug(f"{part=}")
114
+ if part == "<image>":
115
+ content.append({"type": "image", "url": message["files"][image_index]})
116
+ logger.debug(f"file: {message['files'][image_index]}")
117
+ image_index += 1
118
+ elif part.strip():
119
+ content.append({"type": "text", "text": part.strip()})
120
+ elif isinstance(part, str) and part != "<image>":
121
+ content.append({"type": "text", "text": part})
122
+ logger.debug(f"{content=}")
123
+ return content
124
+
125
+ def process_new_user_message(message: dict) -> list[dict]:
126
+ if not message["files"]:
127
+ return [{"type": "text", "text": message["text"]}]
128
+
129
+ if message["files"][0].endswith(".mp4"):
130
+ return [{"type": "text", "text": message["text"]}, *process_video(message["files"][0])]
131
+
132
+ if "<image>" in message["text"]:
133
+ return process_interleaved_images(message)
134
+
135
+ return [
136
+ {"type": "text", "text": message["text"]},
137
+ *[{"type": "image", "url": path} for path in message["files"]],
138
+ ]
139
+
140
+ def process_history(history: list[dict]) -> list[dict]:
141
+ messages = []
142
+ current_user_content: list[dict] = []
143
+ for item in history:
144
+ if item["role"] == "assistant":
145
+ if current_user_content:
146
+ messages.append({"role": "user", "content": current_user_content})
147
+ current_user_content = []
148
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
149
+ else:
150
+ content = item["content"]
151
+ if isinstance(content, str):
152
+ current_user_content.append({"type": "text", "text": content})
153
+ else:
154
+ current_user_content.append({"type": "image", "url": content[0]})
155
+ return messages
156
+
157
+ @spaces.GPU(duration=120)
158
+ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 2048) -> Iterator[str]:
159
+ if not validate_media_constraints(message, history):
160
+ yield ""
161
+ return
162
+
163
+ messages = []
164
+ if system_prompt:
165
+ messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
166
+ messages.extend(process_history(history))
167
+ messages.append({"role": "user", "content": process_new_user_message(message)})
168
+
169
+ inputs = processor.apply_chat_template(
170
+ messages,
171
+ add_generation_prompt=True,
172
+ tokenize=True,
173
+ return_dict=True,
174
+ return_tensors="pt",
175
+ ).to(device=model.device, dtype=torch.bfloat16)
176
+
177
+ streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
178
+ generate_kwargs = dict(
179
+ inputs,
180
+ max_new_tokens=max_new_tokens,
181
+ streamer=streamer,
182
+ temperature=1.0,
183
+ top_p=0.95,
184
+ top_k=64,
185
+ min_p=0.0,
186
+ )
187
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
188
+ t.start()
189
+
190
+ output = ""
191
+ for delta in streamer:
192
+ output += delta
193
+ yield output
194
+
195
+ # Custom CSS for the UI
196
+ custom_css = """
197
+ :root {
198
+ --primary: #4f46e5;
199
+ --primary-dark: #4338ca;
200
+ --text: #1f2937;
201
+ --background: #f9fafb;
202
+ --chat-bg: #ffffff;
203
+ --user-bubble: #e0e7ff;
204
+ --bot-bubble: #f3f4f6;
205
+ --border: #e5e7eb;
206
+ }
207
+
208
+ .dark {
209
+ --primary: #6366f1;
210
+ --primary-dark: #4f46e5;
211
+ --text: #f3f4f6;
212
+ --background: #111827;
213
+ --chat-bg: #1f2937;
214
+ --user-bubble: #4338ca;
215
+ --bot-bubble: #374151;
216
+ --border: #4b5563;
217
+ }
218
+
219
+ body {
220
+ font-family: 'Inter', sans-serif;
221
+ }
222
+
223
+ .gr-chatbot {
224
+ background-color: var(--chat-bg);
225
+ border-radius: 12px;
226
+ border: 1px solid var(--border);
227
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
228
+ }
229
+
230
+ .gr-chat-message {
231
+ padding: 16px 20px;
232
+ border-radius: 12px;
233
+ margin: 8px 0;
234
+ max-width: 80%;
235
+ }
236
+
237
+ .gr-chat-message-user {
238
+ background-color: var(--user-bubble);
239
+ margin-left: auto;
240
+ border-bottom-right-radius: 4px;
241
+ }
242
+
243
+ .gr-chat-message-bot {
244
+ background-color: var(--bot-bubble);
245
+ margin-right: auto;
246
+ border-bottom-left-radius: 4px;
247
+ }
248
+
249
+ .gr-textbox textarea {
250
+ min-height: 120px;
251
+ border-radius: 12px;
252
+ padding: 16px;
253
+ background-color: var(--background);
254
+ color: var(--text);
255
+ border: 1px solid var(--border);
256
+ }
257
+
258
+ .gr-button {
259
+ background-color: var(--primary) !important;
260
+ color: white !important;
261
+ border-radius: 8px !important;
262
+ padding: 10px 20px !important;
263
+ font-weight: 500 !important;
264
+ transition: all 0.2s !important;
265
+ }
266
+
267
+ .gr-button:hover {
268
+ background-color: var(--primary-dark) !important;
269
+ transform: translateY(-1px) !important;
270
+ }
271
+
272
+ .gr-button:active {
273
+ transform: translateY(0) !important;
274
+ }
275
+
276
+ .gr-interface {
277
+ max-width: 900px;
278
+ margin: 0 auto;
279
+ padding: 24px;
280
+ }
281
+
282
+ .gr-header {
283
+ text-align: center;
284
+ margin-bottom: 24px;
285
+ }
286
+
287
+ .gr-header h1 {
288
+ font-size: 2.5rem;
289
+ font-weight: 700;
290
+ color: var(--primary);
291
+ margin-bottom: 8px;
292
+ }
293
+
294
+ .gr-header p {
295
+ color: var(--text);
296
+ opacity: 0.8;
297
+ font-size: 1.1rem;
298
+ }
299
+
300
+ .gr-image-preview {
301
+ border-radius: 8px;
302
+ max-width: 100%;
303
+ max-height: 300px;
304
+ object-fit: contain;
305
+ }
306
+
307
+ .gr-video-preview {
308
+ border-radius: 8px;
309
+ max-width: 100%;
310
+ max-height: 300px;
311
+ }
312
+ """
313
+
314
+ DESCRIPTION = """\
315
+ ## Medical Vision-Language Assistant
316
+
317
+ This advanced AI assistant can understand and analyze medical images, videos, and text.
318
+ Upload images or a video along with your questions to get insights.
319
+
320
+ **Features:**
321
+ - Analyze medical images (X-rays, CT scans, etc.)
322
+ - Process video frames from medical videos
323
+ - Interleave images with text questions
324
+ - Customize system behavior with prompts
325
+ """
326
+
327
+ demo = gr.ChatInterface(
328
+ fn=run,
329
+ type="messages",
330
+ chatbot=gr.Chatbot(
331
+ type="messages",
332
+ scale=1,
333
+ allow_tags=["image"],
334
+ bubble_full_width=False,
335
+ avatar_images=(
336
+ "assets/user.png", # User avatar
337
+ "assets/doctor.png" # Bot avatar (replace with your own)
338
+ ),
339
+ render=False # We'll handle rendering in CSS
340
+ ),
341
+ textbox=gr.MultimodalTextbox(
342
+ file_types=["image", ".mp4"],
343
+ file_count="multiple",
344
+ autofocus=True,
345
+ placeholder="Type your message or upload images/video...",
346
+ ),
347
+ multimodal=True,
348
+ additional_inputs=[
349
+ gr.Textbox(
350
+ label="System Prompt",
351
+ value="You are a helpful and knowledgeable medical expert. Provide accurate, detailed explanations in clear language.",
352
+ info="Guide the assistant's behavior and expertise"
353
+ ),
354
+ gr.Slider(
355
+ label="Response Length",
356
+ minimum=100,
357
+ maximum=4096,
358
+ step=10,
359
+ value=1024,
360
+ info="Control how verbose the responses are"
361
+ ),
362
+ ],
363
+ stop_btn=None,
364
+ title="",
365
+ description=DESCRIPTION,
366
+ examples=[
367
+ ["What abnormalities do you see in this chest X-ray?", "examples/chest_xray.jpg"],
368
+ ["Explain the key findings in this MRI scan.", "examples/brain_mri.jpg"],
369
+ ["Describe the progression shown in this video.", "examples/heart_ultrasound.mp4"],
370
+ ],
371
+ cache_examples=False,
372
+ css=custom_css,
373
+ theme=gr.themes.Default(
374
+ primary_hue="indigo",
375
+ secondary_hue="gray",
376
+ font=["Inter", "sans-serif"]
377
+ ),
378
+ )
379
+
380
+ if __name__ == "__main__":
381
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.8.1
2
+ aiofiles==24.1.0
3
+ annotated-types==0.7.0
4
+ anyio==4.9.0
5
+ certifi==2025.6.15
6
+ charset-normalizer==3.4.2
7
+ click==8.2.1
8
+ fastapi==0.115.14
9
+ ffmpy==0.6.0
10
+ filelock==3.18.0
11
+ fsspec==2025.5.1
12
+ gradio==5.35.0
13
+ gradio_client==1.10.4
14
+ groovy==0.1.2
15
+ h11==0.16.0
16
+ hf-xet==1.1.5
17
+ httpcore==1.0.9
18
+ httpx==0.28.1
19
+ huggingface-hub==0.33.1
20
+ idna==3.10
21
+ Jinja2==3.1.6
22
+ loguru==0.7.3
23
+ markdown-it-py==3.0.0
24
+ MarkupSafe==3.0.2
25
+ mdurl==0.1.2
26
+ mpmath==1.3.0
27
+ networkx==3.5
28
+ numpy==2.3.1
29
+ opencv-python==4.11.0.86
30
+ orjson==3.10.18
31
+ packaging==25.0
32
+ pandas==2.3.0
33
+ pillow==11.2.1
34
+ psutil==5.9.8
35
+ pydantic==2.11.7
36
+ pydantic_core==2.33.2
37
+ pydub==0.25.1
38
+ Pygments==2.19.2
39
+ python-dateutil==2.9.0.post0
40
+ python-multipart==0.0.20
41
+ pytz==2025.2
42
+ PyYAML==6.0.2
43
+ regex==2024.11.6
44
+ requests==2.32.4
45
+ rich==14.0.0
46
+ ruff==0.12.1
47
+ safehttpx==0.1.6
48
+ safetensors==0.5.3
49
+ semantic-version==2.10.0
50
+ setuptools==80.9.0
51
+ shellingham==1.5.4
52
+ six==1.17.0
53
+ sniffio==1.3.1
54
+ spaces==0.37.1
55
+ starlette==0.46.2
56
+ sympy==1.14.0
57
+ tokenizers==0.21.2
58
+ tomlkit==0.13.3
59
+ torch==2.7.1
60
+ tqdm==4.67.1
61
+ transformers==4.53.0
62
+ typer==0.16.0
63
+ typing-inspection==0.4.1
64
+ typing_extensions==4.14.0
65
+ tzdata==2025.2
66
+ urllib3==2.5.0
67
+ uvicorn==0.35.0
68
+ websockets==15.0.1