Spaces:
Runtime error
Runtime error
Commit
·
bbf0437
1
Parent(s):
119610e
app application
Browse files- .gitignore +303 -0
- app.py +381 -0
- requirements.txt +68 -0
.gitignore
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be added to the global gitignore or merged into this project gitignore. For a PyCharm
|
| 158 |
+
# project, it is recommended to ignore these files.
|
| 159 |
+
.idea/
|
| 160 |
+
|
| 161 |
+
# VS Code
|
| 162 |
+
.vscode/
|
| 163 |
+
|
| 164 |
+
# macOS
|
| 165 |
+
.DS_Store
|
| 166 |
+
.AppleDouble
|
| 167 |
+
.LSOverride
|
| 168 |
+
|
| 169 |
+
# Icon must end with two \r
|
| 170 |
+
Icon
|
| 171 |
+
|
| 172 |
+
# Thumbnails
|
| 173 |
+
._*
|
| 174 |
+
|
| 175 |
+
# Files that might appear in the root of a volume
|
| 176 |
+
.DocumentRevisions-V100
|
| 177 |
+
.fseventsd
|
| 178 |
+
.Spotlight-V100
|
| 179 |
+
.TemporaryItems
|
| 180 |
+
.Trashes
|
| 181 |
+
.VolumeIcon.icns
|
| 182 |
+
.com.apple.timemachine.donotpresent
|
| 183 |
+
|
| 184 |
+
# Directories potentially created on remote AFP share
|
| 185 |
+
.AppleDB
|
| 186 |
+
.AppleDesktop
|
| 187 |
+
Network Trash Folder
|
| 188 |
+
Temporary Items
|
| 189 |
+
.apdisk
|
| 190 |
+
|
| 191 |
+
# Windows
|
| 192 |
+
Thumbs.db
|
| 193 |
+
Thumbs.db:encryptable
|
| 194 |
+
ehthumbs.db
|
| 195 |
+
ehthumbs_vista.db
|
| 196 |
+
|
| 197 |
+
# Dump file
|
| 198 |
+
*.stackdump
|
| 199 |
+
|
| 200 |
+
# Folder config file
|
| 201 |
+
[Dd]esktop.ini
|
| 202 |
+
|
| 203 |
+
# Recycle Bin used on file shares
|
| 204 |
+
$RECYCLE.BIN/
|
| 205 |
+
|
| 206 |
+
# Windows Installer files
|
| 207 |
+
*.cab
|
| 208 |
+
*.msi
|
| 209 |
+
*.msix
|
| 210 |
+
*.msm
|
| 211 |
+
*.msp
|
| 212 |
+
|
| 213 |
+
# Windows shortcuts
|
| 214 |
+
*.lnk
|
| 215 |
+
|
| 216 |
+
# Linux
|
| 217 |
+
*~
|
| 218 |
+
|
| 219 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
| 220 |
+
.fuse_hidden*
|
| 221 |
+
|
| 222 |
+
# KDE directory preferences
|
| 223 |
+
.directory
|
| 224 |
+
|
| 225 |
+
# Linux trash folder which might appear on any partition or disk
|
| 226 |
+
.Trash-*
|
| 227 |
+
|
| 228 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
| 229 |
+
.nfs*
|
| 230 |
+
|
| 231 |
+
# Project-specific ignores
|
| 232 |
+
# Model files and caches
|
| 233 |
+
models/
|
| 234 |
+
*.bin
|
| 235 |
+
*.safetensors
|
| 236 |
+
*.onnx
|
| 237 |
+
*.pb
|
| 238 |
+
*.tflite
|
| 239 |
+
*.pth
|
| 240 |
+
*.pt
|
| 241 |
+
*.h5
|
| 242 |
+
*.pkl
|
| 243 |
+
*.pickle
|
| 244 |
+
|
| 245 |
+
# Hugging Face cache
|
| 246 |
+
.cache/
|
| 247 |
+
transformers_cache/
|
| 248 |
+
huggingface_hub/
|
| 249 |
+
|
| 250 |
+
# Gradio specific
|
| 251 |
+
gradio_cached_examples/
|
| 252 |
+
flagged/
|
| 253 |
+
|
| 254 |
+
# Temporary files
|
| 255 |
+
temp/
|
| 256 |
+
tmp/
|
| 257 |
+
*.tmp
|
| 258 |
+
*.temp
|
| 259 |
+
|
| 260 |
+
# Log files
|
| 261 |
+
*.log
|
| 262 |
+
logs/
|
| 263 |
+
|
| 264 |
+
# Data files (if sensitive)
|
| 265 |
+
data/
|
| 266 |
+
datasets/
|
| 267 |
+
*.csv
|
| 268 |
+
*.json
|
| 269 |
+
*.xml
|
| 270 |
+
*.yaml
|
| 271 |
+
*.yml
|
| 272 |
+
|
| 273 |
+
# Media files (examples might be large)
|
| 274 |
+
examples/
|
| 275 |
+
assets/
|
| 276 |
+
media/
|
| 277 |
+
*.mp4
|
| 278 |
+
*.avi
|
| 279 |
+
*.mov
|
| 280 |
+
*.jpg
|
| 281 |
+
*.jpeg
|
| 282 |
+
*.png
|
| 283 |
+
*.gif
|
| 284 |
+
*.bmp
|
| 285 |
+
*.tiff
|
| 286 |
+
*.webp
|
| 287 |
+
|
| 288 |
+
# Configuration files with secrets
|
| 289 |
+
config.ini
|
| 290 |
+
secrets.json
|
| 291 |
+
.secrets
|
| 292 |
+
credentials.json
|
| 293 |
+
|
| 294 |
+
# Backup files
|
| 295 |
+
*.bak
|
| 296 |
+
*.backup
|
| 297 |
+
*.old
|
| 298 |
+
|
| 299 |
+
# Node.js (if using any frontend tools)
|
| 300 |
+
node_modules/
|
| 301 |
+
npm-debug.log*
|
| 302 |
+
yarn-debug.log*
|
| 303 |
+
yarn-error.log*
|
app.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import tempfile
|
| 6 |
+
from collections.abc import Iterator
|
| 7 |
+
from threading import Thread
|
| 8 |
+
|
| 9 |
+
import cv2
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import spaces
|
| 12 |
+
import torch
|
| 13 |
+
from loguru import logger
|
| 14 |
+
from PIL import Image
|
| 15 |
+
from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
|
| 16 |
+
|
| 17 |
+
model_id = os.getenv("MODEL_ID", "google/medgemma-4b-it")
|
| 18 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
| 19 |
+
model = AutoModelForImageTextToText.from_pretrained(
|
| 20 |
+
model_id, device_map="auto", torch_dtype=torch.bfloat16
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
|
| 24 |
+
|
| 25 |
+
def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
|
| 26 |
+
image_count = 0
|
| 27 |
+
video_count = 0
|
| 28 |
+
for path in paths:
|
| 29 |
+
if path.endswith(".mp4"):
|
| 30 |
+
video_count += 1
|
| 31 |
+
else:
|
| 32 |
+
image_count += 1
|
| 33 |
+
return image_count, video_count
|
| 34 |
+
|
| 35 |
+
def count_files_in_history(history: list[dict]) -> tuple[int, int]:
|
| 36 |
+
image_count = 0
|
| 37 |
+
video_count = 0
|
| 38 |
+
for item in history:
|
| 39 |
+
if item["role"] != "user" or isinstance(item["content"], str):
|
| 40 |
+
continue
|
| 41 |
+
if item["content"][0].endswith(".mp4"):
|
| 42 |
+
video_count += 1
|
| 43 |
+
else:
|
| 44 |
+
image_count += 1
|
| 45 |
+
return image_count, video_count
|
| 46 |
+
|
| 47 |
+
def validate_media_constraints(message: dict, history: list[dict]) -> bool:
|
| 48 |
+
new_image_count, new_video_count = count_files_in_new_message(message["files"])
|
| 49 |
+
history_image_count, history_video_count = count_files_in_history(history)
|
| 50 |
+
image_count = history_image_count + new_image_count
|
| 51 |
+
video_count = history_video_count + new_video_count
|
| 52 |
+
if video_count > 1:
|
| 53 |
+
gr.Warning("Only one video is supported.")
|
| 54 |
+
return False
|
| 55 |
+
if video_count == 1:
|
| 56 |
+
if image_count > 0:
|
| 57 |
+
gr.Warning("Mixing images and videos is not allowed.")
|
| 58 |
+
return False
|
| 59 |
+
if "<image>" in message["text"]:
|
| 60 |
+
gr.Warning("Using <image> tags with video files is not supported.")
|
| 61 |
+
return False
|
| 62 |
+
if video_count == 0 and image_count > MAX_NUM_IMAGES:
|
| 63 |
+
gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
|
| 64 |
+
return False
|
| 65 |
+
if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
|
| 66 |
+
gr.Warning("The number of <image> tags in the text does not match the number of images.")
|
| 67 |
+
return False
|
| 68 |
+
return True
|
| 69 |
+
|
| 70 |
+
def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
|
| 71 |
+
vidcap = cv2.VideoCapture(video_path)
|
| 72 |
+
fps = vidcap.get(cv2.CAP_PROP_FPS)
|
| 73 |
+
total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 74 |
+
|
| 75 |
+
frame_interval = max(total_frames // MAX_NUM_IMAGES, 1)
|
| 76 |
+
frames: list[tuple[Image.Image, float]] = []
|
| 77 |
+
|
| 78 |
+
for i in range(0, min(total_frames, MAX_NUM_IMAGES * frame_interval), frame_interval):
|
| 79 |
+
if len(frames) >= MAX_NUM_IMAGES:
|
| 80 |
+
break
|
| 81 |
+
|
| 82 |
+
vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
|
| 83 |
+
success, image = vidcap.read()
|
| 84 |
+
if success:
|
| 85 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 86 |
+
pil_image = Image.fromarray(image)
|
| 87 |
+
timestamp = round(i / fps, 2)
|
| 88 |
+
frames.append((pil_image, timestamp))
|
| 89 |
+
|
| 90 |
+
vidcap.release()
|
| 91 |
+
return frames
|
| 92 |
+
|
| 93 |
+
def process_video(video_path: str) -> list[dict]:
|
| 94 |
+
content = []
|
| 95 |
+
frames = downsample_video(video_path)
|
| 96 |
+
for frame in frames:
|
| 97 |
+
pil_image, timestamp = frame
|
| 98 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
|
| 99 |
+
pil_image.save(temp_file.name)
|
| 100 |
+
content.append({"type": "text", "text": f"Frame {timestamp}:"})
|
| 101 |
+
content.append({"type": "image", "url": temp_file.name})
|
| 102 |
+
logger.debug(f"{content=}")
|
| 103 |
+
return content
|
| 104 |
+
|
| 105 |
+
def process_interleaved_images(message: dict) -> list[dict]:
|
| 106 |
+
logger.debug(f"{message['files']=}")
|
| 107 |
+
parts = re.split(r"(<image>)", message["text"])
|
| 108 |
+
logger.debug(f"{parts=}")
|
| 109 |
+
|
| 110 |
+
content = []
|
| 111 |
+
image_index = 0
|
| 112 |
+
for part in parts:
|
| 113 |
+
logger.debug(f"{part=}")
|
| 114 |
+
if part == "<image>":
|
| 115 |
+
content.append({"type": "image", "url": message["files"][image_index]})
|
| 116 |
+
logger.debug(f"file: {message['files'][image_index]}")
|
| 117 |
+
image_index += 1
|
| 118 |
+
elif part.strip():
|
| 119 |
+
content.append({"type": "text", "text": part.strip()})
|
| 120 |
+
elif isinstance(part, str) and part != "<image>":
|
| 121 |
+
content.append({"type": "text", "text": part})
|
| 122 |
+
logger.debug(f"{content=}")
|
| 123 |
+
return content
|
| 124 |
+
|
| 125 |
+
def process_new_user_message(message: dict) -> list[dict]:
|
| 126 |
+
if not message["files"]:
|
| 127 |
+
return [{"type": "text", "text": message["text"]}]
|
| 128 |
+
|
| 129 |
+
if message["files"][0].endswith(".mp4"):
|
| 130 |
+
return [{"type": "text", "text": message["text"]}, *process_video(message["files"][0])]
|
| 131 |
+
|
| 132 |
+
if "<image>" in message["text"]:
|
| 133 |
+
return process_interleaved_images(message)
|
| 134 |
+
|
| 135 |
+
return [
|
| 136 |
+
{"type": "text", "text": message["text"]},
|
| 137 |
+
*[{"type": "image", "url": path} for path in message["files"]],
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
def process_history(history: list[dict]) -> list[dict]:
|
| 141 |
+
messages = []
|
| 142 |
+
current_user_content: list[dict] = []
|
| 143 |
+
for item in history:
|
| 144 |
+
if item["role"] == "assistant":
|
| 145 |
+
if current_user_content:
|
| 146 |
+
messages.append({"role": "user", "content": current_user_content})
|
| 147 |
+
current_user_content = []
|
| 148 |
+
messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
|
| 149 |
+
else:
|
| 150 |
+
content = item["content"]
|
| 151 |
+
if isinstance(content, str):
|
| 152 |
+
current_user_content.append({"type": "text", "text": content})
|
| 153 |
+
else:
|
| 154 |
+
current_user_content.append({"type": "image", "url": content[0]})
|
| 155 |
+
return messages
|
| 156 |
+
|
| 157 |
+
@spaces.GPU(duration=120)
|
| 158 |
+
def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 2048) -> Iterator[str]:
|
| 159 |
+
if not validate_media_constraints(message, history):
|
| 160 |
+
yield ""
|
| 161 |
+
return
|
| 162 |
+
|
| 163 |
+
messages = []
|
| 164 |
+
if system_prompt:
|
| 165 |
+
messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
|
| 166 |
+
messages.extend(process_history(history))
|
| 167 |
+
messages.append({"role": "user", "content": process_new_user_message(message)})
|
| 168 |
+
|
| 169 |
+
inputs = processor.apply_chat_template(
|
| 170 |
+
messages,
|
| 171 |
+
add_generation_prompt=True,
|
| 172 |
+
tokenize=True,
|
| 173 |
+
return_dict=True,
|
| 174 |
+
return_tensors="pt",
|
| 175 |
+
).to(device=model.device, dtype=torch.bfloat16)
|
| 176 |
+
|
| 177 |
+
streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
|
| 178 |
+
generate_kwargs = dict(
|
| 179 |
+
inputs,
|
| 180 |
+
max_new_tokens=max_new_tokens,
|
| 181 |
+
streamer=streamer,
|
| 182 |
+
temperature=1.0,
|
| 183 |
+
top_p=0.95,
|
| 184 |
+
top_k=64,
|
| 185 |
+
min_p=0.0,
|
| 186 |
+
)
|
| 187 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 188 |
+
t.start()
|
| 189 |
+
|
| 190 |
+
output = ""
|
| 191 |
+
for delta in streamer:
|
| 192 |
+
output += delta
|
| 193 |
+
yield output
|
| 194 |
+
|
| 195 |
+
# Custom CSS for the UI
|
| 196 |
+
custom_css = """
|
| 197 |
+
:root {
|
| 198 |
+
--primary: #4f46e5;
|
| 199 |
+
--primary-dark: #4338ca;
|
| 200 |
+
--text: #1f2937;
|
| 201 |
+
--background: #f9fafb;
|
| 202 |
+
--chat-bg: #ffffff;
|
| 203 |
+
--user-bubble: #e0e7ff;
|
| 204 |
+
--bot-bubble: #f3f4f6;
|
| 205 |
+
--border: #e5e7eb;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.dark {
|
| 209 |
+
--primary: #6366f1;
|
| 210 |
+
--primary-dark: #4f46e5;
|
| 211 |
+
--text: #f3f4f6;
|
| 212 |
+
--background: #111827;
|
| 213 |
+
--chat-bg: #1f2937;
|
| 214 |
+
--user-bubble: #4338ca;
|
| 215 |
+
--bot-bubble: #374151;
|
| 216 |
+
--border: #4b5563;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
body {
|
| 220 |
+
font-family: 'Inter', sans-serif;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.gr-chatbot {
|
| 224 |
+
background-color: var(--chat-bg);
|
| 225 |
+
border-radius: 12px;
|
| 226 |
+
border: 1px solid var(--border);
|
| 227 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.gr-chat-message {
|
| 231 |
+
padding: 16px 20px;
|
| 232 |
+
border-radius: 12px;
|
| 233 |
+
margin: 8px 0;
|
| 234 |
+
max-width: 80%;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.gr-chat-message-user {
|
| 238 |
+
background-color: var(--user-bubble);
|
| 239 |
+
margin-left: auto;
|
| 240 |
+
border-bottom-right-radius: 4px;
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
.gr-chat-message-bot {
|
| 244 |
+
background-color: var(--bot-bubble);
|
| 245 |
+
margin-right: auto;
|
| 246 |
+
border-bottom-left-radius: 4px;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
.gr-textbox textarea {
|
| 250 |
+
min-height: 120px;
|
| 251 |
+
border-radius: 12px;
|
| 252 |
+
padding: 16px;
|
| 253 |
+
background-color: var(--background);
|
| 254 |
+
color: var(--text);
|
| 255 |
+
border: 1px solid var(--border);
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
.gr-button {
|
| 259 |
+
background-color: var(--primary) !important;
|
| 260 |
+
color: white !important;
|
| 261 |
+
border-radius: 8px !important;
|
| 262 |
+
padding: 10px 20px !important;
|
| 263 |
+
font-weight: 500 !important;
|
| 264 |
+
transition: all 0.2s !important;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.gr-button:hover {
|
| 268 |
+
background-color: var(--primary-dark) !important;
|
| 269 |
+
transform: translateY(-1px) !important;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.gr-button:active {
|
| 273 |
+
transform: translateY(0) !important;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.gr-interface {
|
| 277 |
+
max-width: 900px;
|
| 278 |
+
margin: 0 auto;
|
| 279 |
+
padding: 24px;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.gr-header {
|
| 283 |
+
text-align: center;
|
| 284 |
+
margin-bottom: 24px;
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
.gr-header h1 {
|
| 288 |
+
font-size: 2.5rem;
|
| 289 |
+
font-weight: 700;
|
| 290 |
+
color: var(--primary);
|
| 291 |
+
margin-bottom: 8px;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.gr-header p {
|
| 295 |
+
color: var(--text);
|
| 296 |
+
opacity: 0.8;
|
| 297 |
+
font-size: 1.1rem;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
.gr-image-preview {
|
| 301 |
+
border-radius: 8px;
|
| 302 |
+
max-width: 100%;
|
| 303 |
+
max-height: 300px;
|
| 304 |
+
object-fit: contain;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.gr-video-preview {
|
| 308 |
+
border-radius: 8px;
|
| 309 |
+
max-width: 100%;
|
| 310 |
+
max-height: 300px;
|
| 311 |
+
}
|
| 312 |
+
"""
|
| 313 |
+
|
| 314 |
+
DESCRIPTION = """\
|
| 315 |
+
## Medical Vision-Language Assistant
|
| 316 |
+
|
| 317 |
+
This advanced AI assistant can understand and analyze medical images, videos, and text.
|
| 318 |
+
Upload images or a video along with your questions to get insights.
|
| 319 |
+
|
| 320 |
+
**Features:**
|
| 321 |
+
- Analyze medical images (X-rays, CT scans, etc.)
|
| 322 |
+
- Process video frames from medical videos
|
| 323 |
+
- Interleave images with text questions
|
| 324 |
+
- Customize system behavior with prompts
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
demo = gr.ChatInterface(
|
| 328 |
+
fn=run,
|
| 329 |
+
type="messages",
|
| 330 |
+
chatbot=gr.Chatbot(
|
| 331 |
+
type="messages",
|
| 332 |
+
scale=1,
|
| 333 |
+
allow_tags=["image"],
|
| 334 |
+
bubble_full_width=False,
|
| 335 |
+
avatar_images=(
|
| 336 |
+
"assets/user.png", # User avatar
|
| 337 |
+
"assets/doctor.png" # Bot avatar (replace with your own)
|
| 338 |
+
),
|
| 339 |
+
render=False # We'll handle rendering in CSS
|
| 340 |
+
),
|
| 341 |
+
textbox=gr.MultimodalTextbox(
|
| 342 |
+
file_types=["image", ".mp4"],
|
| 343 |
+
file_count="multiple",
|
| 344 |
+
autofocus=True,
|
| 345 |
+
placeholder="Type your message or upload images/video...",
|
| 346 |
+
),
|
| 347 |
+
multimodal=True,
|
| 348 |
+
additional_inputs=[
|
| 349 |
+
gr.Textbox(
|
| 350 |
+
label="System Prompt",
|
| 351 |
+
value="You are a helpful and knowledgeable medical expert. Provide accurate, detailed explanations in clear language.",
|
| 352 |
+
info="Guide the assistant's behavior and expertise"
|
| 353 |
+
),
|
| 354 |
+
gr.Slider(
|
| 355 |
+
label="Response Length",
|
| 356 |
+
minimum=100,
|
| 357 |
+
maximum=4096,
|
| 358 |
+
step=10,
|
| 359 |
+
value=1024,
|
| 360 |
+
info="Control how verbose the responses are"
|
| 361 |
+
),
|
| 362 |
+
],
|
| 363 |
+
stop_btn=None,
|
| 364 |
+
title="",
|
| 365 |
+
description=DESCRIPTION,
|
| 366 |
+
examples=[
|
| 367 |
+
["What abnormalities do you see in this chest X-ray?", "examples/chest_xray.jpg"],
|
| 368 |
+
["Explain the key findings in this MRI scan.", "examples/brain_mri.jpg"],
|
| 369 |
+
["Describe the progression shown in this video.", "examples/heart_ultrasound.mp4"],
|
| 370 |
+
],
|
| 371 |
+
cache_examples=False,
|
| 372 |
+
css=custom_css,
|
| 373 |
+
theme=gr.themes.Default(
|
| 374 |
+
primary_hue="indigo",
|
| 375 |
+
secondary_hue="gray",
|
| 376 |
+
font=["Inter", "sans-serif"]
|
| 377 |
+
),
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
if __name__ == "__main__":
|
| 381 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.8.1
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
annotated-types==0.7.0
|
| 4 |
+
anyio==4.9.0
|
| 5 |
+
certifi==2025.6.15
|
| 6 |
+
charset-normalizer==3.4.2
|
| 7 |
+
click==8.2.1
|
| 8 |
+
fastapi==0.115.14
|
| 9 |
+
ffmpy==0.6.0
|
| 10 |
+
filelock==3.18.0
|
| 11 |
+
fsspec==2025.5.1
|
| 12 |
+
gradio==5.35.0
|
| 13 |
+
gradio_client==1.10.4
|
| 14 |
+
groovy==0.1.2
|
| 15 |
+
h11==0.16.0
|
| 16 |
+
hf-xet==1.1.5
|
| 17 |
+
httpcore==1.0.9
|
| 18 |
+
httpx==0.28.1
|
| 19 |
+
huggingface-hub==0.33.1
|
| 20 |
+
idna==3.10
|
| 21 |
+
Jinja2==3.1.6
|
| 22 |
+
loguru==0.7.3
|
| 23 |
+
markdown-it-py==3.0.0
|
| 24 |
+
MarkupSafe==3.0.2
|
| 25 |
+
mdurl==0.1.2
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
networkx==3.5
|
| 28 |
+
numpy==2.3.1
|
| 29 |
+
opencv-python==4.11.0.86
|
| 30 |
+
orjson==3.10.18
|
| 31 |
+
packaging==25.0
|
| 32 |
+
pandas==2.3.0
|
| 33 |
+
pillow==11.2.1
|
| 34 |
+
psutil==5.9.8
|
| 35 |
+
pydantic==2.11.7
|
| 36 |
+
pydantic_core==2.33.2
|
| 37 |
+
pydub==0.25.1
|
| 38 |
+
Pygments==2.19.2
|
| 39 |
+
python-dateutil==2.9.0.post0
|
| 40 |
+
python-multipart==0.0.20
|
| 41 |
+
pytz==2025.2
|
| 42 |
+
PyYAML==6.0.2
|
| 43 |
+
regex==2024.11.6
|
| 44 |
+
requests==2.32.4
|
| 45 |
+
rich==14.0.0
|
| 46 |
+
ruff==0.12.1
|
| 47 |
+
safehttpx==0.1.6
|
| 48 |
+
safetensors==0.5.3
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
setuptools==80.9.0
|
| 51 |
+
shellingham==1.5.4
|
| 52 |
+
six==1.17.0
|
| 53 |
+
sniffio==1.3.1
|
| 54 |
+
spaces==0.37.1
|
| 55 |
+
starlette==0.46.2
|
| 56 |
+
sympy==1.14.0
|
| 57 |
+
tokenizers==0.21.2
|
| 58 |
+
tomlkit==0.13.3
|
| 59 |
+
torch==2.7.1
|
| 60 |
+
tqdm==4.67.1
|
| 61 |
+
transformers==4.53.0
|
| 62 |
+
typer==0.16.0
|
| 63 |
+
typing-inspection==0.4.1
|
| 64 |
+
typing_extensions==4.14.0
|
| 65 |
+
tzdata==2025.2
|
| 66 |
+
urllib3==2.5.0
|
| 67 |
+
uvicorn==0.35.0
|
| 68 |
+
websockets==15.0.1
|