Spaces:

crosse712
/

fastvlm-screen-observer

Paused

App Files Files Community

crosse712 commited on Sep 8

Commit

6bfe886

1 Parent(s): 37b127f

Add full deployment configuration with low-RAM optimization (3-8GB support)

Browse files

Files changed (7) hide show

Dockerfile +61 -0
README_HF.md +19 -0
backend/Procfile +1 -0
backend/README.md +28 -0
backend/app.py +18 -0
docker-compose.yml +39 -0
fly.toml +48 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,61 @@

+# Multi-stage build for optimized image size
+FROM python:3.9-slim as builder
+WORKDIR /app
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy and install Python dependencies
+COPY backend/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Production stage
+FROM python:3.9-slim
+WORKDIR /app
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    libgomp1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+# Copy Python packages from builder
+COPY --from=builder /usr/local/lib/python3.9/site-packages /usr/local/lib/python3.9/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+# Copy application code
+COPY backend/ ./backend/
+# Set environment variables for memory optimization
+ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
+ENV OMP_NUM_THREADS=4
+ENV MKL_NUM_THREADS=4
+ENV NUMEXPR_NUM_THREADS=4
+ENV TOKENIZERS_PARALLELISM=false
+# Enable extreme memory optimization
+ENV USE_EXTREME_OPTIMIZATION=true
+ENV MAX_MEMORY_GB=3
+WORKDIR /app/backend
+# Expose port
+EXPOSE 8000
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8000/ || exit 1
+# Start the application with memory-limited configuration
+CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]

README_HF.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+title: FastVLM Screen Observer
+emoji: 🖥️
+colorFrom: blue
+colorTo: purple
+sdk: docker
+sdk_version: "3.9"
+app_port: 8000
+pinned: false
+models:
+  - apple/FastVLM-7B
+suggested_hardware: t4-small
+---
+# FastVLM Screen Observer
+Real-time screen observation using FastVLM-7B with extreme memory optimization for 3-8GB RAM systems.
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

backend/Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: cd backend && python -m uvicorn app.main:app --host 0.0.0.0 --port $PORT --workers 1

backend/README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: FastVLM Screen Observer
+emoji: 🖥️
+colorFrom: blue
+colorTo: green
+sdk: docker
+sdk_version: 3.9
+app_file: app/main.py
+pinned: false
+models:
+  - apple/FastVLM-7B
+---
+# FastVLM Screen Observer Backend
+Real-time screen observation and analysis using Apple's FastVLM-7B model.
+## Requirements
+- 14GB+ RAM for model weights
+- GPU (CUDA or MPS) recommended
+- Python 3.9+
+## API Endpoints
+- `GET /` - Status check
+- `POST /analyze` - Screen analysis
+- `POST /demo` - Automation demo
+- `GET /export` - Export logs
+- `GET /logs/stream` - Stream logs via SSE

backend/app.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""
+Gradio wrapper for Hugging Face Spaces deployment
+"""
+import gradio as gr
+from app.main import app
+from fastapi import FastAPI
+import uvicorn
+# Create a Gradio interface that wraps the FastAPI app
+def create_gradio_app():
+    # This allows the FastAPI to run alongside Gradio
+    return gr.mount_gradio_app(app, path="/")
+# For Hugging Face Spaces
+demo = create_gradio_app()
+if __name__ == "__main__":
+    demo.launch()

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,39 @@

+version: '3.8'
+services:
+  backend:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - USE_EXTREME_OPTIMIZATION=true
+      - MAX_MEMORY_GB=3
+      - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:256
+      - OMP_NUM_THREADS=2
+    volumes:
+      - model_cache:/root/.cache/huggingface
+      - ./logs:/app/backend/logs
+    deploy:
+      resources:
+        limits:
+          memory: 3G
+        reservations:
+          memory: 2G
+    restart: unless-stopped
+  frontend:
+    image: node:18-alpine
+    working_dir: /app
+    volumes:
+      - ./frontend:/app
+    command: sh -c "npm install && npm run build && npm run preview -- --host 0.0.0.0 --port 5173"
+    ports:
+      - "5173:5173"
+    environment:
+      - VITE_API_URL=http://localhost:8000
+    depends_on:
+      - backend
+volumes:
+  model_cache:
+    driver: local

fly.toml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Fly.io configuration for FastVLM with memory optimization
+app = "fastvlm-screen-observer"
+primary_region = "sjc"
+[build]
+  dockerfile = "Dockerfile"
+[env]
+  USE_EXTREME_OPTIMIZATION = "true"
+  MAX_MEMORY_GB = "3"
+  PORT = "8000"
+[experimental]
+  allowed_public_ports = []
+  auto_rollback = true
+[[services]]
+  http_checks = []
+  internal_port = 8000
+  processes = ["app"]
+  protocol = "tcp"
+  script_checks = []
+  [services.concurrency]
+    hard_limit = 25
+    soft_limit = 20
+    type = "connections"
+  [[services.ports]]
+    force_https = true
+    handlers = ["http"]
+    port = 80
+  [[services.ports]]
+    handlers = ["tls", "http"]
+    port = 443
+  [[services.tcp_checks]]
+    grace_period = "60s"
+    interval = "30s"
+    restart_limit = 0
+    timeout = "10s"
+# Request 4GB RAM (minimum for 4-bit quantized model)
+[[vm]]
+  cpu_kind = "shared"
+  cpus = 2
+  memory_mb = 4096