FathomPlayground / Dockerfile
Proff12's picture
Squashed commit: keep current project state
790fb60
# syntax=docker/dockerfile:1.4
FROM python:3.10-slim AS source
ARG HF_API_TOKEN
ARG SRC_URL
# Ensure git and certificates are available for cloning
RUN apt-get update && apt-get install -y --no-install-recommends \
git ca-certificates && rm -rf /var/lib/apt/lists/*
# Clone the repository once in its own stage. Files will be moved to /repo
# Use a shallow clone to reduce time and bandwidth and make caching more stable
# This RUN attempts to read a BuildKit secret at /run/secrets/HF_API_TOKEN, and
# falls back to the HF_API_TOKEN environment variable if present. It fails early
# with a clear message when no token is provided.
RUN --mount=type=secret,id=HF_API_TOKEN,required=false --mount=type=secret,id=SRC_URL,required=false sh -c '\
if [ -f /run/secrets/HF_API_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_API_TOKEN); \
elif [ -f /run/secrets/HF_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_TOKEN); \
elif [ -n "$HF_API_TOKEN" ]; then TOKEN=$HF_API_TOKEN; \
elif [ -n "$HF_TOKEN" ]; then TOKEN=$HF_TOKEN; \
else echo "ERROR: HF token not provided (set BuildKit secret HF_API_TOKEN/HF_TOKEN or HF_API_TOKEN/HF_TOKEN env)"; exit 1; fi && \
# Attempt to clone directly into /repo. If the remote creates a single top-level
# directory, detect that and move its contents into /repo so /repo/frontend exists.
mkdir -p /repo && \
# Determine source URL: secret at /run/secrets/SRC_URL > ARG SRC_URL
if [ -f /run/secrets/SRC_URL ]; then SRC=$(cat /run/secrets/SRC_URL); \
elif [ -n "$SRC_URL" ]; then SRC=$SRC_URL; \
else echo "ERROR: SRC_URL not provided (set BuildKit secret SRC_URL or build-arg SRC_URL)"; exit 1; fi && \
echo "Cloning from $SRC" && \
# Normalize SRC: remove leading http(s):// if present, then insert token credentials
if echo "$SRC" | grep -qE '^https?://'; then \
NO_SCHEME=$(echo "$SRC" | sed -E 's#^https?://##'); \
else \
NO_SCHEME="$SRC"; \
fi && \
CLONE_URL="https://__token__:$TOKEN@$NO_SCHEME" && \
git clone --depth 1 "$CLONE_URL" /repo_tmp && \
echo "--- Debug: listing /repo_tmp (show hidden and nested) ---" && \
ls -la /repo_tmp || true && \
# If repo_tmp contains exactly one directory and no other files, move its contents up
set -- /repo_tmp/*; count=$#; if [ $count -eq 1 ] && [ -d "$1" ]; then \
echo "--- Single top-level dir detected: moving its contents into /repo ---" && \
mv "$1"/* "$1"/.??* /repo/ 2>/dev/null || true; \
else \
echo "--- Multiple entries detected: moving all into /repo ---" && \
mv /repo_tmp/* /repo/ 2>/dev/null || true; \
mv /repo_tmp/.[!.]* /repo/ 2>/dev/null || true; \
fi && \
rm -rf /repo_tmp/.git && rm -rf /repo_tmp'
# Verify the clone succeeded and /repo contains files; fail early with a helpful message
RUN [ -d /repo ] && [ "$(ls -A /repo | wc -c)" -gt 0 ] || (echo "ERROR: clone failed or /repo is empty" && exit 1)
# --- Stage 1: Build React frontend ---
FROM node:20-alpine AS frontend
WORKDIR /app/frontend
# Install dependencies (copied from the cloned source stage)
COPY --from=source /repo/frontend/package*.json ./
COPY --from=source /repo/frontend/package-lock.json ./
RUN npm install --frozen-lockfile
# Build frontend (source files copied from the cloned source stage)
COPY --from=source /repo/frontend/ ./
RUN npm run build
# --- Stage 2: Python backend (CPU only) ---
FROM python:3.10-slim AS backend
# Environment setup
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HOME=/app/.cache/huggingface
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git curl && \
rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -m appuser
# Create necessary directories and set permissions
RUN mkdir -p /app/.cache/huggingface \
&& mkdir -p /app/static \
&& chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
WORKDIR /app
# Upgrade pip and install Python dependencies
COPY --from=source /repo/backend/requirements.txt /app/backend/requirements.txt
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install -r /app/backend/requirements.txt
# Copy backend code
COPY --from=source /repo/backend/ /app/backend/
# Fathom-Search-4B files are now part of the backend app directory
# Copy frontend build to static directory
COPY --from=frontend /app/frontend/out/ /app/static/
# App-specific environment variables
ENV STATIC_DIR=/app/static \
MODEL_ID=FractalAIResearch/Fathom-R1-14B \
PIPELINE_TASK=text-generation \
QUANTIZE=auto \
PORT_SERPER_HOST=2221 \
HOST_SERPER_URL=http://0.0.0.0:2221 \
SERPER_URL=http://0.0.0.0:2221 \
PYTHONPATH=/app/backend/app:/app/backend \
MAX_OUTBOUND=256 \
JINA_CACHE_DIR=/app/.cache/jina_cache \
SERPER_CACHE_DIR=/app/.cache/serper_cache \
BOXED_WRAP_WIDTH=130 \
CRAWL4AI_EP=http://localhost:8080 \
CURL_CA_BUNDLE="" \
REQUESTS_CA_BUNDLE="" \
SSL_VERIFY=false
# Create cache directories
RUN mkdir -p /app/.cache/jina_cache /app/.cache/serper_cache && \
chown -R appuser:appuser /app/.cache
# Optional: Healthcheck endpoint - check both services
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
CMD curl -f http://localhost:7860/docs && curl -f http://localhost:2221/health || exit 1
EXPOSE 7860 2221
# Create startup script with proper service management
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# Cleanup function\n\
cleanup() {\n\
echo "๐Ÿ›‘ Shutting down services..."\n\
if [ ! -z "$SERPER_PID" ] && kill -0 $SERPER_PID 2>/dev/null; then\n\
kill $SERPER_PID\n\
echo "โœ… Serper service stopped"\n\
fi\n\
if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then\n\
kill $BACKEND_PID\n\
echo "โœ… Backend service stopped"\n\
fi\n\
exit 0\n\
}\n\
\n\
# Set up signal handlers\n\
trap cleanup SIGTERM SIGINT\n\
\n\
echo "๐Ÿš€ Starting FathomPlayground on Hugging Face Spaces"\n\
echo "โœ… Environment variables configured:"\n\
echo " HF_MODEL_URL: configured"\n\
echo " HOST_SERPER_URL: configured"\n\
echo " PORT_SERPER_HOST: configured"\n\
echo " HF_API_TOKEN: SET"\n\
echo " SERPER_API_KEY: SET"\n\
echo " OPENAI_API_KEY: SET"\n\
echo " HF_TOKEN: SET"\n\
echo " SUMMARY_HF_MODEL_URL: configured"\n\
echo " CRAWL4AI_EP: configured"\n\
echo " JINA_API_KEY: SET"\n\
echo " JINA_CACHE_DIR: configured"\n\
echo " SERPER_CACHE_DIR: configured"\n\
\n\
echo "๐Ÿ” Starting Serper Host Server..."\n\
cd /app/backend/app\n\
python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
SERPER_PID=$!\n\
echo "โœ… Serper service started"\n\
\n\
# Wait for Serper service to be ready\n\
echo "โณ Waiting for Serper service to be ready..."\n\
for i in {1..30}; do\n\
if curl -s http://localhost:2221/health > /dev/null 2>&1; then\n\
echo "โœ… Serper service is ready"\n\
break\n\
fi\n\
if [ $i -eq 30 ]; then\n\
echo "โŒ Serper service failed to start within 30 seconds"\n\
cleanup\n\
exit 1\n\
fi\n\
sleep 1\n\
done\n\
\n\
echo "๐Ÿš€ Starting Backend Service..."\n\
python3 -m uvicorn main:app --host 0.0.0.0 --port 7860 &\n\
BACKEND_PID=$!\n\
echo "โœ… Backend service started on port 7860 (PID: $BACKEND_PID)"\n\
\n\
# Monitor both services\n\
while true; do\n\
if ! kill -0 $SERPER_PID 2>/dev/null; then\n\
echo "โŒ Serper service died, restarting..."\n\
python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
SERPER_PID=$!\n\
echo "โœ… Serper service restarted (PID: $SERPER_PID)"\n\
fi\n\
if ! kill -0 $BACKEND_PID 2>/dev/null; then\n\
echo "โŒ Backend service died, exiting..."\n\
cleanup\n\
exit 1\n\
fi\n\
sleep 5\n\
done' > /app/start.sh && \
chmod +x /app/start.sh && \
chown appuser:appuser /app/start.sh
ENTRYPOINT ["/app/start.sh"]