Spaces:
Sleeping
Sleeping
File size: 8,050 Bytes
790fb60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
# syntax=docker/dockerfile:1.4
FROM python:3.10-slim AS source
ARG HF_API_TOKEN
ARG SRC_URL
# Ensure git and certificates are available for cloning
RUN apt-get update && apt-get install -y --no-install-recommends \
git ca-certificates && rm -rf /var/lib/apt/lists/*
# Clone the repository once in its own stage. Files will be moved to /repo
# Use a shallow clone to reduce time and bandwidth and make caching more stable
# This RUN attempts to read a BuildKit secret at /run/secrets/HF_API_TOKEN, and
# falls back to the HF_API_TOKEN environment variable if present. It fails early
# with a clear message when no token is provided.
RUN --mount=type=secret,id=HF_API_TOKEN,required=false --mount=type=secret,id=SRC_URL,required=false sh -c '\
if [ -f /run/secrets/HF_API_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_API_TOKEN); \
elif [ -f /run/secrets/HF_TOKEN ]; then TOKEN=$(cat /run/secrets/HF_TOKEN); \
elif [ -n "$HF_API_TOKEN" ]; then TOKEN=$HF_API_TOKEN; \
elif [ -n "$HF_TOKEN" ]; then TOKEN=$HF_TOKEN; \
else echo "ERROR: HF token not provided (set BuildKit secret HF_API_TOKEN/HF_TOKEN or HF_API_TOKEN/HF_TOKEN env)"; exit 1; fi && \
# Attempt to clone directly into /repo. If the remote creates a single top-level
# directory, detect that and move its contents into /repo so /repo/frontend exists.
mkdir -p /repo && \
# Determine source URL: secret at /run/secrets/SRC_URL > ARG SRC_URL
if [ -f /run/secrets/SRC_URL ]; then SRC=$(cat /run/secrets/SRC_URL); \
elif [ -n "$SRC_URL" ]; then SRC=$SRC_URL; \
else echo "ERROR: SRC_URL not provided (set BuildKit secret SRC_URL or build-arg SRC_URL)"; exit 1; fi && \
echo "Cloning from $SRC" && \
# Normalize SRC: remove leading http(s):// if present, then insert token credentials
if echo "$SRC" | grep -qE '^https?://'; then \
NO_SCHEME=$(echo "$SRC" | sed -E 's#^https?://##'); \
else \
NO_SCHEME="$SRC"; \
fi && \
CLONE_URL="https://__token__:$TOKEN@$NO_SCHEME" && \
git clone --depth 1 "$CLONE_URL" /repo_tmp && \
echo "--- Debug: listing /repo_tmp (show hidden and nested) ---" && \
ls -la /repo_tmp || true && \
# If repo_tmp contains exactly one directory and no other files, move its contents up
set -- /repo_tmp/*; count=$#; if [ $count -eq 1 ] && [ -d "$1" ]; then \
echo "--- Single top-level dir detected: moving its contents into /repo ---" && \
mv "$1"/* "$1"/.??* /repo/ 2>/dev/null || true; \
else \
echo "--- Multiple entries detected: moving all into /repo ---" && \
mv /repo_tmp/* /repo/ 2>/dev/null || true; \
mv /repo_tmp/.[!.]* /repo/ 2>/dev/null || true; \
fi && \
rm -rf /repo_tmp/.git && rm -rf /repo_tmp'
# Verify the clone succeeded and /repo contains files; fail early with a helpful message
RUN [ -d /repo ] && [ "$(ls -A /repo | wc -c)" -gt 0 ] || (echo "ERROR: clone failed or /repo is empty" && exit 1)
# --- Stage 1: Build React frontend ---
FROM node:20-alpine AS frontend
WORKDIR /app/frontend
# Install dependencies (copied from the cloned source stage)
COPY --from=source /repo/frontend/package*.json ./
COPY --from=source /repo/frontend/package-lock.json ./
RUN npm install --frozen-lockfile
# Build frontend (source files copied from the cloned source stage)
COPY --from=source /repo/frontend/ ./
RUN npm run build
# --- Stage 2: Python backend (CPU only) ---
FROM python:3.10-slim AS backend
# Environment setup
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HOME=/app/.cache/huggingface
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git curl && \
rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -m appuser
# Create necessary directories and set permissions
RUN mkdir -p /app/.cache/huggingface \
&& mkdir -p /app/static \
&& chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
WORKDIR /app
# Upgrade pip and install Python dependencies
COPY --from=source /repo/backend/requirements.txt /app/backend/requirements.txt
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install -r /app/backend/requirements.txt
# Copy backend code
COPY --from=source /repo/backend/ /app/backend/
# Fathom-Search-4B files are now part of the backend app directory
# Copy frontend build to static directory
COPY --from=frontend /app/frontend/out/ /app/static/
# App-specific environment variables
ENV STATIC_DIR=/app/static \
MODEL_ID=FractalAIResearch/Fathom-R1-14B \
PIPELINE_TASK=text-generation \
QUANTIZE=auto \
PORT_SERPER_HOST=2221 \
HOST_SERPER_URL=http://0.0.0.0:2221 \
SERPER_URL=http://0.0.0.0:2221 \
PYTHONPATH=/app/backend/app:/app/backend \
MAX_OUTBOUND=256 \
JINA_CACHE_DIR=/app/.cache/jina_cache \
SERPER_CACHE_DIR=/app/.cache/serper_cache \
BOXED_WRAP_WIDTH=130 \
CRAWL4AI_EP=http://localhost:8080 \
CURL_CA_BUNDLE="" \
REQUESTS_CA_BUNDLE="" \
SSL_VERIFY=false
# Create cache directories
RUN mkdir -p /app/.cache/jina_cache /app/.cache/serper_cache && \
chown -R appuser:appuser /app/.cache
# Optional: Healthcheck endpoint - check both services
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
CMD curl -f http://localhost:7860/docs && curl -f http://localhost:2221/health || exit 1
EXPOSE 7860 2221
# Create startup script with proper service management
RUN echo '#!/bin/bash\n\
set -e\n\
\n\
# Cleanup function\n\
cleanup() {\n\
echo "๐ Shutting down services..."\n\
if [ ! -z "$SERPER_PID" ] && kill -0 $SERPER_PID 2>/dev/null; then\n\
kill $SERPER_PID\n\
echo "โ
Serper service stopped"\n\
fi\n\
if [ ! -z "$BACKEND_PID" ] && kill -0 $BACKEND_PID 2>/dev/null; then\n\
kill $BACKEND_PID\n\
echo "โ
Backend service stopped"\n\
fi\n\
exit 0\n\
}\n\
\n\
# Set up signal handlers\n\
trap cleanup SIGTERM SIGINT\n\
\n\
echo "๐ Starting FathomPlayground on Hugging Face Spaces"\n\
echo "โ
Environment variables configured:"\n\
echo " HF_MODEL_URL: configured"\n\
echo " HOST_SERPER_URL: configured"\n\
echo " PORT_SERPER_HOST: configured"\n\
echo " HF_API_TOKEN: SET"\n\
echo " SERPER_API_KEY: SET"\n\
echo " OPENAI_API_KEY: SET"\n\
echo " HF_TOKEN: SET"\n\
echo " SUMMARY_HF_MODEL_URL: configured"\n\
echo " CRAWL4AI_EP: configured"\n\
echo " JINA_API_KEY: SET"\n\
echo " JINA_CACHE_DIR: configured"\n\
echo " SERPER_CACHE_DIR: configured"\n\
\n\
echo "๐ Starting Serper Host Server..."\n\
cd /app/backend/app\n\
python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
SERPER_PID=$!\n\
echo "โ
Serper service started"\n\
\n\
# Wait for Serper service to be ready\n\
echo "โณ Waiting for Serper service to be ready..."\n\
for i in {1..30}; do\n\
if curl -s http://localhost:2221/health > /dev/null 2>&1; then\n\
echo "โ
Serper service is ready"\n\
break\n\
fi\n\
if [ $i -eq 30 ]; then\n\
echo "โ Serper service failed to start within 30 seconds"\n\
cleanup\n\
exit 1\n\
fi\n\
sleep 1\n\
done\n\
\n\
echo "๐ Starting Backend Service..."\n\
python3 -m uvicorn main:app --host 0.0.0.0 --port 7860 &\n\
BACKEND_PID=$!\n\
echo "โ
Backend service started on port 7860 (PID: $BACKEND_PID)"\n\
\n\
# Monitor both services\n\
while true; do\n\
if ! kill -0 $SERPER_PID 2>/dev/null; then\n\
echo "โ Serper service died, restarting..."\n\
python3 -m web_agents_5.sandbox_serper --port 2221 --workers 1 &\n\
SERPER_PID=$!\n\
echo "โ
Serper service restarted (PID: $SERPER_PID)"\n\
fi\n\
if ! kill -0 $BACKEND_PID 2>/dev/null; then\n\
echo "โ Backend service died, exiting..."\n\
cleanup\n\
exit 1\n\
fi\n\
sleep 5\n\
done' > /app/start.sh && \
chmod +x /app/start.sh && \
chown appuser:appuser /app/start.sh
ENTRYPOINT ["/app/start.sh"] |