crawlitall / Dockerfile
hellorahulk's picture
Update Dockerfile
5b68aa9 verified
FROM python:3.10-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
wget \
gnupg \
fontconfig \
build-essential \
xvfb \
libgbm1 \
libnss3 \
libxss1 \
libasound2 \
libxrandr2 \
libatk1.0-0 \
libgtk-3-0 \
libxshmfence1 \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -m -u 1000 crawler
# Install latest Chrome and its dependencies
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
&& apt-get update \
&& apt-get install -y \
google-chrome-stable \
fonts-ipafont-gothic \
fonts-wqy-zenhei \
fonts-thai-tlwg \
fonts-kacst \
fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
# Set up working directory and permissions
WORKDIR /app
# Create and set permissions for cache directories
RUN mkdir -p /home/crawler/.cache/fontconfig \
&& mkdir -p /home/crawler/.cache/pip \
&& mkdir -p /home/crawler/.crawl4ai \
&& chown -R crawler:crawler /home/crawler \
&& chown -R crawler:crawler /app
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV GRADIO_SERVER_NAME=0.0.0.0
ENV GRADIO_SERVER_PORT=7860
ENV HOME=/home/crawler
ENV FONTCONFIG_PATH=/etc/fonts
ENV XDG_CACHE_HOME=/home/crawler/.cache
ENV PIP_CACHE_DIR=/home/crawler/.cache/pip
ENV PYTHONDONTWRITEBYTECODE=1
# Install Python dependencies and Playwright as root first
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && \
playwright install --with-deps chromium && \
rm -rf /root/.cache/*
# Switch to non-root user
USER crawler
# Copy application code
COPY --chown=crawler:crawler . .
# Expose port
EXPOSE 7860
# Start the application
CMD ["python", "app.py"]