Spaces:

lukestanley
/

ChillTranslator

Sleeping

ChillTranslator / runpod.dockerfile

Luke Stanley

Expose json typed LLM interface for RunPod

976ea17 8 months ago

1.25 kB

	# Base image -> https://github.com/runpod/containers/blob/main/official-templates/base/Dockerfile
	# DockerHub -> https://hub.docker.com/r/runpod/base/tags
	FROM runpod/base:0.4.0-cuda11.8.0

	# Base image sets HuggingFace cache directory to use Runpod's shared cache for efficiency:
	ENV HF_HOME="/runpod-volume/.cache/huggingface/"
	# Also pre-downloading models may speed up start times while
	# increasing image size, but could be worth it for some use cases.

	RUN python3.11 -m pip install --upgrade pip && \
	python3.11 -m pip install runpod==1.6.0

	RUN python3.11 -m pip install pytest cmake \
	scikit-build setuptools pydantic-settings \
	huggingface_hub hf_transfer \
	pydantic pydantic_settings \
	llama-cpp-python

	# Install llama-cpp-python (build with cuda)
	ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on"
	RUN python3.11 -m pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall
	RUN apt-get update; apt-get install jq -y
	ADD runpod_handler.py .

	ADD chill.py .
	ADD utils.py .
	ADD promptObjects.py .

	#ENV REPO_ID="TheBloke/phi-2-GGUF"
	#ENV MODEL_FILE="phi-2.Q2_K.gguf"
	ENV N_GPU_LAYERS=-1
	ENV CONTEXT_SIZE=2048
	CMD python3.11 -u /runpod_handler.py