version: '3' services: h2o-llm: build: context: ./ dockerfile: Dockerfile args: BUILDKIT_INLINE_CACHE: "0" image: h2o-llm shm_size: '64gb' command: generate.py --load_8bit True --base_model 'EleutherAI/gpt-j-6B' restart: unless-stopped volumes: - ./h2o-llm:/root/.cache # Location downloaded weights will be stored - ./.cache:/workspace/.cache ports: - 7860:7860 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [ gpu ] volumes: h2o-llm: name: h2o-llm