version: '3'

services:
  h2o-llm:
    build:
      context: ./
      dockerfile: Dockerfile
      args:
        BUILDKIT_INLINE_CACHE: "0"
    image: h2o-llm
    shm_size: '64gb'
    command: generate.py --load_8bit True --base_model 'EleutherAI/gpt-j-6B'
    restart: unless-stopped
    volumes:
      - ./h2o-llm:/root/.cache # Location downloaded weights will be stored
      - ./.cache:/workspace/.cache
    ports:
      - 7860:7860
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [ gpu ]

volumes:
  h2o-llm:
    name: h2o-llm