nameOverride: "" fullnameOverride: "" h2ogpt: enabled: true stack: # -- Run h2oGPT and vLLM on same pod. enabled: false replicaCount: 1 image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime pullPolicy: IfNotPresent initImage: repository: tag: pullPolicy: storage: size: 128Gi class: useEphemeral: true externalLLM: enabled: false secret: modelLock: openAIAzure: enabled: false openAI: enabled: False replicate: enabled: false # -- Example configs to use when not using Model Lock and External LLM # overrideConfig: # base_model: h2oai/h2ogpt-4096-llama2-7b-chat # use_safetensors: True # prompt_type: llama2 # save_dir: /workspace/save/ # use_gpu_id: False # score_model: None # max_max_new_tokens: 2048 # max_new_tokens: 1024 overrideConfig: service: type: NodePort webPort: 80 gptPort: 8888 webServiceAnnotations: {} updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: type: RuntimeDefault resources: nodeSelector: tolerations: env: {} podAnnotations: {} podLabels: {} autoscaling: {} tgi: enabled: false replicaCount: 1 image: repository: ghcr.io/huggingface/text-generation-inference tag: 0.9.3 pullPolicy: IfNotPresent storage: size: 512Gi class: useEphemeral: true overrideConfig: hfSecret: containerArgs: service: type: ClusterIP port: 8080 updateStrategy: type: RollingUpdate podSecurityContext: securityContext: resources: nodeSelector: tolerations: env: {} podAnnotations: {} podLabels: {} autoscaling: {} vllm: enabled: false replicaCount: 1 image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime pullPolicy: IfNotPresent imagePullSecrets: storage: size: 512Gi class: useEphemeral: true overrideConfig: containerArgs: - "--model" - h2oai/h2ogpt-4096-llama2-7b-chat - "--tokenizer" - hf-internal-testing/llama-tokenizer - "--tensor-parallel-size" - 2 - "--seed" - 1234 - "--trust-remote-code" service: type: ClusterIP port: 5000 updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: env: {} resources: nodeSelector: tolerations: podAnnotations: {} podLabels: {} autoscaling: {}