|
{{- if and .Values.vllm.enabled .Values.tgi.enabled }} |
|
{{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} |
|
{{- end }} |
|
{{- if .Values.h2ogpt.stack.enabled }} |
|
{{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} |
|
{{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} |
|
{{- end }} |
|
{{- end }} |
|
--- |
|
{{- if .Values.h2ogpt.enabled }} |
|
apiVersion: apps/v1 |
|
kind: Deployment |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }} |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }} |
|
spec: |
|
{{- if not .Values.h2ogpt.autoscaling.enabled }} |
|
replicas: {{ .Values.h2ogpt.replicaCount }} |
|
{{- end }} |
|
selector: |
|
matchLabels: |
|
app: {{ include "h2ogpt.fullname" . }} |
|
{{- if .Values.h2ogpt.updateStrategy }} |
|
strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} |
|
{{- end }} |
|
template: |
|
metadata: |
|
{{- with .Values.h2ogpt.podAnnotations }} |
|
annotations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }} |
|
spec: |
|
{{- with .Values.h2ogpt.nodeSelector }} |
|
nodeSelector: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
{{- with .Values.h2ogpt.tolerations }} |
|
tolerations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
securityContext: |
|
{{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} |
|
affinity: |
|
podAntiAffinity: |
|
preferredDuringSchedulingIgnoredDuringExecution: |
|
- weight: 100 |
|
podAffinityTerm: |
|
labelSelector: |
|
matchExpressions: |
|
- key: app |
|
operator: In |
|
values: |
|
- {{ include "h2ogpt.fullname" . }} |
|
topologyKey: failure-domain.beta.kubernetes.io/zone |
|
{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} |
|
initContainers: |
|
- name: tgi-check |
|
securityContext: |
|
{{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} |
|
image: "{{ .Values.h2ogpt.initImage.repository | default "busybox" }}:{{ .Values.h2ogpt.initImage.tag | default "1.36" }}" |
|
imagePullPolicy: {{ .Values.h2ogpt.initImage.pullPolicy | default "IfNotPresent"}} |
|
command: ["/bin/sh", "-c"] |
|
args: |
|
- > |
|
until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; |
|
do |
|
echo "Waiting for inference service to become ready..."; |
|
sleep 5; |
|
done |
|
{{- end }} |
|
{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} |
|
initContainers: |
|
- name: vllm-check |
|
securityContext: |
|
{{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} |
|
image: "{{ .Values.h2ogpt.initImage.repository | default "busybox" }}:{{ .Values.h2ogpt.initImage.tag | default "1.36" }}" |
|
imagePullPolicy: {{ .Values.h2ogpt.initImage.pullPolicy | default "IfNotPresent"}} |
|
command: ["/bin/sh", "-c"] |
|
args: |
|
- > |
|
until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; |
|
do |
|
echo "Waiting for inference service to become ready..."; |
|
sleep 5; |
|
done |
|
{{- end }} |
|
{{- with .Values.h2ogpt.imagePullSecrets }} |
|
imagePullSecrets: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
containers: |
|
{{- if .Values.h2ogpt.stack.enabled }} |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
securityContext: |
|
{{- toYaml .Values.vllm.securityContext | nindent 12 }} |
|
image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" |
|
imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} |
|
command: ["/h2ogpt_conda/vllm_env/bin/python3.10"] |
|
args: |
|
- "-m" |
|
- "vllm.entrypoints.openai.api_server" |
|
- "--port" |
|
- "5000" |
|
- "--host" |
|
- "0.0.0.0" |
|
- "--download-dir" |
|
- "/workspace/.cache/huggingface/hub" |
|
{{- range $arg := .Values.vllm.containerArgs }} |
|
- "{{ $arg }}" |
|
{{- end }} |
|
ports: |
|
- name: http |
|
containerPort: 5000 |
|
protocol: TCP |
|
{{- if .Values.vllm.livenessProbe }} |
|
livenessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.vllm.livenessProbe | nindent 12 }} |
|
{{- end }} |
|
{{- if .Values.vllm.readinessProbe }} |
|
readinessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.vllm.readinessProbe | nindent 12 }} |
|
{{- end }} |
|
resources: |
|
{{- toYaml .Values.vllm.resources | nindent 12 }} |
|
envFrom: |
|
- configMapRef: |
|
name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config |
|
env: |
|
- name: NCCL_IGNORE_DISABLED_P2P |
|
value: "1" |
|
{{- range $key, $value := .Values.vllm.env }} |
|
- name: "{{ $key }}" |
|
value: "{{ $value }}" |
|
{{- end }} |
|
volumeMounts: |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
mountPath: /workspace/.cache |
|
subPath: cache |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
mountPath: /dev/shm |
|
subPath: shm |
|
{{- end }} |
|
- name: {{ include "h2ogpt.fullname" . }} |
|
securityContext: |
|
{{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} |
|
image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" |
|
imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} |
|
command: ["/bin/bash", "-c"] |
|
{{- if .Values.h2ogpt.stack.enabled }} |
|
args: |
|
- > |
|
while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' |
|
http://localhost:5000/v1/models)" != "200" ]]; do |
|
echo "Waiting for inference service to become ready... (2sec)" |
|
sleep 2 |
|
done |
|
|
|
python3 /workspace/generate.py |
|
{{- end }} |
|
{{- if not .Values.h2ogpt.stack.enabled }} |
|
args: |
|
- > |
|
python3 /workspace/generate.py |
|
{{- end }} |
|
ports: |
|
- name: http |
|
containerPort: 7860 |
|
protocol: TCP |
|
- name: gpt |
|
containerPort: 8888 |
|
protocol: TCP |
|
{{- if .Values.h2ogpt.livenessProbe }} |
|
livenessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} |
|
{{- end }} |
|
{{- if .Values.h2ogpt.readinessProbe }} |
|
readinessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} |
|
{{- end }} |
|
resources: |
|
{{- toYaml .Values.h2ogpt.resources | nindent 12 }} |
|
envFrom: |
|
- configMapRef: |
|
name: {{ include "h2ogpt.fullname" . }}-config |
|
env: |
|
{{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} |
|
- name: h2ogpt_inference_server |
|
value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" |
|
{{- end }} |
|
{{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} |
|
- name: h2ogpt_inference_server |
|
value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" |
|
{{- end }} |
|
{{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} |
|
- name: h2ogpt_inference_server |
|
value: "vllm:localhost:5000" |
|
{{- end }} |
|
{{- range $key, $value := .Values.h2ogpt.env }} |
|
- name: "{{ $key }}" |
|
value: "{{ $value }}" |
|
{{- end }} |
|
{{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} |
|
- name: OPENAI_AZURE_KEY |
|
valueFrom: |
|
secretKeyRef: |
|
name: {{ .Values.h2ogpt.externalLLM.secret }} |
|
key: OPENAI_AZURE_KEY |
|
- name: OPENAI_AZURE_API_BASE |
|
valueFrom: |
|
secretKeyRef: |
|
name: {{ .Values.h2ogpt.externalLLM.secret }} |
|
key: OPENAI_AZURE_API_BASE |
|
{{- end }} |
|
{{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} |
|
- name: OPENAI_API_KEY |
|
valueFrom: |
|
secretKeyRef: |
|
name: {{ .Values.h2ogpt.externalLLM.secret }} |
|
key: OPENAI_API_KEY |
|
{{- end }} |
|
{{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} |
|
- name: REPLICATE_API_TOKEN |
|
valueFrom: |
|
secretKeyRef: |
|
name: {{ .Values.h2ogpt.externalLLM.secret }} |
|
key: REPLICATE_API_TOKEN |
|
{{- end }} |
|
{{- if .Values.h2ogpt.externalLLM.enabled }} |
|
- name: H2OGPT_MODEL_LOCK |
|
value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} |
|
- name: H2OGPT_SCORE_MODEL |
|
value: None |
|
{{- end }} |
|
volumeMounts: |
|
- name: {{ include "h2ogpt.fullname" . }}-volume |
|
mountPath: /workspace/.cache |
|
subPath: cache |
|
- name: {{ include "h2ogpt.fullname" . }}-volume |
|
mountPath: /workspace/save |
|
subPath: save |
|
volumes: |
|
- name: {{ include "h2ogpt.fullname" . }}-volume |
|
{{- if not .Values.h2ogpt.storage.useEphemeral }} |
|
persistentVolumeClaim: |
|
claimName: {{ include "h2ogpt.fullname" . }}-volume |
|
{{- else}} |
|
ephemeral: |
|
volumeClaimTemplate: |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: {{ .Values.h2ogpt.storage.size | quote }} |
|
storageClassName: {{ .Values.h2ogpt.storage.class }} |
|
{{- end }} |
|
{{- if .Values.h2ogpt.stack.enabled }} |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- if not .Values.vllm.storage.useEphemeral }} |
|
persistentVolumeClaim: |
|
claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- else }} |
|
ephemeral: |
|
volumeClaimTemplate: |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: {{ .Values.vllm.storage.size | quote }} |
|
storageClassName: {{ .Values.vllm.storage.class }} |
|
{{- end }} |
|
{{- end }} |
|
{{- end }} |
|
--- |
|
{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} |
|
apiVersion: v1 |
|
kind: PersistentVolumeClaim |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }}-volume |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
|
|
storageClassName: {{ .Values.h2ogpt.storage.class }} |
|
resources: |
|
requests: |
|
storage: {{ .Values.h2ogpt.storage.size | quote }} |
|
{{- end }} |
|
|
|
--- |
|
{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} |
|
apiVersion: apps/v1 |
|
kind: Deployment |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }}-tgi-inference |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }}-tgi-inference |
|
spec: |
|
{{- if not .Values.tgi.autoscaling.enabled }} |
|
replicas: {{ .Values.tgi.replicaCount }} |
|
{{- end }} |
|
selector: |
|
matchLabels: |
|
app: {{ include "h2ogpt.fullname" . }}-tgi-inference |
|
{{- if .Values.tgi.updateStrategy }} |
|
strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} |
|
{{- end }} |
|
template: |
|
metadata: |
|
{{- with .Values.tgi.podAnnotations }} |
|
annotations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }}-tgi-inference |
|
spec: |
|
{{- with .Values.tgi.nodeSelector }} |
|
nodeSelector: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
{{- with .Values.tgi.tolerations }} |
|
tolerations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
securityContext: |
|
{{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} |
|
affinity: |
|
podAntiAffinity: |
|
preferredDuringSchedulingIgnoredDuringExecution: |
|
- weight: 100 |
|
podAffinityTerm: |
|
labelSelector: |
|
matchExpressions: |
|
- key: app |
|
operator: In |
|
values: |
|
- {{ include "h2ogpt.fullname" . }} |
|
topologyKey: failure-domain.beta.kubernetes.io/zone |
|
{{- with .Values.tgi.imagePullSecrets }} |
|
imagePullSecrets: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
containers: |
|
- name: {{ include "h2ogpt.fullname" . }}-tgi-inference |
|
securityContext: |
|
{{- toYaml .Values.tgi.securityContext | nindent 12 }} |
|
image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" |
|
imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} |
|
command: [] |
|
args: |
|
{{- range $arg := .Values.tgi.containerArgs }} |
|
- "{{ $arg }}" |
|
{{- end }} |
|
ports: |
|
- name: http |
|
containerPort: 80 |
|
protocol: TCP |
|
{{- if .Values.tgi.livenessProbe }} |
|
livenessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.tgi.livenessProbe | nindent 12 }} |
|
{{- end }} |
|
{{- if .Values.tgi.readinessProbe }} |
|
readinessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.tgi.readinessProbe | nindent 12 }} |
|
{{- end }} |
|
resources: |
|
{{- toYaml .Values.tgi.resources | nindent 12 }} |
|
env: |
|
{{- range $key, $value := .Values.tgi.env }} |
|
- name: "{{ $key }}" |
|
value: "{{ $value }}" |
|
{{- end }} |
|
envFrom: |
|
- configMapRef: |
|
name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config |
|
- secretRef: |
|
name: {{ .Values.tgi.hfSecret }} |
|
volumeMounts: |
|
- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
mountPath: /app/cache |
|
subPath: cache |
|
- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
mountPath: /data |
|
subPath: data |
|
- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
mountPath: /dev/shm |
|
subPath: shm |
|
volumes: |
|
{{- if .Values.h2ogpt.stack.enabled }} |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- if not .Values.vllm.storage.useEphemeral }} |
|
persistentVolumeClaim: |
|
claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- else }} |
|
ephemeral: |
|
volumeClaimTemplate: |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: {{ .Values.vllm.storage.size | quote }} |
|
storageClassName: {{ .Values.vllm.storage.class }} |
|
{{- end }} |
|
{{- end }} |
|
- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
{{- if not .Values.tgi.storage.useEphemeral}} |
|
persistentVolumeClaim: |
|
claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
{{- else}} |
|
ephemeral: |
|
volumeClaimTemplate: |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: {{ .Values.tgi.storage.size | quote }} |
|
storageClassName: {{ .Values.tgi.storage.class }} |
|
{{- end }} |
|
{{- end }} |
|
--- |
|
{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} |
|
apiVersion: v1 |
|
kind: PersistentVolumeClaim |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
|
|
storageClassName: {{ .Values.tgi.storage.class }} |
|
resources: |
|
requests: |
|
storage: {{ .Values.tgi.storage.size | quote }} |
|
{{- end }} |
|
--- |
|
{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} |
|
apiVersion: apps/v1 |
|
kind: Deployment |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
spec: |
|
{{- if not .Values.vllm.autoscaling.enabled }} |
|
replicas: {{ .Values.vllm.replicaCount }} |
|
{{- end }} |
|
selector: |
|
matchLabels: |
|
app: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
{{- if .Values.vllm.updateStrategy }} |
|
strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} |
|
{{- end }} |
|
template: |
|
metadata: |
|
{{- with .Values.vllm.podAnnotations }} |
|
annotations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
labels: |
|
app: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
spec: |
|
{{- with .Values.vllm.nodeSelector }} |
|
nodeSelector: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
{{- with .Values.vllm.tolerations }} |
|
tolerations: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
securityContext: |
|
{{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} |
|
affinity: |
|
podAntiAffinity: |
|
preferredDuringSchedulingIgnoredDuringExecution: |
|
- weight: 100 |
|
podAffinityTerm: |
|
labelSelector: |
|
matchExpressions: |
|
- key: app |
|
operator: In |
|
values: |
|
- {{ include "h2ogpt.fullname" . }} |
|
topologyKey: failure-domain.beta.kubernetes.io/zone |
|
{{- with .Values.vllm.imagePullSecrets }} |
|
imagePullSecrets: |
|
{{- toYaml . | nindent 8 }} |
|
{{- end }} |
|
containers: |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference |
|
securityContext: |
|
{{- toYaml .Values.vllm.securityContext | nindent 12 }} |
|
image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" |
|
imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} |
|
command: ["/h2ogpt_conda/vllm_env/bin/python3.10"] |
|
args: |
|
- "-m" |
|
- "vllm.entrypoints.openai.api_server" |
|
- "--port" |
|
- "5000" |
|
- "--host" |
|
- "0.0.0.0" |
|
- "--download-dir" |
|
- "/workspace/.cache/huggingface/hub" |
|
{{- range $arg := .Values.vllm.containerArgs }} |
|
- "{{ $arg }}" |
|
{{- end }} |
|
ports: |
|
- name: http |
|
containerPort: 5000 |
|
protocol: TCP |
|
{{- if .Values.vllm.livenessProbe }} |
|
livenessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.vllm.livenessProbe | nindent 12 }} |
|
{{- end }} |
|
{{- if .Values.vllm.readinessProbe }} |
|
readinessProbe: |
|
httpGet: |
|
path: / |
|
scheme: HTTP |
|
port: http |
|
{{- toYaml .Values.vllm.readinessProbe | nindent 12 }} |
|
{{- end }} |
|
resources: |
|
{{- toYaml .Values.vllm.resources | nindent 12 }} |
|
envFrom: |
|
- configMapRef: |
|
name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config |
|
env: |
|
- name: NCCL_IGNORE_DISABLED_P2P |
|
value: "1" |
|
{{- range $key, $value := .Values.vllm.env }} |
|
- name: "{{ $key }}" |
|
value: "{{ $value }}" |
|
{{- end }} |
|
volumeMounts: |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
mountPath: /workspace/.cache |
|
subPath: cache |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
mountPath: /dev/shm |
|
subPath: shm |
|
volumes: |
|
- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- if not .Values.vllm.storage.useEphemeral }} |
|
persistentVolumeClaim: |
|
claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
{{- else }} |
|
ephemeral: |
|
volumeClaimTemplate: |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
resources: |
|
requests: |
|
storage: {{ .Values.vllm.storage.size | quote }} |
|
storageClassName: {{ .Values.vllm.storage.class }} |
|
{{- end }} |
|
{{- end }} |
|
--- |
|
{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} |
|
apiVersion: v1 |
|
kind: PersistentVolumeClaim |
|
metadata: |
|
name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume |
|
spec: |
|
accessModes: |
|
- ReadWriteOnce |
|
|
|
storageClassName: {{ .Values.vllm.storage.class }} |
|
resources: |
|
requests: |
|
storage: {{ .Values.vllm.storage.size | quote }} |
|
{{- end }} |
|
|