Spaces:

iblfe
/

test

Runtime error

App Files Files Community

test / helm /h2ogpt-chart /templates /deployment.yaml

iblfe

Upload folder using huggingface_hub

b585c7f verified about 1 year ago

raw

history blame

22.2 kB

	{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
	{{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
	{{- end }}
	{{- if .Values.h2ogpt.stack.enabled }}
	{{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
	{{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
	{{- end }}
	{{- end }}
	---
	{{- if .Values.h2ogpt.enabled }}
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	name: {{ include "h2ogpt.fullname" . }}
	labels:
	app: {{ include "h2ogpt.fullname" . }}
	spec:
	{{- if not .Values.h2ogpt.autoscaling.enabled }}
	replicas: {{ .Values.h2ogpt.replicaCount }}
	{{- end }}
	selector:
	matchLabels:
	app: {{ include "h2ogpt.fullname" . }}
	{{- if .Values.h2ogpt.updateStrategy }}
	strategy: {{- toYaml .Values.h2ogpt.updateStrategy \| nindent 4 }}
	{{- end }}
	template:
	metadata:
	{{- with .Values.h2ogpt.podAnnotations }}
	annotations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	labels:
	app: {{ include "h2ogpt.fullname" . }}
	spec:
	{{- with .Values.h2ogpt.nodeSelector }}
	nodeSelector:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	{{- with .Values.h2ogpt.tolerations }}
	tolerations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	securityContext:
	{{- toYaml .Values.h2ogpt.podSecurityContext \| nindent 8 }}
	affinity:
	podAntiAffinity:
	preferredDuringSchedulingIgnoredDuringExecution:
	- weight: 100
	podAffinityTerm:
	labelSelector:
	matchExpressions:
	- key: app
	operator: In
	values:
	- {{ include "h2ogpt.fullname" . }}
	topologyKey: failure-domain.beta.kubernetes.io/zone
	{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
	initContainers:
	- name: tgi-check
	securityContext:
	{{- toYaml .Values.h2ogpt.securityContext \| nindent 12 }}
	image: "{{ .Values.h2ogpt.initImage.repository \| default "busybox" }}:{{ .Values.h2ogpt.initImage.tag \| default "1.36" }}"
	imagePullPolicy: {{ .Values.h2ogpt.initImage.pullPolicy \| default "IfNotPresent"}}
	command: ["/bin/sh", "-c"]
	args:
	- >
	until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
	do
	echo "Waiting for inference service to become ready...";
	sleep 5;
	done
	{{- end }}
	{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
	initContainers:
	- name: vllm-check
	securityContext:
	{{- toYaml .Values.h2ogpt.securityContext \| nindent 12 }}
	image: "{{ .Values.h2ogpt.initImage.repository \| default "busybox" }}:{{ .Values.h2ogpt.initImage.tag \| default "1.36" }}"
	imagePullPolicy: {{ .Values.h2ogpt.initImage.pullPolicy \| default "IfNotPresent"}}
	command: ["/bin/sh", "-c"]
	args:
	- >
	until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
	do
	echo "Waiting for inference service to become ready...";
	sleep 5;
	done
	{{- end }}
	{{- with .Values.h2ogpt.imagePullSecrets }}
	imagePullSecrets:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	containers:
	{{- if .Values.h2ogpt.stack.enabled }}
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference
	securityContext:
	{{- toYaml .Values.vllm.securityContext \| nindent 12 }}
	image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag \| default .Chart.AppVersion }}"
	imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
	command: ["/h2ogpt_conda/vllm_env/bin/python3.10"]
	args:
	- "-m"
	- "vllm.entrypoints.openai.api_server"
	- "--port"
	- "5000"
	- "--host"
	- "0.0.0.0"
	- "--download-dir"
	- "/workspace/.cache/huggingface/hub"
	{{- range $arg := .Values.vllm.containerArgs }}
	- "{{ $arg }}"
	{{- end }}
	ports:
	- name: http
	containerPort: 5000
	protocol: TCP
	{{- if .Values.vllm.livenessProbe }}
	livenessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.vllm.livenessProbe \| nindent 12 }}
	{{- end }}
	{{- if .Values.vllm.readinessProbe }}
	readinessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.vllm.readinessProbe \| nindent 12 }}
	{{- end }}
	resources:
	{{- toYaml .Values.vllm.resources \| nindent 12 }}
	envFrom:
	- configMapRef:
	name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
	env:
	- name: NCCL_IGNORE_DISABLED_P2P
	value: "1"
	{{- range $key, $value := .Values.vllm.env }}
	- name: "{{ $key }}"
	value: "{{ $value }}"
	{{- end }}
	volumeMounts:
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	mountPath: /workspace/.cache
	subPath: cache
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	mountPath: /dev/shm
	subPath: shm
	{{- end }}
	- name: {{ include "h2ogpt.fullname" . }}
	securityContext:
	{{- toYaml .Values.h2ogpt.securityContext \| nindent 12 }}
	image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag \| default .Chart.AppVersion }}"
	imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
	command: ["/bin/bash", "-c"]
	{{- if .Values.h2ogpt.stack.enabled }}
	args:
	- >
	while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
	http://localhost:5000/v1/models)" != "200" ]]; do
	echo "Waiting for inference service to become ready... (2sec)"
	sleep 2
	done

	python3 /workspace/generate.py
	{{- end }}
	{{- if not .Values.h2ogpt.stack.enabled }}
	args:
	- >
	python3 /workspace/generate.py
	{{- end }}
	ports:
	- name: http
	containerPort: 7860
	protocol: TCP
	- name: gpt
	containerPort: 8888
	protocol: TCP
	{{- if .Values.h2ogpt.livenessProbe }}
	livenessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.h2ogpt.livenessProbe \| nindent 12 }}
	{{- end }}
	{{- if .Values.h2ogpt.readinessProbe }}
	readinessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.h2ogpt.readinessProbe \| nindent 12 }}
	{{- end }}
	resources:
	{{- toYaml .Values.h2ogpt.resources \| nindent 12 }}
	envFrom:
	- configMapRef:
	name: {{ include "h2ogpt.fullname" . }}-config
	env:
	{{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
	- name: h2ogpt_inference_server
	value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
	{{- end }}
	{{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
	- name: h2ogpt_inference_server
	value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
	{{- end }}
	{{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }}
	- name: h2ogpt_inference_server
	value: "vllm:localhost:5000"
	{{- end }}
	{{- range $key, $value := .Values.h2ogpt.env }}
	- name: "{{ $key }}"
	value: "{{ $value }}"
	{{- end }}
	{{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
	- name: OPENAI_AZURE_KEY
	valueFrom:
	secretKeyRef:
	name: {{ .Values.h2ogpt.externalLLM.secret }}
	key: OPENAI_AZURE_KEY
	- name: OPENAI_AZURE_API_BASE
	valueFrom:
	secretKeyRef:
	name: {{ .Values.h2ogpt.externalLLM.secret }}
	key: OPENAI_AZURE_API_BASE
	{{- end }}
	{{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
	- name: OPENAI_API_KEY
	valueFrom:
	secretKeyRef:
	name: {{ .Values.h2ogpt.externalLLM.secret }}
	key: OPENAI_API_KEY
	{{- end }}
	{{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
	- name: REPLICATE_API_TOKEN
	valueFrom:
	secretKeyRef:
	name: {{ .Values.h2ogpt.externalLLM.secret }}
	key: REPLICATE_API_TOKEN
	{{- end }}
	{{- if .Values.h2ogpt.externalLLM.enabled }}
	- name: H2OGPT_MODEL_LOCK
	value: {{ toJson .Values.h2ogpt.externalLLM.modelLock \| quote }}
	- name: H2OGPT_SCORE_MODEL
	value: None
	{{- end }}
	volumeMounts:
	- name: {{ include "h2ogpt.fullname" . }}-volume
	mountPath: /workspace/.cache
	subPath: cache
	- name: {{ include "h2ogpt.fullname" . }}-volume
	mountPath: /workspace/save
	subPath: save
	volumes:
	- name: {{ include "h2ogpt.fullname" . }}-volume
	{{- if not .Values.h2ogpt.storage.useEphemeral }}
	persistentVolumeClaim:
	claimName: {{ include "h2ogpt.fullname" . }}-volume
	{{- else}}
	ephemeral:
	volumeClaimTemplate:
	spec:
	accessModes:
	- ReadWriteOnce
	resources:
	requests:
	storage: {{ .Values.h2ogpt.storage.size \| quote }}
	storageClassName: {{ .Values.h2ogpt.storage.class }}
	{{- end }}
	{{- if .Values.h2ogpt.stack.enabled }}
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- if not .Values.vllm.storage.useEphemeral }}
	persistentVolumeClaim:
	claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- else }}
	ephemeral:
	volumeClaimTemplate:
	spec:
	accessModes:
	- ReadWriteOnce
	resources:
	requests:
	storage: {{ .Values.vllm.storage.size \| quote }}
	storageClassName: {{ .Values.vllm.storage.class }}
	{{- end }}
	{{- end }}
	{{- end }}
	---
	{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
	apiVersion: v1
	kind: PersistentVolumeClaim
	metadata:
	name: {{ include "h2ogpt.fullname" . }}-volume
	spec:
	accessModes:
	- ReadWriteOnce
	# storageClassName: {{ .Values.h2ogpt.storage.class \| quote }}
	storageClassName: {{ .Values.h2ogpt.storage.class }}
	resources:
	requests:
	storage: {{ .Values.h2ogpt.storage.size \| quote }}
	{{- end }}

	---
	{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	name: {{ include "h2ogpt.fullname" . }}-tgi-inference
	labels:
	app: {{ include "h2ogpt.fullname" . }}-tgi-inference
	spec:
	{{- if not .Values.tgi.autoscaling.enabled }}
	replicas: {{ .Values.tgi.replicaCount }}
	{{- end }}
	selector:
	matchLabels:
	app: {{ include "h2ogpt.fullname" . }}-tgi-inference
	{{- if .Values.tgi.updateStrategy }}
	strategy: {{- toYaml .Values.tgi.updateStrategy \| nindent 4 }}
	{{- end }}
	template:
	metadata:
	{{- with .Values.tgi.podAnnotations }}
	annotations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	labels:
	app: {{ include "h2ogpt.fullname" . }}-tgi-inference
	spec:
	{{- with .Values.tgi.nodeSelector }}
	nodeSelector:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	{{- with .Values.tgi.tolerations }}
	tolerations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	securityContext:
	{{- toYaml .Values.tgi.podSecurityContext \| nindent 8 }}
	affinity:
	podAntiAffinity:
	preferredDuringSchedulingIgnoredDuringExecution:
	- weight: 100
	podAffinityTerm:
	labelSelector:
	matchExpressions:
	- key: app
	operator: In
	values:
	- {{ include "h2ogpt.fullname" . }}
	topologyKey: failure-domain.beta.kubernetes.io/zone
	{{- with .Values.tgi.imagePullSecrets }}
	imagePullSecrets:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	containers:
	- name: {{ include "h2ogpt.fullname" . }}-tgi-inference
	securityContext:
	{{- toYaml .Values.tgi.securityContext \| nindent 12 }}
	image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
	imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
	command: []
	args:
	{{- range $arg := .Values.tgi.containerArgs }}
	- "{{ $arg }}"
	{{- end }}
	ports:
	- name: http
	containerPort: 80
	protocol: TCP
	{{- if .Values.tgi.livenessProbe }}
	livenessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.tgi.livenessProbe \| nindent 12 }}
	{{- end }}
	{{- if .Values.tgi.readinessProbe }}
	readinessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.tgi.readinessProbe \| nindent 12 }}
	{{- end }}
	resources:
	{{- toYaml .Values.tgi.resources \| nindent 12 }}
	env:
	{{- range $key, $value := .Values.tgi.env }}
	- name: "{{ $key }}"
	value: "{{ $value }}"
	{{- end }}
	envFrom:
	- configMapRef:
	name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
	- secretRef:
	name: {{ .Values.tgi.hfSecret }}
	volumeMounts:
	- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	mountPath: /app/cache
	subPath: cache
	- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	mountPath: /data
	subPath: data
	- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	mountPath: /dev/shm
	subPath: shm
	volumes:
	{{- if .Values.h2ogpt.stack.enabled }}
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- if not .Values.vllm.storage.useEphemeral }}
	persistentVolumeClaim:
	claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- else }}
	ephemeral:
	volumeClaimTemplate:
	spec:
	accessModes:
	- ReadWriteOnce
	resources:
	requests:
	storage: {{ .Values.vllm.storage.size \| quote }}
	storageClassName: {{ .Values.vllm.storage.class }}
	{{- end }}
	{{- end }}
	- name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	{{- if not .Values.tgi.storage.useEphemeral}}
	persistentVolumeClaim:
	claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	{{- else}}
	ephemeral:
	volumeClaimTemplate:
	spec:
	accessModes:
	- ReadWriteOnce
	resources:
	requests:
	storage: {{ .Values.tgi.storage.size \| quote }}
	storageClassName: {{ .Values.tgi.storage.class }}
	{{- end }}
	{{- end }}
	---
	{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
	apiVersion: v1
	kind: PersistentVolumeClaim
	metadata:
	name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
	spec:
	accessModes:
	- ReadWriteOnce
	# storageClassName: {{ .Values.h2ogpt.storage.class \| quote }}
	storageClassName: {{ .Values.tgi.storage.class }}
	resources:
	requests:
	storage: {{ .Values.tgi.storage.size \| quote }}
	{{- end }}
	---
	{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	name: {{ include "h2ogpt.fullname" . }}-vllm-inference
	labels:
	app: {{ include "h2ogpt.fullname" . }}-vllm-inference
	spec:
	{{- if not .Values.vllm.autoscaling.enabled }}
	replicas: {{ .Values.vllm.replicaCount }}
	{{- end }}
	selector:
	matchLabels:
	app: {{ include "h2ogpt.fullname" . }}-vllm-inference
	{{- if .Values.vllm.updateStrategy }}
	strategy: {{- toYaml .Values.vllm.updateStrategy \| nindent 4 }}
	{{- end }}
	template:
	metadata:
	{{- with .Values.vllm.podAnnotations }}
	annotations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	labels:
	app: {{ include "h2ogpt.fullname" . }}-vllm-inference
	spec:
	{{- with .Values.vllm.nodeSelector }}
	nodeSelector:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	{{- with .Values.vllm.tolerations }}
	tolerations:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	securityContext:
	{{- toYaml .Values.vllm.podSecurityContext \| nindent 8 }}
	affinity:
	podAntiAffinity:
	preferredDuringSchedulingIgnoredDuringExecution:
	- weight: 100
	podAffinityTerm:
	labelSelector:
	matchExpressions:
	- key: app
	operator: In
	values:
	- {{ include "h2ogpt.fullname" . }}
	topologyKey: failure-domain.beta.kubernetes.io/zone
	{{- with .Values.vllm.imagePullSecrets }}
	imagePullSecrets:
	{{- toYaml . \| nindent 8 }}
	{{- end }}
	containers:
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference
	securityContext:
	{{- toYaml .Values.vllm.securityContext \| nindent 12 }}
	image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag \| default .Chart.AppVersion }}"
	imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
	command: ["/h2ogpt_conda/vllm_env/bin/python3.10"]
	args:
	- "-m"
	- "vllm.entrypoints.openai.api_server"
	- "--port"
	- "5000"
	- "--host"
	- "0.0.0.0"
	- "--download-dir"
	- "/workspace/.cache/huggingface/hub"
	{{- range $arg := .Values.vllm.containerArgs }}
	- "{{ $arg }}"
	{{- end }}
	ports:
	- name: http
	containerPort: 5000
	protocol: TCP
	{{- if .Values.vllm.livenessProbe }}
	livenessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.vllm.livenessProbe \| nindent 12 }}
	{{- end }}
	{{- if .Values.vllm.readinessProbe }}
	readinessProbe:
	httpGet:
	path: /
	scheme: HTTP
	port: http
	{{- toYaml .Values.vllm.readinessProbe \| nindent 12 }}
	{{- end }}
	resources:
	{{- toYaml .Values.vllm.resources \| nindent 12 }}
	envFrom:
	- configMapRef:
	name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
	env:
	- name: NCCL_IGNORE_DISABLED_P2P
	value: "1"
	{{- range $key, $value := .Values.vllm.env }}
	- name: "{{ $key }}"
	value: "{{ $value }}"
	{{- end }}
	volumeMounts:
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	mountPath: /workspace/.cache
	subPath: cache
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	mountPath: /dev/shm
	subPath: shm
	volumes:
	- name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- if not .Values.vllm.storage.useEphemeral }}
	persistentVolumeClaim:
	claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	{{- else }}
	ephemeral:
	volumeClaimTemplate:
	spec:
	accessModes:
	- ReadWriteOnce
	resources:
	requests:
	storage: {{ .Values.vllm.storage.size \| quote }}
	storageClassName: {{ .Values.vllm.storage.class }}
	{{- end }}
	{{- end }}
	---
	{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
	apiVersion: v1
	kind: PersistentVolumeClaim
	metadata:
	name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
	spec:
	accessModes:
	- ReadWriteOnce
	# storageClassName: {{ .Values.vllm.storage.class \| quote }}
	storageClassName: {{ .Values.vllm.storage.class }}
	resources:
	requests:
	storage: {{ .Values.vllm.storage.size \| quote }}
	{{- end }}