diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml
index d90a7d69e..eeaf87fef 100644
--- a/helm/h2ogpt-chart/Chart.yaml
+++ b/helm/h2ogpt-chart/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
 name: h2ogpt
-description: A Helm chart for h2ogpt
+description: A Helm chart for h2oGPT
 
 # A chart can be either an 'application' or a 'library' chart.
 #
diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
new file mode 100644
index 000000000..a410aa342
--- /dev/null
+++ b/helm/h2ogpt-chart/README.md
@@ -0,0 +1,226 @@
+# h2ogpt
+
+![Version: 0.1.0-288](https://img.shields.io/badge/Version-0.1.0--288-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-288](https://img.shields.io/badge/AppVersion-0.1.0--288-informational?style=flat-square)
+
+A Helm chart for h2oGPT
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| agents.autoscaling.enabled | bool | `false` |  |
+| agents.autoscaling.maxReplicas | int | `2` |  |
+| agents.autoscaling.minReplicas | int | `1` |  |
+| agents.autoscaling.targetCPU | int | `80` |  |
+| agents.autoscaling.targetMemory | string | `"32Gi"` |  |
+| agents.enabled | bool | `false` | Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` |
+| agents.env | object | `{}` |  |
+| agents.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| agents.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| agents.image.pullPolicy | string | `"IfNotPresent"` |  |
+| agents.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| agents.image.tag | string | `nil` |  |
+| agents.imagePullSecrets | string | `nil` |  |
+| agents.initImage.pullPolicy | string | `nil` |  |
+| agents.initImage.repository | string | `nil` |  |
+| agents.initImage.tag | string | `nil` |  |
+| agents.nodeSelector | string | `nil` |  |
+| agents.overrideConfig.agent_workers | int | `5` |  |
+| agents.overrideConfig.concurrency_count | int | `100` |  |
+| agents.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
+| agents.overrideConfig.enable_stt | bool | `false` |  |
+| agents.overrideConfig.enable_transcriptions | bool | `false` |  |
+| agents.overrideConfig.enable_tts | bool | `false` |  |
+| agents.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
+| agents.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
+| agents.overrideConfig.hf_embedding_model | string | `"fake"` |  |
+| agents.overrideConfig.metadata_in_context | string | `""` |  |
+| agents.overrideConfig.num_async | int | `10` |  |
+| agents.overrideConfig.rotate_align_resize_image | bool | `false` |  |
+| agents.overrideConfig.score_model | string | `"None"` |  |
+| agents.overrideConfig.share | bool | `false` |  |
+| agents.overrideConfig.top_k_docs_max_show | int | `100` |  |
+| agents.overrideConfig.visible_hosts_tab | bool | `false` |  |
+| agents.overrideConfig.visible_login_tab | bool | `false` |  |
+| agents.overrideConfig.visible_models_tab | bool | `false` |  |
+| agents.overrideConfig.visible_system_tab | bool | `false` |  |
+| agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| agents.podAnnotations | object | `{}` |  |
+| agents.podLabels | object | `{}` |  |
+| agents.podSecurityContext.fsGroup | string | `nil` |  |
+| agents.podSecurityContext.runAsGroup | string | `nil` |  |
+| agents.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| agents.podSecurityContext.runAsUser | string | `nil` |  |
+| agents.replicaCount | int | `1` |  |
+| agents.resources.limits.memory | string | `"64Gi"` |  |
+| agents.resources.requests.memory | string | `"32Gi"` |  |
+| agents.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| agents.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| agents.securityContext.runAsNonRoot | bool | `true` |  |
+| agents.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| agents.service.agentsPort | int | `5004` |  |
+| agents.service.annotations | object | `{}` |  |
+| agents.service.type | string | `"NodePort"` |  |
+| agents.storage.class | string | `nil` |  |
+| agents.storage.size | string | `"128Gi"` |  |
+| agents.storage.useEphemeral | bool | `true` |  |
+| agents.tolerations | string | `nil` |  |
+| agents.updateStrategy.type | string | `"RollingUpdate"` |  |
+| caCertificates | string | `""` | CA certs |
+| fullnameOverride | string | `""` |  |
+| global.externalLLM.enabled | bool | `false` |  |
+| global.externalLLM.modelLock | string | `nil` |  |
+| global.externalLLM.openAI.enabled | bool | `false` |  |
+| global.externalLLM.openAIAzure.enabled | bool | `false` |  |
+| global.externalLLM.replicate.enabled | bool | `false` |  |
+| global.externalLLM.secret | string | `nil` | list of secrets for h2ogpt and agents env |
+| global.visionModels.enabled | bool | `false` | Enable vision models |
+| global.visionModels.rotateAlignResizeImage | bool | `false` |  |
+| global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
+| h2ogpt.agents | object | `{"agent_workers":5,"enabled":false}` | Enable agents |
+| h2ogpt.agents.enabled | bool | `false` | Run agents with h2oGPT container |
+| h2ogpt.enabled | bool | `true` | Enable h2oGPT |
+| h2ogpt.env | object | `{}` |  |
+| h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| h2ogpt.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| h2ogpt.image.pullPolicy | string | `"IfNotPresent"` |  |
+| h2ogpt.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| h2ogpt.image.tag | string | `nil` |  |
+| h2ogpt.imagePullSecrets | string | `nil` |  |
+| h2ogpt.initImage.pullPolicy | string | `nil` |  |
+| h2ogpt.initImage.repository | string | `nil` |  |
+| h2ogpt.initImage.tag | string | `nil` |  |
+| h2ogpt.nodeSelector | string | `nil` |  |
+| h2ogpt.overrideConfig.concurrency_count | int | `100` |  |
+| h2ogpt.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
+| h2ogpt.overrideConfig.enable_stt | bool | `false` |  |
+| h2ogpt.overrideConfig.enable_transcriptions | bool | `false` |  |
+| h2ogpt.overrideConfig.enable_tts | bool | `false` |  |
+| h2ogpt.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
+| h2ogpt.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
+| h2ogpt.overrideConfig.hf_embedding_model | string | `"fake"` |  |
+| h2ogpt.overrideConfig.metadata_in_context | string | `""` |  |
+| h2ogpt.overrideConfig.num_async | int | `10` |  |
+| h2ogpt.overrideConfig.openai_server | bool | `true` |  |
+| h2ogpt.overrideConfig.openai_workers | int | `5` |  |
+| h2ogpt.overrideConfig.rotate_align_resize_image | bool | `false` |  |
+| h2ogpt.overrideConfig.score_model | string | `"None"` |  |
+| h2ogpt.overrideConfig.share | bool | `false` |  |
+| h2ogpt.overrideConfig.top_k_docs_max_show | int | `100` |  |
+| h2ogpt.overrideConfig.visible_hosts_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_login_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_models_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_system_tab | bool | `false` |  |
+| h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| h2ogpt.podAnnotations | object | `{}` |  |
+| h2ogpt.podLabels | object | `{}` |  |
+| h2ogpt.podSecurityContext.fsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.podSecurityContext.runAsUser | string | `nil` |  |
+| h2ogpt.replicaCount | int | `1` |  |
+| h2ogpt.resources.limits.memory | string | `"64Gi"` |  |
+| h2ogpt.resources.requests.memory | string | `"32Gi"` |  |
+| h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| h2ogpt.securityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| h2ogpt.service.agentsPort | int | `5004` |  |
+| h2ogpt.service.functionPort | int | `5002` |  |
+| h2ogpt.service.gptPort | int | `8888` |  |
+| h2ogpt.service.openaiPort | int | `5000` |  |
+| h2ogpt.service.type | string | `"NodePort"` |  |
+| h2ogpt.service.webPort | int | `80` |  |
+| h2ogpt.service.webServiceAnnotations | object | `{}` |  |
+| h2ogpt.storage.class | string | `nil` |  |
+| h2ogpt.storage.size | string | `"128Gi"` |  |
+| h2ogpt.storage.useEphemeral | bool | `true` |  |
+| h2ogpt.tolerations | string | `nil` |  |
+| h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
+| lmdeploy.containerArgs[0] | string | `"OpenGVLab/InternVL-Chat-V1-5"` |  |
+| lmdeploy.enabled | bool | `false` | Enable lmdeploy |
+| lmdeploy.env | object | `{}` |  |
+| lmdeploy.hfSecret | string | `nil` |  |
+| lmdeploy.image.pullPolicy | string | `"IfNotPresent"` |  |
+| lmdeploy.image.repository | string | `"gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy"` |  |
+| lmdeploy.image.tag | string | `nil` |  |
+| lmdeploy.nodeSelector | string | `nil` |  |
+| lmdeploy.overrideConfig | string | `nil` |  |
+| lmdeploy.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| lmdeploy.podAnnotations | object | `{}` |  |
+| lmdeploy.podLabels | object | `{}` |  |
+| lmdeploy.podSecurityContext | string | `nil` |  |
+| lmdeploy.replicaCount | int | `1` |  |
+| lmdeploy.resources | string | `nil` |  |
+| lmdeploy.securityContext | string | `nil` |  |
+| lmdeploy.service.port | int | `23333` |  |
+| lmdeploy.service.type | string | `"ClusterIP"` |  |
+| lmdeploy.storage.class | string | `nil` |  |
+| lmdeploy.storage.size | string | `"512Gi"` |  |
+| lmdeploy.storage.useEphemeral | bool | `true` |  |
+| lmdeploy.tolerations | string | `nil` |  |
+| lmdeploy.updateStrategy.type | string | `"RollingUpdate"` |  |
+| nameOverride | string | `""` |  |
+| namespaceOverride | string | `""` |  |
+| tgi.containerArgs | string | `nil` |  |
+| tgi.enabled | bool | `false` | Enable tgi |
+| tgi.env | object | `{}` |  |
+| tgi.hfSecret | string | `nil` |  |
+| tgi.image.pullPolicy | string | `"IfNotPresent"` |  |
+| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` |  |
+| tgi.image.tag | string | `"0.9.3"` |  |
+| tgi.nodeSelector | string | `nil` |  |
+| tgi.overrideConfig | string | `nil` |  |
+| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| tgi.podAnnotations | object | `{}` |  |
+| tgi.podLabels | object | `{}` |  |
+| tgi.podSecurityContext | string | `nil` |  |
+| tgi.replicaCount | int | `1` |  |
+| tgi.resources | string | `nil` |  |
+| tgi.securityContext | string | `nil` |  |
+| tgi.service.port | int | `8080` |  |
+| tgi.service.type | string | `"ClusterIP"` |  |
+| tgi.storage.class | string | `nil` |  |
+| tgi.storage.size | string | `"512Gi"` |  |
+| tgi.storage.useEphemeral | bool | `true` |  |
+| tgi.tolerations | string | `nil` |  |
+| tgi.updateStrategy.type | string | `"RollingUpdate"` |  |
+| vllm.containerArgs[0] | string | `"--model"` |  |
+| vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` |  |
+| vllm.containerArgs[2] | string | `"--tokenizer"` |  |
+| vllm.containerArgs[3] | string | `"hf-internal-testing/llama-tokenizer"` |  |
+| vllm.containerArgs[4] | string | `"--tensor-parallel-size"` |  |
+| vllm.containerArgs[5] | int | `2` |  |
+| vllm.containerArgs[6] | string | `"--seed"` |  |
+| vllm.containerArgs[7] | int | `1234` |  |
+| vllm.containerArgs[8] | string | `"--trust-remote-code"` |  |
+| vllm.enabled | bool | `false` | Enable vllm |
+| vllm.env.DO_NOT_TRACK | string | `"1"` |  |
+| vllm.env.VLLM_NO_USAGE_STATS | string | `"1"` |  |
+| vllm.image.pullPolicy | string | `"IfNotPresent"` |  |
+| vllm.image.repository | string | `"vllm/vllm-openai"` |  |
+| vllm.image.tag | string | `"latest"` |  |
+| vllm.imagePullSecrets | string | `nil` |  |
+| vllm.nodeSelector | string | `nil` |  |
+| vllm.overrideConfig | string | `nil` |  |
+| vllm.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| vllm.podAnnotations | object | `{}` |  |
+| vllm.podLabels | object | `{}` |  |
+| vllm.podSecurityContext.fsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| vllm.podSecurityContext.runAsUser | string | `nil` |  |
+| vllm.replicaCount | int | `1` |  |
+| vllm.resources | string | `nil` |  |
+| vllm.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| vllm.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| vllm.securityContext.runAsNonRoot | bool | `true` |  |
+| vllm.securityContext.seccompProfile | string | `nil` |  |
+| vllm.service.port | int | `5000` |  |
+| vllm.service.type | string | `"ClusterIP"` |  |
+| vllm.storage.class | string | `nil` |  |
+| vllm.storage.size | string | `"512Gi"` |  |
+| vllm.storage.useEphemeral | bool | `true` |  |
+| vllm.tolerations | string | `nil` |  |
+| vllm.updateStrategy.type | string | `"RollingUpdate"` |  |
+