From 54744f77b3059f24548da4c71386bc4e86a05a29 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 11:58:23 +0000 Subject: [PATCH 01/10] docs(helm): Add helm readme generator for medcat-service --- helm-charts/medcat-service-helm/README.md | 91 +++++++++++++++++++++-- 1 file changed, 85 insertions(+), 6 deletions(-) diff --git a/helm-charts/medcat-service-helm/README.md b/helm-charts/medcat-service-helm/README.md index 95c7135..523bd77 100644 --- a/helm-charts/medcat-service-helm/README.md +++ b/helm-charts/medcat-service-helm/README.md @@ -1,6 +1,6 @@ # MedCAT Service Helm Chart -This Helm chart deploys the MedCAT service to a Kubernetes cluster. +A Helm chart to deploy CogStack medcat-service ## Installation @@ -17,7 +17,6 @@ kubectl port-forward svc/medcat-service-helm 5000:5000 Then navigate to http://localhost:5000 to try the service. You can also use http://localhost:5000/docs to view the REST APIs - ## Configuration To configure medcat service, create a values.yaml file and install with helm. @@ -30,7 +29,7 @@ You should specify a model pack to be used by the service. By default it will us There is a model pack already bundled into medcat service, and is the default in this chart. -This pack is only really used for testing, and has just a few concepts built in. +This pack is only really used for testing, and has just a few concepts built in. #### Recommended: Download Model on Startup @@ -75,7 +74,6 @@ extraInitContainers: ``` - ### DeID Mode The service can perform DeID of EHRs by swithcing to the following values @@ -87,7 +85,6 @@ env: DEID_REDACT: "true" ``` - ### GPU Support To run MedCAT Service with GPU acceleration, use the GPU-enabled image and set the pod runtime class accordingly. @@ -119,9 +116,91 @@ env: #### Test GPU support You can verify that the MedCAT Service pod has access to the GPU by executing `nvidia-smi` inside the pod. - ```sh kubectl exec -it -- nvidia-smi ``` You should see the NVIDIA GPU device listing if the GPU is properly accessible. + +## Maintainers + +| Name | Email | Url | +| ---- | ------ | --- | +| alhendrickson | | | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.minReplicas | int | `1` | | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | | +| env.APP_ENABLE_DEMO_UI | bool | `true` | | +| env.APP_ENABLE_METRICS | bool | `false` | | +| env.APP_ENABLE_TRACING | bool | `false` | | +| env.APP_MEDCAT_MODEL_PACK | string | `"/cat/models/examples/example-medcat-v2-model-pack.zip"` | | +| env.OTEL_EXPERIMENTAL_RESOURCE_DETECTORS | string | `"containerid,os"` | | +| env.OTEL_EXPORTER_OTLP_ENDPOINT | string | `"http://:4317"` | | +| env.OTEL_EXPORTER_OTLP_PROTOCOL | string | `"grpc"` | | +| env.OTEL_LOGS_EXPORTER | string | `"none"` | | +| env.OTEL_METRICS_EXPORTER | string | `"none"` | | +| env.OTEL_PYTHON_FASTAPI_EXCLUDED_URLS | string | `"/api/health,/metrics"` | | +| env.OTEL_RESOURCE_ATTRIBUTES | string | `"k8s.pod.uid=$(K8S_POD_UID),k8s.pod.name=$(K8S_POD_NAME),k8s.namespace.name=$(K8S_POD_NAMESPACE),k8s.node.name=$(K8S_NODE_NAME)"` | | +| env.OTEL_SERVICE_NAME | string | `"medcat-service"` | | +| env.OTEL_TRACES_EXPORTER | string | `"otlp"` | | +| env.SERVER_GUNICORN_MAX_REQUESTS | string | `"100000"` | | +| envValueFrom.K8S_NODE_NAME.fieldRef.fieldPath | string | `"spec.nodeName"` | | +| envValueFrom.K8S_POD_NAME.fieldRef.fieldPath | string | `"metadata.name"` | | +| envValueFrom.K8S_POD_NAMESPACE.fieldRef.fieldPath | string | `"metadata.namespace"` | | +| envValueFrom.K8S_POD_UID.fieldRef.fieldPath | string | `"metadata.uid"` | | +| extraInitContainers | list | `[]` | | +| extraManifests | list | `[]` | | +| fullnameOverride | string | `""` | | +| hostAliases | list | `[]` | | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"cogstacksystems/medcat-service"` | | +| imagePullSecrets | list | `[]` | | +| ingress.annotations | object | `{}` | | +| ingress.className | string | `""` | | +| ingress.enabled | bool | `false` | | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | +| ingress.tls | list | `[]` | | +| livenessProbe.httpGet.path | string | `"/api/health/live"` | | +| livenessProbe.httpGet.port | string | `"http"` | | +| model | object | `{}` | | +| nameOverride | string | `""` | | +| networkPolicy.egress.egressRules | list | `[]` | | +| networkPolicy.egress.enabled | bool | `false` | | +| networkPolicy.enabled | bool | `true` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podLabels | object | `{}` | | +| podSecurityContext | object | `{}` | | +| readinessProbe.httpGet.path | string | `"/api/health/ready"` | | +| readinessProbe.httpGet.port | string | `"http"` | | +| replicaCount | int | `1` | | +| resources | object | `{}` | | +| runtimeClassName | string | `""` | | +| securityContext | object | `{}` | | +| service.port | int | `5000` | | +| service.type | string | `"ClusterIP"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.automount | bool | `true` | | +| serviceAccount.create | bool | `true` | | +| serviceAccount.name | string | `""` | | +| startupProbe.failureThreshold | int | `30` | | +| startupProbe.httpGet.path | string | `"/api/health/ready"` | | +| startupProbe.httpGet.port | string | `"http"` | | +| startupProbe.initialDelaySeconds | int | `2` | | +| startupProbe.periodSeconds | int | `10` | | +| tolerations | list | `[]` | | +| updateStrategy.type | string | `"RollingUpdate"` | | +| volumeMounts | list | `[]` | | +| volumes | list | `[]` | | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) \ No newline at end of file From 1c39b7699aa1861cd203237ba6e6a8d5f98c01b9 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 11:59:03 +0000 Subject: [PATCH 02/10] docs(helm): Add helm readme generator for medcat-service --- .../medcat-service-helm/README.md.gotmpl | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 helm-charts/medcat-service-helm/README.md.gotmpl diff --git a/helm-charts/medcat-service-helm/README.md.gotmpl b/helm-charts/medcat-service-helm/README.md.gotmpl new file mode 100644 index 0000000..80f754b --- /dev/null +++ b/helm-charts/medcat-service-helm/README.md.gotmpl @@ -0,0 +1,135 @@ +# MedCAT Service Helm Chart + +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +## Installation + +```sh +helm install medcat-service-helm oci://registry-1.docker.io/cogstacksystems/medcat-service-helm +``` + +## Usage +For local testing, by default you can port forward the service using this command: + +```sh +kubectl port-forward svc/medcat-service-helm 5000:5000 +``` + +Then navigate to http://localhost:5000 to try the service. You can also use http://localhost:5000/docs to view the REST APIs + + +## Configuration + +To configure medcat service, create a values.yaml file and install with helm. + +### Model Pack +You should specify a model pack to be used by the service. By default it will use a small bundled model, which can be used for testing + +--- +#### Default: Use the demo model pack + +There is a model pack already bundled into medcat service, and is the default in this chart. + +This pack is only really used for testing, and has just a few concepts built in. + +#### Recommended: Download Model on Startup + +Enable MedCAT to download the model from a remote URL on container startup. + +Create a values file like `values-model-download.yaml` and set these values: +```yaml +model: + downloadUrl: "http://localhost:9000/models/my-model.zip" + name: my-model.zip +``` + +Use this if you prefer dynamic loading of models at runtime. + +#### Advanced: Create a custom volume and load a model into it + +The service can use a model pack if you want to setup your own download flow. For example, setup an initContainer pattern that downloads to a volume, then mount the volume yourself. + +1. Create a persistent volume and PVC in kubernetes following the official documentation. Alternatively specifiy it in `values.extraManifests` and it will be created. + +2. Create a values file like the following, which mounts the volume, and defines a custom init container. + +```yaml +env: + APP_MEDCAT_MODEL_PACK: "/my/models/custom-model.zip" +volumeMounts: + name: model-volume + mountPath: /my/models + +volumes: +- name: model-volume + persistentVolumeClaim: + claimName: my-custom-pvc +extraInitContainers: + - name: model-downloader + image: busybox:1.28 + # In this command, you can write custom code required to download a file. For example you could configure authentication. + command: ["sh", "-c", "wget -O /my/models/custom-model.zip http://example.com"] + volumeMounts: + - name: model-volume + mountPath: /my/models + +``` + + +### DeID Mode + +The service can perform DeID of EHRs by swithcing to the following values + +``` +env: + APP_MEDCAT_MODEL_PACK: "/cat/models/examples/example-deid-model-pack.zip" + DEID_MODE: "true" + DEID_REDACT: "true" +``` + + +### GPU Support + +To run MedCAT Service with GPU acceleration, use the GPU-enabled image and set the pod runtime class accordingly. + +Note GPU support is only used for deidentification + +Create a values file like `values-gpu.yaml` with the following content: + +```yaml +image: + repository: ghcr.io/cogstack/medcat-service-gpu + +runtimeClassName: nvidia + +resources: + limits: + nvidia.com/gpu: 1 +env: + APP_CUDA_DEVICE_COUNT: 1 + APP_TORCH_THREADS: -1 + DEID_MODE: true +``` + +> To use GPU acceleration, your Kubernetes cluster should be configured with the NVIDIA GPU Operator or the following components: +> - [NVIDIA device plugin for Kubernetes](https://github.com/NVIDIA/k8s-device-plugin) +> - [NVIDIA GPU Feature Discovery](https://github.com/NVIDIA/gpu-feature-discovery) +> - The [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/) + +#### Test GPU support +You can verify that the MedCAT Service pod has access to the GPU by executing `nvidia-smi` inside the pod. + + +```sh +kubectl exec -it -- nvidia-smi +``` + +You should see the NVIDIA GPU device listing if the GPU is properly accessible. + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} \ No newline at end of file From e1beb771e4b986f223c18f0dd4039fca3a1e51d8 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 13:39:47 +0000 Subject: [PATCH 03/10] feat(helm): Create git precommit hook for helm docs --- .pre-commit-config.yaml | 8 +++ helm-charts/medcat-service-helm/README.md | 66 +++++++++------------ helm-charts/medcat-service-helm/values.yaml | 61 ++++++++++--------- 3 files changed, 70 insertions(+), 65 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2ace04d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +repos: + - repo: https://github.com/norwoodj/helm-docs + rev: v1.14.2 + hooks: + - id: helm-docs-container + args: + # Make the tool search for charts only under the `example-charts` directory + - --chart-search-root=helm-charts/medcat-service-helm \ No newline at end of file diff --git a/helm-charts/medcat-service-helm/README.md b/helm-charts/medcat-service-helm/README.md index 523bd77..e58869f 100644 --- a/helm-charts/medcat-service-helm/README.md +++ b/helm-charts/medcat-service-helm/README.md @@ -122,12 +122,6 @@ kubectl exec -it -- nvidia-smi You should see the NVIDIA GPU device listing if the GPU is properly accessible. -## Maintainers - -| Name | Email | Url | -| ---- | ------ | --- | -| alhendrickson | | | - ## Values | Key | Type | Default | Description | @@ -138,9 +132,9 @@ You should see the NVIDIA GPU device listing if the GPU is properly accessible. | autoscaling.minReplicas | int | `1` | | | autoscaling.targetCPUUtilizationPercentage | int | `80` | | | env.APP_ENABLE_DEMO_UI | bool | `true` | | -| env.APP_ENABLE_METRICS | bool | `false` | | +| env.APP_ENABLE_METRICS | bool | `false` | Observability Env Vars | | env.APP_ENABLE_TRACING | bool | `false` | | -| env.APP_MEDCAT_MODEL_PACK | string | `"/cat/models/examples/example-medcat-v2-model-pack.zip"` | | +| env.APP_MEDCAT_MODEL_PACK | string | `"/cat/models/examples/example-medcat-v2-model-pack.zip"` | This defines the Model Pack used by the medcat service Example (download on startup): uncomment `ENABLE_MODEL_DOWNLOAD` and the `MODEL_*` URLs below. Example (DeID mode): uncomment `DEID_MODE`/`DEID_REDACT` and use the DeID model pack referenced below. | | env.OTEL_EXPERIMENTAL_RESOURCE_DETECTORS | string | `"containerid,os"` | | | env.OTEL_EXPORTER_OTLP_ENDPOINT | string | `"http://:4317"` | | | env.OTEL_EXPORTER_OTLP_PROTOCOL | string | `"grpc"` | | @@ -150,18 +144,16 @@ You should see the NVIDIA GPU device listing if the GPU is properly accessible. | env.OTEL_RESOURCE_ATTRIBUTES | string | `"k8s.pod.uid=$(K8S_POD_UID),k8s.pod.name=$(K8S_POD_NAME),k8s.namespace.name=$(K8S_POD_NAMESPACE),k8s.node.name=$(K8S_NODE_NAME)"` | | | env.OTEL_SERVICE_NAME | string | `"medcat-service"` | | | env.OTEL_TRACES_EXPORTER | string | `"otlp"` | | -| env.SERVER_GUNICORN_MAX_REQUESTS | string | `"100000"` | | -| envValueFrom.K8S_NODE_NAME.fieldRef.fieldPath | string | `"spec.nodeName"` | | -| envValueFrom.K8S_POD_NAME.fieldRef.fieldPath | string | `"metadata.name"` | | -| envValueFrom.K8S_POD_NAMESPACE.fieldRef.fieldPath | string | `"metadata.namespace"` | | -| envValueFrom.K8S_POD_UID.fieldRef.fieldPath | string | `"metadata.uid"` | | -| extraInitContainers | list | `[]` | | -| extraManifests | list | `[]` | | +| env.SERVER_GUNICORN_MAX_REQUESTS | string | `"100000"` | Set SERVER_GUNICORN_MAX_REQUESTS to a high number instead of the default 1000. Trust k8s instead to restart pod when needed. Example (tuning): see the commented `SERVER_GUNICORN_EXTRA_ARGS` setting below. | +| envValueFrom | object | `{"K8S_NODE_NAME":{"fieldRef":{"fieldPath":"spec.nodeName"}},"K8S_POD_NAME":{"fieldRef":{"fieldPath":"metadata.name"}},"K8S_POD_NAMESPACE":{"fieldRef":{"fieldPath":"metadata.namespace"}},"K8S_POD_UID":{"fieldRef":{"fieldPath":"metadata.uid"}}}` | Allow setting env values from field/configmap/secret references. Defaults to include k8s details for observability. | +| extraInitContainers | list | `[]` | Additional init containers to run before the main container. Can be templated | +| extraManifests | list | `[]` | Additional manifests to deploy to kubernetes. Can be templated | | fullnameOverride | string | `""` | | -| hostAliases | list | `[]` | | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"cogstacksystems/medcat-service"` | | -| imagePullSecrets | list | `[]` | | +| hostAliases | list | `[]` | Host aliases for the pod | +| image | object | `{"pullPolicy":"IfNotPresent","repository":"cogstacksystems/medcat-service"}` | This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ | +| image.pullPolicy | string | `"IfNotPresent"` | This sets the pull policy for images. | +| image.repository | string | `"cogstacksystems/medcat-service"` | Image repository for the MedCAT service container | +| imagePullSecrets | list | `[]` | This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ | | ingress.annotations | object | `{}` | | | ingress.className | string | `""` | | | ingress.enabled | bool | `false` | | @@ -171,36 +163,36 @@ You should see the NVIDIA GPU device listing if the GPU is properly accessible. | ingress.tls | list | `[]` | | | livenessProbe.httpGet.path | string | `"/api/health/live"` | | | livenessProbe.httpGet.port | string | `"http"` | | -| model | object | `{}` | | -| nameOverride | string | `""` | | -| networkPolicy.egress.egressRules | list | `[]` | | -| networkPolicy.egress.enabled | bool | `false` | | -| networkPolicy.enabled | bool | `true` | | +| model | object | `{}` | Enable downloading of public models using wget on startup. Model will be downloaded to /models/ and used for APP_MEDCAT_MODEL_PACK Example: uncomment `model.downloadUrl` and `model.name` below to fetch a model pack at startup. | +| nameOverride | string | `""` | This is to override the chart name. | +| networkPolicy.egress.egressRules | list | `[]` | Append any custom egress rules following the standard format | +| networkPolicy.egress.enabled | bool | `false` | Choose to block egress by enabling it in the network policy | +| networkPolicy.enabled | bool | `true` | Choose to create a default network policy blocking all ingress other than to the service port. | | nodeSelector | object | `{}` | | -| podAnnotations | object | `{}` | | -| podLabels | object | `{}` | | +| podAnnotations | object | `{}` | This is for setting Kubernetes Annotations to a Pod. For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ | +| podLabels | object | `{}` | This is for setting Kubernetes Labels to a Pod. For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ | | podSecurityContext | object | `{}` | | | readinessProbe.httpGet.path | string | `"/api/health/ready"` | | | readinessProbe.httpGet.port | string | `"http"` | | -| replicaCount | int | `1` | | +| replicaCount | int | `1` | This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ | | resources | object | `{}` | | -| runtimeClassName | string | `""` | | +| runtimeClassName | string | `""` | Runtime class name for the pod (e.g., "nvidia" for GPU workloads) More information: https://kubernetes.io/docs/concepts/containers/runtime-class/ | | securityContext | object | `{}` | | -| service.port | int | `5000` | | -| service.type | string | `"ClusterIP"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.automount | bool | `true` | | -| serviceAccount.create | bool | `true` | | -| serviceAccount.name | string | `""` | | +| service.port | int | `5000` | This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports | +| service.type | string | `"ClusterIP"` | This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | | startupProbe.failureThreshold | int | `30` | | | startupProbe.httpGet.path | string | `"/api/health/ready"` | | | startupProbe.httpGet.port | string | `"http"` | | | startupProbe.initialDelaySeconds | int | `2` | | | startupProbe.periodSeconds | int | `10` | | | tolerations | list | `[]` | | -| updateStrategy.type | string | `"RollingUpdate"` | | -| volumeMounts | list | `[]` | | -| volumes | list | `[]` | | +| updateStrategy.type | string | `"RollingUpdate"` | Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". | +| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | +| volumes | list | `[]` | Additional volumes on the output Deployment definition. | ---------------------------------------------- Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) \ No newline at end of file diff --git a/helm-charts/medcat-service-helm/values.yaml b/helm-charts/medcat-service-helm/values.yaml index ffa29d5..c88e25d 100644 --- a/helm-charts/medcat-service-helm/values.yaml +++ b/helm-charts/medcat-service-helm/values.yaml @@ -2,14 +2,15 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +# -- This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ replicaCount: 1 -# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +# -- This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ image: + # -- Image repository for the MedCAT service container repository: cogstacksystems/medcat-service # repository: cogstacksystems/medcat-service-gpu - # This sets the pull policy for images. + # -- This sets the pull policy for images. pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. # tag: "latest" @@ -17,7 +18,9 @@ image: env: APP_ENABLE_DEMO_UI: true - # This defines the Model Pack used by the medcat service + # -- This defines the Model Pack used by the medcat service + # Example (download on startup): uncomment `ENABLE_MODEL_DOWNLOAD` and the `MODEL_*` URLs below. + # Example (DeID mode): uncomment `DEID_MODE`/`DEID_REDACT` and use the DeID model pack referenced below. APP_MEDCAT_MODEL_PACK: "/cat/models/examples/example-medcat-v2-model-pack.zip" # Alternatively download a model on startup from a URL @@ -33,7 +36,8 @@ env: # DEID_MODE: true # DEID_REDACT: true - # Set SERVER_GUNICORN_MAX_REQUESTS to a high number instead of the default 1000. Trust k8s instead to restart pod when needed. + # -- Set SERVER_GUNICORN_MAX_REQUESTS to a high number instead of the default 1000. Trust k8s instead to restart pod when needed. + # Example (tuning): see the commented `SERVER_GUNICORN_EXTRA_ARGS` setting below. SERVER_GUNICORN_MAX_REQUESTS: "100000" # Recommended env vars to set to try to limit to 1 CPU for scaling @@ -50,7 +54,7 @@ env: # APP_CUDA_DEVICE_COUNT: "1" # APP_TORCH_THREADS: "-1" - # Observability Env Vars + # -- Observability Env Vars APP_ENABLE_METRICS: false APP_ENABLE_TRACING: false OTEL_EXPORTER_OTLP_ENDPOINT: "http://:4317" @@ -63,14 +67,15 @@ env: OTEL_LOGS_EXPORTER: "none" OTEL_PYTHON_FASTAPI_EXCLUDED_URLS: "/api/health,/metrics" -# Enable downloading of public models using wget on startup. Model will be downloaded to /models/ and used for APP_MEDCAT_MODEL_PACK +# -- Enable downloading of public models using wget on startup. Model will be downloaded to /models/ and used for APP_MEDCAT_MODEL_PACK +# Example: uncomment `model.downloadUrl` and `model.name` below to fetch a model pack at startup. model: {} # Public URL to download a model pack from # downloadUrl: "http://localhost:9000/models/my-model.zip" # Name of the model pack to save to. Will be stored at /models/ # name: my-model.zip -# Allow setting env values from field/configmap/secret references. Defaults to include k8s details f +# -- Allow setting env values from field/configmap/secret references. Defaults to include k8s details for observability. envValueFrom: K8S_NODE_NAME: fieldRef: @@ -85,14 +90,14 @@ envValueFrom: fieldRef: fieldPath: metadata.namespace -# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +# -- This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] -# This is to override the chart name. +# -- This is to override the chart name. nameOverride: "" fullnameOverride: "" updateStrategy: - # Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". + # -- Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". type: RollingUpdate # RollingUpdate default options example # rollingUpdate: @@ -101,20 +106,20 @@ updateStrategy: # This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ serviceAccount: - # Specifies whether a service account should be created + # -- Specifies whether a service account should be created create: true - # Automatically mount a ServiceAccount's API credentials? + # -- Automatically mount a ServiceAccount's API credentials? automount: true - # Annotations to add to the service account + # -- Annotations to add to the service account annotations: {} - # The name of the service account to use. + # -- The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: "" -# This is for setting Kubernetes Annotations to a Pod. +# -- This is for setting Kubernetes Annotations to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ podAnnotations: {} -# This is for setting Kubernetes Labels to a Pod. +# -- This is for setting Kubernetes Labels to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ podLabels: {} @@ -131,9 +136,9 @@ securityContext: {} # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ service: - # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + # -- This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types type: ClusterIP - # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + # -- This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports port: 5000 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ @@ -195,20 +200,20 @@ autoscaling: targetCPUUtilizationPercentage: 80 # targetMemoryUtilizationPercentage: 80 -# Additional volumes on the output Deployment definition. +# -- Additional volumes on the output Deployment definition. volumes: [] # - name: foo # secret: # secretName: mysecret # optional: false -# Additional volumeMounts on the output Deployment definition. +# -- Additional volumeMounts on the output Deployment definition. volumeMounts: [] # - name: foo # mountPath: "/etc/foo" # readOnly: true -# Runtime class name for the pod (e.g., "nvidia" for GPU workloads) +# -- Runtime class name for the pod (e.g., "nvidia" for GPU workloads) # More information: https://kubernetes.io/docs/concepts/containers/runtime-class/ runtimeClassName: "" @@ -218,18 +223,18 @@ tolerations: [] affinity: {} -# Host aliases for the pod +# -- Host aliases for the pod hostAliases: [] # - ip: "127.0.0.1" # hostnames: # - "foo.local" networkPolicy: - # Choose to create a default network policy blocking all ingress other than to the service port. + # -- Choose to create a default network policy blocking all ingress other than to the service port. enabled: true egress: - # Choose to block egress by enabling it in the network policy + # -- Choose to block egress by enabling it in the network policy enabled: false - # Append any custom egress rules following the standard format + # -- Append any custom egress rules following the standard format egressRules: [] # # Example format # - to: @@ -239,8 +244,8 @@ networkPolicy: # ports: # - port: 5000 -# Additional init containers to run before the main container. Can be templated +# -- Additional init containers to run before the main container. Can be templated extraInitContainers: [] -# Additional manifests to deploy to kubernetes. Can be templated +# -- Additional manifests to deploy to kubernetes. Can be templated extraManifests: [] From 5e36f21fa2b9f87929d054685f3d8289b794f958 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 13:40:40 +0000 Subject: [PATCH 04/10] feat(helm): Create git precommit hook for helm docs --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ace04d..69e452f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,7 @@ repos: rev: v1.14.2 hooks: - id: helm-docs-container + files: helm-charts/medcat-service-helm/* args: # Make the tool search for charts only under the `example-charts` directory - --chart-search-root=helm-charts/medcat-service-helm \ No newline at end of file From 7f928b0c1a0c863cbd29ba26191a2e00a04849ea Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 13:41:05 +0000 Subject: [PATCH 05/10] feat(helm): Create git precommit hook for helm docs --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 69e452f..80841fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: v1.14.2 hooks: - id: helm-docs-container - files: helm-charts/medcat-service-helm/* + files: helm-charts/medcat-service-helm/(.*) args: # Make the tool search for charts only under the `example-charts` directory - --chart-search-root=helm-charts/medcat-service-helm \ No newline at end of file From a5db93356ab995deee21958a4bb7d8ee504dcd64 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 13:56:02 +0000 Subject: [PATCH 06/10] feat(helm): Create git precommit hook for helm docs --- .pre-commit-config.yaml | 2 +- helm-charts/medcat-service-helm/README.md | 2 +- helm-charts/medcat-service-helm/values.yaml | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 80841fd..890fa9a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: v1.14.2 hooks: - id: helm-docs-container - files: helm-charts/medcat-service-helm/(.*) + files: helm-charts/medcat-service-helm/.* args: # Make the tool search for charts only under the `example-charts` directory - --chart-search-root=helm-charts/medcat-service-helm \ No newline at end of file diff --git a/helm-charts/medcat-service-helm/README.md b/helm-charts/medcat-service-helm/README.md index e58869f..5447450 100644 --- a/helm-charts/medcat-service-helm/README.md +++ b/helm-charts/medcat-service-helm/README.md @@ -175,7 +175,7 @@ You should see the NVIDIA GPU device listing if the GPU is properly accessible. | readinessProbe.httpGet.path | string | `"/api/health/ready"` | | | readinessProbe.httpGet.port | string | `"http"` | | | replicaCount | int | `1` | This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ | -| resources | object | `{}` | | +| resources | object | `{}` | Configure resources for the pod. More information can be found here: https://kubernetes.io/docs/concepts/containers/ Recommendation for a default production model is { requests: { cpu: 1, memory: 4Gi }, limits: { cpu: null , memory: 4Gi } } | | runtimeClassName | string | `""` | Runtime class name for the pod (e.g., "nvidia" for GPU workloads) More information: https://kubernetes.io/docs/concepts/containers/runtime-class/ | | securityContext | object | `{}` | | | service.port | int | `5000` | This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports | diff --git a/helm-charts/medcat-service-helm/values.yaml b/helm-charts/medcat-service-helm/values.yaml index c88e25d..c424ed3 100644 --- a/helm-charts/medcat-service-helm/values.yaml +++ b/helm-charts/medcat-service-helm/values.yaml @@ -163,6 +163,8 @@ ingress: # hosts: # - chart-example.local +# -- Configure resources for the pod. More information can be found here: https://kubernetes.io/docs/concepts/containers/ +# Recommendation for a default production model is { requests: { cpu: 1, memory: 4Gi }, limits: { cpu: null , memory: 4Gi } } resources: {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little @@ -228,6 +230,7 @@ hostAliases: [] # - ip: "127.0.0.1" # hostnames: # - "foo.local" + networkPolicy: # -- Choose to create a default network policy blocking all ingress other than to the service port. enabled: true From e0edd6475d627487b7a877dd6780f83442b07a93 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 14:19:29 +0000 Subject: [PATCH 07/10] docs(helm): Add helm docs for medcat-trainer --- .pre-commit-config.yaml | 7 +- helm-charts/medcat-trainer-helm/README.md | 126 +++++++++++++++++- .../medcat-trainer-helm/README.md.gotmpl | 55 ++++++++ helm-charts/medcat-trainer-helm/values.yaml | 61 +++++---- 4 files changed, 217 insertions(+), 32 deletions(-) create mode 100644 helm-charts/medcat-trainer-helm/README.md.gotmpl diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 890fa9a..2271e72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,4 +6,9 @@ repos: files: helm-charts/medcat-service-helm/.* args: # Make the tool search for charts only under the `example-charts` directory - - --chart-search-root=helm-charts/medcat-service-helm \ No newline at end of file + - --chart-search-root=helm-charts/medcat-service-helm + - id: helm-docs-container + files: helm-charts/medcat-trainer-helm/.* + args: + # Make the tool search for charts only under the `example-charts` directory + - --chart-search-root=helm-charts/medcat-trainer-helm \ No newline at end of file diff --git a/helm-charts/medcat-trainer-helm/README.md b/helm-charts/medcat-trainer-helm/README.md index 030b943..8aa48b8 100644 --- a/helm-charts/medcat-trainer-helm/README.md +++ b/helm-charts/medcat-trainer-helm/README.md @@ -9,7 +9,6 @@ By default the chart will: - Run a SOLR and Zookeeper cluster for the Concept DB - Run a Postgres database for persistence - ## Installation ```sh @@ -20,12 +19,11 @@ helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat See these values for common configurations to change: -| Setting |description | -| -------- | -------- | -| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | -|`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| -| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | - +| Setting | description | +| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +| `medcatConfig` | MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py) | +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | ### Use Sqlite instead of Postgres @@ -41,9 +39,123 @@ postgresql: ``` ## Missing features + These features are not yet existing but to be added in future: + - Use a pre existing postgres db - Use a pre existing SOLR instance - Migrate from supervisord to standalone deployment for background tasks for better scaling - Support SOLR authentication from medcat trainer - Support passing DB OPTIONS to medcat trainer for use in cloud environments + +## Requirements + +| Repository | Name | Version | +|------------|------|---------| +| oci://registry-1.docker.io/bitnamicharts | postgresql | 16.7.27 | +| oci://registry-1.docker.io/bitnamicharts | solr | 9.6.10 | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.minReplicas | int | `1` | | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | | +| env | object | `{"CSRF_TRUSTED_ORIGINS":"http://localhost:8080","DB_ENGINE":"postgresql","DB_PORT":"5432","DEBUG":"1","EMAIL_HOST":"mail.cogstack.org","EMAIL_PASS":"to-be-changed","EMAIL_PORT":"465","EMAIL_USER":"example@cogstack.org","ENV":"non-prod","LOAD_NUM_DOC_PAGES":"10","MAX_DATASET_SIZE":"10000","MAX_MEDCAT_MODELS":"2","OPENBLAS_NUM_THREADS":"1","RESUBMIT_ALL_ON_STARTUP":"0","UNIQUE_DOC_NAMES_IN_DATASETS":"True","VITE_USE_OIDC":"0"}` | Add any environment variables here that should be set in the medcat-trainer container | +| env.CSRF_TRUSTED_ORIGINS | string | `"http://localhost:8080"` | This sets the CSRF trusted origins for the medcat-trainer container. Change to allow access from other domains | +| envValueFrom | object | `{"K8S_NODE_NAME":{"fieldRef":{"fieldPath":"spec.nodeName"}},"K8S_POD_NAME":{"fieldRef":{"fieldPath":"metadata.name"}},"K8S_POD_NAMESPACE":{"fieldRef":{"fieldPath":"metadata.namespace"}},"K8S_POD_UID":{"fieldRef":{"fieldPath":"metadata.uid"}}}` | Allow setting env values from field/configmap/secret references @default -- Adds K8s downward API values for tracing | +| fullnameOverride | string | `""` | | +| hostAliases | list | `[]` | Host aliases for the pod | +| image.pullPolicy | string | `"IfNotPresent"` | This sets the pull policy for images. | +| image.repository | string | `"cogstacksystems/medcat-trainer"` | Image repository for the MedCAT service container | +| imagePullSecrets | list | `[]` | This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ | +| ingress.annotations | object | `{}` | | +| ingress.className | string | `""` | | +| ingress.enabled | bool | `false` | | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | +| ingress.tls | list | `[]` | | +| livenessProbe.failureThreshold | int | `30` | | +| livenessProbe.httpGet.path | string | `"/api/health/live/?format=json"` | | +| livenessProbe.httpGet.port | string | `"api"` | | +| medcatConfig | string | Default config for MedCAT Trainer | MedCAT config as described here: [MedCAT config](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py) | +| nameOverride | string | `""` | This is to override the chart name. | +| nginx.livenessProbe.httpGet.path | string | `"/nginx/health/live"` | | +| nginx.livenessProbe.httpGet.port | string | `"http"` | | +| nginx.readinessProbe.httpGet.path | string | `"/nginx/health/live"` | | +| nginx.readinessProbe.httpGet.port | string | `"http"` | | +| nginxImage | object | `{"pullPolicy":"IfNotPresent","repository":"nginx","tag":"1.29.1"}` | This sets the container image for the nginx server more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ | +| nginxImage.pullPolicy | string | `"IfNotPresent"` | This sets the pull policy for images. | +| nginxImage.repository | string | `"nginx"` | Image repository for the nginx server | +| nginxImage.tag | string | `"1.29.1"` | This sets the image tag for the nginx server | +| nginxUpdateStrategy.type | string | `"RollingUpdate"` | | +| nodeSelector | object | `{}` | | +| persistence.media.size | string | `"8Gi"` | | +| persistence.sqlite.backupDbSize | string | `"300Mi"` | | +| persistence.sqlite.size | string | `"100Mi"` | | +| persistence.static.size | string | `"100Mi"` | | +| persistence.storageClassName | string | `""` | | +| podAnnotations | object | `{}` | This is for setting Kubernetes Annotations to a Pod. For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ | +| podLabels | object | `{}` | This is for setting Kubernetes Labels to a Pod. For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ | +| podSecurityContext | object | `{}` | | +| postgresql.auth.database | string | `"postgres"` | | +| postgresql.auth.password | string | `"postgres"` | | +| postgresql.auth.username | string | `"postgres"` | | +| postgresql.enabled | bool | `true` | | +| postgresql.image.repository | string | `"bitnamilegacy/postgresql"` | | +| postgresql.image.tag | string | `"17.6.0-debian-12-r4"` | | +| postgresql.primary.persistence.size | string | `"500Mi"` | | +| provisioning.config | object | Config to load example project from github | Provisioning Config Yaml contents. Can be templated See https://docs.cogstack.org/projects/medcat-trainer/en/latest/provisioning/ | +| provisioning.enabled | bool | `false` | Set to true to enable provisioning of projects and models on startup.. | +| provisioning.existingConfigMap | object | `{}` | Optional: Reference an existing configmap for the provisioning config. | +| readinessProbe.httpGet.path | string | `"/api/health/ready/?format=json"` | | +| readinessProbe.httpGet.port | string | `"api"` | | +| replicaCount | int | `1` | This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ | +| resources | object | `{}` | Resources for the pod. More information can be found here: https://kubernetes.io/docs/concepts/containers/ Recommendation for a minimal production setup is { requests: { cpu: 2, memory: 2Gi }, limits: { cpu: null , memory: 4Gi } } | +| runtimeClassName | string | `""` | Runtime class name for the pod (e.g., "nvidia" for GPU workloads) | +| securityContext | object | `{}` | | +| service.apiPort | int | `8000` | | +| service.port | int | `8001` | This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports | +| service.type | string | `"ClusterIP"` | This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | +| solr.auth.enabled | bool | `false` | | +| solr.collectionReplicas | int | `1` | | +| solr.collectionShards | int | `1` | | +| solr.image.repository | string | `"bitnamilegacy/solr"` | | +| solr.image.tag | string | `"9.9.0-debian-12-r1"` | | +| solr.persistence.size | string | `"1Gi"` | | +| solr.podLabels."app.kubernetes.io/component" | string | `"solr"` | | +| solr.podLabels."app.kubernetes.io/part-of" | string | `"cogstack"` | | +| solr.replicaCount | int | `1` | | +| solr.zookeeper.image.repository | string | `"bitnamilegacy/zookeeper"` | | +| solr.zookeeper.image.tag | string | `"3.9.3-debian-12-r22"` | | +| solr.zookeeper.persistence.size | string | `"1Gi"` | | +| solr.zookeeper.replicaCount | int | `1` | | +| startupProbe.failureThreshold | int | `30` | | +| startupProbe.httpGet.path | string | `"/api/health/startup/?format=json"` | | +| startupProbe.httpGet.port | string | `"api"` | | +| startupProbe.initialDelaySeconds | int | `15` | | +| startupProbe.periodSeconds | int | `10` | | +| tolerations | list | `[]` | | +| tracing.disabledInstrumentations | string | `"psycopg,sqlite3"` | | +| tracing.experimentalResourceDetectors | string | `"containerid,os"` | | +| tracing.otlp.enabled | bool | `false` | | +| tracing.otlp.grpc.enabled | bool | `false` | | +| tracing.otlp.grpc.endpoint | string | `"http://unused:4317"` | | +| tracing.otlp.http.enabled | bool | `false` | | +| tracing.otlp.http.endpoint | string | `"http://unused:4318"` | | +| tracing.resourceAttributes | object | Adds semantic k8s attributes for tracing | Resource attributes to add to the traces. Can be templated | +| tracing.serviceName | string | `"medcat-trainer"` | | +| updateStrategy.type | string | `"RollingUpdate"` | Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". | +| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | +| volumes | list | `[]` | Additional volumes on the output Deployment definition. | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/helm-charts/medcat-trainer-helm/README.md.gotmpl b/helm-charts/medcat-trainer-helm/README.md.gotmpl new file mode 100644 index 0000000..4e304f5 --- /dev/null +++ b/helm-charts/medcat-trainer-helm/README.md.gotmpl @@ -0,0 +1,55 @@ +# MedCAT Trainer Helm Chart + +This Helm chart deploys MedCAT Trainer and infrastructure to a Kubernetes cluster. + +By default the chart will: + +- Run MedCAT Trainer Django server +- Run NGINX for static site hosting and routing +- Run a SOLR and Zookeeper cluster for the Concept DB +- Run a Postgres database for persistence + +## Installation + +```sh +helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm +``` + +## Configuration + +See these values for common configurations to change: + +| Setting | description | +| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +| `medcatConfig` | MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py) | +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | + +### Use Sqlite instead of Postgres + +Sqlite can be used for smaller single instance deployments + +Set these values: + +```yaml +DB_ENGINE: "sqlite3" + +postgresql: + enabled: false +``` + +## Missing features + +These features are not yet existing but to be added in future: + +- Use a pre existing postgres db +- Use a pre existing SOLR instance +- Migrate from supervisord to standalone deployment for background tasks for better scaling +- Support SOLR authentication from medcat trainer +- Support passing DB OPTIONS to medcat trainer for use in cloud environments + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} diff --git a/helm-charts/medcat-trainer-helm/values.yaml b/helm-charts/medcat-trainer-helm/values.yaml index 189935a..cff1570 100644 --- a/helm-charts/medcat-trainer-helm/values.yaml +++ b/helm-charts/medcat-trainer-helm/values.yaml @@ -2,23 +2,29 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +# -- This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ replicaCount: 1 # This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ image: + # -- Image repository for the MedCAT service container repository: cogstacksystems/medcat-trainer - # This sets the pull policy for images. + # -- This sets the pull policy for images. pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. # tag: "latest" +# -- This sets the container image for the nginx server more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ nginxImage: + # -- Image repository for the nginx server repository: nginx + # -- This sets the pull policy for images. pullPolicy: IfNotPresent + # -- This sets the image tag for the nginx server tag: "1.29.1" -# Add any environment variables here that should be set in the medcat-trainer container +# -- Add any environment variables here that should be set in the medcat-trainer container env: + # -- This sets the CSRF trusted origins for the medcat-trainer container. Change to allow access from other domains CSRF_TRUSTED_ORIGINS: "http://localhost:8080" DEBUG: "1" EMAIL_HOST: "mail.cogstack.org" @@ -39,7 +45,8 @@ env: DB_ENGINE: "postgresql" DB_PORT: "5432" -# Allow setting env values from field/configmap/secret references. Defaults to include k8s details for observability. +# -- Allow setting env values from field/configmap/secret references +# @default -- Adds K8s downward API values for tracing envValueFrom: K8S_NODE_NAME: fieldRef: @@ -61,7 +68,8 @@ provisioning: existingConfigMap: {} # -- The name of an existing configmap. Contains a yaml definition under the key "provisioining.yaml" # name: "" - # -- Put yaml file contents here. Can be templated See https://docs.cogstack.org/projects/medcat-trainer/en/latest/provisioning/ + # -- (object) Provisioning Config Yaml contents. Can be templated See https://docs.cogstack.org/projects/medcat-trainer/en/latest/provisioning/ + # @default -- Config to load example project from github config: projects: - dataset: @@ -80,7 +88,8 @@ provisioning: tracing: # The name of the service in the tracing system serviceName: "medcat-trainer" - # Resource attributes to add to the traces. Can be templated + # -- Resource attributes to add to the traces. Can be templated + # @default -- Adds semantic k8s attributes for tracing resourceAttributes: k8s.pod.uid: "$(K8S_POD_UID)" k8s.pod.name: "$(K8S_POD_NAME)" @@ -98,12 +107,12 @@ tracing: # Set to true to enable grpc tracing enabled: false # The endpoint to send the traces to - endpoint: "http://:4317" + endpoint: "http://unused:4317" http: # Set to true to enable http tracing over http/protobug enabled: false # The endpoint to send the traces to - endpoint: "http://:4318" + endpoint: "http://unused:4318" postgresql: enabled: true @@ -135,7 +144,8 @@ persistence: backupDbSize: 300Mi storageClassName: "" -# MedCAT config as described here: https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py +# -- MedCAT config as described here: [MedCAT config](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py) +# @default -- Default config for MedCAT Trainer medcatConfig: | cat.linking.optim = {'type': 'standard', 'lr': 0.1} cat.linking.filter_before_disamb = True @@ -179,14 +189,14 @@ solr: # Set size of the PVC for zookeper. Default is 8Gi size: 1Gi -# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +# -- This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] -# This is to override the chart name. +# -- This is to override the chart name. nameOverride: "" fullnameOverride: "" updateStrategy: - # Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". + # -- Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate". type: RollingUpdate nginxUpdateStrategy: @@ -195,20 +205,20 @@ nginxUpdateStrategy: # This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ serviceAccount: - # Specifies whether a service account should be created + # -- Specifies whether a service account should be created create: true - # Automatically mount a ServiceAccount's API credentials? + # -- Automatically mount a ServiceAccount's API credentials? automount: true - # Annotations to add to the service account + # -- Annotations to add to the service account annotations: {} - # The name of the service account to use. + # -- The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: "" -# This is for setting Kubernetes Annotations to a Pod. +# -- This is for setting Kubernetes Annotations to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ podAnnotations: {} -# This is for setting Kubernetes Labels to a Pod. +# -- This is for setting Kubernetes Labels to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ podLabels: {} @@ -225,9 +235,9 @@ securityContext: {} # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ service: - # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + # -- This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types type: ClusterIP - # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + # -- This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports port: 8001 # Note medcat-trainer API is currently hardcoded on 8000 in the run.sh script apiPort: 8000 @@ -250,7 +260,10 @@ ingress: # hosts: # - chart-example.local +# -- Resources for the pod. More information can be found here: https://kubernetes.io/docs/concepts/containers/ +# Recommendation for a minimal production setup is { requests: { cpu: 2, memory: 2Gi }, limits: { cpu: null , memory: 4Gi } } resources: {} + # @default -- We usually recommend not to specify default resources and to leave this as a conscious choice for the user. This also increases chances charts run on environments with little resources, such as Minikube. If you do want to specify resources, uncomment the following lines, adjust them as necessary, and remove the curly braces after 'resources:'. # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -298,14 +311,14 @@ autoscaling: targetCPUUtilizationPercentage: 80 # targetMemoryUtilizationPercentage: 80 -# Additional volumes on the output Deployment definition. +# -- Additional volumes on the output Deployment definition. volumes: [] # - name: foo # secret: # secretName: mysecret # optional: false -# Additional volumeMounts on the output Deployment definition. +# -- Additional volumeMounts on the output Deployment definition. volumeMounts: [] # - name: foo # mountPath: "/etc/foo" @@ -317,10 +330,10 @@ tolerations: [] affinity: {} -# Runtime class name for the pod (e.g., "nvidia" for GPU workloads) +# -- Runtime class name for the pod (e.g., "nvidia" for GPU workloads) runtimeClassName: "" -# Host aliases for the pod +# -- Host aliases for the pod hostAliases: [] # - ip: "127.0.0.1" # hostnames: From b27aa779ec85f48945214072a9ebf6355bdb0086 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 14:42:17 +0000 Subject: [PATCH 08/10] docs(helm): Add helm-docs in README for cogstack CE --- helm-charts/cogstack-helm-ce/README.md | 55 +++++++++++++ helm-charts/cogstack-helm-ce/README.md.gotmpl | 80 +++++++++++++++++++ .../charts/jupyterhub/README.md | 54 +++++++++++++ .../charts/jupyterhub/README.md.gotmpl | 18 +++++ .../charts/jupyterhub/values.yaml | 15 +++- helm-charts/cogstack-helm-ce/values.yaml | 42 +++++++++- 6 files changed, 258 insertions(+), 6 deletions(-) create mode 100644 helm-charts/cogstack-helm-ce/README.md.gotmpl create mode 100644 helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md create mode 100644 helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md.gotmpl diff --git a/helm-charts/cogstack-helm-ce/README.md b/helm-charts/cogstack-helm-ce/README.md index 5002ace..7a8de49 100644 --- a/helm-charts/cogstack-helm-ce/README.md +++ b/helm-charts/cogstack-helm-ce/README.md @@ -72,3 +72,58 @@ If the namespace was created only for this release, remove it with: ```bash kubectl delete namespace cogstack ``` + +## Requirements + +| Repository | Name | Version | +|------------|------|---------| +| file://../medcat-service-helm | medcat-service(medcat-service-helm) | 0.0.1 | +| file://../medcat-service-helm | anoncat-service(medcat-service-helm) | 0.0.1 | +| file://../medcat-trainer-helm | medcat-trainer(medcat-trainer-helm) | 0.0.1 | +| file://charts/jupyterhub | cogstack-jupyterhub | 0.1.0 | +| https://opensearch-project.github.io/helm-charts/ | opensearch | 3.5.0 | +| https://opensearch-project.github.io/helm-charts/ | opensearch-dashboards | 3.5.0 | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| anoncat-service.enabled | bool | `true` | Enable AnonCAT service deployment. | +| anoncat-service.env.APP_MEDCAT_MODEL_PACK | string | `"/cat/models/examples/example-deid-model-pack.zip"` | Model pack used by the MedCAT service when running in DeID mode. | +| anoncat-service.env.DEID_MODE | bool | `true` | Enable DeID mode. | +| anoncat-service.env.DEID_REDACT | bool | `false` | Enable redaction behaviour for DeID. | +| anoncat-service.image.repository | string | `"cogstacksystems/medcat-service"` | MedCAT service image repository for AnonCAT. | +| anoncat-service.image.tag | string | `"1.2.0"` | MedCAT service image tag used by AnonCAT. | +| anoncat-service.replicasCount | int | `1` | Number of AnonCAT (medcat-service in DeID mode) replicas. | +| cogstack-jupyterhub.enabled | bool | `true` | Enable JupyterHub (with hub and singleuser components). | +| cogstack-jupyterhub.jupyterhub.hub.config.Authenticator.admin_users | list | `["admin"]` | Allowed admin users for the dummy authenticator. | +| cogstack-jupyterhub.jupyterhub.hub.config.Authenticator.admin_users[0] | string | `"admin"` | Admin user entry for the dummy authenticator. | +| cogstack-jupyterhub.jupyterhub.hub.config.DummyAuthenticator.password | string | `"SuperSecret"` | Password for the dummy authenticator (do not use in production). | +| cogstack-jupyterhub.jupyterhub.hub.config.JupyterHub.authenticator_class | string | `"dummy"` | Authenticator class used by JupyterHub (dummy authenticator for demo/non-prod). | +| cogstack-jupyterhub.jupyterhub.hub.image.name | string | `"cogstacksystems/jupyter-hub"` | JupyterHub hub image name. | +| cogstack-jupyterhub.jupyterhub.hub.image.tag | string | `"2.2.2"` | JupyterHub hub image tag. | +| cogstack-jupyterhub.jupyterhub.singleuser.image.name | string | `"cogstacksystems/jupyter-singleuser"` | JupyterHub singleuser image name. | +| cogstack-jupyterhub.jupyterhub.singleuser.image.pullPolicy | string | `"IfNotPresent"` | JupyterHub singleuser image pull policy. | +| cogstack-jupyterhub.jupyterhub.singleuser.image.tag | string | `"2.2.2"` | JupyterHub singleuser image tag. | +| fullnameOverride | string | `""` | Fully override the chart fullname. | +| imagePullSecrets | list | `[]` | This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ | +| medcat-service.enabled | bool | `true` | Enable MedCAT service deployment. | +| medcat-service.image.repository | string | `"cogstacksystems/medcat-service"` | MedCAT service image repository. | +| medcat-service.image.tag | string | `"1.2.0"` | MedCAT service image tag. | +| medcat-service.replicasCount | int | `1` | Number of MedCAT service replicas. | +| medcat-trainer.enabled | bool | `true` | Enable MedCAT Trainer deployment. | +| medcat-trainer.env.CSRF_TRUSTED_ORIGINS | string | `"http://localhost:8080"` | CSRF trusted origins for the MedCAT Trainer frontend (set for your deployment/port-forward). | +| medcat-trainer.image.tag | string | `"latest@sha256:103215a7540ad614c32866f4cb00ddd91e7aff37cea9abc25dc226c577f9506d"` | MedCAT Trainer image tag (can be a digest-pinned tag). | +| medcat-trainer.provisioning.enabled | bool | `true` | Enable provisioning of projects and models on startup. | +| medcat-trainer.provisioning.existingConfigMap.name | string | `"cogstack-helm-ce-example-trainer-provisioining"` | Existing ConfigMap name containing the provisioning configuration. | +| nameOverride | string | `""` | This is to override the chart name. | +| opensearch-dashboards.enabled | bool | `true` | Deploy an opensearch-dashboards instance | +| opensearch.enabled | bool | `true` | Deploy an opensearch cluster | +| opensearch.extraEnvs | list | Sets the initial admin password for OpenSearch | Extra environment variables to pass to OpenSearch. | +| provisioning.enabled | bool | `true` | Enable provisioning for supporting services (e.g. OpenSearch templates/documents). | +| provisioning.opensearch.createExampleDocuments | bool | `true` | Create example documents in OpenSearch on startup. | +| provisioning.opensearch.createIndexTemplate | bool | `true` | Create the OpenSearch index template on startup. | +| provisioning.opensearchDashboards.createDashboards | bool | `true` | Create dashboards on startup. | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) \ No newline at end of file diff --git a/helm-charts/cogstack-helm-ce/README.md.gotmpl b/helm-charts/cogstack-helm-ce/README.md.gotmpl new file mode 100644 index 0000000..ba10b0d --- /dev/null +++ b/helm-charts/cogstack-helm-ce/README.md.gotmpl @@ -0,0 +1,80 @@ +# CogStack Helm Community Edition + +This is a all in one helm chart that runs CogStack on Kubernetes + +## Overview + +This chart is an umbrella chart that deploys: + +| Component | Description | +|------------------|-------------| +| **MedCAT** | Medical Concept Annotation Tool — NER and linking for clinical text. | +| **AnonCAT** | De-identification service (MedCAT in DEID mode) for anonymising text. | +| **MedCAT Trainer** | Training and model management for MedCAT, with Solr and PostgreSQL. | + +## Prerequisites + +- Kubernetes cluster +- Helm 3+ + +## Installation + +```sh +helm install cogstack oci://registry-1.docker.io/cogstacksystems/cogstack-helm-ce +``` + +## Configuration +These are some values that are likely to need customization for your deployment: + +| Value | Default | Description | +|-------|---------|-------------| +| `medcat-service.replicasCount` | `1` | Number of MedCAT service replicas. | +| `anoncat-service.replicasCount` | `1` | Number of AnonCAT service replicas. | +| `anoncat-service.env.DEID_REDACT` | `false` | Redaction behaviour for de-identification. | +| `medcat-trainer.env.CSRF_TRUSTED_ORIGINS` | `"http://localhost:8080"` | CSRF trusted origins for MedCAT Trainer frontend (set correct value for your deployment, this default works for port forwarding). | + +Subcharts (MedCAT service, AnonCAT service, MedCAT Trainer) support additional options; see their respective charts under `../medcat-service-helm` and `../medcat-trainer-helm`. Pass them under the same keys as in this chart’s `values.yaml` (e.g. `medcat-service.*`, `anoncat-service.*`, `medcat-trainer-helm.*`). + +Example override file: + +```yaml +# my-values.yaml +medcat-service: + replicasCount: 2 +anoncat-service: + replicasCount: 1 + env: + APP_ENABLE_DEMO_UI: true + DEID_MODE: true +``` + +Install with overrides: + +```bash +helm install cogstack . -f my-values.yaml --namespace cogstack --create-namespace +``` + +## Dependencies + +The chart uses local subcharts via relative paths: + +- `medcat-service-helm` (as `medcat-service` and `anoncat-service`) +- `medcat-trainer-helm` + +## Uninstall + +```bash +helm uninstall cogstack-ce --namespace cogstack +``` + +If the namespace was created only for this release, remove it with: + +```bash +kubectl delete namespace cogstack +``` + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} \ No newline at end of file diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md new file mode 100644 index 0000000..4fd3cc8 --- /dev/null +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md @@ -0,0 +1,54 @@ +# cogstack-jupyterhub + +![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: latest](https://img.shields.io/badge/AppVersion-latest-informational?style=flat-square) + +A Helm chart for CogStack Jupyter Hub using official JupyterHub chart + +## Requirements + +| Repository | Name | Version | +|------------|------|---------| +| https://hub.jupyter.org/helm-chart/ | jupyterhub | 4.3.2 | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| jupyterhub.cull.enabled | bool | false, so pods stay running indefinitely | Enable culling of user pods | +| jupyterhub.hub.config.Authenticator.admin_users[0] | string | `"admin"` | | +| jupyterhub.hub.config.DummyAuthenticator.password | string | `"SuperSecret"` | | +| jupyterhub.hub.config.JupyterHub.authenticator_class | string | `"dummy"` | | +| jupyterhub.hub.config.KubeSpawner.fs_gid | int | `0` | | +| jupyterhub.hub.config.KubeSpawner.http_timeout | int | `120` | | +| jupyterhub.hub.config.KubeSpawner.notebook_dir | string | `"/home/jovyan/work"` | | +| jupyterhub.hub.config.KubeSpawner.start_timeout | int | `300` | | +| jupyterhub.hub.config.KubeSpawner.uid | int | `0` | | +| jupyterhub.hub.config.Spawner.default_url | string | `"/lab/"` | | +| jupyterhub.hub.config.Spawner.notebook_dir | string | `"/home/jovyan/work"` | | +| jupyterhub.hub.extraConfig | object | `{"00-custom-spawner":"# Custom environment variables for user pods\nc.KubeSpawner.environment = {\n 'GRANT_SUDO': '1',\n 'CHOWN_HOME': 'yes',\n 'CHOWN_HOME_OPTS': '-R',\n}\n# Allow root in notebooks\nc.KubeSpawner.args = ['--allow-root']\n"}` | Extra hub configuration for custom spawner settings | +| jupyterhub.hub.extraConfig.00-log-level | string | `"c.Application.log_level = 'DEBUG'\n"` | | +| jupyterhub.hub.image.name | string | `"cogstacksystems/jupyter-hub"` | Image repository for the JupyterHub hub. | +| jupyterhub.hub.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the JupyterHub hub. | +| jupyterhub.hub.image.tag | string | `"2.2.2"` | Image tag for the JupyterHub hub. | +| jupyterhub.prePuller.continuous.enabled | bool | `false` | | +| jupyterhub.prePuller.hook | object | `{"enabled":false}` | Enable pulling the singleuser image before they are used to improve startup time | +| jupyterhub.proxy.service.type | string | `"ClusterIP"` | | +| jupyterhub.scheduling.userScheduler.replicas | int | `1` | | +| jupyterhub.singleuser.cmd[0] | string | `"jupyterhub-singleuser"` | | +| jupyterhub.singleuser.extraPodConfig | object | `{"securityContext":{"runAsGroup":0,"runAsUser":0}}` | Extra arguments passed to jupyterhub-singleuser | +| jupyterhub.singleuser.fsGid | int | `0` | | +| jupyterhub.singleuser.image.name | string | `"cogstacksystems/jupyter-singleuser"` | | +| jupyterhub.singleuser.image.pullPolicy | string | `"IfNotPresent"` | | +| jupyterhub.singleuser.image.tag | string | `"2.2.2"` | | +| jupyterhub.singleuser.lifecycleHooks.postStart | object | `{"exec":{"command":["sh","-c","if [ ! -f /home/jovyan/work/.notebooks_initialized ]; then\n echo \"First run - copying notebooks and medcat-scripts...\";\n cp -r /srv/jupyterhub/notebooks/* /home/jovyan/work/;\n cp -r /srv/jupyterhub/medcat-scripts /home/jovyan/work/;\n touch /home/jovyan/work/.notebooks_initialized;\n echo \"Notebooks initialized successfully\";\nelse\n echo \"Notebooks already initialized - skipping\";\nfi\n"]}}` | Lifecycle hook to copy notebooks from image to PVC on first run | +| jupyterhub.singleuser.networkPolicy.enabled | bool | `false` | | +| jupyterhub.singleuser.startTimeout | int | `600` | | +| jupyterhub.singleuser.storage.capacity | string | `"5Gi"` | Capacity of the storage for the user pods. | +| jupyterhub.singleuser.storage.extraVolumeMounts.jupyter-examples.mountPath | string | `"/home/jovyan/work/examples"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts.jupyter-examples.name | string | `"jupyter-examples"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts.jupyter-examples.readOnly | bool | `true` | | +| jupyterhub.singleuser.storage.extraVolumes | object | `{"jupyter-examples":{"configMap":{"name":"jupyter-examples"},"name":"jupyter-examples"}}` | NOTE: Prefer dictionary-form here to avoid Helm merge issues when this subchart is configured by a parent chart. | +| jupyterhub.singleuser.uid | int | `0` | | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md.gotmpl b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md.gotmpl new file mode 100644 index 0000000..89e2dc3 --- /dev/null +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md.gotmpl @@ -0,0 +1,18 @@ +{{ template "chart.header" . }} +{{ template "chart.deprecationWarning" . }} + +{{ template "chart.badgesSection" . }} + +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +{{ template "chart.maintainersSection" . }} + +{{ template "chart.sourcesSection" . }} + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +{{ template "helm-docs.versionFooter" . }} diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/values.yaml b/helm-charts/cogstack-helm-ce/charts/jupyterhub/values.yaml index e2f03d1..53ae410 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/values.yaml +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/values.yaml @@ -2,8 +2,11 @@ jupyterhub: hub: image: + # -- Image repository for the JupyterHub hub. name: cogstacksystems/jupyter-hub + # -- Image tag for the JupyterHub hub. tag: "2.2.2" + # -- Pull policy for the JupyterHub hub. pullPolicy: IfNotPresent extraConfig: 00-log-level: | @@ -28,7 +31,7 @@ jupyterhub: default_url: /lab/ notebook_dir: /home/jovyan/work - # Extra hub configuration for custom spawner settings + # -- Extra hub configuration for custom spawner settings extraConfig: 00-custom-spawner: | # Custom environment variables for user pods @@ -48,8 +51,8 @@ jupyterhub: name: cogstacksystems/jupyter-singleuser tag: "2.2.2" pullPolicy: IfNotPresent - # Lifecycle hook to copy notebooks from image to PVC on first run lifecycleHooks: + # -- Lifecycle hook to copy notebooks from image to PVC on first run postStart: exec: command: @@ -73,15 +76,16 @@ jupyterhub: fsGid: 0 cmd: ["jupyterhub-singleuser"] - # Extra arguments passed to jupyterhub-singleuser + # -- Extra arguments passed to jupyterhub-singleuser extraPodConfig: securityContext: runAsUser: 0 runAsGroup: 0 storage: + # -- Capacity of the storage for the user pods. capacity: 5Gi - # NOTE: Prefer dictionary-form here to avoid Helm merge issues + # -- NOTE: Prefer dictionary-form here to avoid Helm merge issues # when this subchart is configured by a parent chart. extraVolumes: jupyter-examples: @@ -94,11 +98,14 @@ jupyterhub: mountPath: /home/jovyan/work/examples readOnly: true cull: + # -- Enable culling of user pods + # @default -- false, so pods stay running indefinitely enabled: false scheduling: userScheduler: replicas: 1 prePuller: + # -- Enable pulling the singleuser image before they are used to improve startup time hook: enabled: false continuous: diff --git a/helm-charts/cogstack-helm-ce/values.yaml b/helm-charts/cogstack-helm-ce/values.yaml index 2f31def..ce48bf1 100644 --- a/helm-charts/cogstack-helm-ce/values.yaml +++ b/helm-charts/cogstack-helm-ce/values.yaml @@ -2,73 +2,111 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +# -- This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] -# This is to override the chart name. +# -- This is to override the chart name. nameOverride: "" +# -- Fully override the chart fullname. fullnameOverride: "" anoncat-service: + # -- Enable AnonCAT service deployment. + enabled: true + # -- Number of AnonCAT (medcat-service in DeID mode) replicas. replicasCount: 1 env: + # -- Model pack used by the MedCAT service when running in DeID mode. APP_MEDCAT_MODEL_PACK: "/cat/models/examples/example-deid-model-pack.zip" + # -- Enable DeID mode. DEID_MODE: true + # -- Enable redaction behaviour for DeID. DEID_REDACT: false image: + # -- MedCAT service image repository for AnonCAT. repository: cogstacksystems/medcat-service + # -- MedCAT service image tag used by AnonCAT. tag: "1.2.0" medcat-service: + # -- Enable MedCAT service deployment. + enabled: true + # -- Number of MedCAT service replicas. replicasCount: 1 image: + # -- MedCAT service image repository. repository: cogstacksystems/medcat-service + # -- MedCAT service image tag. tag: "1.2.0" medcat-trainer: + # -- Enable MedCAT Trainer deployment. + enabled: true image: + # -- MedCAT Trainer image tag (can be a digest-pinned tag). tag: latest@sha256:103215a7540ad614c32866f4cb00ddd91e7aff37cea9abc25dc226c577f9506d provisioning: + # -- Enable provisioning of projects and models on startup. enabled: true existingConfigMap: + # -- Existing ConfigMap name containing the provisioning configuration. name: "cogstack-helm-ce-example-trainer-provisioining" env: + # -- CSRF trusted origins for the MedCAT Trainer frontend (set for your deployment/port-forward). CSRF_TRUSTED_ORIGINS: "http://localhost:8080" provisioning: + # -- Enable provisioning for supporting services (e.g. OpenSearch templates/documents). enabled: true opensearch: + # -- Create the OpenSearch index template on startup. createIndexTemplate: true + # -- Create example documents in OpenSearch on startup. createExampleDocuments: true opensearchDashboards: + # -- Create dashboards on startup. createDashboards: true opensearch: + # -- Deploy an opensearch cluster enabled: true + # -- Extra environment variables to pass to OpenSearch. + # @default -- Sets the initial admin password for OpenSearch extraEnvs: - name: OPENSEARCH_INITIAL_ADMIN_PASSWORD value: "opensearch-312$A" opensearch-dashboards: + # -- Deploy an opensearch-dashboards instance enabled: true cogstack-jupyterhub: + # -- Enable JupyterHub (with hub and singleuser components). enabled: true jupyterhub: hub: image: + # -- JupyterHub hub image name. name: cogstacksystems/jupyter-hub + # -- JupyterHub hub image tag. tag: "2.2.2" config: JupyterHub: + # -- Authenticator class used by JupyterHub (dummy authenticator for demo/non-prod). authenticator_class: dummy # - Authenticator set to hardcoded user/password Authenticator: + # -- Allowed admin users for the dummy authenticator. admin_users: + # -- Admin user entry for the dummy authenticator. - admin DummyAuthenticator: + # -- Password for the dummy authenticator (do not use in production). password: "SuperSecret" singleuser: image: + # -- JupyterHub singleuser image name. name: cogstacksystems/jupyter-singleuser + # -- JupyterHub singleuser image tag. tag: "2.2.2" + # -- JupyterHub singleuser image pull policy. pullPolicy: IfNotPresent From 437bdab6acf5234ca0a459b7f0f52457405988be Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 14:42:47 +0000 Subject: [PATCH 09/10] docs(helm): Add helm-docs in README for cogstack CE --- .pre-commit-config.yaml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2271e72..0829e40 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,7 @@ repos: rev: v1.14.2 hooks: - id: helm-docs-container - files: helm-charts/medcat-service-helm/.* + files: helm-charts/(.*)/values.yaml args: # Make the tool search for charts only under the `example-charts` directory - - --chart-search-root=helm-charts/medcat-service-helm - - id: helm-docs-container - files: helm-charts/medcat-trainer-helm/.* - args: - # Make the tool search for charts only under the `example-charts` directory - - --chart-search-root=helm-charts/medcat-trainer-helm \ No newline at end of file + - --chart-search-root=helm-charts From e84a8e9377e8812a3c267f797c38de7558c44959 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 23 Mar 2026 14:43:04 +0000 Subject: [PATCH 10/10] docs(helm): Add helm-docs in README for cogstack CE --- helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md index 4fd3cc8..1c4f551 100644 --- a/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md +++ b/helm-charts/cogstack-helm-ce/charts/jupyterhub/README.md @@ -14,7 +14,7 @@ A Helm chart for CogStack Jupyter Hub using official JupyterHub chart | Key | Type | Default | Description | |-----|------|---------|-------------| -| jupyterhub.cull.enabled | bool | false, so pods stay running indefinitely | Enable culling of user pods | +| jupyterhub.cull.enabled | bool | false, so pods stay running indefinitely d | Enable culling of user pods | | jupyterhub.hub.config.Authenticator.admin_users[0] | string | `"admin"` | | | jupyterhub.hub.config.DummyAuthenticator.password | string | `"SuperSecret"` | | | jupyterhub.hub.config.JupyterHub.authenticator_class | string | `"dummy"` | |