diff --git a/.github/workflows/kubernetes-charts-build.yaml b/.github/workflows/kubernetes-charts-build.yaml index 88a372f..9c43a49 100644 --- a/.github/workflows/kubernetes-charts-build.yaml +++ b/.github/workflows/kubernetes-charts-build.yaml @@ -70,7 +70,9 @@ jobs: uses: actions/checkout@v5 - name: Package Helm Charts - run: helm package ./charts/medcat-service-helm --version $CHART_VERSION + run: | + helm package ./charts/medcat-service-helm --version $CHART_VERSION + helm package ./charts/medcat-trainer-helm --version $CHART_VERSION --dependency-update - name: Helm OCI login to Docker Hub run: helm registry login registry-1.docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} -p ${{ secrets.DOCKERHUB_TOKEN }} @@ -78,3 +80,4 @@ jobs: - name: Push Helm Chart to Docker Hub OCI run: | helm push ./medcat-service-helm-${CHART_VERSION}.tgz oci://registry-1.docker.io/cogstacksystems + helm push ./medcat-trainer-helm-${CHART_VERSION}.tgz oci://registry-1.docker.io/cogstacksystems diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore b/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock new file mode 100644 index 0000000..36417b3 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: solr + repository: oci://registry-1.docker.io/bitnamicharts + version: 9.6.10 +- name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: 16.7.27 +digest: sha256:a02db326b15b24d92e4c0787792803d7b224babd4f288e28a73d66a7d4506a70 +generated: "2025-09-10T11:29:31.705470067Z" diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml new file mode 100644 index 0000000..9a74ccd --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml @@ -0,0 +1,34 @@ +apiVersion: v2 +name: medcat-trainer-helm +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.0.1 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "latest" + +# Chart.yaml +dependencies: +- name: solr + version: "9.6.10" + repository: "oci://registry-1.docker.io/bitnamicharts" +- name: postgresql + version: 16.7.27 + repository: "oci://registry-1.docker.io/bitnamicharts" + condition: postgresql.enabled diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/README.md b/deployment/kubernetes/charts/medcat-trainer-helm/README.md new file mode 100644 index 0000000..030b943 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/README.md @@ -0,0 +1,49 @@ +# MedCAT Trainer Helm Chart + +This Helm chart deploys MedCAT Trainer and infrastructure to a Kubernetes cluster. + +By default the chart will: + +- Run MedCAT Trainer Django server +- Run NGINX for static site hosting and routing +- Run a SOLR and Zookeeper cluster for the Concept DB +- Run a Postgres database for persistence + + +## Installation + +```sh +helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm +``` + +## Configuration + +See these values for common configurations to change: + +| Setting |description | +| -------- | -------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +|`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | + + +### Use Sqlite instead of Postgres + +Sqlite can be used for smaller single instance deployments + +Set these values: + +```yaml +DB_ENGINE: "sqlite3" + +postgresql: + enabled: false +``` + +## Missing features +These features are not yet existing but to be added in future: +- Use a pre existing postgres db +- Use a pre existing SOLR instance +- Migrate from supervisord to standalone deployment for background tasks for better scaling +- Support SOLR authentication from medcat trainer +- Support passing DB OPTIONS to medcat trainer for use in cloud environments diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt new file mode 100644 index 0000000..fcfca0d --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "medcat-trainer-helm.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "medcat-trainer-helm.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "medcat-trainer-helm.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "medcat-trainer-helm.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=nginx" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl new file mode 100644 index 0000000..c09de5f --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl @@ -0,0 +1,95 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "medcat-trainer-helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "medcat-trainer-helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "medcat-trainer-helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "medcat-trainer-helm.labels" -}} +helm.sh/chart: {{ include "medcat-trainer-helm.chart" . }} +{{ include "medcat-trainer-helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/part-of: cogstack +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "medcat-trainer-helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "medcat-trainer-helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "medcat-trainer-helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "medcat-trainer-helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + + +{{- /* +Return Solr host: either user-supplied or constructed from release name + +TODO: Make Solr use the chart fullname instead of release name +*/ -}} +{{- define "medcat-trainer-helm.solrHost" -}} +{{- if .Values.solrHost }} +{{ .Values.solrHost }} +{{- else }} +{{- .Release.Name }}-solr +{{- end }} +{{- end }} + +{{- /* +Return Solr port: either user-supplied or default from values +*/ -}} +{{- define "medcat-trainer-helm.solrPort" -}} +{{- if .Values.solrPort }} +{{ .Values.solrPort }} +{{- else }} +{{- .Values.solr.service.ports.http }} +{{- end }} +{{- end }} + +{{- /* +Return full Solr URL: combines host and port +*/ -}} +{{- define "medcat-trainer-helm.solrURL" -}} +http://{{ include "medcat-trainer-helm.solrHost" . }}:{{ include "medcat-trainer-helm.solrPort" . }} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml new file mode 100644 index 0000000..c1e4dfe --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "medcat-trainer-helm.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml new file mode 100644 index 0000000..08b71eb --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "medcat-trainer-helm.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml new file mode 100644 index 0000000..a9eed8c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + # This is mounted in the path for MEDCAT_CONFIG_FILE in the backend. Default to /home/configs/base.txt + medcat-base.txt: | +{{ .Values.medcatConfig | indent 4 }} + supervisord.conf: | + [supervisord] + nodaemon=true + user=root + logfile=/var/log/supervisord.log + pidfile=/var/run/supervisord.pid + + [program:medcattrainer] + command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:bg-process] + command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:db-backup] + command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml new file mode 100644 index 0000000..03cf3cc --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml @@ -0,0 +1,131 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: medcat-trainer + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 8 }} + app.kubernetes.io/component: medcat-trainer + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: medcat-trainer + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8000 + protocol: TCP + args: + - /usr/bin/supervisord + - -c + - /etc/supervisord.conf + envFrom: + - configMapRef: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + {{- if .Values.postgresql.enabled }} + env: + - name: DB_HOST + value: {{ .Release.Name }}-postgresql + - name: DB_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: postgres-password + {{- end }} + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - mountPath: /home/configs/base.txt + name: medcat-trainer-config + subPath: medcat-base.txt + - mountPath: /etc/supervisord.conf + name: medcat-trainer-config + subPath: supervisord.conf + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + {{- if eq .Values.DB_ENGINE "sqlite3" }} + - mountPath: /home/api/db + name: api-db + - mountPath: /home/api/db-backup + name: api-db-backup + {{- end }} + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: medcat-trainer-config + configMap: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-config + - name: api-media + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-media + - name: api-static + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-static + {{- if eq .Values.DB_ENGINE "sqlite3" }} + - name: api-db + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db + - name: api-db-backup + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup + {{- end }} + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml new file mode 100644 index 0000000..3da9ada --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + CONCEPT_SEARCH_SERVICE_HOST: {{ include "medcat-trainer-helm.solrHost" . | quote }} + CONCEPT_SEARCH_SERVICE_PORT: {{ include "medcat-trainer-helm.solrPort" . | quote }} + MEDCAT_CONFIG_FILE: "/home/configs/base.txt" + DB_BACKUP_DIR: "/home/api/db-backup" + DB_DIR: "/home/api/db" + DB_PATH: "/home/api/db/db.sqlite3" +{{- range $key, $value := .Values.env }} + {{ $key }}: {{ $value | quote }} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml new file mode 100644 index 0000000..d705eaf --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -0,0 +1,180 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx +data: + nginx.conf: | + # Configuration File - Nginx Server Configs + # http://nginx.org/en/docs/dirindex.html + + # Sets the worker threads to the number of CPU cores available in the system for best performance. + # Should be > the number of CPU cores. + # Maximum number of connections = worker_processes * worker_connections + worker_processes auto; + + # Maximum number of open files per worker process. + # Should be > worker_connections. + worker_rlimit_nofile 8192; + + events { + # If you need more connections than this, you start optimizing your OS. + # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. + # Should be < worker_rlimit_nofile. + worker_connections 8005; + } + + # Log errors and warnings to this file + # This is only used when you don't override it on a server{} level + #error_log logs/error.log warn; + + # The file storing the process ID of the main process + pid /var/run/nginx.pid; + + http { + + # Hide nginx version information. + server_tokens off; + + # Specify MIME types for files. + include mime.types; + default_type application/octet-stream; + + # Update charset_types to match updated mime.types. + # text/html is always included by charset module. + charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; + + # Include $http_x_forwarded_for within default format used in log files + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + # Log access to this file + # This is only used when you don't override it on a server{} level + #access_log logs/access.log main; + + # How long to allow each connection to stay idle. + # Longer values are better for each individual client, particularly for SSL, + # but means that worker connections are tied up longer. + keepalive_timeout 3000s; + + # Timeouts + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + # increase client body size - Model packs can be over 3G.s + client_max_body_size 4000M; + # Speed up file transfers by using sendfile() to copy directly + # between descriptors rather than using read()/write(). + # For performance reasons, on FreeBSD systems w/ ZFS + # this option should be disabled as ZFS's ARC caches + # frequently used files in RAM by default. + sendfile on; + + # Don't send out partial frames; this increases throughput + # since TCP frames are filled up before being sent out. + tcp_nopush on; + + # Enable gzip compression. + gzip on; + + # Compression level (1-9). + # 5 is a perfect compromise between size and CPU usage, offering about + # 75% reduction for most ASCII files (almost identical to level 9). + gzip_comp_level 5; + + # Don't compress anything that's already small and unlikely to shrink much + # if at all (the default is 20 bytes, which is bad as that usually leads to + # larger files after gzipping). + gzip_min_length 256; + + # Compress data even for clients that are connecting to us via proxies, + # identified by the "Via" header (required for CloudFront). + gzip_proxied any; + + # Tell proxies to cache both the gzipped and regular version of a resource + # whenever the client's Accept-Encoding capabilities header varies; + # Avoids the issue where a non-gzip capable client (which is extremely rare + # today) would display gibberish if their proxy gave them the gzipped version. + gzip_vary on; + + # Compress all output labeled with one of the following MIME-types. + gzip_types + application/atom+xml + application/javascript + application/json + application/ld+json + application/manifest+json + application/rss+xml + application/vnd.geo+json + application/vnd.ms-fontobject + application/x-font-ttf + application/x-web-app-manifest+json + application/xhtml+xml + application/xml + font/opentype + image/bmp + image/svg+xml + image/x-icon + text/cache-manifest + text/css + text/plain + text/vcard + text/vnd.rim.location.xloc + text/vtt + text/x-component + text/x-cross-domain-policy; + # text/html is always compressed by gzip module + + # This should be turned on if you are going to have pre-compressed copies (.gz) of + # static files available. If not it should be left off as it will cause extra I/O + # for the check. It is best if you enable this in a location{} block for + # a specific directory, or on an individual server{} level. + # gzip_static on; + + # Include files in the sites-enabled folder. server{} configuration files should be + # placed in the sites-available folder, and then the configuration should be enabled + # by creating a symlink to it in the sites-enabled folder. + # See doc/sites-enabled.md for more info. + include sites-enabled/*; + } + sitesenabled.medcattrainer: | + server { + listen {{ .Values.service.port }}; + server_name localhost; + charset utf-8; + large_client_header_buffers 4 32k; + + location /static { + alias /home/api/static; + } + + location /media { + alias /home/api/media; + } + + location /api/concepts/ { + proxy_pass {{ include "medcat-trainer-helm.solrURL" . }}/solr/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location / { + proxy_pass http://{{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location /nginx/health/live { + access_log off; + return 200 'OK'; + add_header Content-Type text/plain; + } + } + diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml new file mode 100644 index 0000000..daecee0 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.nginxReplicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: nginx + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 8 }} + app.kubernetes.io/component: nginx + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: nginx + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.nginxImage.repository }}:{{ .Values.nginxImage.tag }}" + imagePullPolicy: {{ .Values.nginxImage.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.nginx.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.nginx.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-config + mountPath: /etc/nginx/sites-enabled/medcattrainer + subPath: sitesenabled.medcattrainer + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: nginx-config + configMap: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx-config + - name: api-media + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-media + - name: api-static + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-static + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml new file mode 100644 index 0000000..196e331 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml @@ -0,0 +1,71 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-media # This stores the uploaded CDB and Vocab models + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.media.size}} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-static # This stores the HTML for the website. Disk use was 72MB. + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.static.size}} +--- +{{- if eq .Values.DB_ENGINE "sqlite3" }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-db # SQLiteDB + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.sqlite.size}} +{{- end }} +--- +{{- if eq .Values.DB_ENGINE "sqlite3" }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup # SQLiteDB + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.sqlite.backupDbSize}} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml new file mode 100644 index 0000000..8151698 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + type: ClusterIP + ports: + - port: 8000 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: nginx +--- diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml new file mode 100644 index 0000000..c7d642d --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "medcat-trainer-helm.serviceAccountName" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml new file mode 100644 index 0000000..8a9f4cb --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "medcat-trainer-helm.fullname" . }}-test-connection" + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "medcat-trainer-helm.fullname" . }}-nginx:{{ .Values.service.port }}/nginx/health/live', '-U helm-test {{ .Chart.Name }}-v{{ .Chart.Version }}'] + restartPolicy: Never diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml new file mode 100644 index 0000000..845e579 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -0,0 +1,234 @@ +# Default values for medcat-trainer-helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 +nginxReplicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: cogstacksystems/medcat-trainer + # This sets the pull policy for images. + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" +nginxImage: + repository: nginx + pullPolicy: IfNotPresent + tag: "1.29.1" + +# Add any environment variables here that should be set in the medcat-trainer container +env: + CSRF_TRUSTED_ORIGINS: "http://localhost:8080" + DEBUG: "1" + EMAIL_HOST: "mail.cogstack.org" + EMAIL_PASS: "to-be-changed" + EMAIL_PORT: "465" + EMAIL_USER: "example@cogstack.org" + ENV: "non-prod" + LOAD_EXAMPLES: "1" + LOAD_NUM_DOC_PAGES: "10" + MAX_DATASET_SIZE: "10000" + MAX_MEDCAT_MODELS: "2" + OPENBLAS_NUM_THREADS: "1" + RESUBMIT_ALL_ON_STARTUP: "0" + UNIQUE_DOC_NAMES_IN_DATASETS: "True" + + # TODO: Support custom DB overrides# + # DB_ENGINE: "sqlite3" + DB_ENGINE: "postgresql" + DB_NAME: "postgres" + DB_USER: "postgres" + DB_PORT: "5432" + # DB_PASSWORD: "" + # DB_HOST: "" + +postgresql: + enabled: true + # TODO: Support custom DB overrides + # auth: + # - name for a custom database + # database: "my_trainer_db" + # username: "trainer_admin" + # password: "changeme_changeme" + primary: + persistence: + # Size of the PVC for the postgres database + size: 500Mi + +persistence: + media: + # Size of PVC for files like model packs and other media downloaded by medcat trainer + size: 8Gi + static: + # Size of the PVC for the static HTML site + size: 100Mi + sqlite: + # Size of the PVC for the Sqlite database + size: 100Mi + # Size of the PVC for the Sqlite backups + backupDbSize: 300Mi + storageClassName: "" + +# MedCAT config as described here: https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py +medcatConfig: | + cat.linking.optim = {'type': 'standard', 'lr': 0.1} + cat.linking.filter_before_disamb = True + # 20 - INFO; 10 - DEBUG + cat.general.log_level = 20 + # Recommended is to have this one negative + cat.linking.similarity_threshold = -5 + # And this one to be used as the real th + cat.linking.similarity_threshold_trainer = -5 + # Used for limiting the number of occ of a concept in a project + cat.general.cui_count_limit = 100000000 + # Is unlink full + cat.general.full_unlink = False + # use this spacy model + cat.general.spacy_model = 'en_core_web_md' + +solr: + replicaCount: 1 + collectionShards: 1 + collectionReplicas: 1 + zookeeper: + replicaCount: 1 + persistence: + size: 1Gi + persistence: + size: 1Gi + auth: + # TODO: support SOLR auth from medcat trainer API + enabled: false + podLabels: + app.kubernetes.io/component: solr + app.kubernetes.io/part-of: cogstack + +# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 8000 + +# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +# TODO Add liveness and readiness to django app +# https://github.com/CogStack/cogstack-nlp/pull/131/files +# livenessProbe: +# httpGet: +# path: /api/health/live/ +# port: http +# readinessProbe: +# httpGet: +# path: /api/health/ready/ +# port: http +# startupProbe: +# httpGet: +# path: /api/health/startup/ +# port: http +# failureThreshold: 30 +# periodSeconds: 10 + +nginx: + livenessProbe: + httpGet: + path: /nginx/health/live + port: http + readinessProbe: + httpGet: + path: /nginx/health/live + port: http + + +# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/deployment/kubernetes/local_dev_startup.sh b/deployment/kubernetes/local_dev_startup.sh index b4999ee..6347bb1 100644 --- a/deployment/kubernetes/local_dev_startup.sh +++ b/deployment/kubernetes/local_dev_startup.sh @@ -16,4 +16,12 @@ helm test medcat-service --logs # Test with host header set for ingress routing # HOST_IP=10.211.112.82 -# curl --resolve chart-example.local:80:${HOST_IP} http://chart-example.local/api/info \ No newline at end of file +# curl --resolve chart-example.local:80:${HOST_IP} http://chart-example.local/api/info + +# Test medcat trainer +# kubectl port-forward svc/nginx 8000:8000 + +helm upgrade my-test ./medcat-trainer-helm --install --recreate-pods --wait --timeout 5m0s # Install if it doesnt already exist, else upgrade +# kubectl port-forward svc/medcat-trainer-solr 8983:8983 + +## helm install trainer-registry oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm --wait --timeout 5m0s diff --git a/docs/platform/deployment/helm/charts/_index.md b/docs/platform/deployment/helm/charts/_index.md index fe202c8..84810a0 100644 --- a/docs/platform/deployment/helm/charts/_index.md +++ b/docs/platform/deployment/helm/charts/_index.md @@ -6,10 +6,13 @@ The Helm charts for CogStack are published to Docker Hub, which is an OCI-compli - **MedCAT Service:** https://hub.docker.com/r/cogstacksystems/medcat-service-helm +- **MedCAT Trainer:** + https://hub.docker.com/r/cogstacksystems/medcat-trainer-helm ```{toctree} :maxdepth: 1 medcat-service-helm +medcat-trainer-helm ``` ## Chart Publishing diff --git a/docs/platform/deployment/helm/charts/medcat-trainer-helm.md b/docs/platform/deployment/helm/charts/medcat-trainer-helm.md new file mode 100644 index 0000000..030b943 --- /dev/null +++ b/docs/platform/deployment/helm/charts/medcat-trainer-helm.md @@ -0,0 +1,49 @@ +# MedCAT Trainer Helm Chart + +This Helm chart deploys MedCAT Trainer and infrastructure to a Kubernetes cluster. + +By default the chart will: + +- Run MedCAT Trainer Django server +- Run NGINX for static site hosting and routing +- Run a SOLR and Zookeeper cluster for the Concept DB +- Run a Postgres database for persistence + + +## Installation + +```sh +helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm +``` + +## Configuration + +See these values for common configurations to change: + +| Setting |description | +| -------- | -------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +|`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | + + +### Use Sqlite instead of Postgres + +Sqlite can be used for smaller single instance deployments + +Set these values: + +```yaml +DB_ENGINE: "sqlite3" + +postgresql: + enabled: false +``` + +## Missing features +These features are not yet existing but to be added in future: +- Use a pre existing postgres db +- Use a pre existing SOLR instance +- Migrate from supervisord to standalone deployment for background tasks for better scaling +- Support SOLR authentication from medcat trainer +- Support passing DB OPTIONS to medcat trainer for use in cloud environments