From 32434f6cba98bde483da78fa148194a550dbdba0 Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@muni.cz>
Date: Mon, 23 Feb 2026 19:09:13 +0100
Subject: [PATCH 1/3] feat: add more smaller yamls

---
 kustomize/base/kustomization.yaml             |  5 ++
 kustomize/base/ray-service-base.yaml          | 58 ++++++++++++
 .../cpu-workers/cpu-workers-patch.yaml        | 65 ++++++++++++++
 .../components/cpu-workers/kustomization.yaml |  8 ++
 .../gpu-workers/gpu-workers-patch.yaml        | 68 ++++++++++++++
 .../components/gpu-workers/kustomization.yaml |  8 ++
 .../components/models/kustomization.yaml      |  8 ++
 kustomize/components/models/merge_models.py   | 51 +++++++++++
 .../models/models-definitions/episeg.yaml     | 29 ++++++
 .../models/models-definitions/heatmap.yaml    | 20 +++++
 .../models/models-definitions/prostate.yaml   | 36 ++++++++
 .../components/models/serve-config-patch.yaml | 89 +++++++++++++++++++
 kustomize/overlays/kustomization.yaml         | 10 +++
 13 files changed, 455 insertions(+)
 create mode 100644 kustomize/base/kustomization.yaml
 create mode 100644 kustomize/base/ray-service-base.yaml
 create mode 100644 kustomize/components/cpu-workers/cpu-workers-patch.yaml
 create mode 100644 kustomize/components/cpu-workers/kustomization.yaml
 create mode 100644 kustomize/components/gpu-workers/gpu-workers-patch.yaml
 create mode 100644 kustomize/components/gpu-workers/kustomization.yaml
 create mode 100644 kustomize/components/models/kustomization.yaml
 create mode 100644 kustomize/components/models/merge_models.py
 create mode 100644 kustomize/components/models/models-definitions/episeg.yaml
 create mode 100644 kustomize/components/models/models-definitions/heatmap.yaml
 create mode 100644 kustomize/components/models/models-definitions/prostate.yaml
 create mode 100644 kustomize/components/models/serve-config-patch.yaml
 create mode 100644 kustomize/overlays/kustomization.yaml

diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml
new file mode 100644
index 0000000..f334bf6
--- /dev/null
+++ b/kustomize/base/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ray-service-base.yaml
diff --git a/kustomize/base/ray-service-base.yaml b/kustomize/base/ray-service-base.yaml
new file mode 100644
index 0000000..a0a5fb8
--- /dev/null
+++ b/kustomize/base/ray-service-base.yaml
@@ -0,0 +1,58 @@
+apiVersion: ray.io/v1
+kind: RayService
+metadata:
+  name: rayservice-model-split
+spec:
+  serveConfigV2: ""
+  rayClusterConfig:
+    rayVersion: 2.53.0
+    enableInTreeAutoscaling: true
+    autoscalerOptions:
+      idleTimeoutSeconds: 60
+      securityContext:
+        runAsUser: 1000
+        allowPrivilegeEscalation: false
+        capabilities:
+          drop: ["ALL"]
+
+    headGroupSpec:
+      rayStartParams:
+        num-cpus: "0"
+        dashboard-host: "0.0.0.0"
+      template:
+        spec:
+          securityContext:
+            fsGroupChangePolicy: OnRootMismatch
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          containers:
+            - name: ray-head
+              image: rayproject/ray:2.53.0-py312
+              imagePullPolicy: Always
+              resources:
+                limits:
+                  cpu: 0
+                  memory: 4Gi
+                requests:
+                  cpu: 0
+                  memory: 4Gi
+              env:
+                - name: HTTPS_PROXY
+                  value: http://proxy.ics.muni.cz:3128
+              ports:
+                - containerPort: 6379
+                  name: gcs-server
+                - containerPort: 8265
+                  name: dashboard
+                - containerPort: 10001
+                  name: client
+                - containerPort: 8000
+                  name: serve
+              securityContext:
+                runAsUser: 1000
+                allowPrivilegeEscalation: false
+                capabilities:
+                  drop: ["ALL"]
+
+    workerGroupSpecs: []
diff --git a/kustomize/components/cpu-workers/cpu-workers-patch.yaml b/kustomize/components/cpu-workers/cpu-workers-patch.yaml
new file mode 100644
index 0000000..5e31d2a
--- /dev/null
+++ b/kustomize/components/cpu-workers/cpu-workers-patch.yaml
@@ -0,0 +1,65 @@
+- op: add
+  path: /spec/rayClusterConfig/workerGroupSpecs/-
+  value:
+    groupName: cpu-workers
+    replicas: 0
+    minReplicas: 0
+    maxReplicas: 2
+    template:
+      spec:
+        securityContext:
+          fsGroupChangePolicy: OnRootMismatch
+          runAsNonRoot: true
+          seccompProfile:
+            type: RuntimeDefault
+        containers:
+          - name: ray-worker
+            image: cerit.io/rationai/model-service:2.53.0
+            imagePullPolicy: Always
+            resources:
+              limits:
+                cpu: 8
+                memory: 16Gi
+              requests:
+                cpu: 8
+                memory: 16Gi
+            env:
+              - name: HTTPS_PROXY
+                value: http://proxy.ics.muni.cz:3128
+            securityContext:
+              allowPrivilegeEscalation: false
+              capabilities:
+                drop: ["ALL"]
+              runAsUser: 1000
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            volumeMounts:
+              - name: data
+                mountPath: /mnt/data
+              - name: public-data
+                mountPath: /mnt/data/Public
+              - name: projects
+                mountPath: /mnt/projects
+              - name: bioptic-tree
+                mountPath: /mnt/bioptic_tree
+              - name: trt-cache-volume
+                mountPath: /mnt/cache
+
+        volumes:
+          - name: data
+            persistentVolumeClaim:
+              claimName: data-ro
+          - name: public-data
+            persistentVolumeClaim:
+              claimName: rationai-data-ro-pvc-jobs
+          - name: projects
+            persistentVolumeClaim:
+              claimName: projects-rw
+          - name: bioptic-tree
+            persistentVolumeClaim:
+              claimName: bioptictree-ro
+          - name: trt-cache-volume
+            persistentVolumeClaim:
+              claimName: tensorrt-cache-pvc
diff --git a/kustomize/components/cpu-workers/kustomization.yaml b/kustomize/components/cpu-workers/kustomization.yaml
new file mode 100644
index 0000000..90d9f27
--- /dev/null
+++ b/kustomize/components/cpu-workers/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1alpha1
+kind: Component
+
+patches:
+  - target:
+      kind: RayService
+      name: rayservice-model-split
+    path: cpu-workers-patch.yaml
diff --git a/kustomize/components/gpu-workers/gpu-workers-patch.yaml b/kustomize/components/gpu-workers/gpu-workers-patch.yaml
new file mode 100644
index 0000000..3fcb635
--- /dev/null
+++ b/kustomize/components/gpu-workers/gpu-workers-patch.yaml
@@ -0,0 +1,68 @@
+- op: add
+  path: /spec/rayClusterConfig/workerGroupSpecs/-
+  value:
+    groupName: gpu-workers
+    replicas: 0
+    minReplicas: 0
+    maxReplicas: 2
+    template:
+      spec:
+        securityContext:
+          fsGroupChangePolicy: OnRootMismatch
+          runAsNonRoot: true
+          seccompProfile:
+            type: RuntimeDefault
+        nodeSelector:
+          nvidia.com/gpu.product: NVIDIA-A40
+        containers:
+          - name: ray-worker
+            image: cerit.io/rationai/model-service:2.53.0-gpu
+            imagePullPolicy: Always
+            resources:
+              limits:
+                cpu: 8
+                memory: 24Gi
+                nvidia.com/gpu: 1
+              requests:
+                cpu: 8
+                memory: 24Gi
+            env:
+              - name: HTTPS_PROXY
+                value: http://proxy.ics.muni.cz:3128
+            securityContext:
+              allowPrivilegeEscalation: false
+              capabilities:
+                drop: ["ALL"]
+              runAsUser: 1000
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            volumeMounts:
+              - name: data
+                mountPath: /mnt/data
+              - name: public-data
+                mountPath: /mnt/data/Public
+              - name: projects
+                mountPath: /mnt/projects
+              - name: bioptic-tree
+                mountPath: /mnt/bioptic_tree
+              - name: trt-cache-volume
+                mountPath: /mnt/cache
+
+        volumes:
+          - name: data
+            persistentVolumeClaim:
+              claimName: data-ro
+          - name: public-data
+            persistentVolumeClaim:
+              claimName: rationai-data-ro-pvc-jobs
+          - name: projects
+            persistentVolumeClaim:
+              claimName: projects-rw
+          - name: bioptic-tree
+            persistentVolumeClaim:
+              claimName: bioptictree-ro
+          - name: trt-cache-volume
+            persistentVolumeClaim:
+              claimName: tensorrt-cache-pvc
diff --git a/kustomize/components/gpu-workers/kustomization.yaml b/kustomize/components/gpu-workers/kustomization.yaml
new file mode 100644
index 0000000..01dccb0
--- /dev/null
+++ b/kustomize/components/gpu-workers/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1alpha1
+kind: Component
+
+patches:
+  - target:
+      kind: RayService
+      name: rayservice-model-split
+    path: gpu-workers-patch.yaml
diff --git a/kustomize/components/models/kustomization.yaml b/kustomize/components/models/kustomization.yaml
new file mode 100644
index 0000000..af5d5b9
--- /dev/null
+++ b/kustomize/components/models/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1alpha1
+kind: Component
+
+patches:
+  - target:
+      kind: RayService
+      name: rayservice-model-split
+    path: serve-config-patch.yaml
diff --git a/kustomize/components/models/merge_models.py b/kustomize/components/models/merge_models.py
new file mode 100644
index 0000000..cce1024
--- /dev/null
+++ b/kustomize/components/models/merge_models.py
@@ -0,0 +1,51 @@
+import os
+
+import yaml
+
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+models_definitions_dir = os.path.join(script_dir, "models-definitions")
+output_file = os.path.join(script_dir, "serve-config-patch.yaml")
+
+model_files = [f for f in os.listdir(models_definitions_dir) if f.endswith(".yaml")]
+
+if not model_files:
+    raise RuntimeError(f"No model definition files found in {models_definitions_dir}")
+
+merged_applications = []
+
+for file_name in sorted(model_files):
+    file_path = os.path.join(models_definitions_dir, file_name)
+    with open(file_path) as f:
+        data = yaml.safe_load(f)
+        if not data or "applications" not in data:
+            raise RuntimeError(f"File {file_name} is missing 'applications' key")
+        merged_applications.extend(data["applications"])
+
+serve_config_str = yaml.dump({"applications": merged_applications}, sort_keys=False)
+
+
+# Literal block scalar wrapper
+class LiteralString(str):
+    pass
+
+
+def literal_presenter(dumper, data):
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+
+
+yaml.add_representer(LiteralString, literal_presenter)
+
+patch = {
+    "apiVersion": "ray.io/v1",
+    "kind": "RayService",
+    "metadata": {"name": "rayservice-model-split"},
+    "spec": {"serveConfigV2": LiteralString(serve_config_str)},
+}
+
+with open(output_file, "w") as f:
+    yaml.dump(patch, f, sort_keys=False)
+
+print(f"Generated {output_file} from {len(model_files)} model files:")
+for f in sorted(model_files):
+    print(f"  - {f}")
diff --git a/kustomize/components/models/models-definitions/episeg.yaml b/kustomize/components/models/models-definitions/episeg.yaml
new file mode 100644
index 0000000..59e93ff
--- /dev/null
+++ b/kustomize/components/models/models-definitions/episeg.yaml
@@ -0,0 +1,29 @@
+applications:
+  - name: episeg-1
+    import_path: models.semantic_segmentation:app
+    route_prefix: /episeg-1
+    runtime_env:
+      working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+    deployments:
+      - name: SemanticSegmentation
+        max_ongoing_requests: 16
+        max_queued_requests: 64
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 4
+          target_ongoing_requests: 4
+        ray_actor_options:
+          num_cpus: 12
+          memory: 12884901888 # 12 GiB
+          runtime_env:
+            env_vars:
+              MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000
+        user_config:
+          tile_size: 1024
+          mpp: 0.468
+          max_batch_size: 2
+          batch_wait_timeout_s: 0.5
+          intra_op_num_threads: 11
+          model:
+            _target_: providers.model_provider:mlflow
+            artifact_uri: mlflow-artifacts:/10/39f821ed5b964c71a603cc6db196f9fd/artifacts/checkpoints/epoch=19-step=32020/model.onnx/model.onnx
diff --git a/kustomize/components/models/models-definitions/heatmap.yaml b/kustomize/components/models/models-definitions/heatmap.yaml
new file mode 100644
index 0000000..7bc9f25
--- /dev/null
+++ b/kustomize/components/models/models-definitions/heatmap.yaml
@@ -0,0 +1,20 @@
+applications:
+  - name: heatmap-builder
+    import_path: builders.heatmap_builder:app
+    route_prefix: /heatmap-builder
+    runtime_env:
+      working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+    deployments:
+      - name: HeatmapBuilder
+        max_ongoing_requests: 16
+        max_queued_requests: 64
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 2
+          target_ongoing_requests: 2
+        ray_actor_options:
+          num_cpus: 4
+          memory: 12884901888 # 12 GiB
+        user_config:
+          num_threads: 4
+          max_concurrent_tasks: 8
diff --git a/kustomize/components/models/models-definitions/prostate.yaml b/kustomize/components/models/models-definitions/prostate.yaml
new file mode 100644
index 0000000..1518397
--- /dev/null
+++ b/kustomize/components/models/models-definitions/prostate.yaml
@@ -0,0 +1,36 @@
+applications:
+  - name: prostate-classifier-1
+    import_path: models.binary_classifier:app
+    route_prefix: /prostate-classifier-1
+    runtime_env:
+      working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+    deployments:
+      - name: BinaryClassifier
+        max_ongoing_requests: 64
+        max_queued_requests: 128
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 4
+          target_ongoing_requests: 32
+        ray_actor_options:
+          num_cpus: 6
+          memory: 6442450944
+          runtime_env:
+            env_vars:
+              MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000
+        user_config:
+          tile_size: 512
+          max_batch_size: 32
+          batch_wait_timeout_s: 0.5
+          mean:
+            - 228.5544
+            - 178.8584
+            - 219.8793
+          std:
+            - 27.8285
+            - 51.4639
+            - 26.4458
+          intra_op_num_threads: 5
+          model:
+            _target_: providers.model_provider:mlflow
+            artifact_uri: mlflow-artifacts:/65/aebc892f526047249b972f200bef4381/artifacts/checkpoints/epoch=0-step=6972/model.onnx
diff --git a/kustomize/components/models/serve-config-patch.yaml b/kustomize/components/models/serve-config-patch.yaml
new file mode 100644
index 0000000..177aabc
--- /dev/null
+++ b/kustomize/components/models/serve-config-patch.yaml
@@ -0,0 +1,89 @@
+apiVersion: ray.io/v1
+kind: RayService
+metadata:
+  name: rayservice-model-split
+spec:
+  serveConfigV2: |
+    applications:
+    - name: episeg-1
+      import_path: models.semantic_segmentation:app
+      route_prefix: /episeg-1
+      runtime_env:
+        working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+      deployments:
+      - name: SemanticSegmentation
+        max_ongoing_requests: 16
+        max_queued_requests: 64
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 4
+          target_ongoing_requests: 4
+        ray_actor_options:
+          num_cpus: 12
+          memory: 12884901888
+          runtime_env:
+            env_vars:
+              MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000
+        user_config:
+          tile_size: 1024
+          mpp: 0.468
+          max_batch_size: 2
+          batch_wait_timeout_s: 0.5
+          intra_op_num_threads: 11
+          model:
+            _target_: providers.model_provider:mlflow
+            artifact_uri: mlflow-artifacts:/10/39f821ed5b964c71a603cc6db196f9fd/artifacts/checkpoints/epoch=19-step=32020/model.onnx/model.onnx
+    - name: heatmap-builder
+      import_path: builders.heatmap_builder:app
+      route_prefix: /heatmap-builder
+      runtime_env:
+        working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+      deployments:
+      - name: HeatmapBuilder
+        max_ongoing_requests: 16
+        max_queued_requests: 64
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 2
+          target_ongoing_requests: 2
+        ray_actor_options:
+          num_cpus: 4
+          memory: 12884901888
+        user_config:
+          num_threads: 4
+          max_concurrent_tasks: 8
+    - name: prostate-classifier-1
+      import_path: models.binary_classifier:app
+      route_prefix: /prostate-classifier-1
+      runtime_env:
+        working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip
+      deployments:
+      - name: BinaryClassifier
+        max_ongoing_requests: 64
+        max_queued_requests: 128
+        autoscaling_config:
+          min_replicas: 0
+          max_replicas: 4
+          target_ongoing_requests: 32
+        ray_actor_options:
+          num_cpus: 6
+          memory: 6442450944
+          runtime_env:
+            env_vars:
+              MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000
+        user_config:
+          tile_size: 512
+          max_batch_size: 32
+          batch_wait_timeout_s: 0.5
+          mean:
+          - 228.5544
+          - 178.8584
+          - 219.8793
+          std:
+          - 27.8285
+          - 51.4639
+          - 26.4458
+          intra_op_num_threads: 5
+          model:
+            _target_: providers.model_provider:mlflow
+            artifact_uri: mlflow-artifacts:/65/aebc892f526047249b972f200bef4381/artifacts/checkpoints/epoch=0-step=6972/model.onnx
diff --git a/kustomize/overlays/kustomization.yaml b/kustomize/overlays/kustomization.yaml
new file mode 100644
index 0000000..37d8d2d
--- /dev/null
+++ b/kustomize/overlays/kustomization.yaml
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../base
+
+components:
+  - ../components/models
+  - ../components/cpu-workers
+  - ../components/gpu-workers

From 3f74fb26b4aecb2685a1d6df7356ac38e3db66c2 Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@muni.cz>
Date: Mon, 23 Feb 2026 19:11:29 +0100
Subject: [PATCH 2/3] feat: add deploy script

---
 deploy.sh | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 deploy.sh

diff --git a/deploy.sh b/deploy.sh
new file mode 100644
index 0000000..4bbdb5a
--- /dev/null
+++ b/deploy.sh
@@ -0,0 +1,4 @@
+# deploy.ps1
+python kustomize/components/models/merge_models.py
+if ($LASTEXITCODE -ne 0) { exit 1 }
+kubectl apply -k kustomize/overlays -n rationai-jobs-ns
\ No newline at end of file

From df76f0ff7684f4e3cca4763799e4418e28527e2b Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@muni.cz>
Date: Mon, 23 Feb 2026 19:11:48 +0100
Subject: [PATCH 3/3] feat: add deploy script

---
 deploy.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/deploy.sh b/deploy.sh
index 4bbdb5a..559c21f 100644
--- a/deploy.sh
+++ b/deploy.sh
@@ -1,4 +1,5 @@
-# deploy.ps1
+#!/bin/bash
+set -e
+
 python kustomize/components/models/merge_models.py
-if ($LASTEXITCODE -ne 0) { exit 1 }
 kubectl apply -k kustomize/overlays -n rationai-jobs-ns
\ No newline at end of file