From 32434f6cba98bde483da78fa148194a550dbdba0 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@muni.cz> Date: Mon, 23 Feb 2026 19:09:13 +0100 Subject: [PATCH 1/3] feat: add more smaller yamls --- kustomize/base/kustomization.yaml | 5 ++ kustomize/base/ray-service-base.yaml | 58 ++++++++++++ .../cpu-workers/cpu-workers-patch.yaml | 65 ++++++++++++++ .../components/cpu-workers/kustomization.yaml | 8 ++ .../gpu-workers/gpu-workers-patch.yaml | 68 ++++++++++++++ .../components/gpu-workers/kustomization.yaml | 8 ++ .../components/models/kustomization.yaml | 8 ++ kustomize/components/models/merge_models.py | 51 +++++++++++ .../models/models-definitions/episeg.yaml | 29 ++++++ .../models/models-definitions/heatmap.yaml | 20 +++++ .../models/models-definitions/prostate.yaml | 36 ++++++++ .../components/models/serve-config-patch.yaml | 89 +++++++++++++++++++ kustomize/overlays/kustomization.yaml | 10 +++ 13 files changed, 455 insertions(+) create mode 100644 kustomize/base/kustomization.yaml create mode 100644 kustomize/base/ray-service-base.yaml create mode 100644 kustomize/components/cpu-workers/cpu-workers-patch.yaml create mode 100644 kustomize/components/cpu-workers/kustomization.yaml create mode 100644 kustomize/components/gpu-workers/gpu-workers-patch.yaml create mode 100644 kustomize/components/gpu-workers/kustomization.yaml create mode 100644 kustomize/components/models/kustomization.yaml create mode 100644 kustomize/components/models/merge_models.py create mode 100644 kustomize/components/models/models-definitions/episeg.yaml create mode 100644 kustomize/components/models/models-definitions/heatmap.yaml create mode 100644 kustomize/components/models/models-definitions/prostate.yaml create mode 100644 kustomize/components/models/serve-config-patch.yaml create mode 100644 kustomize/overlays/kustomization.yaml diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml new file mode 100644 index 0000000..f334bf6 --- /dev/null +++ b/kustomize/base/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ray-service-base.yaml diff --git a/kustomize/base/ray-service-base.yaml b/kustomize/base/ray-service-base.yaml new file mode 100644 index 0000000..a0a5fb8 --- /dev/null +++ b/kustomize/base/ray-service-base.yaml @@ -0,0 +1,58 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: rayservice-model-split +spec: + serveConfigV2: "" + rayClusterConfig: + rayVersion: 2.53.0 + enableInTreeAutoscaling: true + autoscalerOptions: + idleTimeoutSeconds: 60 + securityContext: + runAsUser: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + + headGroupSpec: + rayStartParams: + num-cpus: "0" + dashboard-host: "0.0.0.0" + template: + spec: + securityContext: + fsGroupChangePolicy: OnRootMismatch + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: ray-head + image: rayproject/ray:2.53.0-py312 + imagePullPolicy: Always + resources: + limits: + cpu: 0 + memory: 4Gi + requests: + cpu: 0 + memory: 4Gi + env: + - name: HTTPS_PROXY + value: http://proxy.ics.muni.cz:3128 + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + securityContext: + runAsUser: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + + workerGroupSpecs: [] diff --git a/kustomize/components/cpu-workers/cpu-workers-patch.yaml b/kustomize/components/cpu-workers/cpu-workers-patch.yaml new file mode 100644 index 0000000..5e31d2a --- /dev/null +++ b/kustomize/components/cpu-workers/cpu-workers-patch.yaml @@ -0,0 +1,65 @@ +- op: add + path: /spec/rayClusterConfig/workerGroupSpecs/- + value: + groupName: cpu-workers + replicas: 0 + minReplicas: 0 + maxReplicas: 2 + template: + spec: + securityContext: + fsGroupChangePolicy: OnRootMismatch + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: ray-worker + image: cerit.io/rationai/model-service:2.53.0 + imagePullPolicy: Always + resources: + limits: + cpu: 8 + memory: 16Gi + requests: + cpu: 8 + memory: 16Gi + env: + - name: HTTPS_PROXY + value: http://proxy.ics.muni.cz:3128 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsUser: 1000 + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "ray stop"] + volumeMounts: + - name: data + mountPath: /mnt/data + - name: public-data + mountPath: /mnt/data/Public + - name: projects + mountPath: /mnt/projects + - name: bioptic-tree + mountPath: /mnt/bioptic_tree + - name: trt-cache-volume + mountPath: /mnt/cache + + volumes: + - name: data + persistentVolumeClaim: + claimName: data-ro + - name: public-data + persistentVolumeClaim: + claimName: rationai-data-ro-pvc-jobs + - name: projects + persistentVolumeClaim: + claimName: projects-rw + - name: bioptic-tree + persistentVolumeClaim: + claimName: bioptictree-ro + - name: trt-cache-volume + persistentVolumeClaim: + claimName: tensorrt-cache-pvc diff --git a/kustomize/components/cpu-workers/kustomization.yaml b/kustomize/components/cpu-workers/kustomization.yaml new file mode 100644 index 0000000..90d9f27 --- /dev/null +++ b/kustomize/components/cpu-workers/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +patches: + - target: + kind: RayService + name: rayservice-model-split + path: cpu-workers-patch.yaml diff --git a/kustomize/components/gpu-workers/gpu-workers-patch.yaml b/kustomize/components/gpu-workers/gpu-workers-patch.yaml new file mode 100644 index 0000000..3fcb635 --- /dev/null +++ b/kustomize/components/gpu-workers/gpu-workers-patch.yaml @@ -0,0 +1,68 @@ +- op: add + path: /spec/rayClusterConfig/workerGroupSpecs/- + value: + groupName: gpu-workers + replicas: 0 + minReplicas: 0 + maxReplicas: 2 + template: + spec: + securityContext: + fsGroupChangePolicy: OnRootMismatch + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + nodeSelector: + nvidia.com/gpu.product: NVIDIA-A40 + containers: + - name: ray-worker + image: cerit.io/rationai/model-service:2.53.0-gpu + imagePullPolicy: Always + resources: + limits: + cpu: 8 + memory: 24Gi + nvidia.com/gpu: 1 + requests: + cpu: 8 + memory: 24Gi + env: + - name: HTTPS_PROXY + value: http://proxy.ics.muni.cz:3128 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsUser: 1000 + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "ray stop"] + volumeMounts: + - name: data + mountPath: /mnt/data + - name: public-data + mountPath: /mnt/data/Public + - name: projects + mountPath: /mnt/projects + - name: bioptic-tree + mountPath: /mnt/bioptic_tree + - name: trt-cache-volume + mountPath: /mnt/cache + + volumes: + - name: data + persistentVolumeClaim: + claimName: data-ro + - name: public-data + persistentVolumeClaim: + claimName: rationai-data-ro-pvc-jobs + - name: projects + persistentVolumeClaim: + claimName: projects-rw + - name: bioptic-tree + persistentVolumeClaim: + claimName: bioptictree-ro + - name: trt-cache-volume + persistentVolumeClaim: + claimName: tensorrt-cache-pvc diff --git a/kustomize/components/gpu-workers/kustomization.yaml b/kustomize/components/gpu-workers/kustomization.yaml new file mode 100644 index 0000000..01dccb0 --- /dev/null +++ b/kustomize/components/gpu-workers/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +patches: + - target: + kind: RayService + name: rayservice-model-split + path: gpu-workers-patch.yaml diff --git a/kustomize/components/models/kustomization.yaml b/kustomize/components/models/kustomization.yaml new file mode 100644 index 0000000..af5d5b9 --- /dev/null +++ b/kustomize/components/models/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +patches: + - target: + kind: RayService + name: rayservice-model-split + path: serve-config-patch.yaml diff --git a/kustomize/components/models/merge_models.py b/kustomize/components/models/merge_models.py new file mode 100644 index 0000000..cce1024 --- /dev/null +++ b/kustomize/components/models/merge_models.py @@ -0,0 +1,51 @@ +import os + +import yaml + + +script_dir = os.path.dirname(os.path.abspath(__file__)) +models_definitions_dir = os.path.join(script_dir, "models-definitions") +output_file = os.path.join(script_dir, "serve-config-patch.yaml") + +model_files = [f for f in os.listdir(models_definitions_dir) if f.endswith(".yaml")] + +if not model_files: + raise RuntimeError(f"No model definition files found in {models_definitions_dir}") + +merged_applications = [] + +for file_name in sorted(model_files): + file_path = os.path.join(models_definitions_dir, file_name) + with open(file_path) as f: + data = yaml.safe_load(f) + if not data or "applications" not in data: + raise RuntimeError(f"File {file_name} is missing 'applications' key") + merged_applications.extend(data["applications"]) + +serve_config_str = yaml.dump({"applications": merged_applications}, sort_keys=False) + + +# Literal block scalar wrapper +class LiteralString(str): + pass + + +def literal_presenter(dumper, data): + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + + +yaml.add_representer(LiteralString, literal_presenter) + +patch = { + "apiVersion": "ray.io/v1", + "kind": "RayService", + "metadata": {"name": "rayservice-model-split"}, + "spec": {"serveConfigV2": LiteralString(serve_config_str)}, +} + +with open(output_file, "w") as f: + yaml.dump(patch, f, sort_keys=False) + +print(f"Generated {output_file} from {len(model_files)} model files:") +for f in sorted(model_files): + print(f" - {f}") diff --git a/kustomize/components/models/models-definitions/episeg.yaml b/kustomize/components/models/models-definitions/episeg.yaml new file mode 100644 index 0000000..59e93ff --- /dev/null +++ b/kustomize/components/models/models-definitions/episeg.yaml @@ -0,0 +1,29 @@ +applications: + - name: episeg-1 + import_path: models.semantic_segmentation:app + route_prefix: /episeg-1 + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: SemanticSegmentation + max_ongoing_requests: 16 + max_queued_requests: 64 + autoscaling_config: + min_replicas: 0 + max_replicas: 4 + target_ongoing_requests: 4 + ray_actor_options: + num_cpus: 12 + memory: 12884901888 # 12 GiB + runtime_env: + env_vars: + MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000 + user_config: + tile_size: 1024 + mpp: 0.468 + max_batch_size: 2 + batch_wait_timeout_s: 0.5 + intra_op_num_threads: 11 + model: + _target_: providers.model_provider:mlflow + artifact_uri: mlflow-artifacts:/10/39f821ed5b964c71a603cc6db196f9fd/artifacts/checkpoints/epoch=19-step=32020/model.onnx/model.onnx diff --git a/kustomize/components/models/models-definitions/heatmap.yaml b/kustomize/components/models/models-definitions/heatmap.yaml new file mode 100644 index 0000000..7bc9f25 --- /dev/null +++ b/kustomize/components/models/models-definitions/heatmap.yaml @@ -0,0 +1,20 @@ +applications: + - name: heatmap-builder + import_path: builders.heatmap_builder:app + route_prefix: /heatmap-builder + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: HeatmapBuilder + max_ongoing_requests: 16 + max_queued_requests: 64 + autoscaling_config: + min_replicas: 0 + max_replicas: 2 + target_ongoing_requests: 2 + ray_actor_options: + num_cpus: 4 + memory: 12884901888 # 12 GiB + user_config: + num_threads: 4 + max_concurrent_tasks: 8 diff --git a/kustomize/components/models/models-definitions/prostate.yaml b/kustomize/components/models/models-definitions/prostate.yaml new file mode 100644 index 0000000..1518397 --- /dev/null +++ b/kustomize/components/models/models-definitions/prostate.yaml @@ -0,0 +1,36 @@ +applications: + - name: prostate-classifier-1 + import_path: models.binary_classifier:app + route_prefix: /prostate-classifier-1 + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: BinaryClassifier + max_ongoing_requests: 64 + max_queued_requests: 128 + autoscaling_config: + min_replicas: 0 + max_replicas: 4 + target_ongoing_requests: 32 + ray_actor_options: + num_cpus: 6 + memory: 6442450944 + runtime_env: + env_vars: + MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000 + user_config: + tile_size: 512 + max_batch_size: 32 + batch_wait_timeout_s: 0.5 + mean: + - 228.5544 + - 178.8584 + - 219.8793 + std: + - 27.8285 + - 51.4639 + - 26.4458 + intra_op_num_threads: 5 + model: + _target_: providers.model_provider:mlflow + artifact_uri: mlflow-artifacts:/65/aebc892f526047249b972f200bef4381/artifacts/checkpoints/epoch=0-step=6972/model.onnx diff --git a/kustomize/components/models/serve-config-patch.yaml b/kustomize/components/models/serve-config-patch.yaml new file mode 100644 index 0000000..177aabc --- /dev/null +++ b/kustomize/components/models/serve-config-patch.yaml @@ -0,0 +1,89 @@ +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: rayservice-model-split +spec: + serveConfigV2: | + applications: + - name: episeg-1 + import_path: models.semantic_segmentation:app + route_prefix: /episeg-1 + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: SemanticSegmentation + max_ongoing_requests: 16 + max_queued_requests: 64 + autoscaling_config: + min_replicas: 0 + max_replicas: 4 + target_ongoing_requests: 4 + ray_actor_options: + num_cpus: 12 + memory: 12884901888 + runtime_env: + env_vars: + MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000 + user_config: + tile_size: 1024 + mpp: 0.468 + max_batch_size: 2 + batch_wait_timeout_s: 0.5 + intra_op_num_threads: 11 + model: + _target_: providers.model_provider:mlflow + artifact_uri: mlflow-artifacts:/10/39f821ed5b964c71a603cc6db196f9fd/artifacts/checkpoints/epoch=19-step=32020/model.onnx/model.onnx + - name: heatmap-builder + import_path: builders.heatmap_builder:app + route_prefix: /heatmap-builder + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: HeatmapBuilder + max_ongoing_requests: 16 + max_queued_requests: 64 + autoscaling_config: + min_replicas: 0 + max_replicas: 2 + target_ongoing_requests: 2 + ray_actor_options: + num_cpus: 4 + memory: 12884901888 + user_config: + num_threads: 4 + max_concurrent_tasks: 8 + - name: prostate-classifier-1 + import_path: models.binary_classifier:app + route_prefix: /prostate-classifier-1 + runtime_env: + working_dir: https://gitlab.ics.muni.cz/rationai/infrastructure/model-service/-/archive/master/model-service-master.zip + deployments: + - name: BinaryClassifier + max_ongoing_requests: 64 + max_queued_requests: 128 + autoscaling_config: + min_replicas: 0 + max_replicas: 4 + target_ongoing_requests: 32 + ray_actor_options: + num_cpus: 6 + memory: 6442450944 + runtime_env: + env_vars: + MLFLOW_TRACKING_URI: http://mlflow.rationai-mlflow:5000 + user_config: + tile_size: 512 + max_batch_size: 32 + batch_wait_timeout_s: 0.5 + mean: + - 228.5544 + - 178.8584 + - 219.8793 + std: + - 27.8285 + - 51.4639 + - 26.4458 + intra_op_num_threads: 5 + model: + _target_: providers.model_provider:mlflow + artifact_uri: mlflow-artifacts:/65/aebc892f526047249b972f200bef4381/artifacts/checkpoints/epoch=0-step=6972/model.onnx diff --git a/kustomize/overlays/kustomization.yaml b/kustomize/overlays/kustomization.yaml new file mode 100644 index 0000000..37d8d2d --- /dev/null +++ b/kustomize/overlays/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../base + +components: + - ../components/models + - ../components/cpu-workers + - ../components/gpu-workers From 3f74fb26b4aecb2685a1d6df7356ac38e3db66c2 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@muni.cz> Date: Mon, 23 Feb 2026 19:11:29 +0100 Subject: [PATCH 2/3] feat: add deploy script --- deploy.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 deploy.sh diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000..4bbdb5a --- /dev/null +++ b/deploy.sh @@ -0,0 +1,4 @@ +# deploy.ps1 +python kustomize/components/models/merge_models.py +if ($LASTEXITCODE -ne 0) { exit 1 } +kubectl apply -k kustomize/overlays -n rationai-jobs-ns \ No newline at end of file From df76f0ff7684f4e3cca4763799e4418e28527e2b Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@muni.cz> Date: Mon, 23 Feb 2026 19:11:48 +0100 Subject: [PATCH 3/3] feat: add deploy script --- deploy.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deploy.sh b/deploy.sh index 4bbdb5a..559c21f 100644 --- a/deploy.sh +++ b/deploy.sh @@ -1,4 +1,5 @@ -# deploy.ps1 +#!/bin/bash +set -e + python kustomize/components/models/merge_models.py -if ($LASTEXITCODE -ne 0) { exit 1 } kubectl apply -k kustomize/overlays -n rationai-jobs-ns \ No newline at end of file