Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions .github/workflows/build-deploy.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,42 @@
name: build slurm-operator

on:
pull_request: []
pull_request: {}
push:
branches:
- main
workflow_dispatch:

jobs:
build-arm:
if: (github.event_name != 'pull_request')
runs-on: ubuntu-latest
name: make and build arm
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version: ^1.24
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Add custom buildx ARM builder
if: (github.event_name != 'pull_request')
run: |
docker buildx create --name armbuilder
docker buildx use armbuilder
docker buildx inspect --bootstrap

- name: Deploy Container
if: (github.event_name != 'pull_request')
run: make arm-deploy

build:
runs-on: ubuntu-latest
strategy:
Expand All @@ -21,7 +50,7 @@ jobs:
uses: actions/checkout@v4
- uses: actions/setup-go@v3
with:
go-version: ^1.23
go-version: ^1.24
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: test slurm-operator

on:
pull_request: []
pull_request: {}

jobs:
formatting:
Expand All @@ -27,7 +27,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ^1.23
go-version: ^1.24
- name: fmt check
run: make fmt

Expand Down Expand Up @@ -60,7 +60,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: ^1.23
go-version: ^1.24

- name: Start minikube
uses: medyagh/setup-minikube@697f2b7aaed5f70bf2a94ee21a4ec3dde7b12f92 # v0.0.9
Expand Down
70 changes: 68 additions & 2 deletions .github/workflows/slurm-containers.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,74 @@
name: build slurm-containers

on:
pull_request: []
pull_request: {}
push:
branches:
- main
workflow_dispatch:

jobs:
build-arm-ubuntu:
env:
container: ghcr.io/converged-computing/slurm
runs-on: ubuntu-latest
name: build arm slurm ubuntu
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version: ^1.24
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Add custom buildx ARM builder
if: (github.event_name != 'pull_request')
run: |
docker buildx create --name armbuilder
docker buildx use armbuilder
docker buildx inspect --bootstrap

- name: Build and Deploy Container
if: (github.event_name != 'pull_request')
run: docker buildx build -f docker/Dockerfile.ubuntu --build-arg ARCH=arm64 --platform linux/arm64 --push -t ${{ env.container }}:ubuntu-arm ./docker

build-arm:
env:
container: ghcr.io/converged-computing/slurm
runs-on: ubuntu-latest
name: make and build arm
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version: ^1.24
- name: GHCR Login
if: (github.event_name != 'pull_request')
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Add custom buildx ARM builder
if: (github.event_name != 'pull_request')
run: |
docker buildx create --name armbuilder
docker buildx use armbuilder
docker buildx inspect --bootstrap

- name: Build and Deploy Container
if: (github.event_name != 'pull_request')
run: docker buildx build -f docker/Dockerfile --build-arg ARCH=arm64 --platform linux/arm64 --push -t ${{ env.container }}:arm ./docker


build:
env:
container: ghcr.io/converged-computing/slurm
Expand All @@ -27,6 +88,11 @@ jobs:
- name: Build Container
run: docker build -f docker/Dockerfile -t ${{ env.container }} ./docker

- name: Build Ubuntu Container
run: docker build -f docker/Dockerfile.ubuntu -t ${{ env.container }}:ubuntu ./docker

- name: Deploy Container
if: (github.event_name != 'pull_request')
run: docker push ${{ env.container }}
run: |
docker push ${{ env.container }}
docker push ${{ env.container }}:ubuntu
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM golang:1.23 as builder
FROM golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH

Expand Down
20 changes: 19 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ endif

# Image URL to use all building/pushing image targets
IMG ?= ghcr.io/converged-computing/slurm-operator:latest
ARMIMG ?= ghcr.io/converged-computing/slurm-operator:arm

# Testing image (for development mostly)
DEVIMG ?= ghcr.io/converged-computing/slurm-operator:test
Expand Down Expand Up @@ -126,6 +127,17 @@ run: manifests generate fmt vet ## Run a controller from your host.
docker-build: test ## Build docker image with the manager.
docker build -t ${IMG} .

.PHONY: arm-build
arm-build: test ## Build docker image with the manager.
docker buildx build ARCH=arm64 --platform linux/arm64 -t ${ARMIMG} .

.PHONY: arm-deploy
arm-deploy: manifests kustomize
docker buildx build --platform linux/arm64 --build-arg ARCH=arm64 --push -t ${ARMIMG} .
cd config/manager && $(KUSTOMIZE) edit set image controller=${ARMIMG}
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-arm.yaml


.PHONY: docker-push
docker-push: ## Push docker image with the manager.
docker push ${IMG}
Expand Down Expand Up @@ -206,6 +218,12 @@ test-deploy: manifests kustomize
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-dev.yaml
sed -i 's/ imagePullPolicy: IfNotPresent/ imagePullPolicy: Always/' examples/dist/slurm-operator-dev.yaml


.PHONY: build-config-arm
build-config-arm: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${ARMIMG}
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-arm.yaml

.PHONY: test-deploy-recreate
test-deploy-recreate: test-deploy
kubectl delete -f ./examples/dist/slurm-operator-dev.yaml || echo "Already deleted"
Expand All @@ -217,7 +235,7 @@ list:

## Tool Versions
KUSTOMIZE_VERSION ?= v3.8.7
CONTROLLER_TOOLS_VERSION ?= v0.14.0
CONTROLLER_TOOLS_VERSION ?= v0.19.0

KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
.PHONY: kustomize
Expand Down
21 changes: 12 additions & 9 deletions api/v1alpha1/slurm_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ type SlurmSpec struct {
// +optional
SlurmVersion string `json:"slurmVersion,omitempty"`

// Size of the slurm (1 server + (N-1) nodes)
// Size is number of worker nodes
Size int32 `json:"size"`

// Interactive mode keeps the cluster running
Expand Down Expand Up @@ -168,6 +168,11 @@ type Node struct {
// +optional
WorkingDir string `json:"workingDir,omitempty"`

// Node specification. Leave empty for testing cluster
// This does not include hostlist (generated automatically)
// +optional
Nodespec string `json:"nodespec,omitempty"`

// PullAlways will always pull the container
// +optional
PullAlways bool `json:"pullAlways"`
Expand Down Expand Up @@ -225,8 +230,8 @@ func (s *Slurm) SelectorName() string {

// Validate the slurm
func (s *Slurm) Validate() bool {
if s.WorkerNodes() < 1 {
fmt.Printf("😥️ Slurm cluster must have at least one worker node, Size >= 2.\n")
if s.Spec.Size < 1 {
fmt.Printf("😥️ Slurm cluster must have 1 or more worker nodes.\n")
return false
}
// Ensure we have the default image set
Expand All @@ -238,6 +243,10 @@ func (s *Slurm) Validate() bool {
s.Spec.ClusterName = "linux"
}

// Default node spec
if s.Spec.Node.Nodespec == "" {
s.Spec.Node.Nodespec = "RealMemory=1000 CPUs=1 State=UNKNOWN"
}
// Along with a username and password
if s.Spec.Database.DatabaseName == "" {
s.Spec.Database.DatabaseName = "slurm_acct_db"
Expand All @@ -249,12 +258,6 @@ func (s *Slurm) Validate() bool {
return true
}

// WorkerNodes returns the number of worker nodes
// At this point we've already validated size is >= 1
func (s *Slurm) WorkerNodes() int32 {
return s.Spec.Size - 1
}

// WorkerNode returns the worker node (if defined) or falls back to the server
func (s *Slurm) WorkerNode() Node {

Expand Down
19 changes: 17 additions & 2 deletions config/crd/bases/flux-framework.org_slurms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.14.0
controller-gen.kubebuilder.io/version: v0.19.0
name: slurms.flux-framework.org
spec:
group: flux-framework.org
Expand Down Expand Up @@ -65,6 +65,11 @@ spec:
default: ghcr.io/converged-computing/slurm
description: Image to use for slurm
type: string
nodespec:
description: |-
Node specification. Leave empty for testing cluster
This does not include hostlist (generated automatically)
type: string
ports:
description: |-
Ports to be exposed to other containers in the cluster
Expand Down Expand Up @@ -188,6 +193,11 @@ spec:
default: ghcr.io/converged-computing/slurm
description: Image to use for slurm
type: string
nodespec:
description: |-
Node specification. Leave empty for testing cluster
This does not include hostlist (generated automatically)
type: string
ports:
description: |-
Ports to be exposed to other containers in the cluster
Expand Down Expand Up @@ -234,7 +244,7 @@ spec:
description: Resources include limits and requests
type: object
size:
description: Size of the slurm (1 server + (N-1) nodes)
description: Size is number of worker nodes
format: int32
type: integer
slurmVersion:
Expand Down Expand Up @@ -266,6 +276,11 @@ spec:
default: ghcr.io/converged-computing/slurm
description: Image to use for slurm
type: string
nodespec:
description: |-
Node specification. Leave empty for testing cluster
This does not include hostlist (generated automatically)
type: string
ports:
description: |-
Ports to be exposed to other containers in the cluster
Expand Down
7 changes: 3 additions & 4 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namePrefix: slurm-operator-
#commonLabels:
# someName: someValue

bases:
resources:
- ../crd
- ../rbac
- ../manager
Expand All @@ -28,9 +28,8 @@ patchesStrategicMerge:
# Protect the /metrics endpoint by putting it behind auth.
# If you want your controller-manager to expose the /metrics
# endpoint w/o any authn/z, please comment the following line.
- manager_auth_proxy_patch.yaml


# we have removed this because the image is deprecated 3/2026
# - manager_auth_proxy_patch.yaml

# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
Expand Down
Loading
Loading