Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ const (
// MPISSHAuthorizedKeys is the file name for authorized keys.
MPISSHAuthorizedKeys string = "authorized_keys"

// MPISSHSecretPrivateKeyFileMode is the mode for the mounted MPI SSH private key (0600: OpenSSH-compatible; not group/other readable).
MPISSHSecretPrivateKeyFileMode int32 = 0600

// MPISSHSecretSharedSSHFileMode is the mode for the mounted MPI SSH public key and authorized_keys files (0644).
MPISSHSecretSharedSSHFileMode int32 = 0644

// MPIHostfilePath is the directory for the MPI hostfile.
MPIHostfileDir string = "/etc/mpi"

Expand Down
6 changes: 6 additions & 0 deletions pkg/runtime/core/trainingruntime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1983,14 +1983,17 @@ test-job-node-0-1.test-job slots=8
{
Key: corev1.SSHAuthPrivateKey,
Path: constants.MPISSHPrivateKeyFile,
Mode: ptr.To(constants.MPISSHSecretPrivateKeyFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHPublicKeyFile,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHAuthorizedKeys,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
},
},
Expand Down Expand Up @@ -2022,14 +2025,17 @@ test-job-node-0-1.test-job slots=8
{
Key: corev1.SSHAuthPrivateKey,
Path: constants.MPISSHPrivateKeyFile,
Mode: ptr.To(constants.MPISSHSecretPrivateKeyFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHPublicKeyFile,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHAuthorizedKeys,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
},
},
Expand Down
42 changes: 30 additions & 12 deletions pkg/runtime/framework/core/framework_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -904,13 +904,16 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile),
WithPath(constants.MPISSHPrivateKeyFile).
WithMode(constants.MPISSHSecretPrivateKeyFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile),
WithPath(constants.MPISSHPublicKeyFile).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys),
WithPath(constants.MPISSHAuthorizedKeys).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
),
),
corev1ac.Volume().
Expand Down Expand Up @@ -978,13 +981,16 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile),
WithPath(constants.MPISSHPrivateKeyFile).
WithMode(constants.MPISSHSecretPrivateKeyFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile),
WithPath(constants.MPISSHPublicKeyFile).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys),
WithPath(constants.MPISSHAuthorizedKeys).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
),
),
),
Expand Down Expand Up @@ -1088,13 +1094,16 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile),
WithPath(constants.MPISSHPrivateKeyFile).
WithMode(constants.MPISSHSecretPrivateKeyFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile),
WithPath(constants.MPISSHPublicKeyFile).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys),
WithPath(constants.MPISSHAuthorizedKeys).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
),
),
*corev1ac.Volume().
Expand Down Expand Up @@ -1145,13 +1154,16 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile),
WithPath(constants.MPISSHPrivateKeyFile).
WithMode(constants.MPISSHSecretPrivateKeyFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile),
WithPath(constants.MPISSHPublicKeyFile).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys),
WithPath(constants.MPISSHAuthorizedKeys).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
),
),
},
Expand Down Expand Up @@ -1211,14 +1223,17 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
{
Key: corev1.SSHAuthPrivateKey,
Path: constants.MPISSHPrivateKeyFile,
Mode: ptr.To(constants.MPISSHSecretPrivateKeyFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHPublicKeyFile,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHAuthorizedKeys,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
},
},
Expand Down Expand Up @@ -1252,14 +1267,17 @@ func TestRunComponentBuilderPlugins(t *testing.T) {
{
Key: corev1.SSHAuthPrivateKey,
Path: constants.MPISSHPrivateKeyFile,
Mode: ptr.To(constants.MPISSHSecretPrivateKeyFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHPublicKeyFile,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
{
Key: constants.MPISSHPublicKey,
Path: constants.MPISSHAuthorizedKeys,
Mode: ptr.To(constants.MPISSHSecretSharedSSHFileMode),
},
},
},
Expand Down
30 changes: 17 additions & 13 deletions pkg/runtime/framework/plugins/mpi/mpi.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,19 +135,23 @@ func (m *MPI) EnforceMLPolicy(info *runtime.Info, trainJob *trainer.TrainJob) er
[]corev1ac.VolumeApplyConfiguration{
*corev1ac.Volume().
WithName(constants.MPISSHAuthVolumeName).
WithSecret(corev1ac.SecretVolumeSource().
WithSecretName(fmt.Sprintf("%s%s", trainJob.Name, constants.MPISSHAuthSecretSuffix)).
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys),
),
WithSecret(
corev1ac.SecretVolumeSource().
WithSecretName(fmt.Sprintf("%s%s", trainJob.Name, constants.MPISSHAuthSecretSuffix)).
WithItems(
corev1ac.KeyToPath().
WithKey(corev1.SSHAuthPrivateKey).
WithPath(constants.MPISSHPrivateKeyFile).
WithMode(constants.MPISSHSecretPrivateKeyFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHPublicKeyFile).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
corev1ac.KeyToPath().
WithKey(constants.MPISSHPublicKey).
WithPath(constants.MPISSHAuthorizedKeys).
WithMode(constants.MPISSHSecretSharedSSHFileMode),
),
),
}...,
)
Expand Down
Loading