diff --git a/Makefile b/Makefile index 4a2e90a671..4b80437644 100644 --- a/Makefile +++ b/Makefile @@ -121,7 +121,7 @@ else endif # install-skopeo is purposely omitted from this target because it is only -# needed for a single test target (test-e2e-ocl). +# needed for the e2e-ocl test targets (test-e2e-ocl-1of2, test-e2e-ocl-2of2). install-tools: install-golangci-lint install-go-junit-report install-setup-envtest # Runs golangci-lint @@ -216,15 +216,11 @@ test-e2e-techpreview: install-go-junit-report test-e2e-single-node: install-go-junit-report set -o pipefail; go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-single-node/ | ./hack/test-with-junit.sh $(@) -test-e2e-ocl: install-go-junit-report install-skopeo - # Temporarily include /tmp/skopeo/bin in our PATH variable so that the test suite can find skopeo. - set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 190m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl/ | ./hack/test-with-junit.sh $(@) - test-e2e-ocl-1of2: install-go-junit-report install-skopeo set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl-1of2/ ./test/e2e-ocl-shared/ | ./hack/test-with-junit.sh $(@) test-e2e-ocl-2of2: install-go-junit-report install-skopeo - set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl-2of2/ ./test/e2e-ocl-shared/ | ./hack/test-with-junit.sh $(@) + set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 150m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl-2of2/ ./test/e2e-ocl-shared/ | ./hack/test-with-junit.sh $(@) test-e2e-iri: install-go-junit-report set -o pipefail; go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-iri/ | ./hack/test-with-junit.sh $(@) diff --git a/pkg/controller/build/reconciler.go b/pkg/controller/build/reconciler.go index 43b566a9fd..79d91de71e 100644 --- a/pkg/controller/build/reconciler.go +++ b/pkg/controller/build/reconciler.go @@ -191,6 +191,13 @@ func (b *buildReconciler) deleteMachineOSConfig(ctx context.Context, mosc *mcfgv } for _, mosb := range mosbList { + // Only delete MOSBs owned by this specific MOSC instance. A new MOSC + // with the same name (but different UID) may already exist and have + // created new MOSBs that match the same label selector. + if !metav1.IsControlledBy(mosb, mosc) { + klog.Infof("Skipping MachineOSBuild %s: not owned by deleted MachineOSConfig %s (UID %s)", mosb.Name, mosc.Name, mosc.UID) + continue + } if err := b.deleteMachineOSBuild(ctx, mosb); err != nil { return fmt.Errorf("could not delete MachineOSBuild %s for MachineOSConfig %s: %w", mosb.Name, mosc.Name, err) } diff --git a/test/e2e-ocl/Containerfile.cowsay b/test/e2e-ocl/Containerfile.cowsay deleted file mode 100644 index 9bd23fa342..0000000000 --- a/test/e2e-ocl/Containerfile.cowsay +++ /dev/null @@ -1,9 +0,0 @@ -FROM quay.io/centos/centos:stream9 AS centos -RUN dnf install -y epel-release - -FROM configs AS final -COPY --from=centos /etc/yum.repos.d /etc/yum.repos.d -COPY --from=centos /etc/pki/rpm-gpg/RPM-GPG-KEY-* /etc/pki/rpm-gpg/ -RUN sed -i 's/\$stream/9-stream/g' /etc/yum.repos.d/centos*.repo && \ - rpm-ostree install cowsay && \ - ostree container commit diff --git a/test/e2e-ocl/Containerfile.entitled b/test/e2e-ocl/Containerfile.entitled deleted file mode 100644 index 18e925ecc6..0000000000 --- a/test/e2e-ocl/Containerfile.entitled +++ /dev/null @@ -1,6 +0,0 @@ -FROM configs AS final - -RUN rm -rf /etc/rhsm-host && \ - rpm-ostree install buildah && \ - ln -s /run/secrets/rhsm /etc/rhsm-host && \ - ostree container commit diff --git a/test/e2e-ocl/Containerfile.okd-fcos b/test/e2e-ocl/Containerfile.okd-fcos deleted file mode 100644 index 34db40295f..0000000000 --- a/test/e2e-ocl/Containerfile.okd-fcos +++ /dev/null @@ -1,3 +0,0 @@ -FROM configs AS final -RUN rpm-ostree install cowsay && \ - ostree container commit diff --git a/test/e2e-ocl/Containerfile.simple b/test/e2e-ocl/Containerfile.simple deleted file mode 100644 index 2f0520b230..0000000000 --- a/test/e2e-ocl/Containerfile.simple +++ /dev/null @@ -1,3 +0,0 @@ -FROM configs AS final -RUN touch /etc/simple-test-file.txt && \ - ostree container commit diff --git a/test/e2e-ocl/Containerfile.yum-repos-d b/test/e2e-ocl/Containerfile.yum-repos-d deleted file mode 100644 index f6b9fd4d42..0000000000 --- a/test/e2e-ocl/Containerfile.yum-repos-d +++ /dev/null @@ -1,3 +0,0 @@ -FROM configs AS final -RUN rpm-ostree install buildah && \ - ostree container commit diff --git a/test/e2e-ocl/helpers_test.go b/test/e2e-ocl/helpers_test.go deleted file mode 100644 index f46511629a..0000000000 --- a/test/e2e-ocl/helpers_test.go +++ /dev/null @@ -1,1147 +0,0 @@ -package e2e_ocl_test - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "slices" - "strings" - "testing" - "time" - - "github.com/containers/image/v5/docker" - "github.com/containers/image/v5/types" - ign3types "github.com/coreos/ignition/v2/config/v3_5/types" - "github.com/distribution/reference" - imagev1 "github.com/openshift/api/image/v1" - mcfgv1 "github.com/openshift/api/machineconfiguration/v1" - "github.com/openshift/machine-config-operator/pkg/controller/build/constants" - "github.com/openshift/machine-config-operator/pkg/controller/build/utils" - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - "github.com/openshift/machine-config-operator/test/framework" - "github.com/openshift/machine-config-operator/test/helpers" - "github.com/stretchr/testify/require" - "golang.org/x/sync/errgroup" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - aggerrs "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/util/retry" - "sigs.k8s.io/yaml" -) - -var ( - InspectMC = "inspect-mc" -) - -func applyMC(t *testing.T, cs *framework.ClientSet, mc *mcfgv1.MachineConfig) func() { - cleanupFunc := helpers.ApplyMC(t, cs, mc) - t.Logf("Created new MachineConfig %q", mc.Name) - - return makeIdempotentAndRegister(t, func() { - cleanupFunc() - t.Logf("Deleted MachineConfig %q", mc.Name) - }) -} - -func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.MachineOSConfig) func() { - helpers.SetMetadataOnObject(t, mosc) - - _, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Create(context.TODO(), mosc, metav1.CreateOptions{}) - require.NoError(t, err) - - t.Logf("Created MachineOSConfig %q", mosc.Name) - - return makeIdempotentAndRegister(t, func() { - require.NoError(t, cs.MachineconfigurationV1Interface.MachineOSConfigs().Delete(context.TODO(), mosc.Name, metav1.DeleteOptions{})) - t.Logf("Deleted MachineOSConfig %q", mosc.Name) - }) -} - -// Sets up the ImageStream in the desired namesspace. If in a different -// namespace than the MCO, it will create the namespace and clone the pull -// secret into the MCO namespace. Returns the name of the push secret used, the -// image pullspec, and an idempotent cleanup function. -func setupImageStream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) (string, string, func()) { - t.Helper() - - cleanups := helpers.NewCleanupFuncs() - - pushSecretName := "builder-push-secret-name" - - // If no namespace is provided, default to the MCO namespace. - if objMeta.Namespace == "" { - objMeta.Namespace = ctrlcommon.MCONamespace - } - - builderSAObjMeta := metav1.ObjectMeta{ - Namespace: objMeta.Namespace, - Name: "builder", - } - - // If we're told to use a different namespace than the MCO namespace, we need - // to do some additional steps. - if objMeta.Namespace != ctrlcommon.MCONamespace { - // Create the namespace. - cleanups.Add(createNamespace(t, cs, objMeta)) - - // Wait for the builder service account to exist within the new namespace. - require.NoError(t, waitForServiceAccountToExist(cs, builderSAObjMeta)) - } - - // Create the Imagestream. - pullspec, isCleanupFunc := createImagestream(t, cs, objMeta) - cleanups.Add(isCleanupFunc) - - // Create a long-lived image registry pull secret so that it will not get - // automatically rotated while the test is running. - opts := helpers.LongLivedSecretOpts{ - DeleteIfExists: true, - ServiceAccount: builderSAObjMeta, - Lifetime: "24h", - Secret: metav1.ObjectMeta{ - Namespace: ctrlcommon.MCONamespace, - Name: pushSecretName, - }, - } - - cleanups.Add(helpers.CreateLongLivedPullSecretForTest(context.TODO(), t, cs, opts)) - - return pushSecretName, pullspec, makeIdempotentAndRegister(t, cleanups.Run) -} - -// Creates a namespace. Returns an idempotent cleanup function. -func createNamespace(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) func() { - ns := &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: objMeta.Namespace, - }, - } - - helpers.SetMetadataOnObject(t, ns) - - _, err := cs.CoreV1Interface.Namespaces().Create(context.TODO(), ns, metav1.CreateOptions{}) - require.NoError(t, err) - t.Logf("Created namespace %q", ns.Name) - - return makeIdempotentAndRegister(t, func() { - require.NoError(t, cs.CoreV1Interface.Namespaces().Delete(context.TODO(), ns.Name, metav1.DeleteOptions{})) - t.Logf("Deleted namespace %q", ns.Name) - }) -} - -// There may be a delay between the time a new namespace is created and its -// service accounts to be created. This will wait up to one minute for the -// specified service account to be created. -func waitForServiceAccountToExist(cs *framework.ClientSet, objMeta metav1.ObjectMeta) error { - return wait.PollImmediate(1*time.Second, 1*time.Minute, func() (bool, error) { - builderSA, err := cs.CoreV1Interface.ServiceAccounts(objMeta.Namespace).Get(context.TODO(), objMeta.Name, metav1.GetOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - return false, err - } - - return builderSA != nil, nil - }) -} - -// Creates an OpenShift ImageStream in the MCO namespace for the test and -// registers a cleanup function. Returns the pullspec with the latest tag for -// the newly-created ImageStream. -func createImagestream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) (string, func()) { - is := &imagev1.ImageStream{ - ObjectMeta: metav1.ObjectMeta{ - Name: objMeta.Name, - Namespace: objMeta.Namespace, - }, - } - - helpers.SetMetadataOnObject(t, is) - - created, err := cs.ImageV1Interface.ImageStreams(is.Namespace).Create(context.TODO(), is, metav1.CreateOptions{}) - require.NoError(t, err) - require.NotEmpty(t, created.Status.DockerImageRepository) - - pullspec := fmt.Sprintf("%s:latest", created.Status.DockerImageRepository) - t.Logf("Created ImageStream \"%s/%s\", has pullspec %q", is.Namespace, is.Name, pullspec) - - return pullspec, makeIdempotentAndRegister(t, func() { - require.NoError(t, cs.ImageV1Interface.ImageStreams(is.Namespace).Delete(context.TODO(), is.Name, metav1.DeleteOptions{})) - t.Logf("Deleted ImageStream \"%s/%s\"", is.Namespace, is.Name) - }) -} - -// Creates a given ConfigMap and registers a cleanup function to delete it. -func createConfigMap(t *testing.T, cs *framework.ClientSet, cm *corev1.ConfigMap) func() { - helpers.SetMetadataOnObject(t, cm) - - _, err := cs.CoreV1Interface.ConfigMaps(cm.Namespace).Create(context.TODO(), cm, metav1.CreateOptions{}) - require.NoError(t, err) - - t.Logf("Created ConfigMap \"%s/%s\"", cm.Namespace, cm.Name) - - return makeIdempotentAndRegister(t, func() { - require.NoError(t, cs.CoreV1Interface.ConfigMaps(cm.Namespace).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{})) - t.Logf("Deleted ConfigMap \"%s/%s\"", cm.Namespace, cm.Name) - }) -} - -// Creates a given Secret and registers a cleanup function to delete it. -func createSecret(t *testing.T, cs *framework.ClientSet, secret *corev1.Secret) func() { - helpers.SetMetadataOnObject(t, secret) - - _, err := cs.CoreV1Interface.Secrets(secret.Namespace).Create(context.TODO(), secret, metav1.CreateOptions{}) - require.NoError(t, err) - - t.Logf("Created secret \"%s/%s\"", secret.Namespace, secret.Name) - - return makeIdempotentAndRegister(t, func() { - require.NoError(t, cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{})) - t.Logf("Deleted secret \"%s/%s\"", secret.Namespace, secret.Name) - }) -} - -// Computes the name of the currently-running MachineOSBuild given a MachineConfigPool and MachineOSConfig. -func getMachineOSBuildNameForPool(cs *framework.ClientSet, poolName, moscName string) (string, error) { - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) - if err != nil { - return "", err - } - - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(context.TODO(), moscName, metav1.GetOptions{}) - if err != nil { - return "", err - } - - mosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(context.TODO(), metav1.ListOptions{ - LabelSelector: utils.MachineOSBuildSelector(mosc, mcp).String(), - }) - - if err != nil { - return "", err - } - - if len(mosbs.Items) == 1 { - return mosbs.Items[0].Name, nil - } - - if len(mosbs.Items) == 0 { - return "", fmt.Errorf("no MachineOSBuild found for MachineOSConfig %s, MachineConfigPool %s, rendered MachineConfig %s", mosc.Name, mcp.Name, mcp.Spec.Configuration.Name) - } - - return "", fmt.Errorf("found multiple MachineOSBuilds for MachineOSConfig %s, MachineConfigPool %s, rendered MachineConfig %s", mosc.Name, mcp.Name, mcp.Spec.Configuration.Name) -} - -// Waits for the target MachineConfigPool to reach a state defined in a supplied function. -func waitForPoolToReachState(t *testing.T, cs *framework.ClientSet, poolName string, condFunc func(*mcfgv1.MachineConfigPool) bool) { - err := wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) - if err != nil { - return false, err - } - - return condFunc(mcp), nil - }) - - require.NoError(t, err, "MachineConfigPool %q did not reach desired state", poolName) -} - -// Registers a cleanup function, making it idempotent, and wiring up the skip -// cleanup checks which will cause cleanup to be skipped under certain -// conditions. -func makeIdempotentAndRegister(t *testing.T, cleanupFunc func()) func() { - cfg := helpers.IdempotentConfig{ - SkipAlways: skipCleanupAlways, - SkipOnlyOnFailure: skipCleanupOnlyAfterFailure, - } - - return helpers.MakeConfigurableIdempotentAndRegister(t, cfg, cleanupFunc) -} - -// Registers a cleanup function, making it idempotent and ensures that it will -// always be run, regardless of skip cleanup opts or whether we're in CI. -// -// Note: Use this wrapper only in cases where you want to ensure that a -// function is only called once despite there being multiple calls to the -// returned function. If there is only one call to the returned function -// anyway, use t.Cleanup() instead for clarity. -func makeIdempotentAndRegisterAlwaysRun(t *testing.T, cleanupFunc func()) func() { - return helpers.MakeIdempotentAndRegister(t, cleanupFunc) -} - -// TOOD: Refactor into smaller functions. -func cleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { - labelSelector := utils.OSBuildSelector().String() - - // Any secrets that get created by BuildController should have different - // label selectors since they're produced differently. - secretList, err := cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: utils.CanonicalizedSecretSelector().String(), - }) - - require.NoError(t, err) - - cmList, err := cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - - require.NoError(t, err) - - jobList, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - - require.NoError(t, err) - - podList, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: labelSelector, - }) - - require.NoError(t, err) - - mosbList, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(context.TODO(), metav1.ListOptions{}) - require.NoError(t, err) - - moscList, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().List(context.TODO(), metav1.ListOptions{}) - require.NoError(t, err) - - // Helper function to create a fresh timeout context for each resource verification. - // Each resource gets its own 3-minute timeout to prevent slow deletions from - // consuming the timeout budget of other resources. Use a 5-second poll interval - // to significantly reduce API call rate and avoid exhausting the rate limiter - // (~36 attempts per resource, vs 120 attempts with 1s interval). - newCleanupAssertion := func() *helpers.Assertions { - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) - t.Cleanup(cancel) - return helpers.AssertClientSet(t, cs).WithContext(ctx).WithPollInterval(5 * time.Second).Eventually() - } - - if len(secretList.Items) == 0 { - t.Logf("No build-time secrets to clean up") - } - - if len(cmList.Items) == 0 { - t.Logf("No ephemeral ConfigMaps to clean up") - } - - if len(jobList.Items) == 0 { - t.Logf("No ephemeral Jobs to clean up") - } - - if len(podList.Items) == 0 { - t.Logf("No build pods to clean up") - } - - if len(mosbList.Items) == 0 { - t.Logf("No MachineOSBuilds to clean up") - } - - if len(moscList.Items) == 0 { - t.Logf("No MachineOSConfigs to clean up") - } - - // Delete owners first (MachineOSConfigs, MachineOSBuilds) before their dependents. - // MachineOSBuilds have FinalizerDeleteDependents which blocks deletion until owned - // resources are gone. Deleting ConfigMaps/Secrets before their owners creates a race - // with Kubernetes GC that causes intermittent timeout failures. - for _, item := range moscList.Items { - t.Logf("Cleaning up MachineOSConfig %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSConfigs())) - newCleanupAssertion().MachineOSConfigDoesNotExist(&item) - } - - for _, item := range mosbList.Items { - t.Logf("Cleaning up MachineOSBuild %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSBuilds())) - newCleanupAssertion().MachineOSBuildDoesNotExist(&item) - - // Also clean up the digest ConfigMap - t.Logf("Cleaning up ephemeral digest ConfigMap %q", utils.GetDigestConfigMapName(&item)) - require.NoError(t, cleanupDigestConfigMap(t, cs, &item)) - newCleanupAssertion().ConfigMapDoesNotExist(utils.GetDigestConfigMapName(&item)) - } - - // Now delete Jobs and their dependent resources - for _, item := range jobList.Items { - jobUID := string(item.UID) - t.Logf("Cleaning up build job %q", item.Name) - bgDeletion := metav1.DeletePropagationBackground - require.NoError(t, deleteObjectWithOpts(context.TODO(), t, &item, cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace), metav1.DeleteOptions{ - PropagationPolicy: &bgDeletion, - })) - newCleanupAssertion().JobDoesNotExist(item.Name) - - // Delete any pods that were created by the job - pods, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ - LabelSelector: "controller-uid=" + jobUID, - }) - require.NoError(t, err) - for _, pod := range pods.Items { - require.NoError(t, deleteObject(context.TODO(), t, &pod, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) - newCleanupAssertion().PodDoesNotExist(pod.Name) - } - } - - for _, item := range podList.Items { - t.Logf("Cleaning up build pod %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) - newCleanupAssertion().PodDoesNotExist(item.Name) - } - - // Clean up remaining ConfigMaps and Secrets after their owners are deleted - for _, item := range cmList.Items { - t.Logf("Cleaning up ephemeral ConfigMap %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace))) - newCleanupAssertion().ConfigMapDoesNotExist(item.Name) - } - - for _, item := range secretList.Items { - t.Logf("Cleaning up build-time Secret %s", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace))) - newCleanupAssertion().SecretDoesNotExist(item.Name) - } - - // Clean up inspect MC if it exists - machineConfig, err := cs.MachineConfigs().Get(context.TODO(), InspectMC, metav1.GetOptions{}) - if err == nil { - t.Logf("Cleaning up MachineConfig %q", InspectMC) - require.NoError(t, deleteObject(context.TODO(), t, machineConfig, cs.MachineConfigs())) - newCleanupAssertion().MachineConfigDoesNotExist(machineConfig) - } -} - -type deleter interface { - Delete(context.Context, string, metav1.DeleteOptions) error -} - -type kubeObject interface { - runtime.Object - GetName() string -} - -func deleteObject(ctx context.Context, t *testing.T, obj kubeObject, deleter deleter) error { - return deleteObjectWithOpts(ctx, t, obj, deleter, metav1.DeleteOptions{}) -} - -func deleteObjectWithOpts(ctx context.Context, t *testing.T, obj kubeObject, deleter deleter, opts metav1.DeleteOptions) error { - kind, err := utils.GetKindForObject(obj) - if err != nil && kind == "" { - kind = "" - } - - err = deleter.Delete(ctx, obj.GetName(), opts) - - if err == nil { - t.Logf("Cleaned up %s %q", kind, obj.GetName()) - return nil - } - - if k8serrors.IsNotFound(err) { - t.Logf("%s %q already cleaned up", kind, obj.GetName()) - return nil - } - - return err -} - -func cleanupDigestConfigMap(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { - cm, err := cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace).Get(context.TODO(), utils.GetDigestConfigMapName(mosb), metav1.GetOptions{}) - if err == nil { - return deleteObject(context.TODO(), t, cm, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace)) - } - if k8serrors.IsNotFound(err) { - t.Logf("%s already cleaned up", utils.GetDigestConfigMapName(mosb)) - return nil - } - return err -} - -// Determines where to write the build logs in the event of a failure. -// ARTIFACT_DIR is a well-known env var provided by the OpenShift CI system. -// Writing to the path in this env var will ensure that any files written to -// that path end up in the OpenShift CI GCP bucket for later viewing. -// -// If this env var is not set, these files will be written to the current -// working directory. -func getBuildArtifactDir(t *testing.T) string { - artifactDir := os.Getenv("ARTIFACT_DIR") - if artifactDir != "" { - return artifactDir - } - - cwd, err := os.Getwd() - require.NoError(t, err) - return cwd -} - -// Writes any ephemeral build objects to disk as YAML files. -func writeBuildArtifactsToFiles(t *testing.T, cs *framework.ClientSet, poolName string) { - lo := metav1.ListOptions{ - LabelSelector: utils.OSBuildSelector().String(), - } - - archiveName := fmt.Sprintf("%s-build-artifacts.tar.gz", helpers.SanitizeTestName(t)) - - archive, err := helpers.NewArtifactArchive(t, archiveName) - require.NoError(t, err) - - err = aggerrs.NewAggregate([]error{ - writeConfigMapsToFile(t, cs, lo, archive.StagingDir()), - writeBuildPodsToFile(t, cs, lo, archive.StagingDir()), - writeMachineOSBuildsToFile(t, cs, archive.StagingDir()), - writeMachineOSConfigsToFile(t, cs, archive.StagingDir()), - }) - - require.NoError(t, err, "could not write build artifacts to files, got: %s", err) - - require.NoError(t, archive.WriteArchive(), "could not write archive") -} - -// Writes all MachineOSBuilds to a file. -func writeMachineOSBuildsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { - mosbList, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(context.TODO(), metav1.ListOptions{}) - if err != nil { - return err - } - - if len(mosbList.Items) == 0 { - t.Logf("No MachineOSBuilds to write") - return nil - } - - return dumpObjectToYAMLFile(t, mosbList, filepath.Join(archiveDir, "machineosbuilds.yaml")) -} - -// Writes all MachineOSConfigs to a file. -func writeMachineOSConfigsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { - moscList, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().List(context.TODO(), metav1.ListOptions{}) - if err != nil { - return err - } - - if len(moscList.Items) == 0 { - t.Logf("No MachineOSConfigs to write") - return nil - } - - return dumpObjectToYAMLFile(t, moscList, filepath.Join(archiveDir, "machineosconfigs.yaml")) -} - -// Writes all ConfigMaps that match the OS Build labels to files. -func writeConfigMapsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { - cmList, err := cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace).List(context.TODO(), lo) - - if err != nil { - return err - } - - if len(cmList.Items) == 0 { - t.Logf("No ConfigMaps matching label selector %q found", lo.LabelSelector) - return nil - } - - return dumpObjectToYAMLFile(t, cmList, filepath.Join(archiveDir, "configmaps.yaml")) -} - -// Wrttes all pod specs that match the OS Build labels to files. -func writeBuildPodsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { - podList, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(context.TODO(), lo) - if err != nil { - return err - } - - if len(podList.Items) == 0 { - t.Logf("No pods matching label selector %q found", lo.LabelSelector) - return nil - } - - return dumpObjectToYAMLFile(t, podList, filepath.Join(archiveDir, "pods.yaml")) -} - -// Dumps a struct to the provided filename in YAML format, creating any -// parent directories as needed. -func dumpObjectToYAMLFile(t *testing.T, obj interface{}, filename string) error { - if err := os.MkdirAll(filepath.Dir(filename), 0o755); err != nil { - return err - } - - out, err := yaml.Marshal(obj) - if err != nil { - return err - } - - return os.WriteFile(filename, out, 0o755) -} - -// Streams the logs from the Machine OS Builder pod containers to a set of -// files. This can provide a valuable window into how / why the e2e test suite -// failed. -func streamMachineOSBuilderPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, dirPath string) error { - pods, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(ctx, metav1.ListOptions{ - LabelSelector: "k8s-app=machine-os-builder", - }) - - require.NoError(t, err) - - mobPod := &pods.Items[0] - return streamPodContainerLogsToFile(ctx, t, cs, mobPod, dirPath) -} - -// Streams the logs for all of the containers running in the build pod. The pod -// logs can provide a valuable window into how / why a given build failed. -func streamBuildPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild, dirPath string) error { - jobName := mosb.Status.Builder.Job.Name - - pod, err := getPodFromJob(ctx, cs, jobName) - if err != nil { - return err - } - - return streamPodContainerLogsToFile(ctx, t, cs, pod, dirPath) -} - -// Returns a list of pods that match a given job name. -func listPodsForJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.PodList, error) { - job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) - if err != nil { - return nil, fmt.Errorf("could not get job %s: %w", job, err) - } - - podList, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("controller-uid=%s", job.UID)}) - if err != nil { - return nil, fmt.Errorf("could not get pods with job label %s: %w", jobName, err) - } - - return podList, nil -} - -// Retrieves the currently running build pod for a given job name. -func getPodFromJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.Pod, error) { - podList, err := listPodsForJob(ctx, cs, jobName) - if err != nil { - return nil, fmt.Errorf("could not list pods for job %s: %w", jobName, err) - } - - if podList != nil { - if len(podList.Items) == 1 { - return &podList.Items[0], nil - } - - // this is needed when we test the case for a new pod being created after deleting the existing one - // as sometimes it takes time for the old pod to be completely deleted - for _, pod := range podList.Items { - if isBuildPodRunning(&pod) { - return &pod, nil - } - } - } - - return nil, fmt.Errorf("no pod found for job %s", jobName) -} - -// Determines if a build pod is running by first examining the init container -// statuses and then the main container statuses. -func isBuildPodRunning(pod *corev1.Pod) bool { - for _, status := range pod.Status.InitContainerStatuses { - if status.State.Running != nil { - return true - } - } - - for _, status := range pod.Status.ContainerStatuses { - if status.State.Running != nil { - return true - } - } - - return false -} - -// getJobForMOSB returns the name of the job that was created for the given MOSB by comparing the job UID -// to the UID stored in the MOSB annotation -func getJobForMOSB(ctx context.Context, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) (string, error) { - jobName := "" - mosbJobUID := "" - - for mosbJobUID == "" { - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, build.Name, metav1.GetOptions{}) - if err != nil { - return jobName, fmt.Errorf("could not get MachineOSBuild %s: %w", build.Name, err) - } - if mosb.GetAnnotations()[constants.JobUIDAnnotationKey] != "" { - mosbJobUID = mosb.GetAnnotations()[constants.JobUIDAnnotationKey] - break - } - time.Sleep(1 * time.Second) - } - - for jobName == "" { - jobs, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).List(ctx, metav1.ListOptions{}) - if err != nil { - return jobName, fmt.Errorf("could not get list of jobs: %w", err) - } - for _, job := range jobs.Items { - if string(job.UID) == mosbJobUID { - jobName = job.Name - break - } - } - time.Sleep(1 * time.Second) - } - return jobName, nil -} - -// Attaches a follower to each of the containers within a given pod in order to -// stream their logs to disk for future debugging. -func streamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, dirPath string) error { - errGroup, egCtx := errgroup.WithContext(ctx) - - // Stream logs from init containers - for _, container := range pod.Spec.InitContainers { - container := container - pod := pod.DeepCopy() - - errGroup.Go(func() error { - return streamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) - }) - } - - // Stream logs from regular containers - for _, container := range pod.Spec.Containers { - container := container - pod := pod.DeepCopy() - - // Because we follow the logs for each container in a build pod, this - // blocks the current Goroutine. So we run each log stream operation in a - // separate Goroutine to avoid blocking the main Goroutine. - errGroup.Go(func() error { - return streamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) - }) - } - - // Only propagate errors that are not a context cancellation. - if err := errGroup.Wait(); err != nil && !errors.Is(err, context.Canceled) { - return err - } - - return nil -} - -// Streams the logs for a given container to a file. -func streamContainerLogToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, container corev1.Container, dirPath string) error { - // Wait for the container to be ready to stream logs - for { - select { - case <-ctx.Done(): - return ctx.Err() - default: - logger, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).GetLogs(pod.Name, &corev1.PodLogOptions{ - Container: container.Name, - Follow: true, - }).Stream(ctx) - - if err != nil { - // If the container is waiting to start (e.g., PodInitializing), wait and retry - if strings.Contains(err.Error(), "waiting to start") || strings.Contains(err.Error(), "PodInitializing") { - time.Sleep(5 * time.Second) - continue - } - return fmt.Errorf("could not get logs for container %s in pod %s: %w", container.Name, pod.Name, err) - } - defer logger.Close() - - filename := filepath.Join(dirPath, fmt.Sprintf("%s-%s-%s.log", t.Name(), pod.Name, container.Name)) - - file, err := os.Create(filename) - if err != nil { - return err - } - - defer file.Close() - - t.Logf("Streaming pod (%s) container (%s) logs to %s", pod.Name, container.Name, filename) - if _, err := io.Copy(file, logger); err != nil { - return fmt.Errorf("could not write pod logs to %s: %w", filename, err) - } - - return nil - } - } -} - -// Skips a given test if it is detected that the cluster is running OKD. We -// skip these tests because they're either irrelevant for OKD or would fail. -func skipOnOKD(t *testing.T) { - cs := framework.NewClientSet("") - - isOKD, err := helpers.IsOKDCluster(cs) - require.NoError(t, err) - - if isOKD { - t.Logf("OKD detected, skipping test %s", t.Name()) - t.Skip() - } -} - -func skipOnOCP(t *testing.T) { - cs := framework.NewClientSet("") - isOKD, err := helpers.IsOKDCluster(cs) - require.NoError(t, err) - - if !isOKD { - t.Logf("OCP detected, skipping test %s", t.Name()) - t.Skip() - } -} - -// Extracts the contents of a directory within a given container to a temporary -// directory. Next, it loads them into a bytes map keyed by filename. It does -// not handle nested directories, so use with caution. -func convertFilesFromContainerImageToBytesMap(t *testing.T, pullspec, containerFilepath string) map[string][]byte { - tempDir := t.TempDir() - - path := fmt.Sprintf("%s:%s", containerFilepath, tempDir) - cmd := exec.Command("oc", "image", "extract", pullspec, "--path", path) - t.Logf("Extracting files under %q from %q to %q; running %s", containerFilepath, pullspec, tempDir, cmd.String()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - require.NoError(t, cmd.Run()) - - out := map[string][]byte{} - - isCentosImage := strings.Contains(pullspec, "centos") - - err := filepath.Walk(tempDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - if info.IsDir() { - return nil - } - - contents, err := ioutil.ReadFile(path) - if err != nil { - return err - } - - if isCentosImage { - contents = bytes.ReplaceAll(contents, []byte("$stream"), []byte("9-stream")) - } - - // Replace $stream with 9-stream in any of the Centos repo content we pulled. - out[filepath.Base(path)] = contents - return nil - }) - - require.NoError(t, err) - - return out -} - -// Skips the test if the entitlement secret is not present. -func skipIfEntitlementNotPresent(t *testing.T, cs *framework.ClientSet) { - - _, err := cs.CoreV1Interface.Secrets(constants.EtcPkiEntitlementSecretName).Get(context.TODO(), ctrlcommon.OpenshiftConfigManagedNamespace, metav1.GetOptions{}) - if k8serrors.IsNotFound(err) { - t.Logf("Secret %q not found in %q, skipping test", constants.EtcPkiEntitlementSecretName, ctrlcommon.OpenshiftConfigManagedNamespace) - t.Skip() - return - } - // No other errors are expected. - require.NoError(t, err) -} - -// Uses the centos stream 9 container and extracts the contents of both the -// /etc/yum.repos.d and /etc/pki/rpm-gpg directories and injects those into a -// ConfigMap and Secret, respectively. This is so that the build process will -// consume those objects as part of the build process, injecting them into the -// build context. -func injectYumRepos(t *testing.T, cs *framework.ClientSet) func() { - tempDir := t.TempDir() - - yumReposPath := filepath.Join(tempDir, "yum-repos-d") - require.NoError(t, os.MkdirAll(yumReposPath, 0o755)) - - centosPullspec := "quay.io/centos/centos:stream9" - yumReposContents := convertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/yum.repos.d/") - rpmGpgContents := convertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/pki/rpm-gpg/") - - configMapCleanupFunc := createConfigMap(t, cs, &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etc-yum-repos-d", - Namespace: ctrlcommon.MCONamespace, - }, - // Note: Even though the BuildController retrieves this ConfigMap, it only - // does so to determine whether or not it is present. It does not look at - // its contents. For that reason, we can use the BinaryData field here - // because the Build Pod will use its contents the same regardless of - // whether its string data or binary data. - BinaryData: yumReposContents, - }) - - secretCleanupFunc := createSecret(t, cs, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etc-pki-rpm-gpg", - Namespace: ctrlcommon.MCONamespace, - }, - Data: rpmGpgContents, - }) - - return makeIdempotentAndRegister(t, func() { - configMapCleanupFunc() - secretCleanupFunc() - }) -} - -func newMachineConfig(name, pool string) *mcfgv1.MachineConfig { - mode := 420 - testfiledata := fmt.Sprintf("data:,%s-%s", name, pool) - path := fmt.Sprintf("/etc/%s-%s", name, pool) - file := ign3types.File{ - Node: ign3types.Node{ - Path: path, - }, - FileEmbedded1: ign3types.FileEmbedded1{ - Contents: ign3types.Resource{ - Source: &testfiledata, - }, - Mode: &mode, - }, - } - - return helpers.NewMachineConfig(name, helpers.MCLabelForRole(pool), "", []ign3types.File{file}) -} - -// newMachineConfigWithExtensions returns the same base MC, but adds the given extensions to trigger an image rebuild -func newMachineConfigTriggersImageRebuild(name, pool string, exts []string) *mcfgv1.MachineConfig { - mc := newMachineConfig(name, pool) - mc.Spec.Extensions = append(mc.Spec.Extensions, exts...) - return mc -} - -// newMachineConfigWithKernelType returns the same base MC, but adds the given kernel -func newMachineConfigWithKernelType(name, pool, kernelType string) *mcfgv1.MachineConfig { - mc := newMachineConfig(name, pool) - mc.Spec.KernelType = kernelType - return mc -} - -func compareKernelType(t *testing.T, foundKernel, requiredKernelType string) bool { - switch requiredKernelType { - case ctrlcommon.KernelTypeDefault: - return !strings.Contains(foundKernel, "rt") && !strings.Contains(foundKernel, "64k") - case ctrlcommon.KernelTypeRealtime: - return strings.Contains(foundKernel, "rt") - case ctrlcommon.KernelType64kPages: - return strings.Contains(foundKernel, "64k") - default: - t.Logf("Unsupported kernel type requested in MC %s", requiredKernelType) - return false - } -} - -// Gets an override image pullspec for TestGracefulBuildFailureRecovery. We -// override this option to produce a faster test failure since the image we -// select will both be smaller than the OS image as well as not contain the -// required binaries such as Ignition, rpm-ostree, etc. -// -// This will use the MCO's image from the machine-config-operator-images -// ConfigMap if it is found to be a digested image. Otherwise, it will get the -// digested image pullspec for registry.fedoraproject.org/fedora:latest. The -// reason why we look up that image is because it does not require a pull -// secret in order to get its digest, which the BaseOSImagePullspec field -// requires. -func getImagePullspecForFailureTest(ctx context.Context, cs *framework.ClientSet) (string, error) { - images, err := ctrlcommon.GetImagesConfig(ctx, cs.GetKubeclient()) - if err != nil { - return "", err - } - - parsed, err := docker.ParseReference("//" + images.MachineConfigOperator) - if err != nil { - return "", err - } - - switch parsed.DockerReference().(type) { - case reference.Digested: - return images.MachineConfigOperator, nil - case reference.Tagged: - return resolveTaggedPullspecToDigestedPullspec(ctx, "registry.fedoraproject.org/fedora:latest") - default: - return "", fmt.Errorf("unknown image reference spec %q", images.MachineConfigOperator) - } -} - -func getBadContainerFileForFailureTest() []mcfgv1.MachineOSContainerfile { - return []mcfgv1.MachineOSContainerfile{{ - ContainerfileArch: mcfgv1.NoArch, - Content: "THIS IS A BAD CONTAINERFILE", - }} -} - -// Talks to an image registry to get the digested image pullspec for the -// supplied image pullspec. Note: Only supports public image registries. This -// is the same as doing: -// $ skopeo inspect docker://image-pullspec | jq '.Digest' -func resolveTaggedPullspecToDigestedPullspec(ctx context.Context, pullspec string) (string, error) { - sysCtx := &types.SystemContext{} - - tagged, err := docker.ParseReference("//" + pullspec) - if err != nil { - return "", err - } - - digest, err := docker.GetDigest(ctx, sysCtx, tagged) - if err != nil { - return "", err - } - - canonical, err := reference.WithDigest(reference.TrimNamed(tagged.DockerReference()), digest) - if err != nil { - return "", err - } - - return canonical.String(), nil -} - -// TODO: Deduplicate this definition from machine-config-operator/devex/internal/pkg/rollout/rollout.go -// Having "internal" in the module path prevents us from reusing it here since -// it is internal to the devex directory. -func setDeploymentReplicas(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta, replicas int32) error { - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - t.Logf("Setting replicas for %s/%s to %d", deployment.Namespace, deployment.Name, replicas) - scale, err := cs.AppsV1Interface.Deployments(deployment.Namespace).GetScale(context.TODO(), deployment.Name, metav1.GetOptions{}) - if err != nil { - return err - } - - scale.Spec.Replicas = replicas - - _, err = cs.AppsV1Interface.Deployments(deployment.Namespace).UpdateScale(context.TODO(), deployment.Name, scale, metav1.UpdateOptions{}) - return err - }) -} - -// Scales down the machine-os-builder, machine-config-opreator, and -// cluster-version-operator deployments. Registers and returns an idempotent -// function that will scale the deployments back to their original values. -func scaleDownDeployments(t *testing.T, cs *framework.ClientSet) func() { - deployments := []metav1.ObjectMeta{ - // Scale down the cluster-version-operator since it could set the desired - // replicas for the MCO to 1. - { - Name: "cluster-version-operator", - Namespace: "openshift-cluster-version", - }, - // Scale down the machine-config-operator since it could set the desired - // replicas for the build controller to 1. - { - Name: "machine-config-operator", - Namespace: ctrlcommon.MCONamespace, - }, - // Scale down the machine-os-builder since we want to simulate its pod - // being rescheduled. - { - Name: "machine-os-builder", - Namespace: ctrlcommon.MCONamespace, - }, - } - - restoreFuncs := []func(){} - - for _, deployment := range deployments { - restoreFuncs = append(restoreFuncs, scaleDownDeployment(t, cs, deployment)) - } - - return helpers.MakeIdempotentAndRegister(t, func() { - // Restore the deployments in the reverse order by which we disabled them. - // Not really necessary, but we want to ensure that the machine-os-builder - // deployment starts back up as soon as possible. - slices.Reverse(restoreFuncs) - - for _, restoreFunc := range restoreFuncs { - restoreFunc() - } - }) -} - -// Scales down a given deployment unless that deployment is already set to zero -// replicas, in which case it no-ops. Registers and returns an idempotent -// restoral function that will revert the deployment back to its original -// setting. -func scaleDownDeployment(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta) func() { - ctx := context.TODO() - - originalDeployment, err := cs.AppsV1Interface.Deployments(deployment.Namespace).Get(ctx, deployment.Name, metav1.GetOptions{}) - require.NoError(t, err) - - originalReplicas := *originalDeployment.Spec.Replicas - - // We check if the original replica count is zero. This is because it is very - // common for a dev sandbox cluster to at least have the CVO disabled. - if originalReplicas == 0 { - t.Logf("Original replica count for deployment %s/%s set to 0, skipping scale down", deployment.Namespace, deployment.Name) - - return helpers.MakeIdempotentAndRegister(t, func() { - t.Logf("Original replica count for deployment %s/%s set to 0, skipping restore", deployment.Namespace, deployment.Name) - }) - } - - require.NoError(t, setDeploymentReplicas(t, cs, deployment, 0)) - - return helpers.MakeIdempotentAndRegister(t, func() { - require.NoError(t, setDeploymentReplicas(t, cs, deployment, originalReplicas)) - }) -} - -// forceMachineOSBuildToFail() repeatedly deletes the build pod associated -// with the given MachineOSBuild so that the job will fail. -func forceMachineOSBuildToFail(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { - start := time.Now() - - jobName, err := getJobForMOSB(ctx, cs, mosb) - if err != nil { - return fmt.Errorf("could not identify job for MachineOSBuild %s: %w", mosb.Name, err) - } - - t.Logf("Found job %s for MachineOSBuild %s, will delete pods belonging to this job to cause build failure", jobName, mosb.Name) - - return wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) - if err != nil { - return false, fmt.Errorf("could not get job %s for MachineOSBuild %s: %w", jobName, mosb.Name, err) - } - - for _, condition := range job.Status.Conditions { - if condition.Reason == batchv1.JobReasonBackoffLimitExceeded && condition.Status == corev1.ConditionTrue { - t.Logf("Job %s has indicated failure after %s", jobName, time.Since(start)) - return true, nil - } - } - - podList, err := listPodsForJob(ctx, cs, jobName) - if err != nil { - return false, fmt.Errorf("could not list pods for job %s: %w", jobName, err) - } - - for _, pod := range podList.Items { - if pod.DeletionTimestamp == nil { - t.Logf("Deleting pod %s belonging to job %s", pod.Name, jobName) - if err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil { - return false, fmt.Errorf("could not delete pod %s: %w", pod.Name, err) - } - } - } - - return false, nil - }) -} diff --git a/test/e2e-ocl/imagepruner_test.go b/test/e2e-ocl/imagepruner_test.go deleted file mode 100644 index f1f89abe0a..0000000000 --- a/test/e2e-ocl/imagepruner_test.go +++ /dev/null @@ -1,878 +0,0 @@ -package e2e_ocl_test - -import ( - "context" - "crypto/sha256" - "errors" - "flag" - "fmt" - "net/http" - "os" - "os/exec" - "path/filepath" - "strings" - "testing" - "time" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" - - "github.com/containers/image/v5/docker" - "github.com/containers/image/v5/types" - "github.com/davecgh/go-spew/spew" - "github.com/distribution/reference" - "github.com/opencontainers/go-digest" - mcfgv1 "github.com/openshift/api/machineconfiguration/v1" - - "github.com/openshift/machine-config-operator/pkg/controller/build/imagepruner" - "github.com/openshift/machine-config-operator/pkg/controller/build/utils" - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - "github.com/openshift/machine-config-operator/pkg/secrets" - "github.com/openshift/machine-config-operator/test/framework" - "github.com/openshift/machine-config-operator/test/helpers" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// Used by TestImagePruner only when flags are passed. -var realImageRegistrySecretPath string -var realImagePullspec string - -func init() { - flag.StringVar(&realImageRegistrySecretPath, "image-registry-secret", "", "Path to image registry creds for real test") - flag.StringVar(&realImagePullspec, "image-pullspec", "", "Path to image for real test") -} - -// This test does the following: -// - Creates an empty (scratch) image and uploads it to the specified registry using skopeo. -// - Tests that the ImagePruner can inspect the image. -// - Tests that the ImagePruner cna delete the image. -// - Tests that the image has been deleted. -// -// To run this test, one needs the following: -// - Admin-level creds to an image repository such as Quay.io. -// - A pull secret on disk with the creds for that image repository. -// - The image repository must exist. -// -// The test can be run with the following incantation. -// $ go test -tags='containers_image_openpgp exclude_graphdriver_devicemapper exclude_graphdriver_btrfs containers_image_ostree_stub' -v -count=1 -image-registry-secret /path/to/image/creds/on/disk -image-pullspec quay.io/org/repo:tag -func TestImagePruner(t *testing.T) { - t.Parallel() - - skipIfUnableToRun(t) - - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - require.NoError(t, createAndPushScratchImage(ctx, t, realImagePullspec, realImageRegistrySecretPath, "")) - - ip, k8sSecret, err := setupImagePrunerForTest(realImageRegistrySecretPath) - require.NoError(t, err) - - t.Logf("Inspecting %s using ImagePruner", realImagePullspec) - - inspect, digest, err := ip.InspectImage(ctx, realImagePullspec, k8sSecret, &mcfgv1.ControllerConfig{}) - assert.NoError(t, err) - assert.NotNil(t, inspect) - assert.NotNil(t, digest) - - t.Logf("Deleting image %s using ImagePruner", realImagePullspec) - - assert.NoError(t, ip.DeleteImage(ctx, realImagePullspec, k8sSecret, &mcfgv1.ControllerConfig{})) - - t.Logf("Inspecting %s again using ImagePruner; expecting an error this time", realImagePullspec) - _, _, err = ip.InspectImage(ctx, realImagePullspec, k8sSecret, &mcfgv1.ControllerConfig{}) - assert.Error(t, err) - assert.True(t, imagepruner.IsImageNotFoundErr(err)) - assert.False(t, imagepruner.IsAccessDeniedErr(err)) - assert.True(t, imagepruner.IsTolerableDeleteErr(err)) -} - -// This test sets up an ImageStream and exposes the internal image registry -// which backs it. It then creates a scratch image and pushes it to the -// internal image registry and performs a series of tests against it in order -// to validate the returned errors. -func TestImagePrunerOnCluster(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - t.Cleanup(cancel) - - canContinue, err := canTestOnInClusterRegistry(ctx, "") - if err != nil { - t.Skip("") - } - - if !canContinue { - t.Skip("") - } - - cs := framework.NewClientSet("") - - // Allow the internal image regsistry to be accessed from outside of the cluster. - externalRegistryHostname, err := helpers.ExposeClusterImageRegistry(ctx, cs) - require.NoError(t, err) - - t.Cleanup(func() { - require.NoError(t, helpers.UnexposeClusterImageRegistry(ctx, cs)) - }) - - // Set up the imagestream for this test. - pushSecretName, pullspec, cleanupFunc := setupImageStream(t, cs, metav1.ObjectMeta{Name: "imagepruner", Namespace: ctrlcommon.MCONamespace}) - t.Cleanup(cleanupFunc) - - // Parse the internal registry hostname from the image pullspec. - parsed, err := reference.ParseNamed(pullspec) - require.NoError(t, err) - internalRegistryHostname := reference.Domain(parsed) - - // Retrieve the long-lived image pull secret and add the external registry hostname to it. - secret, err := cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace).Get(ctx, pushSecretName, metav1.GetOptions{}) - require.NoError(t, err) - secretPath := filepath.Join(t.TempDir(), "config.json") - secret, err = addExternalRegistryHostnameToSecret(internalRegistryHostname, externalRegistryHostname, secret, secretPath) - require.NoError(t, err) - - // Replace the internal registry hostname with the external image registry hostname. - pullspec = strings.ReplaceAll(pullspec, internalRegistryHostname, externalRegistryHostname) - - // Retrieve the ingress cert so that we do not have to disable SSL verification. - ingressCert, err := cs.CoreV1Interface.Secrets("openshift-ingress").Get(ctx, "router-certs-default", metav1.GetOptions{}) - require.NoError(t, err) - certsDir := filepath.Join(t.TempDir()) - require.NoError(t, os.WriteFile(filepath.Join(certsDir, externalRegistryHostname+".crt"), ingressCert.Data["tls.crt"], 0o644)) - - // Wait for the route to finish setting up. We can determine that the route - // setup is complete when we get an image not found error when inspecting a - // nonexistent image. - err = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir, AuthFilePath: secretPath} - - _, _, err = imgPruner.ImageInspect(ctx, sysCtx, pullspec) - - // If we get an image not found error, that means the route is set up - // because we were able to authenticate to the image registry and make a - // query for a nonexistent image. - if imagepruner.IsImageNotFoundErr(err) { - return true, nil - } - - // If this is an HTTP 503 error, that means the route has not finished - // being set up, so we need to try again. - var unexpectedHTTPError docker.UnexpectedHTTPStatusError - if errors.As(err, &unexpectedHTTPError) && unexpectedHTTPError.StatusCode == http.StatusServiceUnavailable { - return false, nil - } - - // We were unable to identify this error, so return it. - return false, fmt.Errorf("unknown registry error when polling: %w", err) - }) - - require.NoError(t, err, unwrapAll(err)) - - // Now we can run our test cases. All test cases use the - // ImageInspectorDeleter directly since we need to have a bit more control - // over the SystemContext given that we're running out-of-cluster. - t.Run("Inspect without creds", func(t *testing.T) { - t.Parallel() - - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir} - - _, _, err = imgPruner.ImageInspect(ctx, sysCtx, pullspec) - assert.Error(t, err) - assert.True(t, imagepruner.IsAccessDeniedErr(err), "expected access denied err: %s", unwrapAll(err)) - }) - - t.Run("Inspect nonexistent image digest with creds", func(t *testing.T) { - t.Parallel() - - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir, AuthFilePath: secretPath} - - // Use a fake digest in the image pullspec. - fakeDigestedPullspec, err := replaceTagWithDigestOnPullspec(pullspec, "fake-hash") - require.NoError(t, err) - - _, _, err = imgPruner.ImageInspect(ctx, sysCtx, fakeDigestedPullspec) - assert.Error(t, err) - assert.True(t, imagepruner.IsImageNotFoundErr(err), "expected image not found err: %s", unwrapAll(err)) - }) - - t.Run("Inspect nonexistent image tag with creds", func(t *testing.T) { - t.Parallel() - - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir, AuthFilePath: secretPath} - - fakeRepoPullspec, err := replaceRepoOnPullspec(pullspec, "fake-repo") - require.NoError(t, err) - - _, _, err = imgPruner.ImageInspect(ctx, sysCtx, fakeRepoPullspec) - assert.Error(t, err) - assert.True(t, imagepruner.IsImageNotFoundErr(err), "expected image not found err: %s", unwrapAll(err)) - }) - - t.Run("Inspect nonexistent image repo with creds", func(t *testing.T) { - t.Parallel() - - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir, AuthFilePath: secretPath} - - fakeTagPullspec, err := replaceTagOnPullspec(pullspec, "fake-tag") - require.NoError(t, err) - - _, _, err = imgPruner.ImageInspect(ctx, sysCtx, fakeTagPullspec) - assert.Error(t, err) - assert.True(t, imagepruner.IsImageNotFoundErr(err), "expected image not found err: %s", unwrapAll(err)) - }) - - t.Run("Push image and inspect", func(t *testing.T) { - t.Parallel() - - require.NoError(t, createAndPushScratchImage(ctx, t, pullspec, secretPath, certsDir)) - - imgPruner := imagepruner.NewImageInspectorDeleter() - sysCtx := &types.SystemContext{DockerCertPath: certsDir, AuthFilePath: secretPath} - - _, _, err := imgPruner.ImageInspect(ctx, sysCtx, pullspec) - assert.NoError(t, err) - - // The long-lived pull secret does not confer the ability to delete images - // from the registry, so this is expected to be access denied. - err = imgPruner.DeleteImage(ctx, sysCtx, pullspec) - assert.Error(t, err) - assert.True(t, imagepruner.IsAccessDeniedErr(err), "expected access denied err: %s", unwrapAll(err)) - }) -} - -// This test attempts to make real requests to image registries that one may -// not have the appropriate credentials to run. The general idea here is to -// ensure that our error detection code functions as it should. -// -// Each test case represents a given image registry as well as a commonly-used -// image that is readily available there. For each test case, we do the -// following: -// 1. Attempt to inspect the image; this should succeed in all cases. -// 2. Get the image digest from the inspected image, mutate it so that it does -// not exist, then attempt to inspect that pullspec. -// 3. Change the tag on the image pullspec to a known nonexistent tag, then -// attempt to insect that pullspec. -// 4. Change the repo on the image pullspect o a known nonexistent repo, then -// attempt to inspect that pullspec. -// 5. Perform the same operations described above for deletion. -// 6. Ensure that the errors returned (if any) match what we expect. -func TestImagePrunerErrors(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - t.Cleanup(cancel) - - type expectedErr struct { - accessDenied bool - imageNotFound bool - } - - type operation struct { - existingImage expectedErr - nonexistentRepo expectedErr - nonexistentTag expectedErr - nonexistentDigest expectedErr - } - - type testCase struct { - name string - pullspec string - deletion operation - inspect operation - } - - testCases := []testCase{ - { - name: "Quay.io", - pullspec: "quay.io/skopeo/stable:latest", - deletion: operation{ - existingImage: expectedErr{ - accessDenied: true, - }, - nonexistentRepo: expectedErr{ - accessDenied: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - inspect: operation{ - nonexistentRepo: expectedErr{ - accessDenied: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - }, - { - name: "Docker.io", - pullspec: "docker.io/library/python:latest", - deletion: operation{ - existingImage: expectedErr{ - accessDenied: true, - }, - nonexistentRepo: expectedErr{ - accessDenied: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - inspect: operation{ - nonexistentRepo: expectedErr{ - accessDenied: true, - // Docker.io behavior varies - accept either error type - imageNotFound: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - }, - { - name: "Fedora Registry", - pullspec: "registry.fedoraproject.org/fedora:latest", - deletion: operation{ - existingImage: expectedErr{ - accessDenied: true, - }, - nonexistentRepo: expectedErr{ - imageNotFound: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - inspect: operation{ - nonexistentRepo: expectedErr{ - imageNotFound: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - }, - { - name: "GitHub image registry", - pullspec: "ghcr.io/open-webui/open-webui:latest", - deletion: operation{ - existingImage: expectedErr{ - accessDenied: true, - }, - nonexistentRepo: expectedErr{ - accessDenied: true, - }, - nonexistentTag: expectedErr{ - accessDenied: true, - // GitHub registry behavior varies - accept either error type - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - accessDenied: true, - }, - }, - inspect: operation{ - nonexistentRepo: expectedErr{ - accessDenied: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - }, - { - name: "Google image registry", - pullspec: "gcr.io/google.com/cloudsdktool/google-cloud-cli:stable", - deletion: operation{ - existingImage: expectedErr{ - accessDenied: true, - }, - nonexistentRepo: expectedErr{ - imageNotFound: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - inspect: operation{ - nonexistentRepo: expectedErr{ - imageNotFound: true, - }, - nonexistentTag: expectedErr{ - imageNotFound: true, - }, - nonexistentDigest: expectedErr{ - imageNotFound: true, - }, - }, - }, - } - - // Runs the imagepruner inspect function against the given pullspec and - // checks that errors are what we expect them to be. The error and digest are - // returned for additional assertions and use elsewhere. - inspectTestFunc := func(t *testing.T, pullspec string, expected expectedErr) (*digest.Digest, error) { - t.Helper() - - ip, k8sSecret, err := setupImagePrunerForTestWithEmptyCreds(t) - require.NoError(t, err) - - _, imgDigest, err := ip.InspectImage(ctx, pullspec, k8sSecret, &mcfgv1.ControllerConfig{}) - if !expected.imageNotFound && !expected.accessDenied { - require.NoError(t, err) - } else if expected.imageNotFound && expected.accessDenied { - // Both flags set means accept either error type (registries may vary) - isImageNotFound := imagepruner.IsImageNotFoundErr(err) - isAccessDenied := imagepruner.IsAccessDeniedErr(err) - assert.True(t, isImageNotFound || isAccessDenied, "expected either imageNotFound or accessDenied error, got: %v", err) - } else { - assert.Equal(t, expected.imageNotFound, imagepruner.IsImageNotFoundErr(err), "image not found error should be %v", !expected.imageNotFound) - assert.Equal(t, expected.accessDenied, imagepruner.IsAccessDeniedErr(err), "access denied error should be %v", !expected.accessDenied) - } - - return imgDigest, err - } - - // Runs the imagepruner delete function against the given pullspec and checks - // that errors are what we expect them to be. The error is returned for - // additional assertions. - deleteTestFunc := func(t *testing.T, pullspec string, expected expectedErr) error { - t.Helper() - - ip, k8sSecret, err := setupImagePrunerForTestWithEmptyCreds(t) - require.NoError(t, err) - - err = ip.DeleteImage(ctx, pullspec, k8sSecret, &mcfgv1.ControllerConfig{}) - // We should always get an error back for this test because we do not have - // permissions to delete images. - assert.Error(t, err) - if expected.imageNotFound && expected.accessDenied { - // Both flags set means accept either error type (registries may vary) - isImageNotFound := imagepruner.IsImageNotFoundErr(err) - isAccessDenied := imagepruner.IsAccessDeniedErr(err) - assert.True(t, isImageNotFound || isAccessDenied, "expected either imageNotFound or accessDenied error, got: %v", err) - } else { - assert.Equal(t, expected.imageNotFound, imagepruner.IsImageNotFoundErr(err), "image not found error should be %v", !expected.imageNotFound) - assert.Equal(t, expected.accessDenied, imagepruner.IsAccessDeniedErr(err), "access denied error should be %v", !expected.accessDenied) - } - assert.True(t, imagepruner.IsTolerableDeleteErr(err)) - - return err - } - - for _, testCase := range testCases { - testCase := testCase - t.Run(testCase.name, func(t *testing.T) { - // Because each test case targets a different image registry, we can run - // them in parallel. However, each subtest must be run sequentially in - // order to ensure that we don't run into any rate limiting. - t.Parallel() - - // This should always be successful, hence why the expected outcome is - // not wired up to the individual test cases. - _, inspectExistingErr := inspectTestFunc(t, testCase.pullspec, expectedErr{}) - require.NoError(t, inspectExistingErr) - - // Use a fake digest in the image pullspec. - fakeDigestedPullspec, err := replaceTagWithDigestOnPullspec(testCase.pullspec, "fake-hash") - require.NoError(t, err) - - // Replace the tag on the tagged pullspec with a tag we know will not be there. - fakeTagPullspec, err := replaceTagOnPullspec(testCase.pullspec, "fake-tag") - require.NoError(t, err) - - // Replace the repo on the pullspec with a repo we know will not be there. - fakeRepoPullspec, err := replaceRepoOnPullspec(testCase.pullspec, "fake-repo") - require.NoError(t, err) - - t.Run("Inspect", func(t *testing.T) { - t.Run("Existing image", func(t *testing.T) { - assert.NoError(t, inspectExistingErr) - }) - - t.Run("Nonexistent image digest", func(t *testing.T) { - _, err = inspectTestFunc(t, fakeDigestedPullspec, testCase.inspect.nonexistentDigest) - assert.Error(t, err) - }) - - t.Run("Nonexistent image tag", func(t *testing.T) { - _, err = inspectTestFunc(t, fakeTagPullspec, testCase.inspect.nonexistentTag) - assert.Error(t, err) - }) - - t.Run("Nonexistent image repo", func(t *testing.T) { - _, err = inspectTestFunc(t, fakeRepoPullspec, testCase.inspect.nonexistentRepo) - assert.Error(t, err) - }) - }) - - t.Run("Delete", func(t *testing.T) { - // This should always return an error because we're not authenticated - // against any of the image registries used here. - t.Run("Existing image", func(t *testing.T) { - assert.Error(t, deleteTestFunc(t, testCase.pullspec, testCase.deletion.existingImage)) - }) - - t.Run("Nonexistent image digest", func(t *testing.T) { - assert.Error(t, deleteTestFunc(t, fakeDigestedPullspec, testCase.deletion.nonexistentDigest)) - }) - - t.Run("Nonexistent image tag", func(t *testing.T) { - assert.Error(t, deleteTestFunc(t, fakeTagPullspec, testCase.deletion.nonexistentTag)) - }) - - t.Run("Nonexistent image repo", func(t *testing.T) { - assert.Error(t, deleteTestFunc(t, fakeRepoPullspec, testCase.deletion.nonexistentRepo)) - }) - }) - }) - } -} - -// Tests that the image pullspec mutation functions work as they should. -func TestImagePrunerHelpers(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - pullspec string - expectedRepoReplacement string - expectedTagReplacement string - expectedDigestReplacement string - }{ - { - name: "Simple tagged", - pullspec: "registry.hostname.com/org/repo:tag", - expectedRepoReplacement: "registry.hostname.com/org/notrealgoaway:tag", - expectedTagReplacement: "registry.hostname.com/org/repo:notrealgoaway", - expectedDigestReplacement: "registry.hostname.com/org/repo@sha256:86dec32bc7f325bef814f17689b40c8c04017f3ead2bb600dbda09a26da27a7a", - }, - { - name: "Simple digested", - pullspec: "registry.hostname.com/org/repo@sha256:4bc453b53cb3d914b45f4b250294236adba2c0e09ff6f03793949e7e39fd4cc1", - expectedRepoReplacement: "registry.hostname.com/org/notrealgoaway@sha256:4bc453b53cb3d914b45f4b250294236adba2c0e09ff6f03793949e7e39fd4cc1", - expectedTagReplacement: "registry.hostname.com/org/repo@sha256:86dec32bc7f325bef814f17689b40c8c04017f3ead2bb600dbda09a26da27a7a", - expectedDigestReplacement: "registry.hostname.com/org/repo@sha256:86dec32bc7f325bef814f17689b40c8c04017f3ead2bb600dbda09a26da27a7a", - }, - } - - for _, testCase := range testCases { - t.Run(testCase.name, func(t *testing.T) { - t.Run("Repo", func(t *testing.T) { - replacedRepo, err := replaceRepoOnPullspec(testCase.pullspec, "notrealgoaway") - assert.NoError(t, err) - assert.Equal(t, testCase.expectedRepoReplacement, replacedRepo) - }) - - t.Run("Tag", func(t *testing.T) { - replacedTag, err := replaceTagOnPullspec(testCase.pullspec, "notrealgoaway") - assert.NoError(t, err) - assert.Equal(t, testCase.expectedTagReplacement, replacedTag) - }) - - t.Run("Digest", func(t *testing.T) { - replacedDigest, err := replaceTagWithDigestOnPullspec(testCase.pullspec, "notrealgoaway") - assert.NoError(t, err) - assert.Equal(t, testCase.expectedDigestReplacement, replacedDigest) - }) - }) - } -} - -// Replaces the repo portion of the given pullspec with 'notrealgoaway'. -// Handles both tagged and digested pullspecs. -func replaceRepoOnPullspec(pullspec, fakeRepoName string) (string, error) { - named, err := reference.ParseNamed(pullspec) - if err != nil { - return "", err - } - - splitChar := "/" - - path := reference.Path(named) - if strings.Contains(path, splitChar) { - split := strings.Split(path, splitChar) - split[len(split)-1] = fakeRepoName - path = strings.Join(split, splitChar) - } else { - path = fakeRepoName - } - - // Validate our final pullspec before returning to catch any errors. - getParsedPullspec := func(p string) (string, error) { - parsed, err := reference.ParseNamed(p) - if err != nil { - return "", fmt.Errorf("could not parse generated pullspec %s: %w", p, err) - } - - return parsed.String(), nil - } - - if tagged, ok := named.(reference.NamedTagged); ok { - return getParsedPullspec(fmt.Sprintf("%s/%s:%s", reference.Domain(named), path, tagged.Tag())) - } - - digested, digestedOK := named.(reference.Digested) - canonical, canonicalOK := named.(reference.Canonical) - - digest := "" - if digestedOK { - digest = digested.Digest().String() - } - - if canonicalOK { - digest = canonical.Digest().String() - } - - if digest != "" { - return getParsedPullspec(fmt.Sprintf("%s/%s@%s", reference.Domain(named), path, digest)) - } - - return pullspec, fmt.Errorf("don't know what to do with this pullspec") -} - -// Replaces the tag portion of the given pullspec with 'notrealgoaway'. If a -// digested pullspec is given, we use the fake tag to create a SHA256 sum instead. -func replaceTagOnPullspec(pullspec, fakeTag string) (string, error) { - named, err := reference.ParseNamed(pullspec) - if err != nil { - return "", err - } - - if _, ok := named.(reference.NamedTagged); ok { - taggedRef, err := reference.WithTag(named, fakeTag) - if err != nil { - return "", err - } - - return taggedRef.String(), nil - } - - _, digestedOK := named.(reference.Digested) - _, canonicalOK := named.(reference.Canonical) - - if (digestedOK || canonicalOK) || (digestedOK && canonicalOK) { - return replaceTagWithDigestOnPullspec(pullspec, fakeTag) - } - - return "", fmt.Errorf("don't know what to do with this pullspec") -} - -// Replaces the tag on a given pullspec with a sha256 representation of the -// provided string. -func replaceTagWithDigestOnPullspec(pullspec, fakeDigestContent string) (string, error) { - hasher := sha256.New() - hasher.Write([]byte(fakeDigestContent)) - fakeDigest := fmt.Sprintf("sha256:%x", hasher.Sum(nil)) - - return utils.ParseImagePullspec(pullspec, fakeDigest) -} - -// Determines if a given test which depends on real creds can be run. -func skipIfUnableToRun(t *testing.T) { - if realImageRegistrySecretPath != "" && realImagePullspec != "" { - t.Logf("Test suite invoked with -image-registry-secret %q and -image-pullspec %q, will perform full image registry test", realImageRegistrySecretPath, realImagePullspec) - } else { - t.Skip("-image-registry-secret and -image-pullspec flags unset") - } -} - -// Creates a new imagepruner instance with empty creds. -func setupImagePrunerForTestWithEmptyCreds(t *testing.T) (imagepruner.ImagePruner, *corev1.Secret, error) { - // Write an "empty" creds file since we don't actually need creds for this test. - authfilePath := filepath.Join(t.TempDir(), "authfile.json") - if err := os.WriteFile(authfilePath, []byte(`{"auths":{}}`), 0o755); err != nil { - return nil, nil, err - } - - return setupImagePrunerForTest(authfilePath) -} - -// Creates a new imagepruner with populated creds from the given path. -func setupImagePrunerForTest(credPath string) (imagepruner.ImagePruner, *corev1.Secret, error) { - secretBytes, err := os.ReadFile(credPath) - if err != nil { - return nil, nil, err - } - - is, err := secrets.NewImageRegistrySecret(secretBytes) - if err != nil { - return nil, nil, err - } - - k8sSecret, err := is.K8sSecret(corev1.SecretTypeDockerConfigJson) - if err != nil { - return nil, nil, err - } - - return imagepruner.NewImagePruner(), k8sSecret, nil -} - -// Unwraps all of the errors in a given error chain and calls spew.Sdump() on -// each one to get rich type information for debugging purporses. -func unwrapAll(err error) string { - // The function should handle a nil error gracefully. - if err == nil { - return "" - } - - out := []string{} - - // Loop indefinitely until an error can no longer be unwrapped. - for { - // Attempt to unwrap the current error. - unwrapped := errors.Unwrap(err) - - out = append(out, spew.Sdump(err)) - - // If unwrapped is nil, we've reached the innermost error. - if unwrapped == nil { - return strings.Join(out, "\n") - } - - // If unwrapped is not nil, continue the loop with the newly unwrapped error. - err = unwrapped - } -} - -// Adds the external image registry hostname to the given secret and writes it -// to the given path as a DockerConfigJSON secret. -func addExternalRegistryHostnameToSecret(internalRegistryHostname, externalRegistryHostname string, secret *corev1.Secret, secretPath string) (*corev1.Secret, error) { - is, err := secrets.NewImageRegistrySecret(secret) - if err != nil { - return nil, err - } - - dockerconfigJSON := is.DockerConfigJSON() - if _, ok := dockerconfigJSON.Auths[internalRegistryHostname]; !ok { - return nil, fmt.Errorf("secret %s missing internal registry hostname %s", secret.Name, internalRegistryHostname) - } - - dockerconfigJSON.Auths[externalRegistryHostname] = dockerconfigJSON.Auths[internalRegistryHostname] - - is, err = secrets.NewImageRegistrySecret(dockerconfigJSON) - if err != nil { - return nil, err - } - - k8sSecret, err := is.K8sSecret(corev1.SecretTypeDockerConfigJson) - if err != nil { - return nil, err - } - - out := secret.DeepCopy() - out.Data = k8sSecret.Data - - secretBytes, err := is.JSONBytes(corev1.SecretTypeDockerConfigJson) - if err != nil { - return nil, err - } - - if err := os.WriteFile(secretPath, secretBytes, 0o644); err != nil { - return nil, err - } - - return out, nil -} - -// Determines if a test can target the internal cluster image registry. -func canTestOnInClusterRegistry(ctx context.Context, kubeconfig string) (bool, error) { - cs, err := framework.NewClientSetOrError(kubeconfig) - if err != nil { - return false, fmt.Errorf("could not get clientset: %w", err) - } - - cv, err := cs.ConfigV1Interface.ClusterVersions().List(ctx, metav1.ListOptions{}) - if err != nil { - return false, fmt.Errorf("could not list clusterversions: %w", err) - } - - for _, cv := range cv.Items { - for _, capability := range cv.Status.Capabilities.EnabledCapabilities { - if capability == "ImageRegistry" { - return true, nil - } - } - } - - return false, nil -} - -// Skopeo requires that a policy.json file be present. Usually, this file is -// placed in /etc/containers/policy.json when Skopeo is installed. Because we -// must install skopeo from source in CI, this file is missing. So what we do -// in this scenario is write our own policy.json file to a temp directory -// instead. The temp directory is managed by the Go test suite and will be -// removed after the test is finished. -func writePolicyFile(t *testing.T) (string, error) { - policyPath := filepath.Join(t.TempDir(), "policy.json") - - // Compacted contents of https://github.com/containers/skopeo/blob/main/default-policy.json - policyJSONBytes := []byte(`{"default":[{"type":"insecureAcceptAnything"}],"transports":{"docker-daemon":{"":[{"type":"insecureAcceptAnything"}]}}}`) - - return policyPath, os.WriteFile(policyPath, policyJSONBytes, 0o755) -} - -// Creates an empty scratch image and pushes it to the given pullspec using the -// provided secret path. Accepts an optional certsDir parameter which is -// particularly useful for pushing internal image registries which have -// self-signed certificates. -func createAndPushScratchImage(ctx context.Context, t *testing.T, pullspec, secretPath, certsDir string) error { - tmpDir := t.TempDir() - - srcImage := filepath.Join(tmpDir, helpers.ImageTarballFilename) - - if err := helpers.CreateScratchImageTarball(tmpDir); err != nil { - return err - } - - policyPath, err := writePolicyFile(t) - if err != nil { - return fmt.Errorf("could not write policy.json file: %w", err) - } - - cmd := exec.Command("skopeo", "--policy", policyPath, "copy", "--dest-authfile", secretPath, "tarball://"+srcImage, "docker://"+pullspec) - if certsDir != "" { - cmd = exec.Command("skopeo", "--policy", policyPath, "copy", "--dest-cert-dir", certsDir, "--dest-authfile", secretPath, "tarball://"+srcImage, "docker://"+pullspec) - } - - t.Logf("Copying %s to %s using skopeo", srcImage, pullspec) - t.Logf("%v", cmd.String()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd.Run() -} diff --git a/test/e2e-ocl/layered_image_scaleup_test.go b/test/e2e-ocl/layered_image_scaleup_test.go deleted file mode 100644 index cd05427a89..0000000000 --- a/test/e2e-ocl/layered_image_scaleup_test.go +++ /dev/null @@ -1,131 +0,0 @@ -package e2e_ocl_test - -import ( - "context" - "testing" - "time" - - machineclientset "github.com/openshift/client-go/machine/clientset/versioned" - "github.com/openshift/machine-config-operator/pkg/daemon/constants" - "github.com/openshift/machine-config-operator/test/framework" - "github.com/openshift/machine-config-operator/test/helpers" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" -) - -// TestLayeredImageServingDuringNodeScaleUp tests the 1-reboot vs 2-reboot behavior when scaling up nodes -// while a layered image build is in progress or has completed. -// -// This test verifies that: -// - When UpdatedMachineCount == 0, new scaled nodes get base image (2-reboot path) -// - After at least one node has the layered image (UpdatedMachineCount > 0), new scaled nodes -// get the layered image during bootstrap (1-reboot path) for external registries -// - Node annotations reflect the correct image -// -// Test flow: -// 1. Create a layered MCP with one existing node -// 2. Create a MachineOSConfig and wait for build to complete -// 3. Wait for the first node to adopt the layered image (UpdatedMachineCount > 0) -// 4. Scale up a MachineSet to add new nodes -// 5. Verify new nodes get the layered image during bootstrap -// 6. Verify node annotations contain the correct image -func TestLayeredImageServingDuringNodeScaleUp(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - // Step 1: Build the layered image (this creates the MCP and MOSC) - t.Logf("Creating layered image build for pool %q", layeredMCPName) - imagePullspec, _ := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - t.Logf("Layered image build completed, image pullspec: %s", imagePullspec) - - // Step 2: Get a random worker node and label it to opt into the layered pool - t.Logf("Selecting a random worker node to opt into pool %q", layeredMCPName) - existingNode := helpers.GetRandomNode(t, cs, "worker") - t.Logf("Selected node %s to opt into layered pool", existingNode.Name) - - // Label the node to add it to the layered pool - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, existingNode, helpers.MCPNameToRole(layeredMCPName))) - - // Step 3: Wait for the existing node to adopt the layered image - t.Logf("Waiting for existing node %s to adopt layered image", existingNode.Name) - helpers.WaitForNodeImageChange(t, cs, existingNode, imagePullspec) - helpers.AssertNodeBootedIntoImage(t, cs, existingNode, imagePullspec) - t.Logf("Node %s is booted into layered image %q", existingNode.Name, imagePullspec) - - // Wait for the pool's UpdatedMachineCount to be > 0 - t.Logf("Waiting for pool %q to have UpdatedMachineCount > 0", layeredMCPName) - require.NoError(t, wait.PollUntilContextTimeout(ctx, 2*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { - mcp, err := cs.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - if err != nil { - return false, err - } - t.Logf("Pool %q has UpdatedMachineCount=%d, waiting for > 0", layeredMCPName, mcp.Status.UpdatedMachineCount) - return mcp.Status.UpdatedMachineCount > 0, nil - })) - - t.Logf("Pool %q now has UpdatedMachineCount > 0, proceeding to scale up", layeredMCPName) - - // Step 4: Scale up a MachineSet to add new nodes - // Get a MachineSet from the worker pool (not the layered pool since we just created it) - machineclient := machineclientset.NewForConfigOrDie(cs.GetRestConfig()) - machinesets, err := machineclient.MachineV1beta1().MachineSets("openshift-machine-api").List(ctx, metav1.ListOptions{}) - require.NoError(t, err) - require.NotEmpty(t, machinesets.Items, "No MachineSets found in cluster") - - // Use the first MachineSet for scaling - machineset := machinesets.Items[0] - originalReplicas := *machineset.Spec.Replicas - desiredReplicas := originalReplicas + 1 - - t.Logf("Scaling MachineSet %q from %d to %d replicas", machineset.Name, originalReplicas, desiredReplicas) - - // Scale up the MachineSet and wait for new nodes to be ready - newNodes, cleanupFunc := helpers.ScaleMachineSetAndWaitForNodesToBeReady(t, cs, machineset.Name, desiredReplicas) - t.Cleanup(cleanupFunc) - - require.NotEmpty(t, newNodes, "No new nodes were created during scale-up") - newNode := newNodes[0] - t.Logf("New node %s has been created and is ready", newNode.Name) - - // Label the new node to add it to the layered pool - newNodeUnlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, *newNode, helpers.MCPNameToRole(layeredMCPName))) - - // Step 5: Verify the new node gets the layered image - t.Logf("Waiting for new node %s to adopt layered image during bootstrap", newNode.Name) - helpers.WaitForNodeImageChange(t, cs, *newNode, imagePullspec) - helpers.AssertNodeBootedIntoImage(t, cs, *newNode, imagePullspec) - t.Logf("New node %s successfully booted into layered image %q", newNode.Name, imagePullspec) - - // Step 6: Verify node annotations contain the correct image - t.Logf("Verifying node annotations for new node %s", newNode.Name) - refreshedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, newNode.Name, metav1.GetOptions{}) - require.NoError(t, err) - - currentImage := refreshedNode.Annotations[constants.CurrentImageAnnotationKey] - desiredImage := refreshedNode.Annotations[constants.DesiredImageAnnotationKey] - - require.Equal(t, imagePullspec, currentImage, "Current image annotation should match layered image") - require.Equal(t, imagePullspec, desiredImage, "Desired image annotation should match layered image") - t.Logf("Node annotations verified: current=%s, desired=%s", currentImage, desiredImage) - - // Cleanup: Remove the label from the new node and wait for it to revert - t.Logf("Cleaning up: removing label from new node %s", newNode.Name) - newNodeUnlabelFunc() - assertNodeRevertsToNonLayered(t, cs, *newNode) - - // Cleanup: Remove the label from the existing node and wait for it to revert - t.Logf("Cleaning up: removing label from existing node %s", existingNode.Name) - unlabelFunc() - assertNodeRevertsToNonLayered(t, cs, existingNode) - - t.Logf("Test completed successfully!") -} diff --git a/test/e2e-ocl/main_test.go b/test/e2e-ocl/main_test.go deleted file mode 100644 index e6f51b52b0..0000000000 --- a/test/e2e-ocl/main_test.go +++ /dev/null @@ -1,16 +0,0 @@ -package e2e_ocl_test - -import ( - "flag" - "os" - "testing" - - "k8s.io/klog/v2" -) - -func TestMain(m *testing.M) { - flag.Parse() - klog.Infof("-skip-cleanup: %v", skipCleanupAlways) - klog.Infof("-skip-cleanup-on-failure: %v", skipCleanupOnlyAfterFailure) - os.Exit(m.Run()) -} diff --git a/test/e2e-ocl/onclusterlayering_test.go b/test/e2e-ocl/onclusterlayering_test.go deleted file mode 100644 index 5c16195057..0000000000 --- a/test/e2e-ocl/onclusterlayering_test.go +++ /dev/null @@ -1,1753 +0,0 @@ -package e2e_ocl_test - -import ( - "context" - _ "embed" - "errors" - "flag" - "fmt" - "os" - "os/exec" - "path/filepath" - goruntime "runtime" - "strings" - "testing" - "time" - - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - - ign3types "github.com/coreos/ignition/v2/config/v3_5/types" - mcfgv1 "github.com/openshift/api/machineconfiguration/v1" - - "github.com/openshift/machine-config-operator/pkg/apihelpers" - daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" - "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" - "github.com/openshift/machine-config-operator/test/framework" - "github.com/openshift/machine-config-operator/test/helpers" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/openshift/machine-config-operator/pkg/controller/build/buildrequest" - "github.com/openshift/machine-config-operator/pkg/controller/build/constants" - "github.com/openshift/machine-config-operator/pkg/controller/build/imagebuilder" - "github.com/openshift/machine-config-operator/pkg/controller/build/utils" - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" -) - -const ( - // The MachineConfigPool to create for the tests. - layeredMCPName string = "layered" - - // The MachineConfig names to create for the tests. - mcNameUsbguard string = "inspect-usbguard" - - // The name of the global pull secret copy to use for the tests. - globalPullSecretCloneName string = "global-pull-secret-copy" -) - -var ( - // Provides a Containerfile that installs cowsayusing the Centos Stream 9 - // EPEL repository to do so without requiring any entitlements. - //go:embed Containerfile.cowsay - cowsayDockerfile string - - // Provides a Containerfile that installs Buildah from the default RHCOS RPM - // repositories. If the installation succeeds, the entitlement certificate is - // working. - //go:embed Containerfile.entitled - entitledDockerfile string - - // Provides a Containerfile that works similarly to the cowsay Dockerfile - // with the exception that the /etc/yum.repos.d and /etc/pki/rpm-gpg key - // content is mounted into the build context by the BuildController. - //go:embed Containerfile.yum-repos-d - yumReposDockerfile string - - //go:embed Containerfile.okd-fcos - okdFcosDockerfile string - - //go:embed Containerfile.simple - simpleDockerfile string -) - -var skipCleanupAlways bool -var skipCleanupOnlyAfterFailure bool - -func init() { - // Skips running the cleanup functions. Useful for debugging tests. - flag.BoolVar(&skipCleanupAlways, "skip-cleanup", false, "Skips running cleanups regardless of outcome") - // Skips running the cleanup function only when the test fails. - flag.BoolVar(&skipCleanupOnlyAfterFailure, "skip-cleanup-on-failure", false, "Skips running cleanups only after failure") -} - -// Holds elements common for each on-cluster build tests. -type onClusterLayeringTestOpts struct { - // Which image builder type to use for the test. - imageBuilderType mcfgv1.MachineOSImageBuilderType - - // The custom Dockerfiles to use for the test. This is a map of MachineConfigPool name to Dockerfile content. - customDockerfiles map[string]string - - // What node should be targeted for the test. - targetNode *corev1.Node - - // What MachineConfigPool name to use for the test. - poolName string - - // Use RHEL entitlements - entitlementRequired bool - - // Inject YUM repo information from a Centos 9 stream container - useYumRepos bool - - // Apply the following MachineConfigs before beginning the build. - machineConfigs []*mcfgv1.MachineConfig -} - -func TestOnClusterLayeringOnOKD(t *testing.T) { - skipOnOCP(t) - - runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: okdFcosDockerfile, - }, - }) -} - -// Tests that an on-cluster build can be performed with the Custom Pod Builder. -func TestOnClusterLayering(t *testing.T) { - _, firstMosb := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - assert.NotEqual(t, string(firstMosb.UID), "") - - // Test rebuild annotation works - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - t.Logf("Applying rebuild annotation (%q) to MachineOSConfig (%q) to cause a rebuild", constants.RebuildMachineOSConfigAnnotationKey, layeredMCPName) - - cs := framework.NewClientSet("") - - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) - - // Use the UID of the previous MOSB to ensure it is deleted as the rebuild will trigger a MOSB with the same name - t.Logf("Waiting for the previous MachineOSBuild with UID %q to be deleted", firstMosb.UID) - waitForMOSBToBeDeleted(t, cs, firstMosb) - - // Wait for the build to start - secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - assert.NotEqual(t, firstMosb.UID, secondMosb.UID) -} - -// Tests that an on-cluster build can be performed and that the resulting image -// is rolled out to an opted-in node. -func TestOnClusterBuildRollsOutImage(t *testing.T) { - requiredKernelType := ctrlcommon.KernelTypeRealtime - if goruntime.GOARCH == "arm64" { - requiredKernelType = ctrlcommon.KernelType64kPages - } - - imagePullspec, _ := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - machineConfigs: []*mcfgv1.MachineConfig{ - newMachineConfigWithKernelType(fmt.Sprintf("%s-kernel-machineconfig", requiredKernelType), layeredMCPName, requiredKernelType), - }, - }) - - cs := framework.NewClientSet("") - node := helpers.GetRandomNode(t, cs, "worker") - - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) - helpers.WaitForNodeImageChange(t, cs, node, imagePullspec) - - helpers.AssertNodeBootedIntoImage(t, cs, node, imagePullspec) - t.Logf("Node %s is booted into image %q", node.Name, imagePullspec) - t.Log(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "cowsay", "Moo!")) - - // Check that the booted image has the requested kernel - foundKernel := helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") - t.Logf("Node %s running kernel: %s", node.Name, foundKernel) - if !compareKernelType(t, foundKernel, requiredKernelType) { - t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) - } - - unlabelFunc() - - assertNodeRevertsToNonLayered(t, cs, node) - - // Check that the reverted image has the default kernel. - requiredKernelType = ctrlcommon.KernelTypeDefault - foundKernel = helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") - t.Logf("Node %s running kernel: %s", node.Name, foundKernel) - if !compareKernelType(t, foundKernel, requiredKernelType) { - t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) - } -} - -func TestMissingImageIsRebuilt(t *testing.T) { - cs := framework.NewClientSet("") - - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - - firstImagePullspec, firstMOSB := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - moscName := layeredMCPName - t.Logf("Waiting for MachineOSConfig %q to have a new pullspec", moscName) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, firstImagePullspec) - - // Create a MC to create another MOSB - testMC := newMachineConfigTriggersImageRebuild(mcNameUsbguard, layeredMCPName, []string{"usbguard"}) - t.Logf("Creating MachineConfig %q", testMC.Name) - firstMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) - require.NoError(t, err) - t.Logf("Created MachineConfig %q", firstMC.Name) - kubeassert.MachineConfigExists(firstMC) - - // Wait for the build to start - t.Logf("Waiting for 2nd build to start...") - secondMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, firstMOSB.Name) - secondMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSBName, metav1.GetOptions{}) - require.NoError(t, err) - secondMOSB = waitForBuildToStart(t, cs, secondMOSB) - t.Logf("MachineOSBuild %q has started", secondMOSB.Name) - - // Wait for the build to finish - t.Logf("Waiting for 2nd build completion...") - secondFinishedBuild := waitForBuildToComplete(t, cs, secondMOSB) - secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("MachineOSBuild %q has completed and produced image: %s", secondFinishedBuild.Name, secondImagePullspec) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) - - // Delete the first image- simulating image deletion - t.Logf("Deleting image %q", firstImagePullspec) - istName := fmt.Sprintf("os-image:%s", firstMOSB.Name) - err = cs.ImageStreamTags(ctrlcommon.MCONamespace).Delete(ctx, istName, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.ImageDoesNotExist(istName) - t.Logf("Deleted image %q", firstImagePullspec) - - // Delete the first MC - t.Logf("Deleting MachineConfig %q to retrigger build", firstMC.Name) - err = cs.MachineConfigs().Delete(ctx, firstMC.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigDoesNotExist(firstMC) - t.Logf("Deleted MachineConfig %q", firstMC.Name) - - // Wait for the build to start - t.Logf("Waiting for 3rd build (rebuild of image1) to start...") - thirdMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, secondMOSB.Name) - thirdMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, thirdMOSBName, metav1.GetOptions{}) - require.NoError(t, err) - thirdMOSB = waitForBuildToStart(t, cs, thirdMOSB) - t.Logf("MachineOSBuild %q has started (rebuild of image1)", thirdMOSB.Name) - - // Wait for the build to finish - t.Logf("Waiting for 3rd build completion...") - thirdFinishedBuild := waitForBuildToComplete(t, cs, thirdMOSB) - thirdImagePullspec := string(thirdFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("MachineOSBuild %q has completed and produced image: %s", thirdFinishedBuild.Name, thirdImagePullspec) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, thirdImagePullspec) - - // Apply the MC again - t.Logf("Re‐applying the same MachineConfig %q to confirm no new build for image2", testMC.Name) - secondMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigExists(secondMC) - t.Logf("Created MachineConfig %q", secondMC.Name) - - // waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) - - t.Logf("Waiting for recycled USBGuard MOSB %q to finish (or to prove there is none)", secondMOSB.Name) - secondMOSB, err = cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSB.Name, metav1.GetOptions{}) - require.NoError(t, err) - secondMOSB = waitForBuildToComplete(t, cs, secondMOSB) - t.Logf("MOSB %q is now complete (reused image)", secondMOSB.Name) - - t.Logf("Deleting MachineOSBuild %q (MOSB3) to test pruning of image1", thirdMOSB.Name) - err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Delete(ctx, thirdMOSB.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineOSBuildDoesNotExist(thirdMOSB) - t.Logf("Deleted MachineOSBuild %q", thirdMOSB.Name) - - deletedIst := fmt.Sprintf("os-image:%s", thirdMOSBName) - kubeassert.ImageDoesNotExist(deletedIst) - t.Logf("ImageStreamTag %q has been pruned", deletedIst) - - t.Logf("Deleting MachineConfig %q for cleanup", secondMC.Name) - err = cs.MachineConfigs().Delete(ctx, secondMC.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigDoesNotExist(secondMC) - t.Logf("Deleted MachineConfig %q", secondMC.Name) -} - -func assertNodeRevertsToNonLayered(t *testing.T, cs *framework.ClientSet, node corev1.Node) { - workerMCName := helpers.GetMcName(t, cs, "worker") - workerMC, err := cs.MachineConfigs().Get(context.TODO(), workerMCName, metav1.GetOptions{}) - require.NoError(t, err) - - helpers.WaitForNodeConfigAndImageChange(t, cs, node, workerMCName, "") - - helpers.AssertNodeBootedIntoImage(t, cs, node, workerMC.Spec.OSImageURL) - t.Logf("Node %s has reverted to OS image %q", node.Name, workerMC.Spec.OSImageURL) - - helpers.AssertFileNotOnNode(t, cs, node, filepath.Join("/etc/systemd/system", runtimeassets.RevertServiceName)) - helpers.AssertFileNotOnNode(t, cs, node, runtimeassets.RevertServiceMachineConfigFile) -} - -// This test extracts the /etc/yum.repos.d and /etc/pki/rpm-gpg content from a -// Centos Stream 9 image and injects them into the MCO namespace. It then -// performs a build with the expectation that these artifacts will be used, -// simulating a build where someone has added this content; usually a Red Hat -// Satellite user. -func TestYumReposBuilds(t *testing.T) { - // Skipping this test as it is having a package conflict issue unrelated to MCO - t.Skip() - runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: yumReposDockerfile, - }, - useYumRepos: true, - }) -} - -// Then performs an on-cluster layering build which should consume the -// etc-pki-entitlement certificates. -func TestEntitledBuilds(t *testing.T) { - skipOnOKD(t) - - runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: entitledDockerfile, - }, - entitlementRequired: true, - }) -} - -// This test verifies that if a change is made to a given MachineOSConfig, that -// any in-progress builds are terminated and that only the latest change is -// being built. -func TestMachineOSConfigChangeRestartsBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - mc, err := cs.MachineconfigurationV1Interface.MachineConfigs().Get(ctx, mcp.Spec.Configuration.Name, metav1.GetOptions{}) - require.NoError(t, err) - - firstMosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: mosc, - MachineConfigPool: mcp, - }) - - // First, we get a MachineOSBuild started as usual. - waitForBuildToStart(t, cs, firstMosb) - - // Next, we update the Containerfile. - t.Logf("Initial build has started, updating Containerfile...") - - apiMosc := helpers.SetContainerfileContentsOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc, "FROM configs AS final\nRUN echo 'hello' > /etc/hello") - - moscChangeMosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: apiMosc, - MachineConfigPool: mcp, - }) - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - - assertBuildObjectsAreCreated(t, kubeassert, firstMosb) - - t.Logf("Containerfile is updated, waiting for new build %s to start", moscChangeMosb.Name) - - // Wait for the second build to start. - waitForBuildToStart(t, cs, moscChangeMosb) - - t.Logf("Waiting for initial MachineOSBuild %s to be deleted", firstMosb.Name) - // Wait for the first build to be deleted. - waitForBuildToBeDeleted(t, cs, firstMosb) - - // Ensure that the second build still exists. - _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), moscChangeMosb.Name, metav1.GetOptions{}) - require.NoError(t, err) -} - -// This test verifies that a change to the MachineConfigPool, such as the -// presence of a new rendered MachineConfig, will halt the currently running -// build, replacing it with a new build instead. -func TestMachineConfigPoolChangeRestartsBuild(t *testing.T) { - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - // Wait for the first build to start. - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - // Once the first build has started, we create a new MachineConfig, wait for - // the rendered config to appear, then we check that a new MachineOSBuild has - // started for that new MachineConfig. - mcName := "new-machineconfig" - mc := newMachineConfigTriggersImageRebuild(mcName, layeredMCPName, []string{"usbguard"}) - applyMC(t, cs, mc) - - _, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, mcName) - require.NoError(t, err) - - // We wait for the first build to be deleted. - waitForBuildToBeDeleted(t, cs, firstMosb) - - // Next, we wait for the new build to be started. - secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), secondMosb.Name, metav1.GetOptions{}) - require.NoError(t, err) -} - -// This test starts a build that it then forces to fail by deleting the build -// pods until the job itself fails. After failure, it edits the -// MachineOSConfig with the expectation that the failed build and its will be -// deleted and a new build will start in its place. -func TestGracefulBuildFailureRecovery(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - // Wait for the build to start. - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - t.Logf("Waiting for MachineOSBuild %s to fail", firstMosb.Name) - - // Repeatedly delete the build pod until the job fails to cause a failure. - // Otherwise, it takes a very long time for the job to actually fail. - require.NoError(t, forceMachineOSBuildToFail(ctx, t, cs, firstMosb)) - - // Wait for the build to fail. - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsFailure(firstMosb) - - // Clear the overridden image pullspec. - apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{} - - updatedMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - mc, err := cs.MachineconfigurationV1Interface.MachineConfigs().Get(ctx, mcp.Spec.Configuration.Name, metav1.GetOptions{}) - require.NoError(t, err) - - // Compute the new MachineOSBuild image name. - moscChangeMosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: updatedMosc, - MachineConfigPool: mcp, - }) - - // Wait for the second build to start. - secondMosb := waitForBuildToStart(t, cs, moscChangeMosb) - - // Ensure that the first build is eventually cleaned up. - kubeassert.Eventually().MachineOSBuildDoesNotExist(firstMosb) - assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), firstMosb) - - // Ensure that the second build is still running. - kubeassert.MachineOSBuildExists(secondMosb) - assertBuildObjectsAreCreated(t, kubeassert, secondMosb) -} - -// This test validates that when a running builder is deleted, the -// MachineOSBuild associated with it goes into an interrupted status. -func TestDeletedBuilderInterruptsMachineOSBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - poolName := layeredMCPName - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: poolName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - // Create our MachineOSConfig and ensure that it is deleted after the test is - // finished. - createMachineOSConfig(t, cs, mosc) - - // Wait for the build to start - startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, poolName, mosc.Name) - t.Logf("MachineOSBuild %q has started", startedBuild.Name) - - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) - require.NoError(t, err) - - // Delete the builder - bgDeletion := metav1.DeletePropagationBackground - err = cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Delete(ctx, utils.GetBuildJobName(startedBuild), metav1.DeleteOptions{PropagationPolicy: &bgDeletion}) - require.NoError(t, err) - - // Wait for the build to be interrupted. - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() - waitForBuildToBeInterrupted(t, cs, startedBuild) - // Ensure that the pod and job are deleted - kubeassert.Eventually().JobDoesNotExist(utils.GetBuildJobName(startedBuild)) - kubeassert.Eventually().PodDoesNotExist(pod.Name) -} - -// This test validates that when a running build pod is deleted, the -// Job associated with the MachineOSBuild creates a new pod and the -// MachineOSBuild still reports its state as building. -func TestDeletedPodDoesNotInterruptMachineOSBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - poolName := layeredMCPName - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: poolName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - // Create our MachineOSConfig and ensure that it is deleted after the test is - // finished. - createMachineOSConfig(t, cs, mosc) - - // Wait for the build to start - startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, poolName, mosc.Name) - t.Logf("MachineOSBuild %q has started", startedBuild.Name) - - // Get the pod created by the build Job - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) - require.NoError(t, err) - - // Delete the pod - err = cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - - // Wait a few seconds to ensure that a new pod is created - time.Sleep(time.Second * 5) - - // Ensure the build is still running - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() - kubeassert.MachineOSBuildIsRunning(startedBuild) - - // Check that a new pod was created - podNew, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) - require.NoError(t, err) - assert.NotEqual(t, podNew, pod) -} - -// This test validates that when a running MachineOSBuild is deleted that it will be recreated. -func TestDeletedTransientMachineOSBuildIsRecreated(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - poolName := layeredMCPName - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: poolName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - // Create our MachineOSConfig and ensure that it is deleted after the test is - // finished. - createMachineOSConfig(t, cs, mosc) - - // Wait for the build to start - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, poolName, mosc.Name) - - firstJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(firstMosb), metav1.GetOptions{}) - require.NoError(t, err) - - // Delete the MachineOSBuild. - err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Delete(ctx, firstMosb.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - - t.Logf("MachineOSBuild %q deleted", firstMosb.Name) - - // Wait a few seconds for the MachineOSBuild deletion to complete. - time.Sleep(time.Second * 5) - // Ensure that the Job is deleted as this might take some time - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() - kubeassert.Eventually().JobDoesNotExist(firstJob.Name) - - // Wait for a new MachineOSBuild to start in its place. - secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, poolName, mosc.Name) - - secondJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(secondMosb), metav1.GetOptions{}) - require.NoError(t, err) - - assert.Equal(t, firstMosb.Name, secondMosb.Name) - assert.NotEqual(t, firstMosb.UID, secondMosb.UID) - - assert.Equal(t, firstJob.Name, secondJob.Name) - assert.NotEqual(t, firstJob.UID, secondJob.UID) -} - -// This test verifies that if the rebuild annotation is added to a given MachineOSConfig, that -// the build is restarted -func TestRebuildAnnotationRestartsBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - // First, we get a MachineOSBuild started as usual. - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - assert.NotEqual(t, string(firstMosb.UID), "") - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - assertBuildObjectsAreCreated(t, kubeassert, firstMosb) - - firstJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(firstMosb), metav1.GetOptions{}) - require.NoError(t, err) - - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(firstMosb)) - require.NoError(t, err) - t.Logf("Initial build has started, delete the job to interrupt the build...") - // Delete the builder - bgDeletion := metav1.DeletePropagationBackground - err = cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Delete(ctx, utils.GetBuildJobName(firstMosb), metav1.DeleteOptions{PropagationPolicy: &bgDeletion}) - require.NoError(t, err) - - // Wait for the build to be interrupted. - waitForBuildToBeInterrupted(t, cs, firstMosb) - - // Wait for the job and pod to be deleted. - kubeassert.Eventually().JobDoesNotExist(utils.GetBuildJobName(firstMosb)) - kubeassert.Eventually().PodDoesNotExist(pod.Name) - - t.Logf("Add rebuild annotation to the MOSC...") - helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) - - // Wait for the MOSB to be deleted - t.Logf("Waiting for MachineOSBuild with UID %s to be deleted", firstMosb.UID) - waitForMOSBToBeDeleted(t, cs, firstMosb) - - t.Logf("Annotation is updated, waiting for new build %s to start", firstMosb.Name) - // Wait for the build to start. - secondMosb := waitForBuildToStart(t, cs, firstMosb) - - secondJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(secondMosb), metav1.GetOptions{}) - require.NoError(t, err) - - // Ensure that the names are the same, but that the first and second - // MachineOSBuilds have different UIDs. - assert.Equal(t, firstMosb.Name, secondMosb.Name) - assert.NotEqual(t, firstMosb.UID, secondMosb.UID) - - // Ensure that the build jobs have also changed. - assert.Equal(t, firstJob.Name, secondJob.Name) - assert.NotEqual(t, firstJob.UID, secondJob.UID) -} - -func assertBuildObjectsAreCreated(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { - t.Helper() - - kubeassert.JobExists(utils.GetBuildJobName(mosb)) - kubeassert.ConfigMapExists(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretExists(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretExists(utils.GetFinalPushSecretName(mosb)) - - // Check that ownerReferences are set as well - kubeassert.ConfigMapHasOwnerSet(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretHasOwnerSet(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretHasOwnerSet(utils.GetFinalPushSecretName(mosb)) -} - -func assertBuildObjectsAreDeleted(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { - t.Helper() - - kubeassert.JobDoesNotExist(utils.GetBuildJobName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretDoesNotExist(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretDoesNotExist(utils.GetFinalPushSecretName(mosb)) -} - -// Sets up and performs an on-cluster build for a given set of parameters. -// Returns the built image pullspec for later consumption. -func runOnClusterLayeringTest(t *testing.T, testOpts onClusterLayeringTestOpts) (string, *mcfgv1.MachineOSBuild) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - imageBuilder := testOpts.imageBuilderType - if testOpts.imageBuilderType == "" { - imageBuilder = mcfgv1.JobBuilder - } - - t.Logf("Running with ImageBuilder type: %s", imageBuilder) - - mosc := prepareForOnClusterLayeringTest(t, cs, testOpts) - - // Create our MachineOSConfig. - createMachineOSConfig(t, cs, mosc) - - // Create a child context for the machine-os-builder pod log streamer. We - // create it here because we want the cancellation to run before the - // MachineOSConfig object is removed. - mobPodStreamerCtx, mobPodStreamerCancel := context.WithCancel(ctx) - t.Cleanup(mobPodStreamerCancel) - - // Wait for the build to start - startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, testOpts.poolName, mosc.Name) - t.Logf("MachineOSBuild %q has started", startedBuild.Name) - - t.Logf("Waiting for build completion...") - - // Create a child context for the build pod log streamer. This is so we can - // cancel it independently of the parent context or the context for the - // machine-os-build pod watcher (which has its own separate context). - buildPodStreamerCtx, buildPodStreamerCancel := context.WithCancel(ctx) - - // We wire this to both t.Cleanup() as well as defer because we want to - // cancel this context either at the end of this function or when the test - // fails, whichever comes first. - buildPodWatcherShutdown := makeIdempotentAndRegisterAlwaysRun(t, buildPodStreamerCancel) - defer buildPodWatcherShutdown() - - dirPath, err := helpers.GetBuildArtifactDir(t) - require.NoError(t, err) - - podLogsDirPath := filepath.Join(dirPath, "pod-logs") - require.NoError(t, os.MkdirAll(podLogsDirPath, 0o755)) - - // In the event of a test failure, we want to dump all of the build artifacts - // to files for easy reference later. - t.Cleanup(func() { - if t.Failed() { - writeBuildArtifactsToFiles(t, cs, testOpts.poolName) - } - }) - - // The pod log collection blocks the main Goroutine since we follow the logs - // for each container in the build pod. So they must run in a separate - // Goroutine so that the rest of the test can continue. - go func() { - err := streamBuildPodLogsToFile(buildPodStreamerCtx, t, cs, startedBuild, podLogsDirPath) - if err != nil && !errors.Is(err, context.Canceled) { - t.Logf("Warning: failed to stream build pod logs: %v", err) - } - }() - - // We also want to collect logs from the machine-os-builder pod since they - // can provide a valuable window in how / why a test failed. As mentioned - // above, we need to run this in a separate Goroutine so that the test is not - // blocked. - go func() { - err := streamMachineOSBuilderPodLogsToFile(mobPodStreamerCtx, t, cs, podLogsDirPath) - if err != nil && !errors.Is(err, context.Canceled) { - t.Logf("Warning: failed to stream machine-os-builder pod logs: %v", err) - } - }() - - // Wait for the build to complete. - finishedBuild := waitForBuildToComplete(t, cs, startedBuild) - - t.Logf("MachineOSBuild %q has completed and produced image: %s", finishedBuild.Name, finishedBuild.Status.DigestedImagePushSpec) - - require.NoError(t, archiveBuildPodLogs(t, podLogsDirPath)) - - return string(finishedBuild.Status.DigestedImagePushSpec), startedBuild -} - -func archiveBuildPodLogs(t *testing.T, podLogsDirPath string) error { - archiveName := fmt.Sprintf("%s-pod-logs.tar.gz", helpers.SanitizeTestName(t)) - - archive, err := helpers.NewArtifactArchive(t, archiveName) - if err != nil { - return err - } - - cmd := exec.Command("mv", podLogsDirPath, archive.StagingDir()) - output, err := cmd.CombinedOutput() - if err != nil { - t.Log(string(output)) - return err - } - - return archive.WriteArchive() -} - -// Waits for the build to start and returns the started MachineOSBuild object. -func waitForBuildToStartForPoolAndConfig(t *testing.T, cs *framework.ClientSet, poolName, moscName string) *mcfgv1.MachineOSBuild { - t.Helper() - - var mosbName string - - require.NoError(t, wait.PollImmediate(2*time.Second, 3*time.Minute, func() (bool, error) { - // Get the name for the MachineOSBuild based upon the MachineConfigPool and MachineOSConfig state. - name, err := getMachineOSBuildNameForPool(cs, poolName, moscName) - if err != nil { - return false, nil - } - - mosbName = name - return true, nil - })) - - // Create a "dummy" MachineOSBuild object with just the name field set so - // that waitForMachineOSBuildToReachState() can use it. - mosb := &mcfgv1.MachineOSBuild{ - ObjectMeta: metav1.ObjectMeta{ - Name: mosbName, - }, - } - - return waitForBuildToStart(t, cs, mosb) -} - -// Waits for a MachineOSBuild to start building. -func waitForBuildToStart(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() - - t.Logf("Waiting for MachineOSBuild %s to start", build.Name) - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() - - start := time.Now() - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildExists(build) - t.Logf("MachineOSBuild %s created after %s", build.Name, time.Since(start)) - kubeassert.Eventually().MachineOSBuildIsRunning(build) - t.Logf("MachineOSBuild %s running after %s", build.Name, time.Since(start)) - - // Get the job for the MOSB created by comparing the job UID with the MOSB annotation - buildJobName, err := getJobForMOSB(ctx, cs, build) - require.NoError(t, err) - kubeassert.Eventually().JobExists(buildJobName) - t.Logf("Build job %s created after %s", buildJobName, time.Since(start)) - // Get the pod created by the job - buildPod, err := getPodFromJob(ctx, cs, buildJobName) - require.NoError(t, err) - kubeassert.Eventually().PodIsRunning(buildPod.Name) - t.Logf("Build pod %s running after %s", buildPod.Name, time.Since(start)) - kubeassert.Eventually().PodHasOwnerSet(buildPod.Name) - t.Logf("Build pod %s has owner set after %s", buildPod.Name, time.Since(start)) - - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, build.Name, metav1.GetOptions{}) - require.NoError(t, err) - - assertBuildObjectsAreCreated(t, kubeassert.Eventually(), mosb) - t.Logf("Build objects created after %s", time.Since(start)) - - return mosb -} - -// Waits for a MachineOSBuild with a specific UID to be deleted. -func waitForMOSBToBeDeleted(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { - t.Helper() - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() - - start := time.Now() - - // If the given MachineOSBuild does not have a UID, e.g., from the - // NewMachineOSBuildFromAPIOrDie() helper, then we query the API server to - // find it. - if mosb.UID == "" { - t.Logf("No UID provided for MachineOSBuild %s, querying API for UID", mosb.Name) - // Get the MOSB from the API to get the UID - apiMosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.Background(), mosb.Name, metav1.GetOptions{}) - require.NoError(t, err) - - if k8serrors.IsNotFound(err) { - t.Logf("MachineOSBuild %s is not found, must have already been deleted", mosb.Name) - return - } - - require.NoError(t, err) - - mosb = apiMosb - } - - mosbName := mosb.Name - mosbUID := mosb.UID - - t.Logf("Waiting for MachineOSBuild %s with UID %s to be deleted", mosbName, mosbUID) - - // Assert does not adequately handle the case where the object is deleted. - // See https://issues.redhat.com/browse/OCPBUGS-63048 for details. - err := wait.PollImmediate(time.Second, time.Minute*5, func() (bool, error) { - mosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) - if err != nil { - return false, err - } - - for _, mosb := range mosbs.Items { - // If we find a MachineOSBuild with the same name and UID, then we know - // it has not been deleted yet. - if mosb.Name == mosbName && mosb.UID == mosbUID { - return false, nil - } - } - - return true, nil - }) - - t.Logf("MachineOSBuild %s with UID %s deleted after %s", mosb.Name, mosb.UID, time.Since(start)) - - require.NoError(t, err, "MachineOSBuild %s with UID %s not deleted after %s", mosb.Name, mosb.UID, time.Since(start)) -} - -// Waits for a MachineOSBuild to be deleted. This is different than -// waitForMOSBToBeDeleted since it then asserts that all of the objects -// associated with the MOSB are deleted. -func waitForBuildToBeDeleted(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) { - t.Helper() - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - - t.Logf("Waiting for MachineOSBuild %s to be deleted", build.Name) - - start := time.Now() - - waitForMOSBToBeDeleted(t, cs, build) - - assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), build) - t.Logf("Build objects deleted after %s", time.Since(start)) -} - -// Waits for the given MachineOSBuild to complete and returns the completed -// MachineOSBuild object. -func waitForBuildToComplete(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() - - t.Logf("Waiting for MachineOSBuild %s to complete", startedBuild.Name) - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) - defer cancel() - - start := time.Now() - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsSuccessful(startedBuild) - t.Logf("MachineOSBuild %s successful after %s", startedBuild.Name, time.Since(start)) - assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), startedBuild) - t.Logf("Build objects deleted after %s", time.Since(start)) - - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) - require.NoError(t, err) - - return mosb -} - -func waitForBuildToBeInterrupted(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() - - t.Logf("Waiting for MachineOSBuild %s to be interrupted", startedBuild.Name) - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() - - start := time.Now() - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsInterrupted(startedBuild) - t.Logf("MachineOSBuild %s interrupted after %s", startedBuild.Name, time.Since(start)) - - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) - require.NoError(t, err) - - return mosb -} - -// Prepares for an on-cluster build test by performing the following: -// - Gets the Docker Builder secret name from the MCO namespace. -// - Creates the imagestream to use for the test. -// - Clones the global pull secret into the MCO namespace. -// - If requested, clones the RHEL entitlement secret into the MCO namespace. -// - Creates the on-cluster-build-config ConfigMap. -// - Creates the target MachineConfigPool and waits for it to get a rendered config. -// - Creates the on-cluster-build-custom-dockerfile ConfigMap. -// -// Each of the object creation steps registers an idempotent cleanup function -// that will delete the object at the end of the test. -// -// Returns a MachineOSConfig object for the caller to create to begin the build -// process. -func prepareForOnClusterLayeringTest(t *testing.T, cs *framework.ClientSet, testOpts onClusterLayeringTestOpts) *mcfgv1.MachineOSConfig { - // If the test requires RHEL entitlements, ensure they are present - // in the test cluster. If not found, the test is skipped. - if testOpts.entitlementRequired { - skipIfEntitlementNotPresent(t, cs) - } - - // If the test requires /etc/yum.repos.d and /etc/pki/rpm-gpg, pull a Centos - // Stream 9 container image and populate them from there. This is intended to - // emulate the Red Hat Satellite enablement process, but does not actually - // require any Red Hat Satellite creds to work. - if testOpts.useYumRepos { - injectYumRepos(t, cs) - } - - // Register ephemeral object cleanup function. - makeIdempotentAndRegister(t, func() { - cleanupEphemeralBuildObjects(t, cs) - }) - - imagestreamObjMeta := metav1.ObjectMeta{ - Name: "os-image", - } - - pushSecretName, finalPullspec, _ := setupImageStream(t, cs, imagestreamObjMeta) - - if testOpts.targetNode != nil { - makeIdempotentAndRegister(t, helpers.CreatePoolWithNode(t, cs, testOpts.poolName, *testOpts.targetNode)) - } else { - makeIdempotentAndRegister(t, helpers.CreateMCP(t, cs, testOpts.poolName)) - } - - mcNames := []string{"00-worker"} - if len(testOpts.machineConfigs) > 0 { - for _, mc := range testOpts.machineConfigs { - makeIdempotentAndRegister(t, helpers.ApplyMC(t, cs, mc)) - mcNames = append(mcNames, mc.Name) - } - } - - _, err := helpers.WaitForRenderedConfigs(t, cs, testOpts.poolName, mcNames...) - require.NoError(t, err) - - mosc := &mcfgv1.MachineOSConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: testOpts.poolName, - }, - Spec: mcfgv1.MachineOSConfigSpec{ - MachineConfigPool: mcfgv1.MachineConfigPoolReference{ - Name: testOpts.poolName, - }, - RenderedImagePushSecret: mcfgv1.ImageSecretObjectReference{ - Name: pushSecretName, - }, - RenderedImagePushSpec: mcfgv1.ImageTagFormat(finalPullspec), - ImageBuilder: mcfgv1.MachineOSImageBuilder{ - ImageBuilderType: mcfgv1.JobBuilder, - }, - Containerfile: []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: testOpts.customDockerfiles[testOpts.poolName], - }, - }, - }, - } - - helpers.SetMetadataOnObject(t, mosc) - - return mosc -} - -func TestSSHKeyAndPasswordForOSBuilder(t *testing.T) { - t.Skip() - - cs := framework.NewClientSet("") - - // label random node from pool, get the node - unlabelFunc := helpers.LabelRandomNodeFromPool(t, cs, "worker", "node-role.kubernetes.io/layered") - osNode := helpers.GetSingleNodeByRole(t, cs, layeredMCPName) - - // prepare for on cluster build test - prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{}, - }) - - // Set up Ignition config with the desired SSH key and password - testIgnConfig := ctrlcommon.NewIgnConfig() - sshKeyContent := "testsshkey11" - passwordHash := "testpassword11" - - // retreive initial etc/shadow contents - initialEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") - - testIgnConfig.Passwd.Users = []ign3types.PasswdUser{ - { - Name: "core", - SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}, - PasswordHash: &passwordHash, - }, - } - - testConfig := &mcfgv1.MachineConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "99-test-ssh-and-password", - Labels: helpers.MCLabelForRole(layeredMCPName), - }, - Spec: mcfgv1.MachineConfigSpec{ - Config: runtime.RawExtension{ - Raw: helpers.MarshalOrDie(testIgnConfig), - }, - }, - } - - helpers.SetMetadataOnObject(t, testConfig) - - // Create the MachineConfig and wait for the configuration to be applied - mcCleanupFunc := applyMC(t, cs, testConfig) - - // wait for rendered config to finish creating - renderedConfig, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, testConfig.Name) - require.Nil(t, err) - t.Logf("Finished rendering config") - - // wait for mcp to complete updating - err = helpers.WaitForPoolComplete(t, cs, layeredMCPName, renderedConfig) - require.Nil(t, err) - t.Logf("Pool completed updating") - - // Validate the SSH key and password - osNode = helpers.GetSingleNodeByRole(t, cs, layeredMCPName) // Re-fetch node with updated configurations - - foundSSHKey := helpers.ExecCmdOnNode(t, cs, osNode, "cat", "/rootfs/home/core/.ssh/authorized_keys.d/ignition") - if !strings.Contains(foundSSHKey, sshKeyContent) { - t.Fatalf("updated ssh key not found, got %s", foundSSHKey) - } - t.Logf("updated ssh hash found, got %s", foundSSHKey) - - currentEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") - if currentEtcShadowContents == initialEtcShadowContents { - t.Fatalf("updated password hash not found in /etc/shadow, got %s", currentEtcShadowContents) - } - t.Logf("updated password hash found in /etc/shadow, got %s", currentEtcShadowContents) - - t.Logf("Node %s has correct SSH Key and Password Hash", osNode.Name) - - // Clean-up: Delete the applied MachineConfig and ensure configurations are rolled back - - t.Cleanup(func() { - unlabelFunc() - mcCleanupFunc() - }) -} - -// This test starts a build and then immediately scales down the -// machine-os-builder deployment until the underlying build job has completed. -// The rationale behind this test is so that if the machine-os-builder pod gets -// rescheduled onto a different node while a build is occurring that the -// MachineOSBuild object will eventually be reconciled, even if the build -// completed during the rescheduling operation. -func TestControllerEventuallyReconciles(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - poolName := layeredMCPName - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: poolName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) - require.NoError(t, err) - - createMachineOSConfig(t, cs, mosc) - - mc, err := cs.MachineconfigurationV1Interface.MachineConfigs().Get(ctx, mcp.Spec.Configuration.Name, metav1.GetOptions{}) - require.NoError(t, err) - - mosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: mosc, - MachineConfigPool: mcp, - }) - - // Wait for the MachineOSBuild to exist. - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() - kubeassert.MachineOSBuildExists(mosb) - jobName, err := getJobForMOSB(ctx, cs, mosb) - require.NoError(t, err) - kubeassert.JobExists(jobName) - assertBuildObjectsAreCreated(t, kubeassert, mosb) - - t.Logf("MachineOSBuild %q exists, stopping machine-os-builder", mosb.Name) - - // As soon as the MachineOSBuild exists, scale down the machine-os-builder - // deployment and any other deployments which may inadvertantly cause its - // replica count to increase. This is done to simulate the machine-os-builder - // pod being scheduled onto a different node. - restoreDeployments := scaleDownDeployments(t, cs) - - // Wait for the job to start running. - waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuilding) - - t.Logf("Job %s has started running, starting machine-os-builder", jobName) - - // Restore the deployments. - restoreDeployments() - - // Ensure that the MachineOSBuild object eventually gets updated. - kubeassert.MachineOSBuildIsRunning(mosb) - - t.Logf("MachineOSBuild %s is now running, stopping machine-os-builder", mosb.Name) - - // Stop the deployments again. - restoreDeployments = scaleDownDeployments(t, cs) - - // Wait for the job to complete. - waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuildSucceeded) - - t.Logf("Job %q finished, starting machine-os-builder", jobName) - - // Restore the deployments again. - restoreDeployments() - - // At this point, the machine-os-builder is running, so we wait for the build - // itself to complete and be updated. - mosb = waitForBuildToComplete(t, cs, mosb) - - // Wait until the MachineOSConfig gets the digested pullspec from the MachineOSBuild. - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - if err != nil { - return false, err - } - - return mosc.Status.CurrentImagePullSpec != "" && mosc.Status.CurrentImagePullSpec == mosb.Status.DigestedImagePushSpec, nil - })) -} - -func waitForMOSCToGetNewPullspec(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, pullspec string) { - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) - if err != nil { - return false, err - } - - return mosc.Status.CurrentImagePullSpec != "" && string(mosc.Status.CurrentImagePullSpec) == pullspec, nil - })) -} - -func waitForMOSCToUpdateCurrentMOSB(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, mosbName string) string { - var currentMOSB string - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) - if err != nil { - return false, err - } - - currentMOSB = mosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - return currentMOSB != mosbName, nil - - })) - return currentMOSB -} - -// Waits for a job object to reach a given state. -// TOOD: Add this to the Asserts helper struct. -func waitForJobToReachCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, condFunc func(*batchv1.Job) (bool, error)) { - require.NoError(t, wait.PollImmediate(1*time.Second, 20*time.Minute, func() (bool, error) { - job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) - if err != nil { - return false, err - } - - return condFunc(job) - })) -} - -// Waits for a job object to be mapped to a given MachineOSBuild state. Will always fail the test if the job reaches a failed state unexpectedly. -func waitForJobToReachMOSBCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, expectedCondition mcfgv1.BuildProgress) { - waitForJobToReachCondition(ctx, t, cs, jobName, func(job *batchv1.Job) (bool, error) { - buildprogress, _ := imagebuilder.MapJobStatusToBuildStatus(job) - if buildprogress == mcfgv1.MachineOSBuildFailed && expectedCondition != mcfgv1.MachineOSBuildFailed { - return false, fmt.Errorf("job %q failed unexpectedly", jobName) - } - - return expectedCondition == buildprogress, nil - }) -} - -// waitForImageBuildDegradedCondition waits for the ImageBuildDegraded condition to reach the expected state -func waitForImageBuildDegradedCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, poolName string, expectedStatus corev1.ConditionStatus) *mcfgv1.MachineConfigPoolCondition { - t.Helper() - - var condition *mcfgv1.MachineConfigPoolCondition - require.NoError(t, wait.PollImmediate(1*time.Second, 2*time.Minute, func() (bool, error) { - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) - if err != nil { - return false, err - } - - condition = apihelpers.GetMachineConfigPoolCondition(mcp.Status, mcfgv1.MachineConfigPoolImageBuildDegraded) - if condition == nil { - return false, nil - } - - return condition.Status == expectedStatus, nil - })) - - return condition -} - -// TestImageBuildDegradedOnFailureAndClearedOnBuildStart tests that the -// ImageBuildDegraded condition is set to True when a MachineOSBuild fails, and -// is set to False when a MachineOSBuild is started after a previous failure. -// Previously, this test waited until the build was completed before verifying -// that the state was no longer degraded. -func TestImageBuildDegradedOnFailureAndClearedOnBuildStart(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - // First, add a bad containerfile to cause a build failure. However, we will - // actually delete the build pod to force the failure to happen faster. - t.Logf("Adding a bad containerfile for MachineOSConfig %s to cause a build failure", mosc.Name) - mosc.Spec.Containerfile = getBadContainerFileForFailureTest() - - createMachineOSConfig(t, cs, mosc) - - // Wait for the build to start and fail - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - t.Logf("Waiting for MachineOSBuild %s to fail", firstMosb.Name) - - // Force the build to fail faster by repeatedly deleting the build pods until - // the job reflects a failure status. - require.NoError(t, forceMachineOSBuildToFail(ctx, t, cs, firstMosb)) - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsFailure(firstMosb) - - // Wait for and verify ImageBuildDegraded condition is set to True - degradedCondition := waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionTrue) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildFailed), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildFailed") - assert.Contains(t, degradedCondition.Message, fmt.Sprintf("Failed to build OS image for pool %s", layeredMCPName), "ImageBuildDegraded message should contain pool name") - assert.Contains(t, degradedCondition.Message, firstMosb.Name, "ImageBuildDegraded message should contain MachineOSBuild name") - - t.Logf("ImageBuildDegraded condition correctly set to True with message: %s", degradedCondition.Message) - - // Now fix the MachineOSConfig with a good containerfile - apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: cowsayDockerfile, - }, - } - - updatedMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) - - t.Logf("Fixed containerfile, waiting for new build to start") - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - // Compute the new MachineOSBuild name - mc, err := cs.MachineconfigurationV1Interface.MachineConfigs().Get(ctx, mcp.Spec.Configuration.Name, metav1.GetOptions{}) - require.NoError(t, err) - - moscChangeMosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: updatedMosc, - MachineConfigPool: mcp, - }) - - // Wait for the second build to start - secondMosb := waitForBuildToStart(t, cs, moscChangeMosb) - t.Logf("Second build started successfully: %s", secondMosb.Name) - - // Wait for and verify ImageBuildDegraded condition is False after the new build starts. - // The condition should be cleared when the build starts. - degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuilding), degradedCondition.Reason, "ImageBuildDegraded reason should be Building") - t.Logf("ImageBuildDegraded condition correctly cleared to False when build started with message: %s", degradedCondition.Message) - - // Wait for the second build to complete successfully - finishedBuild := waitForBuildToComplete(t, cs, secondMosb) - t.Logf("Second build completed successfully: %s", finishedBuild.Name) - - // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finishedBuild.Status.DigestedImagePushSpec)) - - // Wait for and verify ImageBuildDegraded condition is False with reason BuildSucceeded - degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildSuccess), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildSuccess") - t.Logf("ImageBuildDegraded condition correctly set to False when build succeeded with message: %s", degradedCondition.Message) - - // Verify MCP status is correct after successful build and full reconciliation - successMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - // After successful build completion and full reconciliation, MCP should show: - // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False - kubeassert.Eventually().MachineConfigPoolReachesState(successMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { - if err != nil { - return false, err - } - // Return false (keep polling) if conditions don't match expected state - if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } - - // Return true when expected state is reached - t.Logf("MCP status after successful build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) - - return true, nil - }, "MCP should reach correct state after successful build (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") - - // Now trigger another build to test MCP status transitions when a new build starts - t.Logf("Triggering a third build to test MCP status transitions") - - // Modify the containerfile slightly to trigger a new build - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - - // Add a comment to the containerfile to change it and trigger a new build - modifiedDockerfile := cowsayDockerfile + "\n# Comment to trigger new build" - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: modifiedDockerfile, - }, - } - - updatedMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) - - t.Logf("Modified containerfile, waiting for third build to start") - - // Get the updated MCP to compute the new build - mcp, err = cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - //Get the updated MC to compute the new build - mc, err = cs.MachineconfigurationV1Interface.MachineConfigs().Get(ctx, mcp.Spec.Configuration.Name, metav1.GetOptions{}) - require.NoError(t, err) - - // Compute the new MachineOSBuild name for the third build - thirdMoscMosb := buildrequest.NewMachineOSBuildOrDie(buildrequest.MachineOSBuildOpts{ - MachineConfig: mc, - MachineOSConfig: updatedMosc, - MachineConfigPool: mcp, - }) - - // Wait for the third build to start - thirdMosb := waitForBuildToStart(t, cs, thirdMoscMosb) - t.Logf("Third build started: %s", thirdMosb.Name) - - // Verify MCP status during active build: - // Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False - buildingMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - kubeassert.Eventually().MachineConfigPoolReachesState(buildingMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { - if err != nil { - return false, err - } - // During build, MCP should show: Updated=False, Updating=True - if apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } - - t.Logf("MCP status during active build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) - - return true, nil - }, "MCP should reach correct state during active build (Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False)") - - // Wait for the third build to complete successfully - finalBuild := waitForBuildToComplete(t, cs, thirdMosb) - t.Logf("Third build completed successfully: %s", finalBuild.Name) - - // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finalBuild.Status.DigestedImagePushSpec)) - - // Final verification: MCP status should return to: - // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False - finalMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - kubeassert.Eventually().MachineConfigPoolReachesState(finalMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { - if err != nil { - return false, err - } - // Return false (keep polling) if conditions don't match expected state - if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } - - // Return true when expected state is reached - t.Logf("Final MCP status after third build completion - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) - - return true, nil - }, "MCP should return to correct state after final build completion (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") - - t.Logf("All MCP status transitions verified successfully across build failure, success, and subsequent new build") -} - -// TestCurrentMachineOSBuildAnnotationHandling tests that the node controller correctly uses the -// current-machine-os-build annotation on the MachineOSConfig to select the correct MOSB when -// multiple MOSBs exist for the same rendered MachineConfig. This can happen during rapid updates -// or when a rebuild annotation is applied. -func TestCurrentMachineOSBuildAnnotationHandling(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - // Setup: Create initial layered pool and build - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: simpleDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - // Wait for the first build to complete - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - t.Logf("First MachineOSBuild %q has started", firstMosb.Name) - - firstFinishedBuild := waitForBuildToComplete(t, cs, firstMosb) - firstImagePullspec := string(firstFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("First MachineOSBuild %q completed with image: %s", firstFinishedBuild.Name, firstImagePullspec) - - // Verify the MOSC has the current-machine-os-build annotation set to the first build - apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - currentBuildAnnotation := apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - assert.Equal(t, firstFinishedBuild.Name, currentBuildAnnotation, - "MOSC should have current-machine-os-build annotation pointing to first build") - t.Logf("Verified MOSC has current-machine-os-build annotation: %s", currentBuildAnnotation) - - // Trigger a second build by editing the MOSC (e.g., updating the containerfile) - // This does NOT create a new rendered MC, which is the scenario we're testing - t.Logf("Updating MachineOSConfig containerfile to trigger second build without new rendered MC") - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - - // Update the containerfile to trigger a rebuild - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: simpleDockerfile + "\nRUN echo 'test annotation handling' > /etc/test-annotation", - }, - } - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) - t.Logf("Updated MachineOSConfig %q containerfile", apiMosc.Name) - - // Wait for the second build to start - t.Logf("Waiting for second build to start...") - secondMosbName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, mosc.Name, firstMosb.Name) - secondMosb, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMosbName, metav1.GetOptions{}) - require.NoError(t, err) - secondMosb = waitForBuildToStart(t, cs, secondMosb) - t.Logf("Second MachineOSBuild %q has started", secondMosb.Name) - - // At this point, both MOSBs exist: - // - firstMosb is completed (with original containerfile) - // - secondMosb is building (with updated containerfile, but SAME rendered MC) - // This is the critical scenario: multiple MOSBs for the same rendered MachineConfig - - // Verify that the MOSC annotation now points to the second build - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - currentBuildAnnotation = apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - assert.Equal(t, secondMosb.Name, currentBuildAnnotation, - "MOSC should have current-machine-os-build annotation pointing to second build") - t.Logf("Verified MOSC annotation updated to second build: %s", currentBuildAnnotation) - - // List all MOSBs to confirm both exist - allMosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) - require.NoError(t, err) - - mosbNames := []string{} - for _, mosb := range allMosbs.Items { - if mosb.Spec.MachineOSConfig.Name == mosc.Name { - mosbNames = append(mosbNames, mosb.Name) - } - } - t.Logf("Found %d MOSBs for MachineOSConfig %q: %v", len(mosbNames), mosc.Name, mosbNames) - assert.GreaterOrEqual(t, len(mosbNames), 2, "Should have at least 2 MOSBs at this point") - - // The critical test: The node controller should use the MOSB specified by the annotation - // (secondMosb) even though firstMosb also exists and matches the MOSC name. - // This is implicitly tested by the fact that the pool status should reflect the second build. - // We verify this by checking the pool is waiting for the second build, not using the first. - - t.Logf("Verifying that pool targets the correct (second) build based on annotation") - // The pool should be waiting for the second build to complete, not using the first completed build - // We can verify this by checking that the pool doesn't have the first image in its status - - // Wait for the second build to complete - t.Logf("Waiting for second build to complete...") - secondFinishedBuild := waitForBuildToComplete(t, cs, secondMosb) - secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("Second MachineOSBuild %q completed with image: %s", secondFinishedBuild.Name, secondImagePullspec) - - // Verify the images are different (proving we built a new image, not reusing the old one) - assert.NotEqual(t, firstImagePullspec, secondImagePullspec, - "First and second builds should produce different images") - - // Verify that the MOSC status reflects the second build's image - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, secondImagePullspec) - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - assert.Equal(t, mcfgv1.ImageDigestFormat(secondImagePullspec), apiMosc.Status.CurrentImagePullSpec, - "MOSC status should have the second build's image pullspec") - - // The critical test: Verify the node controller uses the annotation to select the correct MOSB - // Add a node to the pool and verify it gets the SECOND build's image, not the first - t.Logf("Adding node to pool to verify node controller uses annotation-based MOSB selection") - node := helpers.GetRandomNode(t, cs, "worker") - - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) - defer unlabelFunc() - - // Wait for the node controller to update the node's desiredImage annotation - // The node controller should use the annotation on the MOSC to select the second MOSB - // and therefore set the desiredImage to the second build's image, NOT the first - t.Logf("Waiting for node %s to have desiredImage set to second build's image", node.Name) - helpers.WaitForNodeImageChange(t, cs, node, secondImagePullspec) - - // Verify the node's desiredImage annotation matches the second build - updatedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{}) - require.NoError(t, err) - desiredImage := updatedNode.Annotations[daemonconsts.DesiredImageAnnotationKey] - assert.Equal(t, secondImagePullspec, desiredImage, - "Node controller should use annotation to select second build, not first build") - t.Logf("Node controller correctly selected second build based on annotation") - - // Also verify it's NOT the first build's image - assert.NotEqual(t, firstImagePullspec, desiredImage, - "Node should NOT have first build's image (would indicate annotation was ignored)") - - t.Logf("Successfully verified that node controller uses annotation-based build selection") -} diff --git a/test/helpers/assertions.go b/test/helpers/assertions.go index d2e222b940..9e9e6175f7 100644 --- a/test/helpers/assertions.go +++ b/test/helpers/assertions.go @@ -674,6 +674,9 @@ func (a *Assertions) machineOSBuildHasConditionTrue(mosb *mcfgv1.MachineOSBuild, stateFunc := func(apiMosb *mcfgv1.MachineOSBuild, err error) (bool, error) { if err != nil { + if a.poll && k8serrors.IsNotFound(err) { + return false, nil + } return false, err }