diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go index 63b8c7ddb..d7a1afd37 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -62,10 +62,11 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e // is fairly decoupled from this code where we get the annotations back out. We may want to co-locate // the set/get logic a bit better to make it more maintainable and less likely to get out of sync. rm := &RevisionMetadata{ - RevisionName: rev.Name, - Package: rev.Annotations[labels.PackageNameKey], - Image: rev.Annotations[labels.BundleReferenceKey], - Conditions: rev.Status.Conditions, + RevisionName: rev.Name, + Package: rev.Annotations[labels.PackageNameKey], + Image: rev.Annotations[labels.BundleReferenceKey], + ResolutionDigest: rev.Annotations[labels.ResolutionDigestKey], + Conditions: rev.Status.Conditions, BundleMetadata: ocv1.BundleMetadata{ Name: rev.Annotations[labels.BundleNameKey], Version: rev.Annotations[labels.BundleVersionKey], @@ -100,10 +101,11 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) revisionAnnotations := map[string]string{ - labels.BundleNameKey: state.resolvedRevisionMetadata.Name, - labels.PackageNameKey: state.resolvedRevisionMetadata.Package, - labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, - labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.BundleNameKey: state.resolvedRevisionMetadata.Name, + labels.PackageNameKey: state.resolvedRevisionMetadata.Package, + labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest, } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index 22b676851..14b946bca 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -499,9 +499,10 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh } type RevisionMetadata struct { - RevisionName string - Package string - Image string + RevisionName string + Package string + Image string + ResolutionDigest string ocv1.BundleMetadata Conditions []metav1.Condition } @@ -535,8 +536,9 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o for _, rel := range relhis { if rel.Info != nil && rel.Info.Status == release.StatusDeployed { rs.Installed = &RevisionMetadata{ - Package: rel.Labels[labels.PackageNameKey], - Image: rel.Labels[labels.BundleReferenceKey], + Package: rel.Labels[labels.PackageNameKey], + Image: rel.Labels[labels.BundleReferenceKey], + ResolutionDigest: rel.Labels[labels.ResolutionDigestKey], BundleMetadata: ocv1.BundleMetadata{ Name: rel.Labels[labels.BundleNameKey], Version: rel.Labels[labels.BundleVersionKey], diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index b86c50076..cab617011 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -785,7 +785,12 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te // the same inputs the runtime would see. d.RevisionStatesGetter = &MockRevisionStatesGetter{ RevisionStates: &controllers.RevisionStates{ - RollingOut: []*controllers.RevisionMetadata{{}}, + RollingOut: []*controllers.RevisionMetadata{{ + // Different digest from current spec - ensures we call the resolver + // (This simulates a rolling-out revision from a previous reconcile) + ResolutionDigest: "different-digest-from-previous-reconcile", + BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"}, + }}, }, } d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { @@ -845,21 +850,18 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te deprecatedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeDeprecated) require.NotNil(t, deprecatedCond) - require.Equal(t, metav1.ConditionUnknown, deprecatedCond.Status, "no catalog data during rollout, so Unknown") - require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, deprecatedCond.Reason) - require.Equal(t, "deprecation status unknown: catalog data unavailable", deprecatedCond.Message) + require.Equal(t, metav1.ConditionFalse, deprecatedCond.Status, "catalog data available from resolution, not deprecated") + require.Equal(t, ocv1.ReasonNotDeprecated, deprecatedCond.Reason) packageCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypePackageDeprecated) require.NotNil(t, packageCond) - require.Equal(t, metav1.ConditionUnknown, packageCond.Status, "no catalog data during rollout, so Unknown") - require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, packageCond.Reason) - require.Equal(t, "deprecation status unknown: catalog data unavailable", packageCond.Message) + require.Equal(t, metav1.ConditionFalse, packageCond.Status, "catalog data available from resolution, package not deprecated") + require.Equal(t, ocv1.ReasonNotDeprecated, packageCond.Reason) channelCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeChannelDeprecated) require.NotNil(t, channelCond) - require.Equal(t, metav1.ConditionUnknown, channelCond.Status, "no catalog data during rollout, so Unknown") - require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, channelCond.Reason) - require.Equal(t, "deprecation status unknown: catalog data unavailable", channelCond.Message) + require.Equal(t, metav1.ConditionFalse, channelCond.Status, "catalog data available from resolution, channel not deprecated") + require.Equal(t, ocv1.ReasonNotDeprecated, channelCond.Reason) bundleCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeBundleDeprecated) require.NotNil(t, bundleCond) diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 48507a2b7..2f71443eb 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -18,8 +18,12 @@ package controllers import ( "context" + "crypto/sha256" + "encoding/base64" + "encoding/json" "errors" "fmt" + "slices" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" @@ -37,6 +41,58 @@ import ( imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image" ) +// resolutionInputs captures the catalog filter fields that affect bundle resolution. +// Changes to any of these fields require re-resolution. +type resolutionInputs struct { + PackageName string `json:"packageName"` + Version string `json:"version"` + Channels []string `json:"channels,omitempty"` + Selector string `json:"selector,omitempty"` + UpgradeConstraintPolicy string `json:"upgradeConstraintPolicy,omitempty"` +} + +// calculateResolutionDigest computes a SHA256 hash of the catalog filter inputs +// that affect bundle resolution. This digest enables detection of spec changes that +// require re-resolution versus when a rolling-out revision can be reused. +// +// The digest is base64 URL-safe encoded (43 characters) to fit within Kubernetes +// label value limits (63 characters max) when stored in Helm release metadata. +func calculateResolutionDigest(catalog *ocv1.CatalogFilter) (string, error) { + if catalog == nil { + return "", nil + } + + // Sort channels for deterministic hashing + channels := slices.Clone(catalog.Channels) + slices.Sort(channels) + + inputs := resolutionInputs{ + PackageName: catalog.PackageName, + Version: catalog.Version, + Channels: channels, + UpgradeConstraintPolicy: string(catalog.UpgradeConstraintPolicy), + } + + // Convert selector to canonical string representation for deterministic hashing + if catalog.Selector != nil { + selector, err := metav1.LabelSelectorAsSelector(catalog.Selector) + if err != nil { + return "", fmt.Errorf("converting selector: %w", err) + } + inputs.Selector = selector.String() + } + + // Marshal to JSON for consistent hashing + inputsBytes, err := json.Marshal(inputs) + if err != nil { + return "", fmt.Errorf("marshaling resolution inputs: %w", err) + } + + // Compute SHA256 hash and encode as base64 URL-safe (43 chars, fits in label value limit) + hash := sha256.Sum256(inputsBytes) + return base64.RawURLEncoding.EncodeToString(hash[:]), nil +} + func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) @@ -140,15 +196,32 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) - // If already rolling out, use existing revision and set deprecation to Unknown (no catalog check) + // If already rolling out, check if the latest rolling-out revision matches the current spec. + // If spec has changed (e.g., version, channels, selector, upgradeConstraintPolicy), we need + // to resolve a new bundle instead of reusing the rolling-out revision. + // + // Note: RollingOut slice is sorted oldest→newest, so use the last element (most recent). + // This avoids re-resolving when a newer revision already matches the spec, even if an + // older revision is still stuck rolling out. if len(state.revisionStates.RollingOut) > 0 { - installedBundleName := "" - if state.revisionStates.Installed != nil { - installedBundleName = state.revisionStates.Installed.Name + latestRollingOutRevision := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1] + + // Calculate digest of current spec's catalog filter + currentDigest, err := calculateResolutionDigest(ext.Spec.Source.Catalog) + if err != nil { + l.Error(err, "failed to calculate resolution digest from spec") + // On digest calculation error, fall through to re-resolve for safety + } else if currentDigest == latestRollingOutRevision.ResolutionDigest { + // Resolution inputs haven't changed - reuse the rolling-out revision + installedBundleName := "" + if state.revisionStates.Installed != nil { + installedBundleName = state.revisionStates.Installed.Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = latestRollingOutRevision + return nil, nil } - SetDeprecationStatus(ext, installedBundleName, nil, false) - state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0] - return nil, nil + // Resolution inputs changed - fall through to resolve new bundle from catalog } // Resolve a new bundle from the catalog @@ -188,9 +261,17 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return handleResolutionError(ctx, c, state, ext, err) } + // Calculate digest of the resolution inputs for change detection + digest, err := calculateResolutionDigest(ext.Spec.Source.Catalog) + if err != nil { + l.Error(err, "failed to calculate resolution digest, continuing without it") + digest = "" + } + state.resolvedRevisionMetadata = &RevisionMetadata{ - Package: resolvedBundle.Package, - Image: resolvedBundle.Image, + Package: resolvedBundle.Package, + Image: resolvedBundle.Image, + ResolutionDigest: digest, // TODO: Right now, operator-controller only supports registry+v1 bundles and has no concept // of a "release" field. If/when we add a release field concept or a new bundle format // we need to re-evaluate use of `AsLegacyRegistryV1Version` so that we avoid propagating @@ -409,10 +490,11 @@ func ApplyBundle(a Applier) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) revisionAnnotations := map[string]string{ - labels.BundleNameKey: state.resolvedRevisionMetadata.Name, - labels.PackageNameKey: state.resolvedRevisionMetadata.Package, - labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, - labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.BundleNameKey: state.resolvedRevisionMetadata.Name, + labels.PackageNameKey: state.resolvedRevisionMetadata.Package, + labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, + labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, + labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest, } objLbls := map[string]string{ labels.OwnerKindKey: ocv1.ClusterExtensionKind, diff --git a/internal/operator-controller/controllers/clusterobjectset_controller.go b/internal/operator-controller/controllers/clusterobjectset_controller.go index 055f962ce..eb31e578c 100644 --- a/internal/operator-controller/controllers/clusterobjectset_controller.go +++ b/internal/operator-controller/controllers/clusterobjectset_controller.go @@ -31,7 +31,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -81,7 +80,10 @@ func (c *ClusterObjectSetReconciler) Reconcile(ctx context.Context, req ctrl.Req reconciledRev := existingRev.DeepCopy() res, reconcileErr := c.reconcile(ctx, reconciledRev) - if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 { + // Progress deadline enforcement only applies to Active revisions, not Archived ones. + // Archived revisions may need to retry teardown operations and should not have their + // Progressing condition or requeue behavior overridden by the deadline check. + if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 && reconciledRev.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived { cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded) @@ -344,29 +346,12 @@ type Sourcoser interface { } func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error { - skipProgressDeadlineExceededPredicate := predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet) - if !ok { - return true - } - // allow deletions to happen - if !rev.DeletionTimestamp.IsZero() { - return true - } - if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded { - return false - } - return true - }, - } c.Clock = clock.RealClock{} return ctrl.NewControllerManagedBy(mgr). For( &ocv1.ClusterObjectSet{}, builder.WithPredicates( predicate.ResourceVersionChangedPredicate{}, - skipProgressDeadlineExceededPredicate, ), ). WatchesRawSource( @@ -684,7 +669,33 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) { } } +// markAsProgressing sets the Progressing condition to True with the given reason. +// +// Once ProgressDeadlineExceeded has been set for the current generation, this function +// blocks only the RollingOut reason to prevent a reconcile loop for Active revisions. +// The loop would occur when reconcile() sees in-transition objects and sets +// Progressing=True/RollingOut, then the progress deadline check immediately resets it +// to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile. +// +// Archived revisions are exempt because they skip progress deadline enforcement entirely, +// so their status updates won't be overridden. +// +// Other reasons like Succeeded and Retrying are always allowed because: +// - Succeeded represents terminal success (rollout eventually completed) +// - Retrying represents transient errors that need to be visible in status +// +// If the generation changed since ProgressDeadlineExceeded was recorded, the guard +// allows all updates through so the condition reflects the new generation. func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) { + if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && + cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded && + cnd.ObservedGeneration == cos.Generation && + reason == ocv1.ReasonRollingOut && + cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived { + log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions", + "requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState) + return + } meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{ Type: ocv1.ClusterObjectSetTypeProgressing, Status: metav1.ConditionTrue, diff --git a/internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go b/internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go index 15300f63c..7fcd44d8b 100644 --- a/internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go +++ b/internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -235,3 +236,113 @@ func (m *mockTrackingCacheInternal) Watch(ctx context.Context, user client.Objec func (m *mockTrackingCacheInternal) Source(h handler.EventHandler, predicates ...predicate.Predicate) source.Source { return nil } + +func Test_markAsProgressing_doesNotOverwriteProgressDeadlineExceeded(t *testing.T) { + for _, tc := range []struct { + name string + generation int64 + existingConditions []metav1.Condition + reason string + expectProgressing metav1.ConditionStatus + expectReason string + }{ + { + name: "sets Progressing when no condition exists", + generation: 1, + existingConditions: nil, + reason: ocv1.ReasonRollingOut, + expectProgressing: metav1.ConditionTrue, + expectReason: ocv1.ReasonRollingOut, + }, + { + name: "sets Progressing when currently RollingOut", + generation: 1, + existingConditions: []metav1.Condition{ + { + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRollingOut, + ObservedGeneration: 1, + }, + }, + reason: ocv1.ReasonSucceeded, + expectProgressing: metav1.ConditionTrue, + expectReason: ocv1.ReasonSucceeded, + }, + { + name: "does not overwrite ProgressDeadlineExceeded with RollingOut (same generation)", + generation: 1, + existingConditions: []metav1.Condition{ + { + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + ObservedGeneration: 1, + }, + }, + reason: ocv1.ReasonRollingOut, + expectProgressing: metav1.ConditionFalse, + expectReason: ocv1.ReasonProgressDeadlineExceeded, + }, + { + name: "allows Succeeded to overwrite ProgressDeadlineExceeded", + generation: 1, + existingConditions: []metav1.Condition{ + { + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + ObservedGeneration: 1, + }, + }, + reason: ocv1.ReasonSucceeded, + expectProgressing: metav1.ConditionTrue, + expectReason: ocv1.ReasonSucceeded, + }, + { + name: "allows Retrying to overwrite ProgressDeadlineExceeded (same generation)", + generation: 1, + existingConditions: []metav1.Condition{ + { + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + ObservedGeneration: 1, + }, + }, + reason: ocv1.ClusterObjectSetReasonRetrying, + expectProgressing: metav1.ConditionTrue, + expectReason: ocv1.ClusterObjectSetReasonRetrying, + }, + { + name: "allows RollingOut when generation changed since ProgressDeadlineExceeded", + generation: 2, + existingConditions: []metav1.Condition{ + { + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + ObservedGeneration: 1, + }, + }, + reason: ocv1.ReasonRollingOut, + expectProgressing: metav1.ConditionTrue, + expectReason: ocv1.ReasonRollingOut, + }, + } { + t.Run(tc.name, func(t *testing.T) { + cos := &ocv1.ClusterObjectSet{ + ObjectMeta: metav1.ObjectMeta{Generation: tc.generation}, + Status: ocv1.ClusterObjectSetStatus{ + Conditions: tc.existingConditions, + }, + } + markAsProgressing(cos, tc.reason, "test message") + + cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) + require.NotNil(t, cnd) + require.Equal(t, tc.expectProgressing, cnd.Status) + require.Equal(t, tc.expectReason, cnd.Reason) + }) + } +} diff --git a/internal/operator-controller/controllers/clusterobjectset_controller_test.go b/internal/operator-controller/controllers/clusterobjectset_controller_test.go index ccfb9755e..435c6615e 100644 --- a/internal/operator-controller/controllers/clusterobjectset_controller_test.go +++ b/internal/operator-controller/controllers/clusterobjectset_controller_test.go @@ -1070,6 +1070,190 @@ func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadline(t *testing.T) { } } +// Test_ClusterObjectSetReconciler_Reconcile_ArchivalAfterProgressDeadlineExceeded verifies that +// a COS with ProgressDeadlineExceeded can still be archived. It simulates the real scenario: +// the COS starts as Active with ProgressDeadlineExceeded, then a spec patch sets +// lifecycleState to Archived (as a succeeding revision would do), and a subsequent +// reconcile processes the archival. +func Test_ClusterObjectSetReconciler_Reconcile_ArchivalAfterProgressDeadlineExceeded(t *testing.T) { + const clusterObjectSetName = "test-ext-1" + + testScheme := newScheme(t) + require.NoError(t, corev1.AddToScheme(testScheme)) + + ext := newTestClusterExtension() + rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme) + rev1.Spec.ProgressDeadlineMinutes = 1 + rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-2 * time.Minute)) + rev1.Finalizers = []string{"olm.operatorframework.io/teardown"} + meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + Message: "Revision has not rolled out for 1 minute(s).", + ObservedGeneration: rev1.Generation, + }) + + testClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithStatusSubresource(&ocv1.ClusterObjectSet{}). + WithObjects(rev1, ext). + Build() + + // Simulate the patch that a succeeding revision would apply. + patch := []byte(`{"spec":{"lifecycleState":"Archived"}}`) + require.NoError(t, testClient.Patch(t.Context(), + &ocv1.ClusterObjectSet{ObjectMeta: metav1.ObjectMeta{Name: clusterObjectSetName}}, + client.RawPatch(types.MergePatchType, patch), + )) + + mockEngine := &mockRevisionEngine{ + teardown: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) { + return &mockRevisionTeardownResult{isComplete: true}, nil + }, + } + + result, err := (&controllers.ClusterObjectSetReconciler{ + Client: testClient, + RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine}, + TrackingCache: &mockTrackingCache{client: testClient}, + Clock: clock.RealClock{}, + }).Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{Name: clusterObjectSetName}, + }) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + + rev := &ocv1.ClusterObjectSet{} + require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev)) + + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionFalse, cond.Status) + require.Equal(t, ocv1.ClusterObjectSetReasonArchived, cond.Reason) + + cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeAvailable) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionUnknown, cond.Status) + require.Equal(t, ocv1.ClusterObjectSetReasonArchived, cond.Reason) +} + +// Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_StaysSticky verifies that +// once ProgressDeadlineExceeded is set, it is not overwritten when the reconciler runs again +// and sees objects still in transition. The Progressing condition should stay at +// ProgressDeadlineExceeded instead of being set back to RollingOut. +func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_StaysSticky(t *testing.T) { + const clusterObjectSetName = "test-ext-1" + + testScheme := newScheme(t) + require.NoError(t, corev1.AddToScheme(testScheme)) + + ext := newTestClusterExtension() + rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme) + rev1.Spec.ProgressDeadlineMinutes = 1 + rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-5 * time.Minute)) + meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + Message: "Revision has not rolled out for 1 minute(s).", + ObservedGeneration: rev1.Generation, + }) + + testClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithStatusSubresource(&ocv1.ClusterObjectSet{}). + WithObjects(rev1, ext). + Build() + + mockEngine := &mockRevisionEngine{ + reconcile: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionReconcileOption) (machinery.RevisionResult, error) { + return &mockRevisionResult{inTransition: true}, nil + }, + } + + result, err := (&controllers.ClusterObjectSetReconciler{ + Client: testClient, + RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine}, + TrackingCache: &mockTrackingCache{client: testClient}, + Clock: clock.RealClock{}, + }).Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{Name: clusterObjectSetName}, + }) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + + rev := &ocv1.ClusterObjectSet{} + require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev)) + + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionFalse, cond.Status, "Progressing should stay False") + require.Equal(t, ocv1.ReasonProgressDeadlineExceeded, cond.Reason, "Reason should stay ProgressDeadlineExceeded") +} + +// Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_SucceededOverrides verifies +// that if a revision's objects eventually complete rolling out after the deadline was exceeded, +// the COS can still transition to Succeeded. ProgressDeadlineExceeded should not prevent a +// revision from completing when its objects become ready. +func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_SucceededOverrides(t *testing.T) { + const clusterObjectSetName = "test-ext-1" + + testScheme := newScheme(t) + require.NoError(t, corev1.AddToScheme(testScheme)) + + ext := newTestClusterExtension() + rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme) + rev1.Spec.ProgressDeadlineMinutes = 1 + rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-5 * time.Minute)) + meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterObjectSetTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + Message: "Revision has not rolled out for 1 minute(s).", + ObservedGeneration: rev1.Generation, + }) + + testClient := fake.NewClientBuilder(). + WithScheme(testScheme). + WithStatusSubresource(&ocv1.ClusterObjectSet{}). + WithObjects(rev1, ext). + Build() + + mockEngine := &mockRevisionEngine{ + reconcile: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionReconcileOption) (machinery.RevisionResult, error) { + return &mockRevisionResult{isComplete: true}, nil + }, + } + + result, err := (&controllers.ClusterObjectSetReconciler{ + Client: testClient, + RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine}, + TrackingCache: &mockTrackingCache{client: testClient}, + Clock: clock.RealClock{}, + }).Reconcile(t.Context(), ctrl.Request{ + NamespacedName: types.NamespacedName{Name: clusterObjectSetName}, + }) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + + rev := &ocv1.ClusterObjectSet{} + require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev)) + + cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status, "Progressing should transition to True") + require.Equal(t, ocv1.ReasonSucceeded, cond.Reason, "Reason should be Succeeded") + + cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeAvailable) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status) + + cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded) + require.NotNil(t, cond) + require.Equal(t, metav1.ConditionTrue, cond.Status) +} + func newTestClusterExtension() *ocv1.ClusterExtension { return &ocv1.ClusterExtension{ ObjectMeta: metav1.ObjectMeta{ diff --git a/internal/operator-controller/labels/labels.go b/internal/operator-controller/labels/labels.go index 805c34c3b..05f293e76 100644 --- a/internal/operator-controller/labels/labels.go +++ b/internal/operator-controller/labels/labels.go @@ -57,4 +57,9 @@ const ( // that were created during migration from Helm releases. This label is used // to distinguish migrated revisions from those created by normal Boxcutter operation. MigratedFromHelmKey = "olm.operatorframework.io/migrated-from-helm" + + // ResolutionDigestKey stores a base64-encoded SHA256 hash of the catalog filter inputs + // (packageName, version, channels, selector, upgradeConstraintPolicy) used during bundle resolution. + // The controller compares this digest to detect spec changes that require re-resolution. + ResolutionDigestKey = "olm.operatorframework.io/resolution-digest" ) diff --git a/test/e2e/features/update.feature b/test/e2e/features/update.feature index ab9408365..ea06f198d 100644 --- a/test/e2e/features/update.feature +++ b/test/e2e/features/update.feature @@ -325,3 +325,38 @@ Feature: Update ClusterExtension And ClusterObjectSet "${NAME}-2" reports Progressing as True with Reason RollingOut And ClusterObjectSet "${NAME}-2" reports Available as False with Reason ProbeFailure + @BoxcutterRuntime + @ProgressDeadline + Scenario: Old revision with ProgressDeadlineExceeded is archived when a new revision succeeds + Given min value for ClusterExtension .spec.progressDeadlineMinutes is set to 1 + And min value for ClusterObjectSet .spec.progressDeadlineMinutes is set to 1 + When ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + progressDeadlineMinutes: 1 + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + version: 1.0.3 + upgradeConstraintPolicy: SelfCertified + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + Then ClusterObjectSet "${NAME}-1" reports Progressing as False with Reason ProgressDeadlineExceeded + When ClusterExtension is updated to version "1.0.0" + Then resource "clusterobjectset/${NAME}-2" exists + And ClusterExtension has been reconciled the latest generation + And ClusterObjectSet "${NAME}-1" is archived + And ClusterExtension is rolled out + And ClusterExtension is available + And ClusterObjectSet "${NAME}-2" reports Progressing as True with Reason Succeeded +