Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@ run:
allow-parallel-runners: true
linters:
enable:
- dupword
- durationcheck
- errorlint
- fatcontext
- gocyclo
- misspell
- nilerr
- noctx
- unparam
- usestdlibvars
settings:
gocyclo:
min-complexity: 30
Expand Down
11 changes: 8 additions & 3 deletions availability-prober/availability_prober.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ func NewStartCommand() *cobra.Command {
}
}

check(log, url, time.Second, time.Second, opts.requiredAPIsParsed, opts.waitForInfrastructureResource, opts.waitForClusterRolebinding, opts.waitForLabeledPodsGone, discoveryClient, kubeClient)
check(cmd.Context(), log, url, time.Second, time.Second, opts.requiredAPIsParsed, opts.waitForInfrastructureResource, opts.waitForClusterRolebinding, opts.waitForLabeledPodsGone, discoveryClient, kubeClient)
}

return cmd
}

func check(log logr.Logger, target *url.URL, requestTimeout time.Duration, sleepTime time.Duration, requiredAPIs []schema.GroupVersionKind, waitForInfrastructureResource bool, waitForClusterRolebinding, waitForLabeledPodsGone string, discoveryClient discovery.DiscoveryInterface, kubeClient crclient.Client) {
func check(ctx context.Context, log logr.Logger, target *url.URL, requestTimeout time.Duration, sleepTime time.Duration, requiredAPIs []schema.GroupVersionKind, waitForInfrastructureResource bool, waitForClusterRolebinding, waitForLabeledPodsGone string, discoveryClient discovery.DiscoveryInterface, kubeClient crclient.Client) {
log = log.WithValues("sleepTime", sleepTime.String())
client := &http.Client{
Timeout: requestTimeout,
Expand All @@ -118,7 +118,12 @@ func check(log logr.Logger, target *url.URL, requestTimeout time.Duration, sleep
},
}
for ; ; time.Sleep(sleepTime) {
response, err := client.Get(target.String())
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.String(), nil)
if err != nil {
log.Error(err, "Failed to create request, retrying...")
continue
}
response, err := client.Do(req)
if err != nil {
Comment on lines 120 to 127
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Handle context cancellation to stop the probe loop.

Line 120 currently retries forever. Once ctx is canceled, Do(req) will keep failing and this loop never exits.

Suggested fix
 	for ; ; time.Sleep(sleepTime) {
+		select {
+		case <-ctx.Done():
+			log.Info("probe canceled, exiting", "reason", ctx.Err())
+			return
+		default:
+		}
 		req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.String(), nil)
 		if err != nil {
 			log.Error(err, "Failed to create request, retrying...")
 			continue
 		}
 		response, err := client.Do(req)
 		if err != nil {
+			if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
+				log.Info("probe canceled, exiting", "reason", err)
+				return
+			}
 			log.Error(err, "Request failed, retrying...")
 			continue
 		}

As per coding guidelines "Do not leak goroutines — ensure they exit cleanly".

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for ; ; time.Sleep(sleepTime) {
response, err := client.Get(target.String())
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.String(), nil)
if err != nil {
log.Error(err, "Failed to create request, retrying...")
continue
}
response, err := client.Do(req)
if err != nil {
for ; ; time.Sleep(sleepTime) {
select {
case <-ctx.Done():
log.Info("probe canceled, exiting", "reason", ctx.Err())
return
default:
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.String(), nil)
if err != nil {
log.Error(err, "Failed to create request, retrying...")
continue
}
response, err := client.Do(req)
if err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
log.Info("probe canceled, exiting", "reason", err)
return
}
log.Error(err, "Request failed, retrying...")
continue
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@availability-prober/availability_prober.go` around lines 120 - 127, The
infinite retry loop around creating and sending the request (the for ; ;
time.Sleep(sleepTime) loop that calls http.NewRequestWithContext(ctx, ...) and
client.Do(req)) doesn't stop when ctx is canceled; update the loop to observe
ctx.Done() and exit cleanly: before sleeping or retrying check if ctx.Err() !=
nil (or select on ctx.Done()) and break/return when canceled, and also after a
failed client.Do(req) check ctx.Err() and stop rather than continuing; ensure
this change touches the loop that constructs req and calls client.Do so the
goroutine can exit when the provided context is canceled.

log.Error(err, "Request failed, retrying...")
continue
Expand Down
2 changes: 1 addition & 1 deletion cmd/bastion/aws/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (o *CreateBastionOpts) Run(ctx context.Context, logger logr.Logger) (string
var err error
sshPublicKey, err = os.ReadFile(o.SSHKeyFile)
if err != nil {
return "", "", fmt.Errorf("cannot read SSH public key from %s: %v", o.SSHKeyFile, err)
return "", "", fmt.Errorf("cannot read SSH public key from %s: %w", o.SSHKeyFile, err)
}
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/cluster/core/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,7 @@ func validateMgmtClusterAndNodePoolCPUArchitectures(ctx context.Context, opts *R
if !validMultiArchImage {
mgmtClusterCPUArch, err := hyperutil.GetMgmtClusterCPUArch(kc)
if err != nil {
return fmt.Errorf("failed to check mgmt cluster CPU arch: %v", err)
return fmt.Errorf("failed to check mgmt cluster CPU arch: %w", err)
}

if !strings.EqualFold(mgmtClusterCPUArch, opts.Arch) {
Expand All @@ -1253,7 +1253,7 @@ func validateMgmtClusterAndNodePoolCPUArchitectures(ctx context.Context, opts *R
func validateVersion(ctx context.Context, versionCLI string, client crclient.Client) error {
_, operatorVersion, err := supportedversion.GetSupportedOCPVersions(ctx, "hypershift", client, nil)
if err != nil {
return fmt.Errorf("failed to get supported OCP versions: %v", err)
return fmt.Errorf("failed to get supported OCP versions: %w", err)
}
if operatorVersion != versionCLI {
return fmt.Errorf("version mismatch detected, CLI: %s, Operator: %s", versionCLI, operatorVersion)
Expand Down
2 changes: 1 addition & 1 deletion cmd/cluster/powervs/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ var _ core.Platform = (*CreateOptions)(nil)
func NewCreateCommand(opts *core.RawCreateOptions) *cobra.Command {
cmd := &cobra.Command{
Use: "powervs",
Short: "Creates basic functional HostedCluster resources on PowerVS PowerVS",
Short: "Creates basic functional HostedCluster resources on PowerVS",
SilenceUsage: true,
}

Expand Down
10 changes: 5 additions & 5 deletions cmd/infra/aws/iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw
// Create a single shared role with all policies
sharedRoleARN, err := o.CreateSharedOIDCRole(ctx, iamClient, bindings, providerARN, providerName, logger)
if err != nil {
return nil, fmt.Errorf("failed to create shared OIDC role: %v", err)
return nil, fmt.Errorf("failed to create shared OIDC role: %w", err)
}
// Set all role ARNs to the shared role ARN
for into := range bindings {
Expand All @@ -903,7 +903,7 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw
trustPolicy := oidcTrustPolicy(providerARN, providerName, binding.serviceAccounts...)
arn, err := o.CreateOIDCRole(ctx, iamClient, binding, trustPolicy, logger)
if err != nil {
return nil, fmt.Errorf("failed to create OIDC Role %q: with trust policy %s and permission policy %s: %v", binding.name, trustPolicy, binding.policy, err)
return nil, fmt.Errorf("failed to create OIDC Role %q: with trust policy %s and permission policy %s: %w", binding.name, trustPolicy, binding.policy, err)
}
*into = arn
}
Expand Down Expand Up @@ -966,7 +966,7 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw
]
}`, ingressPolicyStatement, ccmPolicyStatement)),
}); err != nil {
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %v", ingressRoleName, ingressPolicyStatement, err)
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %w", ingressRoleName, ingressPolicyStatement, err)
}
logger.Info("Added inline shared policy to ROSA Managed Role", "role", ingressRoleName)
} else {
Expand All @@ -979,7 +979,7 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw
"Statement": [%s]
}`, ingressPolicyStatement)),
}); err != nil {
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %v", ingressRoleName, ingressPolicyStatement, err)
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %w", ingressRoleName, ingressPolicyStatement, err)
}
logger.Info("Added inline policy to ROSA Ingress Managed Role", "role", ingressRoleName)

Expand All @@ -992,7 +992,7 @@ func (o *CreateIAMOptions) CreateOIDCResources(ctx context.Context, iamClient aw
"Statement": [%s]
}`, ccmPolicyStatement)),
}); err != nil {
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %v", ccmRoleName, ccmPolicyStatement, err)
return nil, fmt.Errorf("failed to create role policy %q: with permission policy %s: %w", ccmRoleName, ccmPolicyStatement, err)
}
logger.Info("Added inline policy to ROSA Cloud Controller Manager Managed Role", "role", ccmRoleName)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/infra/aws/iam_policies.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func APIsByDelegatedServices() (ServicesByDelegate, error) {
for _, binding := range bindings {
p := policy{}
if err := json.Unmarshal([]byte(binding.policy), &p); err != nil {
return nil, fmt.Errorf("error unmarshalling delegate policy for %q: %v", binding.name, err)
return nil, fmt.Errorf("error unmarshalling delegate policy for %q: %w", binding.name, err)
}
delegate := EndpointsByService{}
for i, statement := range p.Statement {
Expand Down
9 changes: 6 additions & 3 deletions cmd/infra/aws/route53.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,10 @@ func (o *DestroyInfraOptions) CleanupPublicZone(ctx context.Context, client awsa
name := o.BaseDomain
id, err := LookupZone(ctx, client, name, false)
if err != nil {
return nil
if strings.Contains(err.Error(), "not found") {
return nil
}
return fmt.Errorf("failed to lookup public hosted zone %s: %w", name, err)
}
recordName := fmt.Sprintf("*.apps.%s.%s", o.Name, o.BaseDomain)
err = deleteRecord(ctx, client, id, recordName)
Expand Down Expand Up @@ -239,12 +242,12 @@ func setSOAMinimum(ctx context.Context, client awsapi.ROUTE53API, id, name strin
func deleteZone(ctx context.Context, id string, client awsapi.ROUTE53API, logger logr.Logger) error {
err := deleteRecords(ctx, client, id, logger)
if err != nil {
return fmt.Errorf("failed to delete hosted zone records: %v", err)
return fmt.Errorf("failed to delete hosted zone records: %w", err)
}
if _, err = client.DeleteHostedZone(ctx, &route53.DeleteHostedZoneInput{
Id: aws.String(id),
}); err != nil {
return fmt.Errorf("failed to delete hosted zone: %v", err)
return fmt.Errorf("failed to delete hosted zone: %w", err)
}
return nil
}
Expand Down
83 changes: 67 additions & 16 deletions cmd/infra/aws/route53_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,14 @@ func TestCreatePrivateZone(t *testing.T) {

func TestCleanupPublicZone(t *testing.T) {
tests := []struct {
name string
setupMock func(*awsapi.MockROUTE53API)
expectError bool
errorContains string
name string
redact bool
setupMock func(*awsapi.MockROUTE53API)
expectError bool
errorContains string
useCtx func() context.Context
wantLogRedacted bool
wantLogSubstring string
}{
{
name: "When zone and wildcard record exist it should delete the record and return nil",
Expand All @@ -340,6 +344,27 @@ func TestCleanupPublicZone(t *testing.T) {
Return(&route53.ChangeResourceRecordSetsOutput{}, nil)
},
},
{
name: "When zone and wildcard record exist with redact enabled it should delete the record and redact the domain in logs",
redact: true,
setupMock: func(m *awsapi.MockROUTE53API) {
m.EXPECT().ListHostedZones(gomock.Any(), gomock.Any(), gomock.Any()).
Return(publicZonePage("PUBZONE", testBaseDomain), nil)
m.EXPECT().ListResourceRecordSets(gomock.Any(), gomock.Any(), gomock.Any()).
Return(&route53.ListResourceRecordSetsOutput{
ResourceRecordSets: []route53types.ResourceRecordSet{
{
Name: aws.String("*.apps." + testCluster + "." + testBaseDomain + "."),
Type: route53types.RRTypeA,
},
},
}, nil)
m.EXPECT().ChangeResourceRecordSets(gomock.Any(), gomock.Any(), gomock.Any()).
Return(&route53.ChangeResourceRecordSetsOutput{}, nil)
},
wantLogRedacted: true,
wantLogSubstring: "[REDACTED]",
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
{
name: "When the zone is not found it should return nil as a no-op",
setupMock: func(m *awsapi.MockROUTE53API) {
Expand All @@ -358,6 +383,16 @@ func TestCleanupPublicZone(t *testing.T) {
}, nil)
},
},
{
name: "When LookupZone fails with a non-not-found error it should return a wrapped error",
useCtx: cancelledCtx,
setupMock: func(m *awsapi.MockROUTE53API) {
m.EXPECT().ListHostedZones(gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil, errors.New("throttling exception"))
},
expectError: true,
errorContains: "failed to lookup public hosted zone",
},
{
name: "When ChangeResourceRecordSets fails with a non-404 error it should return the error",
setupMock: func(m *awsapi.MockROUTE53API) {
Expand All @@ -382,28 +417,44 @@ func TestCleanupPublicZone(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
ctrl := gomock.NewController(t)
mockR53 := awsapi.NewMockROUTE53API(ctrl)
tt.setupMock(mockR53)

ctx := t.Context()
if tt.useCtx != nil {
ctx = tt.useCtx()
}

var logOutput strings.Builder
logger := logr.Discard()
if tt.wantLogRedacted {
logger = funcr.New(func(prefix, args string) {
logOutput.WriteString(args)
}, funcr.Options{})
}

o := &DestroyInfraOptions{
BaseDomain: testBaseDomain,
Name: testCluster,
Log: logr.Discard(),
BaseDomain: testBaseDomain,
Name: testCluster,
RedactBaseDomain: tt.redact,
Log: logger,
}
err := o.CleanupPublicZone(context.Background(), mockR53)
err := o.CleanupPublicZone(ctx, mockR53)

if tt.expectError {
if err == nil {
t.Fatal("expected error, got nil")
}
if tt.errorContains != "" && !strings.Contains(err.Error(), tt.errorContains) {
t.Errorf("expected error containing %q, got: %v", tt.errorContains, err)
g.Expect(err).To(HaveOccurred())
if tt.errorContains != "" {
g.Expect(err).To(MatchError(ContainSubstring(tt.errorContains)))
}
} else {
if err != nil {
t.Errorf("expected no error, got: %v", err)
}
g.Expect(err).ToNot(HaveOccurred())
}

if tt.wantLogRedacted {
g.Expect(logOutput.String()).To(ContainSubstring(tt.wantLogSubstring))
g.Expect(logOutput.String()).ToNot(ContainSubstring(testBaseDomain))
}
})
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/infra/aws/util/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (
)

func IsErrorRetryable(err error) bool {
if aggregate, isAggregate := err.(utilerrors.Aggregate); isAggregate {
var aggregate utilerrors.Aggregate
if errors.As(err, &aggregate) {
if len(aggregate.Errors()) == 1 {
err = aggregate.Errors()[0]
} else {
Expand Down
76 changes: 76 additions & 0 deletions cmd/infra/aws/util/errors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package util

import (
"errors"
"fmt"
"testing"

. "github.com/onsi/gomega"

"github.com/aws/aws-sdk-go-v2/config"

utilerrors "k8s.io/apimachinery/pkg/util/errors"
)

func TestIsErrorRetryable(t *testing.T) {
t.Parallel()
tests := []struct {
name string
err error
expected bool
}{
{
name: "When error is a generic error it should be retryable",
err: errors.New("some transient error"),
expected: true,
},
{
name: "When error is a credential load error it should not be retryable",
err: config.SharedConfigLoadError{},
expected: false,
},
{
name: "When error is a wrapped credential load error it should not be retryable",
err: fmt.Errorf("loading config: %w", config.SharedConfigLoadError{}),
expected: false,
},
{
name: "When aggregate has single generic error it should be retryable",
err: utilerrors.NewAggregate([]error{errors.New("transient")}),
expected: true,
},
{
name: "When aggregate has single credential load error it should not be retryable",
err: utilerrors.NewAggregate([]error{config.SharedConfigLoadError{}}),
expected: false,
},
{
name: "When aggregate has only credential load errors it should not be retryable",
err: utilerrors.NewAggregate([]error{
config.SharedConfigLoadError{},
config.SharedConfigLoadError{},
}),
expected: false,
},
{
name: "When aggregate has mixed errors it should be retryable",
err: utilerrors.NewAggregate([]error{
config.SharedConfigLoadError{},
errors.New("some other error"),
}),
expected: true,
},
{
name: "When wrapped aggregate has single generic error it should be retryable",
err: fmt.Errorf("operation failed: %w", utilerrors.NewAggregate([]error{errors.New("transient")})),
expected: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
g.Expect(IsErrorRetryable(tt.err)).To(Equal(tt.expected))
})
}
}
3 changes: 2 additions & 1 deletion cmd/infra/azure/create_iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net/http"
"os"

"github.com/openshift/hypershift/cmd/log"
Expand Down Expand Up @@ -198,7 +199,7 @@ func (o *CreateIAMOptions) ensureResourceGroup(ctx context.Context, l logr.Logge
}

var respErr *azcore.ResponseError
if !errors.As(err, &respErr) || respErr.StatusCode != 404 {
if !errors.As(err, &respErr) || respErr.StatusCode != http.StatusNotFound {
return fmt.Errorf("failed to check resource group %q: %w", o.ResourceGroupName, err)
}

Expand Down
Loading