From 8df52afeee1fd392d45df93679c3c9472e6b8835 Mon Sep 17 00:00:00 2001 From: Arthur Bellal Date: Mon, 26 May 2025 17:37:59 +0200 Subject: [PATCH 1/4] (fleet) collect journalctl logs on error --- .../installer/packages/datadog_agent_linux.go | 14 +++++++--- .../packages/service/systemd/systemd.go | 11 ++++++++ .../installer/setup/common/services_nix.go | 27 +++++++++---------- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/pkg/fleet/installer/packages/datadog_agent_linux.go b/pkg/fleet/installer/packages/datadog_agent_linux.go index b750a8327ad4..97764b781834 100644 --- a/pkg/fleet/installer/packages/datadog_agent_linux.go +++ b/pkg/fleet/installer/packages/datadog_agent_linux.go @@ -481,11 +481,11 @@ func (s *datadogAgentService) StopStable(ctx HookContext) error { } switch service.GetServiceManagerType() { case service.SystemdType: - return systemd.StopUnits(ctx, s.SystemdUnitsStable...) + return systemd.StopUnits(ctx, s.SystemdMainUnitStable) case service.UpstartType: - return upstart.StopAll(ctx, s.UpstartServices...) + return upstart.StopAll(ctx, reverseStringSlice(s.UpstartServices)...) case service.SysvinitType: - return sysvinit.StopAll(ctx, s.SysvinitServices...) + return sysvinit.StopAll(ctx, reverseStringSlice(s.SysvinitServices)...) default: return fmt.Errorf("unsupported service manager") } @@ -659,3 +659,11 @@ func writeEmbeddedUnit(dir string, unit string, content []byte) error { } return nil } + +func reverseStringSlice(slice []string) []string { + reversed := make([]string, len(slice)) + for i := range slice { + reversed[i] = slice[len(slice)-1-i] + } + return reversed +} diff --git a/pkg/fleet/installer/packages/service/systemd/systemd.go b/pkg/fleet/installer/packages/service/systemd/systemd.go index 0efd423c29a5..f77054cc0103 100644 --- a/pkg/fleet/installer/packages/service/systemd/systemd.go +++ b/pkg/fleet/installer/packages/service/systemd/systemd.go @@ -16,6 +16,7 @@ import ( "os/exec" "path/filepath" "syscall" + "time" "github.com/DataDog/datadog-agent/pkg/fleet/installer/telemetry" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -142,3 +143,13 @@ func IsRunning() (running bool, err error) { } return true, nil } + +// JournaldLogs returns the logs for a given unit since a given time +func JournaldLogs(ctx context.Context, unit string, since time.Time) (string, error) { + journalctlCmd := exec.CommandContext(ctx, "journalctl", "--unit", unit, "-e", "--no-pager", "--since", since.Format(time.RFC3339)) + stdout, err := journalctlCmd.Output() + if err != nil { + return "", err + } + return string(stdout), nil +} diff --git a/pkg/fleet/installer/setup/common/services_nix.go b/pkg/fleet/installer/setup/common/services_nix.go index 1e493ce1efc1..1e635b70e603 100644 --- a/pkg/fleet/installer/setup/common/services_nix.go +++ b/pkg/fleet/installer/setup/common/services_nix.go @@ -8,32 +8,29 @@ package common import ( - "bytes" "fmt" - "os/exec" + "time" + + "github.com/DataDog/datadog-agent/pkg/fleet/installer/packages/service/systemd" + "github.com/DataDog/datadog-agent/pkg/fleet/installer/telemetry" ) // restartServices restarts the services that need to be restarted after a package upgrade or // an install script re-run; because the configuration may have changed. func (s *Setup) restartServices(pkgs []packageWithVersion) error { + t := time.Now() + span, ctx := telemetry.StartSpanFromContext(s.Ctx, "restartServices") for _, pkg := range pkgs { switch pkg.name { case DatadogAgentPackage: - if err := restartService("datadog-agent.service"); err != nil { - return err + err := systemd.RestartUnit(ctx, "datadog-agent.service") + if err != nil { + logs, logsErr := systemd.JournaldLogs(ctx, "datadog-agent.service", t) + span.SetTag("journald_logs", logs) + span.SetTag("journald_logs_err", logsErr) + return fmt.Errorf("failed to restart datadog-agent.service: %w", err) } } } return nil } - -func restartService(unit string) error { - cmd := exec.Command("systemctl", "restart", unit) - stderr := bytes.Buffer{} - cmd.Stderr = &stderr - err := cmd.Run() - if err != nil { - return fmt.Errorf("failed to restart %s (%s): %s", unit, err.Error(), stderr.String()) - } - return nil -} From 505d1a5b36787ab53d198fcfd3aa3ee5556f194a Mon Sep 17 00:00:00 2001 From: Arthur Bellal Date: Mon, 26 May 2025 17:40:58 +0200 Subject: [PATCH 2/4] nice eng --- pkg/fleet/installer/packages/datadog_agent_linux.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/fleet/installer/packages/datadog_agent_linux.go b/pkg/fleet/installer/packages/datadog_agent_linux.go index 97764b781834..cfb4297ce0c1 100644 --- a/pkg/fleet/installer/packages/datadog_agent_linux.go +++ b/pkg/fleet/installer/packages/datadog_agent_linux.go @@ -10,6 +10,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "strings" "github.com/DataDog/datadog-agent/pkg/fleet/installer/installinfo" @@ -662,8 +663,7 @@ func writeEmbeddedUnit(dir string, unit string, content []byte) error { func reverseStringSlice(slice []string) []string { reversed := make([]string, len(slice)) - for i := range slice { - reversed[i] = slice[len(slice)-1-i] - } + copy(reversed, slice) + slices.Reverse(reversed) return reversed } From 817c0dfe76128c9f5c80d53684b335bbcb0ddf46 Mon Sep 17 00:00:00 2001 From: Arthur Bellal Date: Mon, 26 May 2025 17:44:15 +0200 Subject: [PATCH 3/4] raphael's _COMM=systemd --- pkg/fleet/installer/packages/service/systemd/systemd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/fleet/installer/packages/service/systemd/systemd.go b/pkg/fleet/installer/packages/service/systemd/systemd.go index f77054cc0103..912bdbea999b 100644 --- a/pkg/fleet/installer/packages/service/systemd/systemd.go +++ b/pkg/fleet/installer/packages/service/systemd/systemd.go @@ -146,7 +146,7 @@ func IsRunning() (running bool, err error) { // JournaldLogs returns the logs for a given unit since a given time func JournaldLogs(ctx context.Context, unit string, since time.Time) (string, error) { - journalctlCmd := exec.CommandContext(ctx, "journalctl", "--unit", unit, "-e", "--no-pager", "--since", since.Format(time.RFC3339)) + journalctlCmd := exec.CommandContext(ctx, "journalctl", "_COMM=systemd", "--unit", unit, "-e", "--no-pager", "--since", since.Format(time.RFC3339)) stdout, err := journalctlCmd.Output() if err != nil { return "", err From b96e517bbecc6c968d89e339e1e4f86bd1f153b8 Mon Sep 17 00:00:00 2001 From: Arthur Bellal Date: Tue, 27 May 2025 13:30:00 +0200 Subject: [PATCH 4/4] test fix attempt --- pkg/fleet/installer/packages/datadog_agent_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/fleet/installer/packages/datadog_agent_linux.go b/pkg/fleet/installer/packages/datadog_agent_linux.go index cfb4297ce0c1..0ef5912965fa 100644 --- a/pkg/fleet/installer/packages/datadog_agent_linux.go +++ b/pkg/fleet/installer/packages/datadog_agent_linux.go @@ -482,7 +482,7 @@ func (s *datadogAgentService) StopStable(ctx HookContext) error { } switch service.GetServiceManagerType() { case service.SystemdType: - return systemd.StopUnits(ctx, s.SystemdMainUnitStable) + return systemd.StopUnits(ctx, reverseStringSlice(s.SystemdUnitsStable)...) case service.UpstartType: return upstart.StopAll(ctx, reverseStringSlice(s.UpstartServices)...) case service.SysvinitType: