Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,30 @@ Errors that should stop the command are **returned**, not logged — they bubble

The handler and the `--log` flag (`debug|warn|none`, default `warn`) live in [`cmds/dutctl/clilog.go`](cmds/dutctl/clilog.go).

### Agent and server logging

`dutagent` (and `dutserver`) are service daemons, so they log differently from the client: structured records via [`internal/log`](internal/log) (built on `log/slog`), to **stderr**, at the full set of levels. The base logger is installed in `start()` (`slog.SetDefault`); the `-log` flag sets the level (`debug|info|warn|error`, default `info`) and `-log-json` switches the text handler for a JSON one. Human/TTY output is `2006/01/02 15:04:05 LEVEL [scope] message key=value` (color only on a terminal); JSON emits `scope` as an attribute. Never log to stdout.

**Obtain the logger from the context.** Code retrieves it with `log.FromContext(ctx)`. At a component boundary, the caller sets the scope and any shared attributes *before* handing control on, so each component logs only its own concern:

- `ctx = log.WithScope(ctx, "session")` — set the component scope (a flat label; a new scope replaces the old one).
- `ctx = log.With(ctx, "device", dev)` — add structured attributes that descend with the context.

Module `Init`/`Deinit` and `Run` all receive a context carrying the `module`-scoped logger — a good place to log applied defaults from missing config, and the external hosts/tools/devices the module talks to. Where there genuinely is no context — process bootstrap, the `Session` methods — use `slog.Default()` or a logger frozen into the struct at construction (see `Locker`, `session.log`). Scopes in `dutagent`: `agent` (bootstrap/lifecycle, the default), `rpc`, `session`, `locker`, `module`. Scopes in `dutserver`: `server` (bootstrap/lifecycle, the default), `rpc`, `relay` (with directional `relay downstream`/`relay upstream` for the two forwarders of the relayed `Run` stream), and `registry` (the device→agent map).

Module logging is for the **admin/operator** (the person who wrote the config and wired the DUTs), not a re-narration of what the module already prints to the client. Worth logging: external interactions with their effective parameters (tool + command, host:port, device + settings), and defaults a module applies for missing config. `Init`/`Deinit` should not log lifecycle markers — the framework already logs those.

Always log through a logger **instance** (`log.FromContext(ctx).Info(...)`, `l.Warn(...)`), never the package-level `slog.Info`/`slog.Error`/… — those are rejected repo-wide by `forbidigo`.

Levels:

- `Error` — the agent failed at something it was asked to do; an operator likely must act.
- `Warn` — a handled or tolerated anomaly (e.g. a malformed client message, an admin force-unlock).
- `Info` — normal operational milestones; a few lines per request (request received/finished, running a module, listening).
- `Debug` — internal tracing, off by default (per-message traffic, worker start/stop, module init/deinit).

An error returned up the call stack is logged once where it becomes terminal (the RPC handler), not at every hop. Module bodies should not log lifecycle markers (`"Run called"`); the framework logs the transition and sets the scope.

### Documentation Style Guide

- Use Markdown for documentation
Expand Down
77 changes: 55 additions & 22 deletions cmds/dutagent/dutagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"flag"
"fmt"
"io"
"log"
"log/slog"
"net/http"
"os"
"os/signal"
Expand All @@ -23,6 +23,7 @@
"connectrpc.com/connect"
"github.com/BlindspotSoftware/dutctl/internal/buildinfo"
"github.com/BlindspotSoftware/dutctl/internal/dutagent"
"github.com/BlindspotSoftware/dutctl/internal/log"
"github.com/BlindspotSoftware/dutctl/pkg/dut"
"github.com/BlindspotSoftware/dutctl/protobuf/gen/dutctl/v1/dutctlv1connect"
"gopkg.in/yaml.v3"
Expand All @@ -37,6 +38,8 @@
dryRunInfo = `Only run the initialization phase of the modules, not start the (includes validation of the configuration)`
serverInfo = `Optional DUT Server address and port to register with in the format: address:port`
versionFlagInfo = `Print version information and exit`
logLevelInfo = `Log level: debug, info, warn, or error`
logJSONInfo = `Emit logs as JSON instead of human-readable text`
)

func newAgent(stdout io.Writer, exitFunc func(int), args []string) *agent {
Expand All @@ -52,6 +55,8 @@
fs.BoolVar(&agt.dryRun, "dry-run", false, dryRunInfo)
fs.StringVar(&agt.server, "server", "", serverInfo)
fs.BoolVar(&agt.versionFlag, "v", false, versionFlagInfo)
fs.StringVar(&agt.logLevel, "log", "info", logLevelInfo)
fs.BoolVar(&agt.logJSON, "log-json", false, logJSONInfo)
//nolint:errcheck // flag.Parse always returns no error because of flag.ExitOnError
fs.Parse(args[1:])

Expand All @@ -70,6 +75,8 @@
checkConfig bool
dryRun bool
server string
logLevel string
logJSON bool

// state
config config
Expand All @@ -94,10 +101,13 @@
// otherwise with provided exitCode.
func (agt *agent) cleanup(code exitCode) {
if agt.modulesNeedDeinit {
err := dutagent.Deinit(agt.config.Devices)
// TODO(ctx): this background context is the shutdown seam. A later change
// can make it a context.WithTimeout to bound module Deinit, or derive it
// from a shutdown-signal context. It flows into every module's Deinit.
err := dutagent.Deinit(context.Background(), agt.config.Devices)
if err != nil {
printInitErr(err)
log.Print("System might be in an UNKNOWN STATE !!!")
slog.Default().Error("system might be in an UNKNOWN STATE")
agt.exit(1)
}
}
Expand All @@ -113,7 +123,7 @@

go func() {
sig := <-c
log.Printf("Captured signal: %v", sig)
slog.Default().Info("captured signal", "signal", sig)
agt.cleanup(exit0)
}()
}
Expand All @@ -132,24 +142,28 @@
return nil
}

func (agt *agent) initModules() error {
func (agt *agent) initModules(ctx context.Context) error {
agt.modulesNeedDeinit = true

return dutagent.Init(agt.config.Devices)
return dutagent.Init(ctx, agt.config.Devices)
}

// printInitErr extracts and pretty-prints the details of a dutagent.ModuleInitErr
// if err is of this type, otherwise it just prints err.
func printInitErr(err error) {
l := slog.Default()

var initerr *dutagent.ModuleInitError
if errors.As(err, &initerr) {
for _, item := range initerr.Errs {
devstr := fmt.Sprintf("dev:%q cmd:%q module:%q", item.Dev, item.Cmd, item.Mod.Config.Name)
log.Printf("init %s failed with:\n%v\n", devstr, item.Err)
l.Error("module init failed",
"device", item.Dev, "command", item.Cmd, "module", item.Mod.Config.Name, "err", item.Err)
}

return
}

log.Print(err)
l.Error("module initialization error", "err", err)
}

// readHeaderTimeout bounds how long the server waits to read request headers.
Expand Down Expand Up @@ -177,24 +191,30 @@
srv.Protocols.SetHTTP1(true)
srv.Protocols.SetUnencryptedHTTP2(true)

slog.Default().Info("rpc service listening", "addr", agt.address)

return srv.ListenAndServe()
}

func (agt *agent) registerWithServer() error {
log.Printf("Registering with server %q", agt.server)
l := slog.Default()
l.Info("registering with server", "server", agt.server)

client := spawnClient(agt.server)
req := connect.NewRequest(&pb.RegisterRequest{
Devices: agt.config.Devices.Names(),
Address: agt.address,
})

// TODO(ctx): background context — registration has no deadline or
// cancellation. A later change can bound it (context.WithTimeout) or tie it
// to a startup/shutdown context.
_, err := client.Register(context.Background(), req)
if err != nil {
return fmt.Errorf("registering with server %q failed: %w", agt.server, err)
}

log.Printf("Successfully registered with server %q", agt.server)
l.Info("successfully registered with server", "server", agt.server)

return nil
}
Expand All @@ -204,7 +224,7 @@
//
//nolint:ireturn
func spawnClient(agendURL string) dutctlv1connect.RelayServiceClient {
log.Printf("Spawning new client for agent %q", agendURL)
slog.Default().Info("spawning new client for agent", "url", agendURL)

return dutctlv1connect.NewRelayServiceClient(
// Instead of http.DefaultClient, use the HTTP/2 protocol without TLS
Expand Down Expand Up @@ -233,8 +253,15 @@
// start orchestrates the dutagent execution.
//
//nolint:cyclop
func (agt *agent) start() {

Check failure on line 256 in cmds/dutagent/dutagent.go

View workflow job for this annotation

GitHub Actions / Lint

Function 'start' is too long (61 > 60) (funlen)
log.SetOutput(agt.stdout)
// Install the process-wide structured logger. Service diagnostics go to
// stderr (stdout is reserved for program output such as the version banner).
// The default is scoped "agent"; request handlers replace the scope as
// control enters their subsystem. See package internal/log.
base := log.New(os.Stderr, log.ParseLevel(agt.logLevel), agt.logJSON)
slog.SetDefault(log.Scope(base, "agent"))

l := slog.Default()

if agt.versionFlag {
agt.printVersion()
Expand All @@ -246,7 +273,7 @@
// to do a graceful shutdown
defer func() {
if r := recover(); r != nil {
log.Printf("Recovered from panic: %v", r)
l.Error("recovered from panic", "panic", r)
agt.cleanup(exit1)
}
}()
Expand All @@ -256,26 +283,32 @@
err := agt.loadConfig()
if agt.checkConfig {
if err != nil {
log.Printf("Bad configuration: %v", err)
l.Error("bad configuration", "err", err)
agt.cleanup(exit0)
}

log.Print("Configuration is valid")
l.Info("configuration is valid")
agt.cleanup(exit0)
} else if err != nil {
log.Printf("Loading config failed: %v", err)
l.Error("loading config failed", "err", err)
agt.cleanup(exit1)
}

err = agt.initModules()
// initCtx is the agent-lifetime context for module initialization. Today it
// is a plain background context; it is the single seam where a later change
// can attach a startup deadline (context.WithTimeout) or wire in
// cancellation. It flows into every module's Init via internal/log. TODO(ctx).
initCtx := context.Background()

err = agt.initModules(initCtx)
if agt.dryRun {
if err != nil {
printInitErr(err)
log.Print("Initialization FAILED - Dry run finished")
l.Info("initialization failed - dry run finished")
agt.cleanup(exit0)
}

log.Print("Initialization SUCCESSFUL - Dry run finished")
l.Info("initialization successful - dry run finished")
agt.cleanup(exit0)
} else if err != nil {
printInitErr(err)
Expand All @@ -285,13 +318,13 @@
if agt.server != "" {
err := agt.registerWithServer()
if err != nil {
log.Printf("Registering with server %q failed: %v", agt.server, err)
l.Error("registering with server failed", "server", agt.server, "err", err)
agt.cleanup(exit1)
}
}

err = agt.startRPCService()
log.Printf("internal RPC handler error: %v", err)
l.Error("internal RPC handler error", "err", err)
agt.cleanup(exit1)
}

Expand Down
Loading
Loading