From 4f3942af9d0efcd1b4db37eba466e1a8e3b9438f Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Thu, 18 Jun 2026 18:20:24 +0200 Subject: [PATCH 1/3] go.mod: Update cgroups dep to 0.0.7 and fix tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new version includes fixes for EPBF fd leaks: https://github.com/opencontainers/cgroups/pull/60 Let's update to it. However, the Stats() method was added in the new release to the interface, so tests were failing with: # github.com/opencontainers/runc/libcontainer [github.com/opencontainers/runc/libcontainer.test] ./container_linux_test.go:125:18: cannot use &mockCgroupManager{…} (value of type *mockCgroupManager) as cgroups.Manager value in struct literal: *mockCgroupManager does not implement cgroups.Manager (missing method Stats) Let's add the method to the mock too. In the future, probably the mock should come from the cgroups module. Signed-off-by: Rodrigo Campos --- go.mod | 2 +- go.sum | 4 +- libcontainer/container_linux_test.go | 4 + .../cgroups/.golangci-extra.yml | 8 + .../opencontainers/cgroups/CODEOWNERS | 2 +- .../opencontainers/cgroups/cgroups.go | 3 + .../opencontainers/cgroups/config_linux.go | 76 ++++---- .../opencontainers/cgroups/config_rdma.go | 4 +- .../cgroups/devices/config/device.go | 4 +- .../cgroups/devices/ebpf_linux.go | 47 +++-- .../opencontainers/cgroups/devices/v2.go | 2 +- .../opencontainers/cgroups/fs/blkio.go | 5 + .../opencontainers/cgroups/fs/cpu.go | 5 + .../opencontainers/cgroups/fs/cpuacct.go | 7 +- .../opencontainers/cgroups/fs/cpuset.go | 7 +- .../opencontainers/cgroups/fs/devices.go | 6 + .../opencontainers/cgroups/fs/freezer.go | 6 + .../opencontainers/cgroups/fs/fs.go | 22 ++- .../opencontainers/cgroups/fs/hugetlb.go | 5 + .../opencontainers/cgroups/fs/memory.go | 5 + .../opencontainers/cgroups/fs/name.go | 6 + .../opencontainers/cgroups/fs/net_cls.go | 6 + .../opencontainers/cgroups/fs/net_prio.go | 6 + .../opencontainers/cgroups/fs/perf_event.go | 6 + .../opencontainers/cgroups/fs/pids.go | 5 + .../opencontainers/cgroups/fs/rdma.go | 5 + .../opencontainers/cgroups/fs2/create.go | 5 +- .../opencontainers/cgroups/fs2/fs2.go | 87 +++++++--- .../opencontainers/cgroups/fscommon/utils.go | 3 +- .../opencontainers/cgroups/stats.go | 164 ++++++++++-------- .../opencontainers/cgroups/systemd/common.go | 2 +- .../opencontainers/cgroups/systemd/cpuset.go | 2 +- .../opencontainers/cgroups/systemd/v1.go | 22 ++- .../opencontainers/cgroups/systemd/v2.go | 5 + .../opencontainers/cgroups/utils.go | 2 +- .../opencontainers/cgroups/v1_utils.go | 2 +- vendor/modules.txt | 4 +- 37 files changed, 375 insertions(+), 181 deletions(-) diff --git a/go.mod b/go.mod index a006470fd2b..22dd6094a83 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/moby/sys/user v0.4.0 github.com/moby/sys/userns v0.1.0 github.com/mrunalp/fileutils v0.5.1 - github.com/opencontainers/cgroups v0.0.6 + github.com/opencontainers/cgroups v0.0.7 github.com/opencontainers/runtime-spec v1.3.0 github.com/opencontainers/selinux v1.15.1 github.com/seccomp/libseccomp-golang v0.11.1 diff --git a/go.sum b/go.sum index a550e0cf34d..2641725ad6d 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/opencontainers/cgroups v0.0.6 h1:tfZFWTIIGaUUFImTyuTg+Mr5x8XRiSdZESgEBW7UxuI= -github.com/opencontainers/cgroups v0.0.6/go.mod h1:oWVzJsKK0gG9SCRBfTpnn16WcGEqDI8PAcpMGbqWxcs= +github.com/opencontainers/cgroups v0.0.7 h1:FqhggFhigAMgKKwy39jweWX3h7Ha6VBF/qNR6Sso3oc= +github.com/opencontainers/cgroups v0.0.7/go.mod h1:hPBRvnBhLZueEN0eJyozMeM3HeFGYlZW9KnO//px6G4= github.com/opencontainers/runtime-spec v1.3.0 h1:YZupQUdctfhpZy3TM39nN9Ika5CBWT5diQ8ibYCRkxg= github.com/opencontainers/runtime-spec v1.3.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.15.1 h1:ERxeh5caJvCzNAKdI8WQbJmB1LDTn4BuaAg8wihLBpA= diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index 0d0dc4451d3..cce320e53e8 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -28,6 +28,10 @@ func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { return nil, nil } +func (m *mockCgroupManager) Stats(_ *cgroups.StatsOptions) (*cgroups.Stats, error) { + return nil, nil +} + func (m *mockCgroupManager) Apply(pid int) error { return nil } diff --git a/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml b/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml index b98dba1ba15..ce687ce5afe 100644 --- a/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml +++ b/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml @@ -19,3 +19,11 @@ linters: - -QF1008 # https://staticcheck.dev/docs/checks/#QF1008 Omit embedded fields from selector expression. exclusions: generated: strict + rules: + # Legacy names we can't change without breaking compatibility. + - path: stats.go + text: "(type|struct field) (CpuUsage|CpuStats) should be " + - path: config_linux.go + text: "struct field (CpuShares|CpuQuota|CpuBurst|CpuPeriod|CpuRtRuntime|CpuRtPeriod|CpuWeight) should be " + - path: devices/config/device.go + text: "struct field (Uid|Gid) should be " diff --git a/vendor/github.com/opencontainers/cgroups/CODEOWNERS b/vendor/github.com/opencontainers/cgroups/CODEOWNERS index 7201e35ac92..255c5d40e48 100644 --- a/vendor/github.com/opencontainers/cgroups/CODEOWNERS +++ b/vendor/github.com/opencontainers/cgroups/CODEOWNERS @@ -1 +1 @@ -* @maintainer1 @maintainer2 @maintainer3 +* @opencontainers/cgroups-maintainers diff --git a/vendor/github.com/opencontainers/cgroups/cgroups.go b/vendor/github.com/opencontainers/cgroups/cgroups.go index 5a97bd36b71..35c4e25442e 100644 --- a/vendor/github.com/opencontainers/cgroups/cgroups.go +++ b/vendor/github.com/opencontainers/cgroups/cgroups.go @@ -44,6 +44,9 @@ type Manager interface { // GetStats returns cgroups statistics. GetStats() (*Stats, error) + // Stats returns statistics for specified controllers. + Stats(opts *StatsOptions) (*Stats, error) + // Freeze sets the freezer cgroup to the specified state. Freeze(state FreezerState) error diff --git a/vendor/github.com/opencontainers/cgroups/config_linux.go b/vendor/github.com/opencontainers/cgroups/config_linux.go index 3d29d938bf0..8b5e5058a93 100644 --- a/vendor/github.com/opencontainers/cgroups/config_linux.go +++ b/vendor/github.com/opencontainers/cgroups/config_linux.go @@ -16,23 +16,23 @@ const ( // Cgroup holds properties of a cgroup on Linux. type Cgroup struct { // Name specifies the name of the cgroup - Name string `json:"name,omitempty"` + Name string `json:"name,omitzero"` // Parent specifies the name of parent of cgroup or slice - Parent string `json:"parent,omitempty"` + Parent string `json:"parent,omitzero"` // Path specifies the path to cgroups that are created and/or joined by the container. // The path is assumed to be relative to the host system cgroup mountpoint. - Path string `json:"path,omitempty"` + Path string `json:"path,omitzero"` // ScopePrefix describes prefix for the scope name. - ScopePrefix string `json:"scope_prefix,omitempty"` + ScopePrefix string `json:"scope_prefix,omitzero"` // Resources contains various cgroups settings to apply. *Resources // Systemd tells if systemd should be used to manage cgroups. - Systemd bool `json:"Systemd,omitempty"` + Systemd bool `json:"Systemd,omitzero"` // SystemdProps are any additional properties for systemd, // derived from org.systemd.property.xxx annotations. @@ -40,108 +40,108 @@ type Cgroup struct { SystemdProps []systemdDbus.Property `json:"-"` // Rootless tells if rootless cgroups should be used. - Rootless bool `json:"Rootless,omitempty"` + Rootless bool `json:"Rootless,omitzero"` // The host UID that should own the cgroup, or nil to accept // the default ownership. This should only be set when the // cgroupfs is to be mounted read/write. // Not all cgroup manager implementations support changing // the ownership. - OwnerUID *int `json:"owner_uid,omitempty"` + OwnerUID *int `json:"owner_uid,omitzero"` } type Resources struct { // Devices is the set of access rules for devices in the container. - Devices []*devices.Rule `json:"devices,omitempty"` + Devices []*devices.Rule `json:"devices,omitzero"` // Memory limit (in bytes). - Memory int64 `json:"memory,omitempty"` + Memory int64 `json:"memory,omitzero"` // Memory reservation or soft_limit (in bytes). - MemoryReservation int64 `json:"memory_reservation,omitempty"` + MemoryReservation int64 `json:"memory_reservation,omitzero"` // Total memory usage (memory+swap); use -1 for unlimited swap. - MemorySwap int64 `json:"memory_swap,omitempty"` + MemorySwap int64 `json:"memory_swap,omitzero"` // CPU shares (relative weight vs. other containers). - CpuShares uint64 `json:"cpu_shares,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuShares should be CPUShares". + CpuShares uint64 `json:"cpu_shares,omitzero"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuQuota int64 `json:"cpu_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota". + CpuQuota int64 `json:"cpu_quota,omitzero"` // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. - CpuBurst *uint64 `json:"cpu_burst,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuBurst should be CPUBurst". + CpuBurst *uint64 `json:"cpu_burst,omitzero"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpuPeriod uint64 `json:"cpu_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuPeriod should be CPUPeriod". + CpuPeriod uint64 `json:"cpu_period,omitzero"` // How many time CPU will use in realtime scheduling (in usecs). - CpuRtRuntime int64 `json:"cpu_rt_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuRtRuntime should be CPURtRuntime". + CpuRtRuntime int64 `json:"cpu_rt_quota,omitzero"` // CPU period to be used for realtime scheduling (in usecs). - CpuRtPeriod uint64 `json:"cpu_rt_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota". + CpuRtPeriod uint64 `json:"cpu_rt_period,omitzero"` // Cpuset CPUs to use. - CpusetCpus string `json:"cpuset_cpus,omitempty"` + CpusetCpus string `json:"cpuset_cpus,omitzero"` // Cpuset memory nodes to use. - CpusetMems string `json:"cpuset_mems,omitempty"` + CpusetMems string `json:"cpuset_mems,omitzero"` // Cgroup's SCHED_IDLE value. - CPUIdle *int64 `json:"cpu_idle,omitempty"` + CPUIdle *int64 `json:"cpu_idle,omitzero"` // Process limit; set < `0' to disable limit. `nil` means "keep current limit". - PidsLimit *int64 `json:"pids_limit,omitempty"` + PidsLimit *int64 `json:"pids_limit,omitzero"` // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight uint16 `json:"blkio_weight,omitempty"` + BlkioWeight uint16 `json:"blkio_weight,omitzero"` // Tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only. - BlkioLeafWeight uint16 `json:"blkio_leaf_weight,omitempty"` + BlkioLeafWeight uint16 `json:"blkio_leaf_weight,omitzero"` // Weight per cgroup per device, can override BlkioWeight. - BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device,omitempty"` + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device,omitzero"` // IO read rate limit per cgroup per device, bytes per second. - BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device,omitempty"` + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device,omitzero"` // IO write rate limit per cgroup per device, bytes per second. - BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device,omitempty"` + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device,omitzero"` // IO read rate limit per cgroup per device, IO per second. - BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device,omitempty"` + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device,omitzero"` // IO write rate limit per cgroup per device, IO per second. - BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device,omitempty"` + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device,omitzero"` // Freeze value for the process. - Freezer FreezerState `json:"freezer,omitempty"` + Freezer FreezerState `json:"freezer,omitzero"` // Hugetlb limit (in bytes). - HugetlbLimit []*HugepageLimit `json:"hugetlb_limit,omitempty"` + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit,omitzero"` // Whether to disable OOM killer. - OomKillDisable bool `json:"oom_kill_disable,omitempty"` + OomKillDisable bool `json:"oom_kill_disable,omitzero"` // Tuning swappiness behaviour per cgroup. - MemorySwappiness *uint64 `json:"memory_swappiness,omitempty"` + MemorySwappiness *uint64 `json:"memory_swappiness,omitzero"` // Set priority of network traffic for container. - NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap,omitempty"` + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap,omitzero"` // Set class identifier for container's network packets. - NetClsClassid uint32 `json:"net_cls_classid_u,omitempty"` + NetClsClassid uint32 `json:"net_cls_classid_u,omitzero"` // Rdma resource restriction configuration. - Rdma map[string]LinuxRdma `json:"rdma,omitempty"` + Rdma map[string]LinuxRdma `json:"rdma,omitzero"` // Used on cgroups v2: // CpuWeight sets a proportional bandwidth limit. - CpuWeight uint64 `json:"cpu_weight,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuWeight should be CPUWeight". + CpuWeight uint64 `json:"cpu_weight,omitzero"` // Unified is cgroupv2-only key-value map. - Unified map[string]string `json:"unified,omitempty"` + Unified map[string]string `json:"unified,omitzero"` // SkipDevices allows to skip configuring device permissions. // Used by e.g. kubelet while creating a parent cgroup (kubepods) @@ -165,5 +165,5 @@ type Resources struct { // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check // if the new memory limits (Memory and MemorySwap) being set are lower // than the current memory usage, and reject if so. - MemoryCheckBeforeUpdate bool `json:"memory_check_before_update,omitempty"` + MemoryCheckBeforeUpdate bool `json:"memory_check_before_update,omitzero"` } diff --git a/vendor/github.com/opencontainers/cgroups/config_rdma.go b/vendor/github.com/opencontainers/cgroups/config_rdma.go index a0bd54f04f5..a5c4adab76a 100644 --- a/vendor/github.com/opencontainers/cgroups/config_rdma.go +++ b/vendor/github.com/opencontainers/cgroups/config_rdma.go @@ -3,7 +3,7 @@ package cgroups // LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) type LinuxRdma struct { // Maximum number of HCA handles that can be opened. Default is "no limit". - HcaHandles *uint32 `json:"hca_handles,omitempty"` + HcaHandles *uint32 `json:"hca_handles,omitzero"` // Maximum number of HCA objects that can be created. Default is "no limit". - HcaObjects *uint32 `json:"hca_objects,omitempty"` + HcaObjects *uint32 `json:"hca_objects,omitzero"` } diff --git a/vendor/github.com/opencontainers/cgroups/devices/config/device.go b/vendor/github.com/opencontainers/cgroups/devices/config/device.go index 295575cbfa7..b36a3e818bc 100644 --- a/vendor/github.com/opencontainers/cgroups/devices/config/device.go +++ b/vendor/github.com/opencontainers/cgroups/devices/config/device.go @@ -20,10 +20,10 @@ type Device struct { FileMode os.FileMode `json:"file_mode"` // Uid of the device. - Uid uint32 `json:"uid,omitempty"` //nolint:revive // Suppress "var-naming: struct field Uid should be UID". + Uid uint32 `json:"uid,omitzero"` // Gid of the device. - Gid uint32 `json:"gid,omitempty"` //nolint:revive // Suppress "var-naming: struct field Gid should be GID". + Gid uint32 `json:"gid,omitzero"` } // Permissions is a cgroupv1-style string to represent device access. It diff --git a/vendor/github.com/opencontainers/cgroups/devices/ebpf_linux.go b/vendor/github.com/opencontainers/cgroups/devices/ebpf_linux.go index 6a41aff6e1a..cfc36f77a9e 100644 --- a/vendor/github.com/opencontainers/cgroups/devices/ebpf_linux.go +++ b/vendor/github.com/opencontainers/cgroups/devices/ebpf_linux.go @@ -15,11 +15,7 @@ import ( "golang.org/x/sys/unix" ) -func nilCloser() error { - return nil -} - -func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { +func findAttachedCgroupDeviceFilters(dirFd int) (_ []*ebpf.Program, retErr error) { type bpfAttrQuery struct { TargetFd uint32 AttachType uint32 @@ -58,8 +54,17 @@ func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { } // Convert the ids to program handles. + // On error we don't return the programs slice, so close the fds stored there. progIds = progIds[:size] programs := make([]*ebpf.Program, 0, len(progIds)) + defer func() { + if retErr != nil { + for _, p := range programs { + p.Close() + } + } + }() + for _, progId := range progIds { program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) if err != nil { @@ -154,7 +159,7 @@ func haveBpfProgReplace() bool { // Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . // // https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 -func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) { +func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) error { // Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). // This limit is not inherited into the container. memlockLimit := &unix.Rlimit{ @@ -166,8 +171,14 @@ func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd // Get the list of existing programs. oldProgs, err := findAttachedCgroupDeviceFilters(dirFd) if err != nil { - return nilCloser, err + return err } + defer func() { + for _, p := range oldProgs { + p.Close() + } + }() + useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1 // Generate new program. @@ -178,8 +189,9 @@ func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd } prog, err := ebpf.NewProgram(spec) if err != nil { - return nilCloser, err + return err } + defer prog.Close() // If there is only one old program, we can just replace it directly. @@ -195,20 +207,7 @@ func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd } err = link.RawAttachProgram(attachProgramOptions) if err != nil { - return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) - } - closer := func() error { - err = link.RawDetachProgram(link.RawDetachProgramOptions{ - Target: dirFd, - Program: prog, - Attach: ebpf.AttachCGroupDevice, - }) - if err != nil { - return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) - } - // TODO: Should we attach the old filters back in this case? Otherwise - // we fail-open on a security feature, which is a bit scary. - return nil + return fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) } if !useReplaceProg { logLevel := logrus.DebugLevel @@ -248,9 +247,9 @@ func loadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd Attach: ebpf.AttachCGroupDevice, }) if err != nil { - return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) + return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) } } } - return closer, nil + return nil } diff --git a/vendor/github.com/opencontainers/cgroups/devices/v2.go b/vendor/github.com/opencontainers/cgroups/devices/v2.go index d54298f7e76..508f3ddc574 100644 --- a/vendor/github.com/opencontainers/cgroups/devices/v2.go +++ b/vendor/github.com/opencontainers/cgroups/devices/v2.go @@ -64,7 +64,7 @@ func setV2(dirPath string, r *cgroups.Resources) error { return fmt.Errorf("cannot get dir FD for %s", dirPath) } defer unix.Close(dirFD) - if _, err := loadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { + if err := loadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { if !canSkipEBPFError(r) { return err } diff --git a/vendor/github.com/opencontainers/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/cgroups/fs/blkio.go index f3c4c5cf816..e5b2d314e33 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/blkio.go +++ b/vendor/github.com/opencontainers/cgroups/fs/blkio.go @@ -19,6 +19,11 @@ func (s *BlkioGroup) Name() string { return "blkio" } +// ID returns the controller ID for blkio subsystem. +func (s *BlkioGroup) ID() cgroups.Controller { + return cgroups.IO +} + func (s *BlkioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/cgroups/fs/cpu.go index 3e05788a3f6..84f9f74407d 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/cpu.go +++ b/vendor/github.com/opencontainers/cgroups/fs/cpu.go @@ -18,6 +18,11 @@ func (s *CpuGroup) Name() string { return "cpu" } +// ID returns the controller ID for CPU subsystem. +func (s *CpuGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuGroup) Apply(path string, r *cgroups.Resources, pid int) error { if err := os.MkdirAll(path, 0o755); err != nil { return err diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go index bde25b07594..5a1be7564e4 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go +++ b/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go @@ -26,6 +26,11 @@ func (s *CpuacctGroup) Name() string { return "cpuacct" } +// ID returns the controller ID for cpuacct subsystem. +func (s *CpuacctGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuacctGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } @@ -105,7 +110,7 @@ func getPercpuUsage(path string) ([]uint64, error) { if err != nil { return percpuUsage, err } - for _, value := range strings.Fields(data) { + for value := range strings.FieldsSeq(data) { value, err := strconv.ParseUint(value, 10, 64) if err != nil { return percpuUsage, &parseError{Path: path, File: file, Err: err} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/cgroups/fs/cpuset.go index ef6ff7da303..327d5e75075 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/cpuset.go +++ b/vendor/github.com/opencontainers/cgroups/fs/cpuset.go @@ -54,6 +54,11 @@ func (s *CpusetGroup) Name() string { return "cpuset" } +// ID returns the controller ID for cpuset subsystem. +func (s *CpusetGroup) ID() cgroups.Controller { + return cgroups.CPUSet +} + func (s *CpusetGroup) Apply(path string, r *cgroups.Resources, pid int) error { return s.ApplyDir(path, r, pid) } @@ -82,7 +87,7 @@ func getCpusetStat(path string, file string) ([]uint16, error) { return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")} } - for _, s := range strings.Split(fileContent, ",") { + for s := range strings.SplitSeq(fileContent, ",") { fromStr, toStr, ok := strings.Cut(s, "-") if ok { from, err := strconv.ParseUint(fromStr, 10, 16) diff --git a/vendor/github.com/opencontainers/cgroups/fs/devices.go b/vendor/github.com/opencontainers/cgroups/fs/devices.go index 26483ecb7dd..5eee641572b 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/devices.go +++ b/vendor/github.com/opencontainers/cgroups/fs/devices.go @@ -10,6 +10,12 @@ func (s *DevicesGroup) Name() string { return "devices" } +// ID returns the controller ID for devices subsystem. +// Returns 0 as devices is not a cgroups.Controller. +func (s *DevicesGroup) ID() cgroups.Controller { + return 0 +} + func (s *DevicesGroup) Apply(path string, r *cgroups.Resources, pid int) error { if r.SkipDevices { return nil diff --git a/vendor/github.com/opencontainers/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/cgroups/fs/freezer.go index fe0f0dde482..3edc7a81aba 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/freezer.go +++ b/vendor/github.com/opencontainers/cgroups/fs/freezer.go @@ -18,6 +18,12 @@ func (s *FreezerGroup) Name() string { return "freezer" } +// ID returns the controller ID for freezer subsystem. +// Returns 0 as freezer is not a cgroups.Controller. +func (s *FreezerGroup) ID() cgroups.Controller { + return 0 +} + func (s *FreezerGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/fs.go b/vendor/github.com/opencontainers/cgroups/fs/fs.go index 625931193ec..879400cda35 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/fs.go +++ b/vendor/github.com/opencontainers/cgroups/fs/fs.go @@ -29,7 +29,7 @@ var subsystems = []subsystem{ &FreezerGroup{}, &RdmaGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, - &NameGroup{GroupName: "misc", Join: true}, + &NameGroup{GroupName: "misc", Join: true, GroupID: cgroups.Misc}, } var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") @@ -45,6 +45,8 @@ func init() { type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats fills in the stats for the subsystem. GetStats(path string, stats *cgroups.Stats) error // Apply creates and joins a cgroup, adding pid into it. Some @@ -181,14 +183,32 @@ func (m *Manager) Path(subsys string) string { } func (m *Manager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range subsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go index 698fd691e10..39b8f80dfef 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go +++ b/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go @@ -15,6 +15,11 @@ func (s *HugetlbGroup) Name() string { return "hugetlb" } +// ID returns the controller ID for hugetlb subsystem. +func (s *HugetlbGroup) ID() cgroups.Controller { + return cgroups.HugeTLB +} + func (s *HugetlbGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/memory.go b/vendor/github.com/opencontainers/cgroups/fs/memory.go index d92f2322beb..0250819172a 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/memory.go +++ b/vendor/github.com/opencontainers/cgroups/fs/memory.go @@ -29,6 +29,11 @@ func (s *MemoryGroup) Name() string { return "memory" } +// ID returns the controller ID for memory subsystem. +func (s *MemoryGroup) ID() cgroups.Controller { + return cgroups.Memory +} + func (s *MemoryGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/name.go b/vendor/github.com/opencontainers/cgroups/fs/name.go index 28643519b58..47c60229319 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/name.go +++ b/vendor/github.com/opencontainers/cgroups/fs/name.go @@ -7,12 +7,18 @@ import ( type NameGroup struct { GroupName string Join bool + GroupID cgroups.Controller } func (s *NameGroup) Name() string { return s.GroupName } +// ID returns the controller ID for named subsystem. +func (s *NameGroup) ID() cgroups.Controller { + return s.GroupID +} + func (s *NameGroup) Apply(path string, _ *cgroups.Resources, pid int) error { if s.Join { // Ignore errors if the named cgroup does not exist. diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/cgroups/fs/net_cls.go index 2bd6c5ab218..c61bded0c45 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/net_cls.go +++ b/vendor/github.com/opencontainers/cgroups/fs/net_cls.go @@ -12,6 +12,12 @@ func (s *NetClsGroup) Name() string { return "net_cls" } +// ID returns the controller ID for net_cls subsystem. +// Returns 0 as net_cls is not a cgroups.Controller. +func (s *NetClsGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetClsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/cgroups/fs/net_prio.go index b51682b6da0..228eb341995 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/net_prio.go +++ b/vendor/github.com/opencontainers/cgroups/fs/net_prio.go @@ -10,6 +10,12 @@ func (s *NetPrioGroup) Name() string { return "net_prio" } +// ID returns the controller ID for net_prio subsystem. +// Returns 0 as net_prio is not a cgroups.Controller. +func (s *NetPrioGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetPrioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/cgroups/fs/perf_event.go index 929c412a3a7..ba6f44878fe 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/perf_event.go +++ b/vendor/github.com/opencontainers/cgroups/fs/perf_event.go @@ -10,6 +10,12 @@ func (s *PerfEventGroup) Name() string { return "perf_event" } +// ID returns the controller ID for perf_event subsystem. +// Returns 0 as perf_event is not a cgroups.Controller. +func (s *PerfEventGroup) ID() cgroups.Controller { + return 0 +} + func (s *PerfEventGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/pids.go b/vendor/github.com/opencontainers/cgroups/fs/pids.go index 36bd339af82..fa5b1c8fd2e 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/pids.go +++ b/vendor/github.com/opencontainers/cgroups/fs/pids.go @@ -14,6 +14,11 @@ func (s *PidsGroup) Name() string { return "pids" } +// ID returns the controller ID for pids subsystem. +func (s *PidsGroup) ID() cgroups.Controller { + return cgroups.Pids +} + func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs/rdma.go b/vendor/github.com/opencontainers/cgroups/fs/rdma.go index 4b175365f27..8cc436c6f9b 100644 --- a/vendor/github.com/opencontainers/cgroups/fs/rdma.go +++ b/vendor/github.com/opencontainers/cgroups/fs/rdma.go @@ -11,6 +11,11 @@ func (s *RdmaGroup) Name() string { return "rdma" } +// ID returns the controller ID for rdma subsystem. +func (s *RdmaGroup) ID() cgroups.Controller { + return cgroups.RDMA +} + func (s *RdmaGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/vendor/github.com/opencontainers/cgroups/fs2/create.go b/vendor/github.com/opencontainers/cgroups/fs2/create.go index 565ca883079..6be11c24739 100644 --- a/vendor/github.com/opencontainers/cgroups/fs2/create.go +++ b/vendor/github.com/opencontainers/cgroups/fs2/create.go @@ -28,7 +28,7 @@ func needAnyControllers(r *cgroups.Resources) (bool, error) { return false, err } avail := make(map[string]struct{}) - for _, ctr := range strings.Fields(content) { + for ctr := range strings.FieldsSeq(content) { avail[ctr] = struct{}{} } @@ -137,8 +137,7 @@ func CreateCgroupPath(path string, c *cgroups.Cgroup) (Err error) { if i < len(elements)-1 { if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil { // try write one by one - allCtrs := strings.Split(res, " ") - for _, ctr := range allCtrs { + for ctr := range strings.SplitSeq(res, " ") { _ = cgroups.WriteFile(current, cgStCtlFile, ctr) } } diff --git a/vendor/github.com/opencontainers/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/cgroups/fs2/fs2.go index 356d087985c..46819a8f4d9 100644 --- a/vendor/github.com/opencontainers/cgroups/fs2/fs2.go +++ b/vendor/github.com/opencontainers/cgroups/fs2/fs2.go @@ -105,50 +105,85 @@ func (m *Manager) GetAllPids() ([]int, error) { } func (m *Manager) GetStats() (*cgroups.Stats, error) { - var errs []error + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + var errs []error + var err error st := cgroups.NewStats() // pids (since kernel 4.5) - if err := statPids(m.dirPath, st); err != nil { - errs = append(errs, err) + if controllers&cgroups.Pids != 0 { + if err = statPids(m.dirPath, st); err != nil { + errs = append(errs, err) + } } + // memory (since kernel 4.5) - if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Memory != 0 { + if err = statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { + errs = append(errs, err) + } } + // io (since kernel 4.5) - if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.IO != 0 { + if err = statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { + errs = append(errs, err) + } } + // cpu (since kernel 4.15) // Note cpu.stat is available even if the controller is not enabled. - if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // PSI (since kernel 4.20). - var err error - if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { - errs = append(errs, err) - } - if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { - errs = append(errs, err) - } - if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { - errs = append(errs, err) + if controllers&cgroups.CPU != 0 { + if err = statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + // PSI (since kernel 4.20) + if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { + errs = append(errs, err) + } + } + // hugetlb (since kernel 5.6) - if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.HugeTLB != 0 { + if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // rdma (since kernel 4.11) - if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.RDMA != 0 { + if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // misc (since kernel 5.13) - if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Misc != 0 { + if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + if len(errs) > 0 && !m.config.Rootless { return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) } diff --git a/vendor/github.com/opencontainers/cgroups/fscommon/utils.go b/vendor/github.com/opencontainers/cgroups/fscommon/utils.go index d8f8dfc0237..a8b32aa95ab 100644 --- a/vendor/github.com/opencontainers/cgroups/fscommon/utils.go +++ b/vendor/github.com/opencontainers/cgroups/fscommon/utils.go @@ -82,8 +82,7 @@ func GetValueByKey(path, file, key string) (uint64, error) { } key += " " - lines := strings.Split(content, "\n") - for _, line := range lines { + for line := range strings.SplitSeq(content, "\n") { v, ok := strings.CutPrefix(line, key) if ok { val, err := ParseUint(v, 10, 64) diff --git a/vendor/github.com/opencontainers/cgroups/stats.go b/vendor/github.com/opencontainers/cgroups/stats.go index 01701333ab3..d7654ffd1ed 100644 --- a/vendor/github.com/opencontainers/cgroups/stats.go +++ b/vendor/github.com/opencontainers/cgroups/stats.go @@ -2,19 +2,19 @@ package cgroups type ThrottlingData struct { // Number of periods with throttling active - Periods uint64 `json:"periods,omitempty"` + Periods uint64 `json:"periods,omitzero"` // Number of periods when the container hit its throttling limit. - ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + ThrottledPeriods uint64 `json:"throttled_periods,omitzero"` // Aggregate time the container was throttled for in nanoseconds. - ThrottledTime uint64 `json:"throttled_time,omitempty"` + ThrottledTime uint64 `json:"throttled_time,omitzero"` } type BurstData struct { // Number of periods bandwidth burst occurs - BurstsPeriods uint64 `json:"bursts_periods,omitempty"` + BurstsPeriods uint64 `json:"bursts_periods,omitzero"` // Cumulative wall-time that any cpus has used above quota in respective periods // Units: nanoseconds. - BurstTime uint64 `json:"burst_time,omitempty"` + BurstTime uint64 `json:"burst_time,omitzero"` } // CpuUsage denotes the usage of a CPU. @@ -22,10 +22,10 @@ type BurstData struct { type CpuUsage struct { // Total CPU time consumed. // Units: nanoseconds. - TotalUsage uint64 `json:"total_usage,omitempty"` + TotalUsage uint64 `json:"total_usage,omitzero"` // Total CPU time consumed per core. // Units: nanoseconds. - PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + PercpuUsage []uint64 `json:"percpu_usage,omitzero"` // CPU time consumed per core in kernel mode // Units: nanoseconds. PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` @@ -48,26 +48,26 @@ type PSIData struct { } type PSIStats struct { - Some PSIData `json:"some,omitempty"` - Full PSIData `json:"full,omitempty"` + Some PSIData `json:"some,omitzero"` + Full PSIData `json:"full,omitzero"` } type CpuStats struct { - CpuUsage CpuUsage `json:"cpu_usage,omitempty"` - ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` - PSI *PSIStats `json:"psi,omitempty"` - BurstData BurstData `json:"burst_data,omitempty"` + CpuUsage CpuUsage `json:"cpu_usage,omitzero"` + ThrottlingData ThrottlingData `json:"throttling_data,omitzero"` + PSI *PSIStats `json:"psi,omitzero"` + BurstData BurstData `json:"burst_data,omitzero"` } type CPUSetStats struct { // List of the physical numbers of the CPUs on which processes // in that cpuset are allowed to execute - CPUs []uint16 `json:"cpus,omitempty"` + CPUs []uint16 `json:"cpus,omitzero"` // cpu_exclusive flag CPUExclusive uint64 `json:"cpu_exclusive"` // List of memory nodes on which processes in that cpuset // are allowed to allocate memory - Mems []uint16 `json:"mems,omitempty"` + Mems []uint16 `json:"mems,omitzero"` // mem_hardwall flag MemHardwall uint64 `json:"mem_hardwall"` // mem_exclusive flag @@ -87,122 +87,122 @@ type CPUSetStats struct { } type MemoryData struct { - Usage uint64 `json:"usage,omitempty"` - MaxUsage uint64 `json:"max_usage,omitempty"` + Usage uint64 `json:"usage,omitzero"` + MaxUsage uint64 `json:"max_usage,omitzero"` Failcnt uint64 `json:"failcnt"` Limit uint64 `json:"limit"` } type MemoryStats struct { // memory used for cache - Cache uint64 `json:"cache,omitempty"` + Cache uint64 `json:"cache,omitzero"` // usage of memory - Usage MemoryData `json:"usage,omitempty"` + Usage MemoryData `json:"usage,omitzero"` // usage of memory + swap - SwapUsage MemoryData `json:"swap_usage,omitempty"` + SwapUsage MemoryData `json:"swap_usage,omitzero"` // usage of swap only - SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"` + SwapOnlyUsage MemoryData `json:"swap_only_usage,omitzero"` // usage of kernel memory - KernelUsage MemoryData `json:"kernel_usage,omitempty"` + KernelUsage MemoryData `json:"kernel_usage,omitzero"` // usage of kernel TCP memory - KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitzero"` // usage of memory pages by NUMA node // see chapter 5.6 of memory controller documentation - PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` + PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitzero"` // if true, memory usage is accounted for throughout a hierarchy of cgroups. UseHierarchy bool `json:"use_hierarchy"` - Stats map[string]uint64 `json:"stats,omitempty"` - PSI *PSIStats `json:"psi,omitempty"` + Stats map[string]uint64 `json:"stats,omitzero"` + PSI *PSIStats `json:"psi,omitzero"` } type PageUsageByNUMA struct { // Embedding is used as types can't be recursive. PageUsageByNUMAInner - Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` + Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitzero"` } type PageUsageByNUMAInner struct { - Total PageStats `json:"total,omitempty"` - File PageStats `json:"file,omitempty"` - Anon PageStats `json:"anon,omitempty"` - Unevictable PageStats `json:"unevictable,omitempty"` + Total PageStats `json:"total,omitzero"` + File PageStats `json:"file,omitzero"` + Anon PageStats `json:"anon,omitzero"` + Unevictable PageStats `json:"unevictable,omitzero"` } type PageStats struct { - Total uint64 `json:"total,omitempty"` - Nodes map[uint8]uint64 `json:"nodes,omitempty"` + Total uint64 `json:"total,omitzero"` + Nodes map[uint8]uint64 `json:"nodes,omitzero"` } type PidsStats struct { // number of pids in the cgroup - Current uint64 `json:"current,omitempty"` + Current uint64 `json:"current,omitzero"` // active pids hard limit - Limit uint64 `json:"limit,omitempty"` + Limit uint64 `json:"limit,omitzero"` } type BlkioStatEntry struct { - Major uint64 `json:"major,omitempty"` - Minor uint64 `json:"minor,omitempty"` - Op string `json:"op,omitempty"` - Value uint64 `json:"value,omitempty"` + Major uint64 `json:"major,omitzero"` + Minor uint64 `json:"minor,omitzero"` + Op string `json:"op,omitzero"` + Value uint64 `json:"value,omitzero"` } type BlkioStats struct { // number of bytes transferred to and from the block device - IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` - IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` - IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` - IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` - IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` - IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` - IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` - SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` - PSI *PSIStats `json:"psi,omitempty"` - IoCostUsage []BlkioStatEntry `json:"io_cost_usage,omitempty"` - IoCostWait []BlkioStatEntry `json:"io_cost_wait,omitempty"` - IoCostIndebt []BlkioStatEntry `json:"io_cost_indebt,omitempty"` - IoCostIndelay []BlkioStatEntry `json:"io_cost_indelay,omitempty"` + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitzero"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitzero"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitzero"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitzero"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitzero"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitzero"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitzero"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitzero"` + PSI *PSIStats `json:"psi,omitzero"` + IoCostUsage []BlkioStatEntry `json:"io_cost_usage,omitzero"` + IoCostWait []BlkioStatEntry `json:"io_cost_wait,omitzero"` + IoCostIndebt []BlkioStatEntry `json:"io_cost_indebt,omitzero"` + IoCostIndelay []BlkioStatEntry `json:"io_cost_indelay,omitzero"` } type HugetlbStats struct { // current res_counter usage for hugetlb - Usage uint64 `json:"usage,omitempty"` + Usage uint64 `json:"usage,omitzero"` // maximum usage ever recorded. - MaxUsage uint64 `json:"max_usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitzero"` // number of times hugetlb usage allocation failure. Failcnt uint64 `json:"failcnt"` } type RdmaEntry struct { - Device string `json:"device,omitempty"` - HcaHandles uint32 `json:"hca_handles,omitempty"` - HcaObjects uint32 `json:"hca_objects,omitempty"` + Device string `json:"device,omitzero"` + HcaHandles uint32 `json:"hca_handles,omitzero"` + HcaObjects uint32 `json:"hca_objects,omitzero"` } type RdmaStats struct { - RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"` - RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` + RdmaLimit []RdmaEntry `json:"rdma_limit,omitzero"` + RdmaCurrent []RdmaEntry `json:"rdma_current,omitzero"` } type MiscStats struct { // current resource usage for a key in misc - Usage uint64 `json:"usage,omitempty"` + Usage uint64 `json:"usage,omitzero"` // number of times the resource usage was about to go over the max boundary - Events uint64 `json:"events,omitempty"` + Events uint64 `json:"events,omitzero"` } type Stats struct { - CpuStats CpuStats `json:"cpu_stats,omitempty"` - CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` - MemoryStats MemoryStats `json:"memory_stats,omitempty"` - PidsStats PidsStats `json:"pids_stats,omitempty"` - BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + CpuStats CpuStats `json:"cpu_stats,omitzero"` + CPUSetStats CPUSetStats `json:"cpuset_stats,omitzero"` + MemoryStats MemoryStats `json:"memory_stats,omitzero"` + PidsStats PidsStats `json:"pids_stats,omitzero"` + BlkioStats BlkioStats `json:"blkio_stats,omitzero"` // the map is in the format "size of hugepage: stats of the hugepage" - HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` - RdmaStats RdmaStats `json:"rdma_stats,omitempty"` + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitzero"` + RdmaStats RdmaStats `json:"rdma_stats,omitzero"` // the map is in the format "misc resource name: stats of the key" - MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` + MiscStats map[string]MiscStats `json:"misc_stats,omitzero"` } func NewStats() *Stats { @@ -211,3 +211,29 @@ func NewStats() *Stats { miscStats := make(map[string]MiscStats) return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} } + +// Controller represents a cgroup controller type for stats collection. +type Controller int + +// Controller types for cgroup stats collection. +const ( + CPU Controller = 1 << iota + Memory + Pids + IO + HugeTLB + RDMA + Misc + CPUSet // v1 only +) + +// AllControllers is a bitmask of all available controllers. +const AllControllers = CPU | Memory | Pids | IO | HugeTLB | RDMA | Misc | CPUSet + +// StatsOptions specifies which controllers to retrieve statistics for. +type StatsOptions struct { + // Controllers is a bitmask of Controller values. + // If 0, all available controllers are queried (default behavior). + // Use Controller constants like: CPU | Memory | Pids + Controllers Controller +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/common.go b/vendor/github.com/opencontainers/cgroups/systemd/common.go index 537defbf2d1..42083d0f06d 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/common.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/common.go @@ -77,7 +77,7 @@ func ExpandSlice(slice string) (string, error) { if sliceName == "-" { return "/", nil } - for _, component := range strings.Split(sliceName, "-") { + for component := range strings.SplitSeq(sliceName, "-") { // test--a.slice isn't permitted, nor is -test.slice. if component == "" { return "", fmt.Errorf("invalid slice name: %s", slice) diff --git a/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go b/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go index c6f5642dcd2..f2603620415 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go @@ -14,7 +14,7 @@ import ( func RangeToBits(str string) ([]byte, error) { bits := new(big.Int) - for _, r := range strings.Split(str, ",") { + for r := range strings.SplitSeq(str, ",") { // allow extra spaces around r = strings.TrimSpace(r) // allow empty elements (extra commas) diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v1.go b/vendor/github.com/opencontainers/cgroups/systemd/v1.go index 96e69bb8608..3c3fb4cb604 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/v1.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/v1.go @@ -46,6 +46,8 @@ func NewLegacyManager(cg *cgroups.Cgroup, paths map[string]string) (*LegacyManag type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set sets cgroup resource limits. @@ -69,7 +71,7 @@ var legacySubsystems = []subsystem{ &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, &fs.RdmaGroup{}, - &fs.NameGroup{GroupName: "misc"}, + &fs.NameGroup{GroupName: "misc", GroupID: cgroups.Misc}, } func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { @@ -339,14 +341,32 @@ func (m *LegacyManager) GetAllPids() ([]int, error) { } func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +func (m *LegacyManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers (same as original GetStats behavior) + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range legacySubsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v2.go b/vendor/github.com/opencontainers/cgroups/systemd/v2.go index f76c93e8444..cb71e9eac3b 100644 --- a/vendor/github.com/opencontainers/cgroups/systemd/v2.go +++ b/vendor/github.com/opencontainers/cgroups/systemd/v2.go @@ -497,6 +497,11 @@ func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { return m.fsMgr.GetStats() } +// Stats returns cgroup statistics for the specified controllers. +func (m *UnifiedManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + return m.fsMgr.Stats(opts) +} + func (m *UnifiedManager) Set(r *cgroups.Resources) error { if r == nil { return nil diff --git a/vendor/github.com/opencontainers/cgroups/utils.go b/vendor/github.com/opencontainers/cgroups/utils.go index 95b3310ab6e..469475cc8a6 100644 --- a/vendor/github.com/opencontainers/cgroups/utils.go +++ b/vendor/github.com/opencontainers/cgroups/utils.go @@ -207,7 +207,7 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) } - for _, subs := range strings.Split(parts[1], ",") { + for subs := range strings.SplitSeq(parts[1], ",") { cgroups[subs] = parts[2] } } diff --git a/vendor/github.com/opencontainers/cgroups/v1_utils.go b/vendor/github.com/opencontainers/cgroups/v1_utils.go index 19b8af1344b..11025b2ed01 100644 --- a/vendor/github.com/opencontainers/cgroups/v1_utils.go +++ b/vendor/github.com/opencontainers/cgroups/v1_utils.go @@ -170,7 +170,7 @@ func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all boo Mountpoint: mi.Mountpoint, Root: mi.Root, } - for _, opt := range strings.Split(mi.VFSOptions, ",") { + for opt := range strings.SplitSeq(mi.VFSOptions, ",") { seen, known := ss[opt] if !known || (!all && seen) { continue diff --git a/vendor/modules.txt b/vendor/modules.txt index b2ae77b2580..550edcf0aec 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -71,8 +71,8 @@ github.com/moby/sys/userns # github.com/mrunalp/fileutils v0.5.1 ## explicit; go 1.13 github.com/mrunalp/fileutils -# github.com/opencontainers/cgroups v0.0.6 -## explicit; go 1.23.0 +# github.com/opencontainers/cgroups v0.0.7 +## explicit; go 1.24.0 github.com/opencontainers/cgroups github.com/opencontainers/cgroups/devices github.com/opencontainers/cgroups/devices/config From d11e5e88d8e6a7fa4fcf1d1159b11d309592fc45 Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Thu, 18 Jun 2026 18:21:55 +0200 Subject: [PATCH 2/3] libct: Don't exclude BPF fds from leaks Now the cgroups dependency has been upgraded and it doesn't cause these leaks anymore. Let's remove the exclude. Signed-off-by: Rodrigo Campos --- libcontainer/integration/exec_test.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index c1c69a65f85..e553cb40d99 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -1748,10 +1748,6 @@ func testFdLeaks(t *testing.T, systemd bool) { } // Show the extra opened files. - excludedPaths := []string{ - "anon_inode:bpf-prog", // FIXME: see https://github.com/opencontainers/runc/issues/2366#issuecomment-776411392 - } - count := 0 procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) @@ -1767,12 +1763,6 @@ next_fd: } } dst, _ := linux.Readlinkat(procSelfFd, fd1) - for _, ex := range excludedPaths { - if ex == dst { - continue next_fd - } - } - count++ t.Logf("extra fd %s -> %s", fd1, dst) } From 54294e5e97bf46f3159f9c79b247b52f4becfe35 Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Fri, 19 Jun 2026 12:16:02 +0200 Subject: [PATCH 3/3] CHANGELOG: Mention eBPF fd leaks are fixed now Signed-off-by: Rodrigo Campos --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4929c9f7ba4..4faa2506af6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### - The poststart hooks are now executed after starting the user-specified process, fixing a runtime-spec conformance issue. (#4347, #5186) +- Some long-standing file-descriptor leaks on the eBPF devices cgroups were + fixed. (#5322) ### Added ### - `runc version` and `runc features` now provide version information about