From b32e6582b8757570424a56a7ff8701016579a4df Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 15 May 2026 14:30:36 +0800 Subject: [PATCH 01/12] refactor(server): split server package into multiple files Signed-off-by: houyuxi --- internal/server/allocate.go | 189 ++++++++++++++++ internal/server/register.go | 152 +++++++++++++ internal/server/server.go | 423 ------------------------------------ internal/server/util.go | 117 ++++++++++ 4 files changed, 458 insertions(+), 423 deletions(-) create mode 100644 internal/server/allocate.go create mode 100644 internal/server/register.go create mode 100644 internal/server/util.go diff --git a/internal/server/allocate.go b/internal/server/allocate.go new file mode 100644 index 0000000..5ae43cf --- /dev/null +++ b/internal/server/allocate.go @@ -0,0 +1,189 @@ +package server + +import ( + "encoding/json" + "fmt" + "strconv" + + v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + + "github.com/Project-HAMi/HAMi/pkg/device" + "github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin" + "github.com/Project-HAMi/HAMi/pkg/util" +) + +// buildContainerAllocateResponse builds the allocate response for a single container. +func (ps *PluginServer) buildContainerAllocateResponse(pod *v1.Pod, containerDevs device.ContainerDevices, rtInfoLookup map[string]RuntimeInfo) (*v1beta1.ContainerAllocateResponse, error) { + resp := &v1beta1.ContainerAllocateResponse{} + + var ( + IDs []int32 + memories []*int64 + cores []*int32 + ascendVNPUSpec string + ) + + for _, dev := range containerDevs { + d := ps.mgr.GetDeviceByUUID(dev.UUID) + if d == nil { + return nil, fmt.Errorf("unknown uuid: %s", dev.UUID) + } + IDs = append(IDs, d.PhyID) + + if info, ok := rtInfoLookup[dev.UUID]; ok { + if ascendVNPUSpec == "" && info.Temp != "" { + ascendVNPUSpec = info.Temp + } + if info.Memory != nil { + memories = append(memories, info.Memory) + } + if info.Core != nil { + cores = append(cores, info.Core) + } + } + } + + if len(IDs) == 0 { + return nil, fmt.Errorf("annotation %s value invalid", ps.allocAnno) + } + ascendVisibleDevices := fmt.Sprintf("%d", IDs[0]) + for i := 1; i < len(IDs); i++ { + ascendVisibleDevices = fmt.Sprintf("%s,%d", ascendVisibleDevices, IDs[i]) + } + resp.Envs = make(map[string]string) + resp.Envs["ASCEND_VISIBLE_DEVICES"] = ascendVisibleDevices + + vnpuMode := pod.Annotations[VNPUModeAnnotation] + klog.V(4).Infof("Pod %s vnpu mode: %s", pod.Name, vnpuMode) + if vnpuMode == VNPUModeHamiCore { + // 1. Handle volume mount injection + var mounts []*v1beta1.Mount + // A.Huawei driver and SMI toolchain (Read-Only) + driverPaths := []string{ + "/usr/local/bin/npu-smi", + "/etc/ascend_install.info", + "/usr/local/Ascend/driver/lib64/driver", + "/usr/local/Ascend/driver/version.info", + } + for _, p := range driverPaths { + mounts = append(mounts, &v1beta1.Mount{HostPath: p, ContainerPath: p, ReadOnly: true}) + } + + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-vnpu-core", + ContainerPath: "/hami-vnpu-core", + ReadOnly: true, + }) + // B. Inject HAMi library path by mounting /etc/ld.so.preload. + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-vnpu-core/ld.so.preload", // Template file on host + ContainerPath: "/etc/ld.so.preload", // Overwrites the target file in container + ReadOnly: true, + }) + + // C. Shared directory for HAMi compute resource partitioning (Read/Write) + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-shared-region", + ContainerPath: "/hami-shared-region", + ReadOnly: false, + }) + resp.Mounts = mounts + + // Set NPU_MEM_QUOTA + if len(memories) > 0 && memories[0] != nil { + resp.Envs["NPU_MEM_QUOTA"] = strconv.FormatInt(*memories[0], 10) + klog.V(4).InfoS("Memory quota set", "value", *memories[0]) + } + + // Set NPU_PRIORITY + if len(cores) > 0 && cores[0] != nil { + resp.Envs["NPU_PRIORITY"] = strconv.FormatInt(int64(*cores[0]), 10) + klog.V(4).InfoS("Core priority set", "value", *cores[0]) + } + + // Set GLOBAL_SHM_PATH based on the first device ID. + resp.Envs["NPU_GLOBAL_SHM_PATH"] = fmt.Sprintf("/hami-shared-region/%d_global_registry", IDs[0]) + klog.V(5).Infof("Create %d_global_registry", IDs[0]) + } else { + if ascendVNPUSpec != "" { + resp.Envs["ASCEND_VNPU_SPECS"] = ascendVNPUSpec + } + } + return resp, nil +} + +// popNextContainerDevices finds and erases the first non-empty containerDevices +// from podSingleDev. It mutates podSingleDev in place. +func (ps *PluginServer) popNextContainerDevices(podSingleDev device.PodSingleDevice) (device.ContainerDevices, error) { + for i, ctrDevs := range podSingleDev { + if len(ctrDevs) > 0 { + podSingleDev[i] = device.ContainerDevices{} + return ctrDevs, nil + } + } + return nil, fmt.Errorf("no pending device allocation found") +} + +// decodeDeviceAnnotations decodes the pod's device allocation annotation +// (registered as hami.io/-devices-to-allocate in InRequestDevices) +// into a PodSingleDevice. +func (ps *PluginServer) decodeDeviceAnnotations(pod *v1.Pod) (device.PodSingleDevice, error) { + pdevices, err := device.DecodePodDevices(device.InRequestDevices, pod.Annotations) + if err != nil { + return nil, err + } + pd, ok := pdevices[ps.commonWord] + if !ok { + return nil, fmt.Errorf("device %s not found in pod annotations", ps.commonWord) + } + return pd, nil +} + +// buildRuntimeInfoLookup builds a UUID-to-RuntimeInfo lookup from the pod's allocAnno annotation. +func (ps *PluginServer) buildRuntimeInfoLookup(pod *v1.Pod) (map[string]RuntimeInfo, error) { + anno, ok := pod.Annotations[ps.allocAnno] + if !ok { + return nil, fmt.Errorf("annotation %s not set", ps.allocAnno) + } + var rtInfo []RuntimeInfo + if err := json.Unmarshal([]byte(anno), &rtInfo); err != nil { + return nil, fmt.Errorf("annotation %s value %s invalid: %w", ps.allocAnno, anno, err) + } + lookup := make(map[string]RuntimeInfo, len(rtInfo)) + for _, info := range rtInfo { + if info.UUID != "" { + lookup[info.UUID] = info + } + } + return lookup, nil +} + +// patchErasedAnnotation patches the pod's device annotation with the given +// podSingleDev. It also updates pod.Annotations in place. +func (ps *PluginServer) patchErasedAnnotation(pod *v1.Pod, podSingleDev device.PodSingleDevice) error { + klog.V(5).Infof("After erase annotation, remaining devices: %v", podSingleDev) + newAnnoValue := device.EncodePodSingleDevice(podSingleDev) + newAnnos := map[string]string{ + ps.toAllocDeviceAnno: newAnnoValue, + } + if err := util.PatchPodAnnotations(pod, newAnnos); err != nil { + return err + } + pod.Annotations[ps.toAllocDeviceAnno] = newAnnoValue + return nil +} + +// podAllocationTrySuccess checks if all containers of this pod have been +// allocated. If so, it sets bind-phase to "success" and releases the node +// lock; otherwise it returns without setting bind-phase or releasing the lock, +// waiting for the next Allocate call. +func (ps *PluginServer) podAllocationTrySuccess(pod *v1.Pod) { + plugin.PodAllocationTrySuccess(ps.nodeName, ps.commonWord, NodeLockAscend, pod) +} + +// podAllocationFailed sets bind-phase to "failed" and releases the node lock. +func (ps *PluginServer) podAllocationFailed(pod *v1.Pod) { + plugin.PodAllocationFailed(ps.nodeName, pod, NodeLockAscend) +} diff --git a/internal/server/register.go b/internal/server/register.go new file mode 100644 index 0000000..e3acb0c --- /dev/null +++ b/internal/server/register.go @@ -0,0 +1,152 @@ +package server + +import ( + "context" + "fmt" + "net" + "path" + "strings" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "k8s.io/klog/v2" + "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + + "github.com/Project-HAMi/HAMi/pkg/device" + "github.com/Project-HAMi/HAMi/pkg/util" +) + +func (ps *PluginServer) watchAndRegister() { + timer := time.After(1 * time.Second) + for { + select { + case <-ps.stopCh: + klog.Infof("stop watch and register") + return + case <-timer: + } + unhealthy := ps.mgr.GetUnHealthIDs() + if len(unhealthy) > 0 { + if err := ps.mgr.UpdateDevice(); err != nil { + klog.Errorf("update device error: %v", err) + timer = time.After(5 * time.Second) + continue + } + ps.healthCh <- unhealthy[0] + } + err := ps.registerHAMi() + if err != nil { + klog.Errorf("register HAMi error: %v", err) + timer = time.After(5 * time.Second) + } else { + klog.V(3).Infof("register HAMi success") + timer = time.After(30 * time.Second) + } + } +} + +func (ps *PluginServer) registerHAMi() error { + devs := ps.mgr.GetDevices() + apiDevices := make([]*device.DeviceInfo, 0, len(devs)) + // hami currently believes that the index starts from 0 and is continuous. + for i, dev := range devs { + device := &device.DeviceInfo{ + Index: uint(i), + ID: dev.UUID, + Count: int32(ps.mgr.VDeviceCount()), + Devmem: int32(dev.Memory), + Devcore: dev.AICore, + Type: ps.mgr.CommonWord(), + Numa: 0, + Health: dev.Health, + } + if strings.HasPrefix(device.Type, Ascend910Prefix) { + NetworkID, err := ps.getDeviceNetworkID(i, device.Type) + if err != nil { + return fmt.Errorf("get networkID error: %w", err) + } + device.CustomInfo = map[string]any{ + "NetworkID": NetworkID, + } + } + apiDevices = append(apiDevices, device) + } + annos := make(map[string]string) + annos[ps.registerAnno] = device.MarshalNodeDevices(apiDevices) + annos[ps.handshakeAnno] = "Reported_" + time.Now().Add(time.Duration(*reportTimeOffset)*time.Second).Format("2006.01.02 15:04:05") + + if ps.mgr.IsHamiVnpuCore() { + annos[VNPUNodeSelectorAnnotation] = "true" + klog.V(4).Infof("Node %s has HamiVnpuCore enabled, patching annotation %s: true", ps.nodeName, VNPUNodeSelectorAnnotation) + } else { + annos[VNPUNodeSelectorAnnotation] = "false" + } + + node, err := util.GetNode(ps.nodeName) + if err != nil { + return fmt.Errorf("get node %s error: %w", ps.nodeName, err) + } + err = util.PatchNodeAnnotations(node, annos) + if err != nil { + return fmt.Errorf("patch node %s annotations error: %w", ps.nodeName, err) + } + klog.V(5).Infof("patch node %s annotations: %v", ps.nodeName, annos) + return nil +} + +func (ps *PluginServer) getDeviceNetworkID(idx int, deviceType string) (int, error) { + // For Ascend910C devices, all modules (dies) are interconnected via HCCS + if deviceType == Ascend910CType { + return 0, nil + } + + if idx > 3 { + return 1, nil + } + + return 0, nil +} + +func (ps *PluginServer) registerKubelet() error { + conn, err := ps.dial(v1beta1.KubeletSocket, 5*time.Second) + if err != nil { + return err + } + defer func(conn *grpc.ClientConn) { + _ = conn.Close() + }(conn) + client := v1beta1.NewRegistrationClient(conn) + reqt := &v1beta1.RegisterRequest{ + Version: v1beta1.Version, + Endpoint: path.Base(ps.socket), + ResourceName: ps.mgr.ResourceName(), + Options: &v1beta1.DevicePluginOptions{ + GetPreferredAllocationAvailable: false, + }, + } + + _, err = client.Register(context.Background(), reqt) + if err != nil { + return err + } + return nil +} + +func (ps *PluginServer) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + c, err := grpc.DialContext(ctx, unixSocketPath, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithBlock(), + grpc.WithContextDialer(func(ctx2 context.Context, addr string) (net.Conn, error) { + var d net.Dialer + return d.DialContext(ctx2, "unix", addr) + }), + ) + + if err != nil { + return nil, err + } + return c, nil +} diff --git a/internal/server/server.go b/internal/server/server.go index 4fff85a..b05bce3 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -18,31 +18,20 @@ package server import ( "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "errors" "flag" "fmt" - "io" "net" "os" "path" - "path/filepath" - "strconv" - "strings" - "syscall" "time" "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" // "github.com/Project-HAMi/HAMi/pkg/device/ascend" "github.com/Project-HAMi/HAMi/pkg/device" - "github.com/Project-HAMi/HAMi/pkg/device-plugin/nvidiadevice/nvinternal/plugin" "github.com/Project-HAMi/HAMi/pkg/util" "github.com/Project-HAMi/ascend-device-plugin/internal/manager" ) @@ -105,110 +94,6 @@ func NewPluginServer(mgr *manager.AscendManager, nodeName string, checkIdleVNPUI return server, nil } -// fileSHA256 calculates the SHA256 checksum of the specified file -func fileSHA256(path string) (string, error) { - f, err := os.Open(path) - if err != nil { - return "", err - } - defer f.Close() - - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - return "", err - } - return hex.EncodeToString(h.Sum(nil)), nil -} - -// Automatically creates directories, sets permissions, and copies core files on the host -func prepareHostResources() error { - klog.Info("Starting host resource preparation for HAMi vNPU core...") - - // 1. Create shared memory directory - sharedRegionPath := "/usr/local/hami-shared-region" - if err := os.MkdirAll(sharedRegionPath, 0777); err != nil { - if !os.IsExist(err) { - return fmt.Errorf("failed to create %s: %w", sharedRegionPath, err) - } - } - if err := os.Chmod(sharedRegionPath, 0777); err != nil { - return fmt.Errorf("failed to chmod %s: %w", sharedRegionPath, err) - } - klog.Infof("Successfully prepared directory: %s", sharedRegionPath) - - // 2. Prepare /usr/local/hami-vnpu-core/ directory - targetDir := "/usr/local/hami-vnpu-core" - if err := os.MkdirAll(targetDir, 0775); err != nil { - return fmt.Errorf("failed to create %s: %w", targetDir, err) - } - - // Specify the in-container assets directory (can be overridden via environment variable, default follows standard DevicePlugin convention) - assetsDir := os.Getenv("HAMI_VNPU_ASSETS_PATH") - if assetsDir == "" { - assetsDir = "/usr/local/hami-vnpu-core-assets" - } - - // Define files to copy: source path in container -> target path on host - filesToCopy := map[string]string{ - "limiter": filepath.Join(targetDir, "limiter"), - "libvnpu.so": filepath.Join(targetDir, "libvnpu.so"), - "ld.so.preload": filepath.Join(targetDir, "ld.so.preload"), - } - - for srcName, destPath := range filesToCopy { - srcPath := filepath.Join(assetsDir, srcName) - - // File already exists, skip if content is consistent - if _, err := os.Stat(destPath); err == nil { - srcSum, err1 := fileSHA256(srcPath) - dstSum, err2 := fileSHA256(destPath) - - if err1 == nil && err2 == nil && srcSum == dstSum { - klog.Infof("✓ %s already up-to-date, skipping", destPath) - continue - } - } - - if err := copyFile(srcPath, destPath); err != nil { - if errors.Is(err, syscall.ETXTBSY) { - klog.Warningf("⚠ %s is in use by running process, keeping existing version (safe)", destPath) - continue - } - return fmt.Errorf("failed to copy %s: %w", destPath, err) - } - klog.Infof("✓ Copied %s -> %s", srcPath, destPath) - } - - klog.Info("Host resource preparation completed successfully.") - return nil -} - -// A standard file copy implementation that preserves the original file permissions -func copyFile(src, dst string) error { - srcFile, err := os.Open(src) - if err != nil { - return err - } - defer srcFile.Close() - - dstFile, err := os.Create(dst) - if err != nil { - return err - } - defer dstFile.Close() - - if _, err = io.Copy(dstFile, srcFile); err != nil { - return err - } - - // Sync source file permissions (ensure the limiter binary retains executable permission) - srcInfo, err := srcFile.Stat() - if err != nil { - return err - } - return os.Chmod(dst, srcInfo.Mode()) -} - func (ps *PluginServer) Start() error { // Automatically prepare host environment when the plugin starts if err := prepareHostResources(); err != nil { @@ -265,24 +150,6 @@ func (ps *PluginServer) CleanupIdleVNPUs() error { return ps.mgr.CleanupIdleVNPUs() } -func (ps *PluginServer) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - c, err := grpc.DialContext(ctx, unixSocketPath, - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithBlock(), - grpc.WithContextDialer(func(ctx2 context.Context, addr string) (net.Conn, error) { - var d net.Dialer - return d.DialContext(ctx2, "unix", addr) - }), - ) - - if err != nil { - return nil, err - } - return c, nil -} - func (ps *PluginServer) serve() error { _ = os.Remove(ps.socket) sock, err := net.Listen("unix", ps.socket) @@ -336,283 +203,6 @@ func (ps *PluginServer) serve() error { return nil } -func (ps *PluginServer) registerKubelet() error { - conn, err := ps.dial(v1beta1.KubeletSocket, 5*time.Second) - if err != nil { - return fmt.Errorf("failed to dial kubelet socket: %w", err) - } - defer func(conn *grpc.ClientConn) { - _ = conn.Close() - }(conn) - client := v1beta1.NewRegistrationClient(conn) - reqt := &v1beta1.RegisterRequest{ - Version: v1beta1.Version, - Endpoint: path.Base(ps.socket), - ResourceName: ps.mgr.ResourceName(), - Options: &v1beta1.DevicePluginOptions{ - GetPreferredAllocationAvailable: false, - }, - } - - _, err = client.Register(context.Background(), reqt) - if err != nil { - return fmt.Errorf("failed to register device plugin with kubelet: %w", err) - } - return nil -} - -func (ps *PluginServer) getDeviceNetworkID(idx int, deviceType string) (int, error) { - // For Ascend910C devices, all modules (dies) are interconnected via HCCS - if deviceType == Ascend910CType { - return 0, nil - } - - if idx > 3 { - return 1, nil - } - - return 0, nil -} - -func (ps *PluginServer) registerHAMi() error { - devs := ps.mgr.GetDevices() - apiDevices := make([]*device.DeviceInfo, 0, len(devs)) - // hami currently believes that the index starts from 0 and is continuous. - for i, dev := range devs { - device := &device.DeviceInfo{ - Index: uint(i), - ID: dev.UUID, - Count: int32(ps.mgr.VDeviceCount()), - Devmem: int32(dev.Memory), - Devcore: dev.AICore, - Type: ps.mgr.CommonWord(), - Numa: 0, - Health: dev.Health, - } - if strings.HasPrefix(device.Type, Ascend910Prefix) { - NetworkID, err := ps.getDeviceNetworkID(i, device.Type) - if err != nil { - return fmt.Errorf("get networkID error: %w", err) - } - device.CustomInfo = map[string]any{ - "NetworkID": NetworkID, - } - } - apiDevices = append(apiDevices, device) - } - annos := make(map[string]string) - annos[ps.registerAnno] = device.MarshalNodeDevices(apiDevices) - annos[ps.handshakeAnno] = "Reported_" + time.Now().Add(time.Duration(*reportTimeOffset)*time.Second).Format("2006.01.02 15:04:05") - - if ps.mgr.IsHamiVnpuCore() { - annos[VNPUNodeSelectorAnnotation] = "true" - klog.V(4).Infof("Node %s has HamiVnpuCore enabled, patching annotation %s: true", ps.nodeName, VNPUNodeSelectorAnnotation) - } else { - annos[VNPUNodeSelectorAnnotation] = "false" - } - - node, err := util.GetNode(ps.nodeName) - if err != nil { - return fmt.Errorf("get node %s error: %w", ps.nodeName, err) - } - err = util.PatchNodeAnnotations(node, annos) - if err != nil { - return fmt.Errorf("patch node %s annotations error: %w", ps.nodeName, err) - } - klog.V(5).Infof("patch node %s annotations: %v", ps.nodeName, annos) - return nil -} - -func (ps *PluginServer) watchAndRegister() { - timer := time.After(1 * time.Second) - for { - select { - case <-ps.stopCh: - klog.Infof("stop watch and register") - return - case <-timer: - } - unhealthy := ps.mgr.GetUnHealthIDs() - if len(unhealthy) > 0 { - if err := ps.mgr.UpdateDevice(); err != nil { - klog.Errorf("update device error: %v", err) - timer = time.After(5 * time.Second) - continue - } - ps.healthCh <- unhealthy[0] - } - err := ps.registerHAMi() - if err != nil { - klog.Errorf("register HAMi error: %v", err) - timer = time.After(5 * time.Second) - } else { - klog.V(3).Infof("register HAMi success") - timer = time.After(30 * time.Second) - } - } -} - -// buildContainerAllocateResponse builds the allocate response for a single container. -func (ps *PluginServer) buildContainerAllocateResponse(pod *v1.Pod, containerDevs device.ContainerDevices, rtInfoLookup map[string]RuntimeInfo) (*v1beta1.ContainerAllocateResponse, error) { - resp := &v1beta1.ContainerAllocateResponse{} - - var ( - IDs []int32 - memories []*int64 - cores []*int32 - ascendVNPUSpec string - ) - - for _, dev := range containerDevs { - d := ps.mgr.GetDeviceByUUID(dev.UUID) - if d == nil { - return nil, fmt.Errorf("unknown uuid: %s", dev.UUID) - } - IDs = append(IDs, d.PhyID) - - if info, ok := rtInfoLookup[dev.UUID]; ok { - if ascendVNPUSpec == "" && info.Temp != "" { - ascendVNPUSpec = info.Temp - } - if info.Memory != nil { - memories = append(memories, info.Memory) - } - if info.Core != nil { - cores = append(cores, info.Core) - } - } - } - - if len(IDs) == 0 { - return nil, fmt.Errorf("annotation %s value invalid", ps.allocAnno) - } - ascendVisibleDevices := fmt.Sprintf("%d", IDs[0]) - for i := 1; i < len(IDs); i++ { - ascendVisibleDevices = fmt.Sprintf("%s,%d", ascendVisibleDevices, IDs[i]) - } - resp.Envs = make(map[string]string) - resp.Envs["ASCEND_VISIBLE_DEVICES"] = ascendVisibleDevices - - vnpuMode := pod.Annotations[VNPUModeAnnotation] - klog.V(4).Infof("Pod %s vnpu mode: %s", pod.Name, vnpuMode) - if vnpuMode == VNPUModeHamiCore { - // 1. Handle volume mount injection - var mounts []*v1beta1.Mount - // A.Huawei driver and SMI toolchain (Read-Only) - driverPaths := []string{ - "/usr/local/bin/npu-smi", - "/etc/ascend_install.info", - "/usr/local/Ascend/driver/lib64/driver", - "/usr/local/Ascend/driver/version.info", - } - for _, p := range driverPaths { - mounts = append(mounts, &v1beta1.Mount{HostPath: p, ContainerPath: p, ReadOnly: true}) - } - - mounts = append(mounts, &v1beta1.Mount{ - HostPath: "/usr/local/hami-vnpu-core", - ContainerPath: "/hami-vnpu-core", - ReadOnly: true, - }) - // B. Inject HAMi library path by mounting /etc/ld.so.preload. - mounts = append(mounts, &v1beta1.Mount{ - HostPath: "/usr/local/hami-vnpu-core/ld.so.preload", // Template file on host - ContainerPath: "/etc/ld.so.preload", // Overwrites the target file in container - ReadOnly: true, - }) - - // C. Shared directory for HAMi compute resource partitioning (Read/Write) - mounts = append(mounts, &v1beta1.Mount{ - HostPath: "/usr/local/hami-shared-region", - ContainerPath: "/hami-shared-region", - ReadOnly: false, - }) - resp.Mounts = mounts - - // Set NPU_MEM_QUOTA - if len(memories) > 0 && memories[0] != nil { - resp.Envs["NPU_MEM_QUOTA"] = strconv.FormatInt(*memories[0], 10) - klog.V(4).InfoS("Memory quota set", "value", *memories[0]) - } - - // Set NPU_PRIORITY - if len(cores) > 0 && cores[0] != nil { - resp.Envs["NPU_PRIORITY"] = strconv.FormatInt(int64(*cores[0]), 10) - klog.V(4).InfoS("Core priority set", "value", *cores[0]) - } - - // Set GLOBAL_SHM_PATH based on the first device ID. - resp.Envs["NPU_GLOBAL_SHM_PATH"] = fmt.Sprintf("/hami-shared-region/%d_global_registry", IDs[0]) - klog.V(5).Infof("Create %d_global_registry", IDs[0]) - } else { - if ascendVNPUSpec != "" { - resp.Envs["ASCEND_VNPU_SPECS"] = ascendVNPUSpec - } - } - return resp, nil -} - -// popNextContainerDevices finds and erases the first non-empty containerDevices -// from podSingleDev. It mutates podSingleDev in place. -func (ps *PluginServer) popNextContainerDevices(podSingleDev device.PodSingleDevice) (device.ContainerDevices, error) { - for i, ctrDevs := range podSingleDev { - if len(ctrDevs) > 0 { - podSingleDev[i] = device.ContainerDevices{} - return ctrDevs, nil - } - } - return nil, fmt.Errorf("no pending device allocation found") -} - -// decodeDeviceAnnotations decodes the pod's device allocation annotation -// (registered as hami.io/-devices-to-allocate in InRequestDevices) -// into a PodSingleDevice. -func (ps *PluginServer) decodeDeviceAnnotations(pod *v1.Pod) (device.PodSingleDevice, error) { - pdevices, err := device.DecodePodDevices(device.InRequestDevices, pod.Annotations) - if err != nil { - return nil, err - } - pd, ok := pdevices[ps.commonWord] - if !ok { - return nil, fmt.Errorf("device %s not found in pod annotations", ps.commonWord) - } - return pd, nil -} - -// buildRuntimeInfoLookup builds a UUID-to-RuntimeInfo lookup from the pod's allocAnno annotation. -func (ps *PluginServer) buildRuntimeInfoLookup(pod *v1.Pod) (map[string]RuntimeInfo, error) { - anno, ok := pod.Annotations[ps.allocAnno] - if !ok { - return nil, fmt.Errorf("annotation %s not set", ps.allocAnno) - } - var rtInfo []RuntimeInfo - if err := json.Unmarshal([]byte(anno), &rtInfo); err != nil { - return nil, fmt.Errorf("annotation %s value %s invalid: %w", ps.allocAnno, anno, err) - } - lookup := make(map[string]RuntimeInfo, len(rtInfo)) - for _, info := range rtInfo { - if info.UUID != "" { - lookup[info.UUID] = info - } - } - return lookup, nil -} - -// patchErasedAnnotation patches the pod's device annotation with the given -// podSingleDev. It also updates pod.Annotations in place. -func (ps *PluginServer) patchErasedAnnotation(pod *v1.Pod, podSingleDev device.PodSingleDevice) error { - klog.V(5).Infof("After erase annotation, remaining devices: %v", podSingleDev) - newAnnoValue := device.EncodePodSingleDevice(podSingleDev) - newAnnos := map[string]string{ - ps.toAllocDeviceAnno: newAnnoValue, - } - if err := util.PatchPodAnnotations(pod, newAnnos); err != nil { - return err - } - pod.Annotations[ps.toAllocDeviceAnno] = newAnnoValue - return nil -} - func (ps *PluginServer) apiDevices() []*v1beta1.Device { devs := ps.mgr.GetDevices() devices := make([]*v1beta1.Device, 0, len(devs)) @@ -723,16 +313,3 @@ func (ps *PluginServer) Allocate(ctx context.Context, reqs *v1beta1.AllocateRequ func (ps *PluginServer) PreStartContainer(context.Context, *v1beta1.PreStartContainerRequest) (*v1beta1.PreStartContainerResponse, error) { return &v1beta1.PreStartContainerResponse{}, nil } - -// podAllocationTrySuccess checks if all containers of this pod have been -// allocated. If so, it sets bind-phase to "success" and releases the node -// lock; otherwise it returns without setting bind-phase or releasing the lock, -// waiting for the next Allocate call. -func (ps *PluginServer) podAllocationTrySuccess(pod *v1.Pod) { - plugin.PodAllocationTrySuccess(ps.nodeName, ps.commonWord, NodeLockAscend, pod) -} - -// podAllocationFailed sets bind-phase to "failed" and releases the node lock. -func (ps *PluginServer) podAllocationFailed(pod *v1.Pod) { - plugin.PodAllocationFailed(ps.nodeName, pod, NodeLockAscend) -} diff --git a/internal/server/util.go b/internal/server/util.go new file mode 100644 index 0000000..7ac63ab --- /dev/null +++ b/internal/server/util.go @@ -0,0 +1,117 @@ +package server + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "k8s.io/klog/v2" +) + +// fileSHA256 calculates the SHA256 checksum of the specified file +func fileSHA256(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +// A standard file copy implementation that preserves the original file permissions +func copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + if _, err = io.Copy(dstFile, srcFile); err != nil { + return err + } + + // Sync source file permissions (ensure the limiter binary retains executable permission) + srcInfo, err := srcFile.Stat() + if err != nil { + return err + } + return os.Chmod(dst, srcInfo.Mode()) +} + +// Automatically creates directories, sets permissions, and copies core files on the host +func prepareHostResources() error { + klog.Info("Starting host resource preparation for HAMi vNPU core...") + + // 1. Create shared memory directory + sharedRegionPath := "/usr/local/hami-shared-region" + if err := os.MkdirAll(sharedRegionPath, 0777); err != nil { + if !os.IsExist(err) { + return fmt.Errorf("failed to create %s: %w", sharedRegionPath, err) + } + } + if err := os.Chmod(sharedRegionPath, 0777); err != nil { + return fmt.Errorf("failed to chmod %s: %w", sharedRegionPath, err) + } + klog.Infof("Successfully prepared directory: %s", sharedRegionPath) + + // 2. Prepare /usr/local/hami-vnpu-core/ directory + targetDir := "/usr/local/hami-vnpu-core" + if err := os.MkdirAll(targetDir, 0775); err != nil { + return fmt.Errorf("failed to create %s: %w", targetDir, err) + } + + // Specify the in-container assets directory (can be overridden via environment variable, default follows standard DevicePlugin convention) + assetsDir := os.Getenv("HAMI_VNPU_ASSETS_PATH") + if assetsDir == "" { + assetsDir = "/usr/local/hami-vnpu-core-assets" + } + + // Define files to copy: source path in container -> target path on host + filesToCopy := map[string]string{ + "limiter": filepath.Join(targetDir, "limiter"), + "libvnpu.so": filepath.Join(targetDir, "libvnpu.so"), + "ld.so.preload": filepath.Join(targetDir, "ld.so.preload"), + } + + for srcName, destPath := range filesToCopy { + srcPath := filepath.Join(assetsDir, srcName) + + // File already exists, skip if content is consistent + if _, err := os.Stat(destPath); err == nil { + srcSum, err1 := fileSHA256(srcPath) + dstSum, err2 := fileSHA256(destPath) + + if err1 == nil && err2 == nil && srcSum == dstSum { + klog.Infof("✓ %s already up-to-date, skipping", destPath) + continue + } + } + + if err := copyFile(srcPath, destPath); err != nil { + if strings.Contains(err.Error(), "text file busy") { + klog.Warningf("⚠ %s is in use by running process, keeping existing version (safe)", destPath) + continue + } + return fmt.Errorf("failed to copy %s: %w", destPath, err) + } + klog.Infof("✓ Copied %s -> %s", srcPath, destPath) + } + + klog.Info("Host resource preparation completed successfully.") + return nil +} From f4f8c9996117432d212a31494e2efe1954df1624 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Mon, 11 May 2026 16:45:57 +0800 Subject: [PATCH 02/12] refactor: extract `Manager` interface for testing Signed-off-by: houyuxi --- internal/manager/manager.go | 14 ++++++++++++++ internal/server/server.go | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/internal/manager/manager.go b/internal/manager/manager.go index b3af1ec..2ad422f 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -39,6 +39,20 @@ type Device struct { Health bool } +// Manager defines the interface that PluginServer depends on. +// AscendManager implements this interface. +type Manager interface { + CommonWord() string + ResourceName() string + VDeviceCount() int + UpdateDevice() error + GetDevices() []*Device + GetDeviceByUUID(UUID string) *Device + GetUnHealthIDs() []int32 + CleanupIdleVNPUs() error + IsHamiVnpuCore() bool +} + type AscendManager struct { mu sync.RWMutex mgr *devmanager.DeviceManager diff --git a/internal/server/server.go b/internal/server/server.go index b05bce3..da07524 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -59,7 +59,7 @@ type PluginServer struct { allocAnno string toAllocDeviceAnno string grpcServer *grpc.Server - mgr *manager.AscendManager + mgr manager.Manager socket string stopCh chan interface{} healthCh chan int32 @@ -73,7 +73,7 @@ type RuntimeInfo struct { Core *int32 `json:"core,omitempty"` } -func NewPluginServer(mgr *manager.AscendManager, nodeName string, checkIdleVNPUInterval int) (*PluginServer, error) { +func NewPluginServer(mgr manager.Manager, nodeName string, checkIdleVNPUInterval int) (*PluginServer, error) { commonWord := mgr.CommonWord() server := &PluginServer{ commonWord: commonWord, From 4499fd236c6f8214ae63a183dafbe51e1a82648b Mon Sep 17 00:00:00 2001 From: houyuxi Date: Mon, 11 May 2026 16:47:00 +0800 Subject: [PATCH 03/12] test(server): implement fake manager for testing Signed-off-by: houyuxi --- internal/server/fake_manager_test.go | 99 ++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 internal/server/fake_manager_test.go diff --git a/internal/server/fake_manager_test.go b/internal/server/fake_manager_test.go new file mode 100644 index 0000000..c5f2a2b --- /dev/null +++ b/internal/server/fake_manager_test.go @@ -0,0 +1,99 @@ +/* + * Copyright 2024 The HAMi Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package server + +import ( + "github.com/Project-HAMi/ascend-device-plugin/internal/manager" +) + +// FakeManager implements manager.Manager for testing. +// Each method delegates to the corresponding Func field if set; +// otherwise it returns a zero value. +type FakeManager struct { + CommonWordFunc func() string + ResourceNameFunc func() string + VDeviceCountFunc func() int + UpdateDeviceFunc func() error + GetDevicesFunc func() []*manager.Device + GetDeviceByUUIDFunc func(UUID string) *manager.Device + GetUnHealthIDsFunc func() []int32 + CleanupIdleVNPUsFunc func() error + IsHamiVnpuCoreFunc func() bool +} + +func (f *FakeManager) CommonWord() string { + if f.CommonWordFunc != nil { + return f.CommonWordFunc() + } + return "" +} + +func (f *FakeManager) ResourceName() string { + if f.ResourceNameFunc != nil { + return f.ResourceNameFunc() + } + return "" +} + +func (f *FakeManager) VDeviceCount() int { + if f.VDeviceCountFunc != nil { + return f.VDeviceCountFunc() + } + return 0 +} + +func (f *FakeManager) UpdateDevice() error { + if f.UpdateDeviceFunc != nil { + return f.UpdateDeviceFunc() + } + return nil +} + +func (f *FakeManager) GetDevices() []*manager.Device { + if f.GetDevicesFunc != nil { + return f.GetDevicesFunc() + } + return nil +} + +func (f *FakeManager) GetDeviceByUUID(UUID string) *manager.Device { + if f.GetDeviceByUUIDFunc != nil { + return f.GetDeviceByUUIDFunc(UUID) + } + return nil +} + +func (f *FakeManager) GetUnHealthIDs() []int32 { + if f.GetUnHealthIDsFunc != nil { + return f.GetUnHealthIDsFunc() + } + return nil +} + +func (f *FakeManager) CleanupIdleVNPUs() error { + if f.CleanupIdleVNPUsFunc != nil { + return f.CleanupIdleVNPUsFunc() + } + return nil +} + +func (f *FakeManager) IsHamiVnpuCore() bool { + if f.IsHamiVnpuCoreFunc != nil { + return f.IsHamiVnpuCoreFunc() + } + return false +} From 6844ff6092227e7a7bc9488bd7349035eacaa008 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 15:15:16 +0800 Subject: [PATCH 04/12] test(server): add multiple tests Signed-off-by: houyuxi --- go.mod | 20 +- go.sum | 65 ++ internal/server/register_test.go | 494 ++++++++++++ internal/server/server_test.go | 764 ++++++++++++++++++ internal/server/util_test.go | 1300 ++++++++++++++++++++++++++++++ 5 files changed, 2642 insertions(+), 1 deletion(-) create mode 100644 internal/server/register_test.go create mode 100644 internal/server/server_test.go create mode 100644 internal/server/util_test.go diff --git a/go.mod b/go.mod index 767178e..1d66db0 100644 --- a/go.mod +++ b/go.mod @@ -10,12 +10,19 @@ require ( huawei.com/npu-exporter v0.0.0-00010101000000-000000000000 k8s.io/api v0.33.0 k8s.io/apimachinery v0.33.0 + k8s.io/client-go v0.33.0 k8s.io/klog/v2 v2.130.1 k8s.io/kubelet v0.31.3 ) require ( + github.com/NVIDIA/go-gpuallocator v0.6.0 // indirect + github.com/NVIDIA/go-nvlib v0.7.4 // indirect + github.com/NVIDIA/go-nvml v0.12.9-0 // indirect + github.com/NVIDIA/k8s-device-plugin v0.17.3 // indirect + github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.2 // indirect github.com/ccoveille/go-safecast v1.6.1 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.11.3 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect @@ -27,6 +34,7 @@ require ( github.com/google/gnostic-models v0.6.9 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/influxdata/telegraf v1.26.3 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -34,10 +42,18 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/opencontainers/runtime-spec v1.2.1 // indirect + github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/smartystreets/goconvey v1.7.2 // indirect github.com/spf13/pflag v1.0.7 // indirect + github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect + github.com/urfave/cli/v2 v2.27.7 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect + golang.org/x/mod v0.27.0 // indirect golang.org/x/net v0.43.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sys v0.35.0 // indirect @@ -48,14 +64,16 @@ require ( google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/client-go v0.33.0 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect sigs.k8s.io/yaml v1.4.0 // indirect + tags.cncf.io/container-device-interface v1.0.1 // indirect + tags.cncf.io/container-device-interface/specs-go v1.0.0 // indirect ) replace ( diff --git a/go.sum b/go.sum index d53050b..29780f3 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,23 @@ +github.com/NVIDIA/go-gpuallocator v0.6.0 h1:2PA2swx59gJYREPkZNTGtyCP6Pnz3WEgnYsXlRkyvkk= +github.com/NVIDIA/go-gpuallocator v0.6.0/go.mod h1:c+Yspg+/QxWOmoSQeuI48Z/7nS+mMPtxyj1NYUTwewY= +github.com/NVIDIA/go-nvlib v0.7.4 h1:qnXK8qhm45YfxalhZ76XwKdAMmxz1GIgzE0e/Hhhshs= +github.com/NVIDIA/go-nvlib v0.7.4/go.mod h1:i95Je7GinMy/+BDs++DAdbPmT2TubjNP8i8joC7DD7I= +github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= +github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= +github.com/NVIDIA/k8s-device-plugin v0.17.3 h1:s6fN2/WqF0e6dxMfcjMXrN38fStLHkZJ3+9ErDY6DRw= +github.com/NVIDIA/k8s-device-plugin v0.17.3/go.mod h1:pRr/ZiwAqgP3XGiQOcA2rSM0jbSDpQEPnnNyxwDFz1w= +github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.2 h1:vjEJpEpiGhGqca+JK9p0RqmCIBZPnOpPyT1nPj4nsEM= +github.com/NVIDIA/nvidia-container-toolkit v1.18.0-rc.2/go.mod h1:z0KTZNkSOiQx8u4SEo1iq0aldqdnwruLHPXHdMd3B+k= github.com/Project-HAMi/HAMi v0.0.0-20250901013025-61c6cbe7d480 h1:2rV+Gpy2+1fDOpQBPPXE3YG6nwfaO8DZjyCH+ARAmMY= github.com/Project-HAMi/HAMi v0.0.0-20250901013025-61c6cbe7d480/go.mod h1:KgE6IKrLJBAp6YrToFRFLDXHXctsZ6wXvNHMWY6ZbBU= github.com/agiledragon/gomonkey/v2 v2.8.0 h1:u2K2nNGyk0ippzklz1CWalllEB9ptD+DtSXeCX5O000= github.com/agiledragon/gomonkey/v2 v2.8.0/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/ccoveille/go-safecast v1.6.1 h1:Nb9WMDR8PqhnKCVs2sCB+OqhohwO5qaXtCviZkIff5Q= github.com/ccoveille/go-safecast v1.6.1/go.mod h1:QqwNjxQ7DAqY0C721OIO9InMk9zCwcsO7tnRuHytad8= +github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= +github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -38,10 +52,18 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/influxdata/telegraf v1.26.3 h1:wawD3VTdnPDbHnJ1RBGgCf0YB7vlxREZ70rvEepHdGs= github.com/influxdata/telegraf v1.26.3/go.mod h1:w+VUZ4NRDzfhRmhEdBbbNZBNT7E8qRkLiL73j/pD0ug= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -58,17 +80,27 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY= github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o= +github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww= +github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0= +github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/opencontainers/selinux v1.11.1 h1:nHFvthhM0qY8/m+vfhJylliSshm8G1jJ2jDMcgULaH8= +github.com/opencontainers/selinux v1.11.1/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -76,6 +108,11 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N3yZFZkDFs= github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg= @@ -85,11 +122,27 @@ github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= +github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= +github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= +github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= +github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= +github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= @@ -111,6 +164,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= +golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -125,7 +180,10 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= @@ -162,6 +220,9 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= @@ -189,3 +250,7 @@ sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +tags.cncf.io/container-device-interface v1.0.1 h1:KqQDr4vIlxwfYh0Ed/uJGVgX+CHAkahrgabg6Q8GYxc= +tags.cncf.io/container-device-interface v1.0.1/go.mod h1:JojJIOeW3hNbcnOH2q0NrWNha/JuHoDZcmYxAZwb2i0= +tags.cncf.io/container-device-interface/specs-go v1.0.0 h1:8gLw29hH1ZQP9K1YtAzpvkHCjjyIxHZYzBAvlQ+0vD8= +tags.cncf.io/container-device-interface/specs-go v1.0.0/go.mod h1:u86hoFWqnh3hWz3esofRFKbI261bUlvUfLKGrDhJkgQ= diff --git a/internal/server/register_test.go b/internal/server/register_test.go new file mode 100644 index 0000000..86854a3 --- /dev/null +++ b/internal/server/register_test.go @@ -0,0 +1,494 @@ +/* + * Copyright 2024 The HAMi Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package server + +import ( + "context" + "strings" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/Project-HAMi/HAMi/pkg/device" + "github.com/Project-HAMi/HAMi/pkg/util/client" + "github.com/Project-HAMi/ascend-device-plugin/internal/manager" +) + +func TestGetDeviceNetworkID(t *testing.T) { + t.Parallel() + + type getDeviceNetworkIDArgs struct { + idx int + deviceType string + } + + tests := []struct { + name string + args getDeviceNetworkIDArgs + want int + wantErr bool + }{ + {name: "Ascend910A_idx0_non910C", args: getDeviceNetworkIDArgs{idx: 0, deviceType: "Ascend910A"}, want: 0}, + {name: "Ascend910A_idx5_non910C", args: getDeviceNetworkIDArgs{idx: 5, deviceType: "Ascend910A"}, want: 1}, + {name: "Ascend910B_idx0", args: getDeviceNetworkIDArgs{idx: 0, deviceType: "Ascend910B"}, want: 0}, + {name: "Ascend910B_idx3_boundary", args: getDeviceNetworkIDArgs{idx: 3, deviceType: "Ascend910B"}, want: 0}, + {name: "Ascend910B_idx4", args: getDeviceNetworkIDArgs{idx: 4, deviceType: "Ascend910B"}, want: 1}, + {name: "Ascend910B_idx100_large", args: getDeviceNetworkIDArgs{idx: 100, deviceType: "Ascend910B"}, want: 1}, + {name: "Ascend910C_idx0", args: getDeviceNetworkIDArgs{idx: 0, deviceType: "Ascend910C"}, want: 0}, + {name: "Ascend910C_idx4_still0", args: getDeviceNetworkIDArgs{idx: 4, deviceType: "Ascend910C"}, want: 0}, + {name: "Ascend910C_idx100_still0", args: getDeviceNetworkIDArgs{idx: 100, deviceType: "Ascend910C"}, want: 0}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ps := &PluginServer{} + got, err := ps.getDeviceNetworkID(tc.args.idx, tc.args.deviceType) + if (err != nil) != tc.wantErr { + t.Fatalf("getDeviceNetworkID(%d, %q) error = %v, wantErr %v", tc.args.idx, tc.args.deviceType, err, tc.wantErr) + } + if got != tc.want { + t.Fatalf("getDeviceNetworkID(%d, %q) = %d, want %d", tc.args.idx, tc.args.deviceType, got, tc.want) + } + }) + } +} + +func TestRegisterHAMi(t *testing.T) { + t.Parallel() + + type registerHAMiArgs struct { + nodeName string + registerAnno string + handshakeAnno string + mgr *FakeManager + nodes []*v1.Node + } + + type registerHAMiWant struct { + deviceCount int + deviceCheck func(t *testing.T, devs []*device.DeviceInfo) + annotationCheck func(t *testing.T, annos map[string]string) + } + + tests := []struct { + name string + args registerHAMiArgs + want registerHAMiWant + wantErr string + }{ + { + name: "NodeNotFound", + args: registerHAMiArgs{ + nodeName: "missing-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910" }, + }, + nodes: nil, + }, + wantErr: "get node", + }, + { + name: "SingleDevice", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{ + {UUID: "uuid1", Memory: 32768, AICore: 30, Health: true}, + } + }, + VDeviceCountFunc: func() int { return 4 }, + CommonWordFunc: func() string { return "Ascend910" }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 1, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + d := devs[0] + if d.ID != "uuid1" { + t.Fatalf("device ID = %q, want uuid1", d.ID) + } + if d.Index != 0 { + t.Fatalf("device Index = %d, want 0", d.Index) + } + if d.Count != 4 { + t.Fatalf("device Count = %d, want 4", d.Count) + } + if d.Devmem != 32768 { + t.Fatalf("device Devmem = %d, want 32768", d.Devmem) + } + if d.Devcore != 30 { + t.Fatalf("device Devcore = %d, want 30", d.Devcore) + } + if d.Type != "Ascend910" { + t.Fatalf("device Type = %q, want Ascend910", d.Type) + } + if !d.Health { + t.Fatal("device Health = false, want true") + } + }, + annotationCheck: func(t *testing.T, annos map[string]string) { + t.Helper() + hs := annos["hami.io/node-handshake-Ascend910"] + if !strings.HasPrefix(hs, "Reported_") { + t.Fatalf("handshakeAnno = %q, want prefix 'Reported_'", hs) + } + }, + }, + }, + { + name: "MultiDevice", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910C", + handshakeAnno: "hami.io/node-handshake-Ascend910C", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{ + {UUID: "uuid1", Memory: 65536, AICore: 60, Health: true}, + {UUID: "uuid2", Memory: 65536, AICore: 60, Health: false}, + } + }, + VDeviceCountFunc: func() int { return 2 }, + CommonWordFunc: func() string { return "Ascend910C" }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 2, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + if devs[0].Index != 0 || devs[1].Index != 1 { + t.Fatalf("device indices = %d, %d, want 0, 1", devs[0].Index, devs[1].Index) + } + if !devs[0].Health { + t.Fatal("device[0] Health = false, want true") + } + if devs[1].Health { + t.Fatal("device[1] Health = true, want false") + } + if devs[0].Type != "Ascend910C" { + t.Fatalf("device[0] Type = %q, want Ascend910C", devs[0].Type) + } + }, + }, + }, + { + name: "EmptyDevices", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910" }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 0, + }, + }, + { + name: "NetworkID_Ascend910B_LowIdx", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910B", + handshakeAnno: "hami.io/node-handshake-Ascend910B", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Memory: 32768, AICore: 30, Health: true}} + }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910B" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 1, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + ci, ok := devs[0].CustomInfo["NetworkID"] + if !ok { + t.Fatal("expected CustomInfo to contain NetworkID for Ascend910B device") + } + if int(ci.(float64)) != 0 { + t.Fatalf("NetworkID = %d, want 0 for idx=0", int(ci.(float64))) + } + }, + }, + }, + { + name: "NetworkID_Ascend910B_HighIdx", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910B", + handshakeAnno: "hami.io/node-handshake-Ascend910B", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{ + {UUID: "uuid0", Memory: 32768, AICore: 30, Health: true}, + {UUID: "uuid1", Memory: 32768, AICore: 30, Health: true}, + {UUID: "uuid2", Memory: 32768, AICore: 30, Health: true}, + {UUID: "uuid3", Memory: 32768, AICore: 30, Health: true}, + {UUID: "uuid4", Memory: 32768, AICore: 30, Health: true}, + } + }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910B" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 5, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + for i, d := range devs { + ci, ok := d.CustomInfo["NetworkID"] + if !ok { + t.Fatalf("device[%d] missing NetworkID", i) + } + netID := int(ci.(float64)) + if i < 4 && netID != 0 { + t.Fatalf("device[%d] NetworkID = %d, want 0", i, netID) + } + if i == 4 && netID != 1 { + t.Fatalf("device[4] NetworkID = %d, want 1", netID) + } + } + }, + }, + }, + { + name: "NetworkID_Ascend910C_AlwaysZero", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910C", + handshakeAnno: "hami.io/node-handshake-Ascend910C", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{ + {UUID: "uuid0", Memory: 65536, AICore: 60, Health: true}, + {UUID: "uuid5", Memory: 65536, AICore: 60, Health: true}, + } + }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910C" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 2, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + for i, d := range devs { + ci, ok := d.CustomInfo["NetworkID"] + if !ok { + t.Fatalf("device[%d] missing NetworkID", i) + } + if int(ci.(float64)) != 0 { + t.Fatalf("device[%d] NetworkID = %d, want 0 (Ascend910C always 0)", i, int(ci.(float64))) + } + } + }, + }, + }, + { + name: "NonAscend910_NoCustomInfo", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend310P", + handshakeAnno: "hami.io/node-handshake-Ascend310P", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Memory: 16384, AICore: 15, Health: true}} + }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend310P" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + deviceCount: 1, + deviceCheck: func(t *testing.T, devs []*device.DeviceInfo) { + t.Helper() + if len(devs[0].CustomInfo) != 0 { + t.Fatalf("expected no CustomInfo for non-Ascend910 device, got %v", devs[0].CustomInfo) + } + }, + }, + }, + { + name: "IsHamiVnpuCore_True", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910" }, + IsHamiVnpuCoreFunc: func() bool { return true }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + annotationCheck: func(t *testing.T, annos map[string]string) { + t.Helper() + if annos[VNPUNodeSelectorAnnotation] != "true" { + t.Fatalf("VNPUNodeSelectorAnnotation = %q, want 'true'", annos[VNPUNodeSelectorAnnotation]) + } + }, + }, + }, + { + name: "IsHamiVnpuCore_False", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + annotationCheck: func(t *testing.T, annos map[string]string) { + t.Helper() + if annos[VNPUNodeSelectorAnnotation] != "false" { + t.Fatalf("VNPUNodeSelectorAnnotation = %q, want 'false'", annos[VNPUNodeSelectorAnnotation]) + } + }, + }, + }, + { + name: "HandshakeAnnotationFormat", + args: registerHAMiArgs{ + nodeName: "test-node", + registerAnno: "hami.io/node-register-Ascend910", + handshakeAnno: "hami.io/node-handshake-Ascend910", + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + CommonWordFunc: func() string { return "Ascend910" }, + IsHamiVnpuCoreFunc: func() bool { return false }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-node", Annotations: map[string]string{}}}, + }, + }, + want: registerHAMiWant{ + annotationCheck: func(t *testing.T, annos map[string]string) { + t.Helper() + hs := annos["hami.io/node-handshake-Ascend910"] + if !strings.HasPrefix(hs, "Reported_") { + t.Fatalf("handshake annotation = %q, want prefix 'Reported_'", hs) + } + timeStr := strings.TrimPrefix(hs, "Reported_") + if _, err := time.Parse("2006.01.02 15:04:05", timeStr); err != nil { + t.Fatalf("handshake time %q does not match expected format: %v", timeStr, err) + } + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ps := &PluginServer{ + nodeName: tc.args.nodeName, + registerAnno: tc.args.registerAnno, + handshakeAnno: tc.args.handshakeAnno, + mgr: tc.args.mgr, + } + cleanup := setupFakeClient(nil, tc.args.nodes) + defer cleanup() + + err := ps.registerHAMi() + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + updated, err := client.KubeClient.CoreV1().Nodes().Get(context.Background(), tc.args.nodeName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("failed to get updated node: %v", err) + } + + devs, err := device.UnMarshalNodeDevices(updated.Annotations[tc.args.registerAnno]) + if err != nil { + t.Fatalf("failed to unmarshal node devices: %v", err) + } + + if tc.want.deviceCount != 0 && len(devs) != tc.want.deviceCount { + t.Fatalf("expected %d devices, got %d", tc.want.deviceCount, len(devs)) + } + if tc.want.deviceCheck != nil { + tc.want.deviceCheck(t, devs) + } + if tc.want.annotationCheck != nil { + tc.want.annotationCheck(t, updated.Annotations) + } + }) + } +} diff --git a/internal/server/server_test.go b/internal/server/server_test.go new file mode 100644 index 0000000..c65783d --- /dev/null +++ b/internal/server/server_test.go @@ -0,0 +1,764 @@ +/* + * Copyright 2024 The HAMi Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package server + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + + "github.com/Project-HAMi/HAMi/pkg/device" + "github.com/Project-HAMi/HAMi/pkg/device/ascend" + "github.com/Project-HAMi/HAMi/pkg/util" + "github.com/Project-HAMi/HAMi/pkg/util/client" + "github.com/Project-HAMi/HAMi/pkg/util/nodelock" + "github.com/Project-HAMi/ascend-device-plugin/internal/manager" +) + +const testCommonWord = "Ascend910" + +// CleanupFunc is the return type of test setup helpers that modify global state. +// Callers typically pass the returned function to t.Cleanup. +type CleanupFunc func() + +// cd is a test helper that constructs a device.ContainerDevice. +func cd(uuid, typ string, usedmem, usedcores int32) device.ContainerDevice { + return device.ContainerDevice{ + UUID: uuid, + Type: typ, + Usedmem: usedmem, + Usedcores: usedcores, + } +} + +// setupInRequestDevices registers the test commonWord in device.InRequestDevices +// and DevicesToHandle, and returns a cleanup function to restore the original state. +func setupInRequestDevices(commonWord string) CleanupFunc { + origAnno := device.InRequestDevices[commonWord] + device.InRequestDevices[commonWord] = fmt.Sprintf("hami.io/%s-devices-to-allocate", commonWord) + + origDevicesToHandle := device.DevicesToHandle + device.DevicesToHandle = append(device.DevicesToHandle, commonWord) + + return func() { + delete(device.InRequestDevices, commonWord) + if origAnno != "" { + device.InRequestDevices[commonWord] = origAnno + } + device.DevicesToHandle = origDevicesToHandle + } +} + +// setupFakeClient sets client.KubeClient to a fake clientset pre-loaded with +// the given pods and nodes, and returns a cleanup function to restore the +// original client. +func setupFakeClient(pods []*v1.Pod, nodes []*v1.Node) CleanupFunc { + orig := client.KubeClient + fc := fake.NewSimpleClientset() + for _, p := range pods { + _, _ = fc.CoreV1().Pods(p.Namespace).Create(context.Background(), p, metav1.CreateOptions{}) + } + for _, n := range nodes { + _, _ = fc.CoreV1().Nodes().Create(context.Background(), n, metav1.CreateOptions{}) + } + client.KubeClient = fc + return func() { client.KubeClient = orig } +} + +// setupAllocateEnv creates a fake clientset with a node (with nodelock annotation +// pointing to the pod) and a pod with the specified number of containers, returning +// both along with a cleanup function. +func setupAllocateEnv(nodeName, podName, podNamespace string, numContainers int, podAnnotations map[string]string) (*v1.Node, *v1.Pod, CleanupFunc) { + lockValue := fmt.Sprintf("2024-01-01T00:00:00Z,%s,%s", podNamespace, podName) + node := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Annotations: map[string]string{ + nodelock.NodeLockKey: lockValue, + }, + }, + } + containers := make([]v1.Container, numContainers) + for i := range containers { + containers[i].Name = fmt.Sprintf("ctr-%d", i) + } + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: podNamespace, + Annotations: podAnnotations, + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + Containers: containers, + }, + } + cleanup := setupFakeClient([]*v1.Pod{pod}, []*v1.Node{node}) + return node, pod, cleanup +} + +// ============================================================================ +// Allocate tests +// ============================================================================ + +// composeCleanup combines multiple CleanupFuncs into one that runs in reverse order. +func composeCleanup(fns ...CleanupFunc) CleanupFunc { + return func() { + for i := len(fns) - 1; i >= 0; i-- { + fns[i]() + } + } +} + +func TestAllocate(t *testing.T) { + type allocateArgs struct { + ps *PluginServer + reqs *v1beta1.AllocateRequest + } + + type allocateWant struct { + containerResponses []*v1beta1.ContainerAllocateResponse + nodeLockReleased bool + } + + tests := []struct { + name string + args allocateArgs + want allocateWant + wantErr string + setup func() CleanupFunc + }{ + { + name: "GetPendingPodFails", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "missing-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{}, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"uuid1-0"}}, + }, + }, + }, + wantErr: "get pending pod", + setup: func() CleanupFunc { return setupFakeClient(nil, nil) }, + }, + { + name: "SingleContainerSingleDevice", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "test-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 3} + }, + }, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"uuid1-0"}}, + }, + }, + }, + want: allocateWant{ + containerResponses: []*v1beta1.ContainerAllocateResponse{ + {Envs: map[string]string{"ASCEND_VISIBLE_DEVICES": "3", "ASCEND_VNPU_SPECS": "vir01"}}, + }, + nodeLockReleased: true, + }, + setup: func() CleanupFunc { + c1 := setupInRequestDevices("Ascend910") + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + allocAnno := "huawei.com/Ascend910" + containerDevs := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + }) + rtInfo := []ascend.RuntimeInfo{{UUID: "uuid1", Temp: "vir01"}} + rtData, _ := json.Marshal(rtInfo) + _, _, c2 := setupAllocateEnv("test-node", "test-pod", "default", 1, map[string]string{ + toAllocAnno: containerDevs, + allocAnno: string(rtData), + util.BindTimeAnnotations: "2024-01-01T00:00:00Z", + util.DeviceBindPhase: util.DeviceBindAllocating, + "hami.io/Ascend910-devices-allocated": containerDevs, + }) + return composeCleanup(c2, c1) + }, + }, + { + name: "DeviceNumberMismatch", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "test-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: uuid, PhyID: 0} + }, + }, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"uuid1-0"}}, + }, + }, + }, + wantErr: "device number not matched", + setup: func() CleanupFunc { + c1 := setupInRequestDevices("Ascend910") + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + allocAnno := "huawei.com/Ascend910" + containerDevs := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4), cd("uuid2", "Ascend910", 2048, 8)}, + }) + rtInfo := []ascend.RuntimeInfo{ + {UUID: "uuid1", Temp: "vir01"}, + {UUID: "uuid2", Temp: "vir01"}, + } + rtData, _ := json.Marshal(rtInfo) + _, _, c2 := setupAllocateEnv("test-node", "test-pod", "default", 1, map[string]string{ + toAllocAnno: containerDevs, + allocAnno: string(rtData), + util.BindTimeAnnotations: "2024-01-01T00:00:00Z", + util.DeviceBindPhase: util.DeviceBindAllocating, + }) + return composeCleanup(c2, c1) + }, + }, + { + name: "MultiContainer", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "test-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + switch uuid { + case "uuid1": + return &manager.Device{UUID: "uuid1", PhyID: 0} + case "uuid2": + return &manager.Device{UUID: "uuid2", PhyID: 1} + default: + return nil + } + }, + }, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"uuid1-0"}}, + {DevicesIDs: []string{"uuid2-0"}}, + }, + }, + }, + want: allocateWant{ + containerResponses: []*v1beta1.ContainerAllocateResponse{ + {Envs: map[string]string{"ASCEND_VISIBLE_DEVICES": "0", "ASCEND_VNPU_SPECS": "vir01"}}, + {Envs: map[string]string{"ASCEND_VISIBLE_DEVICES": "1", "ASCEND_VNPU_SPECS": "vir02"}}, + }, + }, + setup: func() CleanupFunc { + c1 := setupInRequestDevices("Ascend910") + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + allocAnno := "huawei.com/Ascend910" + containerDevs := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }) + rtInfo := []ascend.RuntimeInfo{ + {UUID: "uuid1", Temp: "vir01"}, + {UUID: "uuid2", Temp: "vir02"}, + } + rtData, _ := json.Marshal(rtInfo) + _, _, c2 := setupAllocateEnv("test-node", "test-pod", "default", 2, map[string]string{ + toAllocAnno: containerDevs, + allocAnno: string(rtData), + util.BindTimeAnnotations: "2024-01-01T00:00:00Z", + util.DeviceBindPhase: util.DeviceBindAllocating, + }) + return composeCleanup(c2, c1) + }, + }, + { + name: "BuildResponseError", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "test-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{}, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"unknown-uuid-0"}}, + }, + }, + }, + wantErr: "unknown uuid", + setup: func() CleanupFunc { + c1 := setupInRequestDevices("Ascend910") + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + allocAnno := "huawei.com/Ascend910" + containerDevs := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("unknown-uuid", "Ascend910", 1024, 4)}, + }) + rtInfo := []ascend.RuntimeInfo{{UUID: "unknown-uuid", Temp: "vir01"}} + rtData, _ := json.Marshal(rtInfo) + _, _, c2 := setupAllocateEnv("test-node", "test-pod", "default", 1, map[string]string{ + toAllocAnno: containerDevs, + allocAnno: string(rtData), + util.BindTimeAnnotations: "2024-01-01T00:00:00Z", + util.DeviceBindPhase: util.DeviceBindAllocating, + }) + return composeCleanup(c2, c1) + }, + }, + { + name: "NodeLockReleasedOnError", + args: allocateArgs{ + ps: &PluginServer{ + commonWord: testCommonWord, + nodeName: "test-node", + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + allocAnno: "huawei.com/Ascend910", + mgr: &FakeManager{}, + }, + reqs: &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{ + {DevicesIDs: []string{"uuid1-0"}}, + }, + }, + }, + want: allocateWant{ + nodeLockReleased: true, + }, + wantErr: "annotation", + setup: func() CleanupFunc { + c1 := setupInRequestDevices("Ascend910") + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + containerDevs := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + }) + _, _, c2 := setupAllocateEnv("test-node", "test-pod", "default", 1, map[string]string{ + toAllocAnno: containerDevs, + util.BindTimeAnnotations: "2024-01-01T00:00:00Z", + util.DeviceBindPhase: util.DeviceBindAllocating, + }) + return composeCleanup(c2, c1) + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Cleanup(tc.setup()) + + resp, err := tc.args.ps.Allocate(context.Background(), tc.args.reqs) + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + + // Check container responses + if tc.want.containerResponses != nil && resp != nil { + if len(resp.ContainerResponses) != len(tc.want.containerResponses) { + t.Fatalf("expected %d container responses, got %d", len(tc.want.containerResponses), len(resp.ContainerResponses)) + } + for i, wantCR := range tc.want.containerResponses { + gotCR := resp.ContainerResponses[i] + for k, wantVal := range wantCR.Envs { + if gotCR.Envs[k] != wantVal { + t.Fatalf("container[%d] env[%q] = %q, want %q", i, k, gotCR.Envs[k], wantVal) + } + } + } + } + + // Check node lock state + if tc.want.nodeLockReleased && tc.args.ps.nodeName != "missing-node" { + updatedNode, nErr := client.KubeClient.CoreV1().Nodes().Get(context.Background(), tc.args.ps.nodeName, metav1.GetOptions{}) + if nErr != nil { + t.Fatalf("failed to get node: %v", nErr) + } + if _, hasLock := updatedNode.Annotations[nodelock.NodeLockKey]; hasLock { + t.Fatal("node lock should have been released") + } + } + }) + } +} + +// ============================================================================ +// NewPluginServer tests +// ============================================================================ + +func TestNewPluginServer(t *testing.T) { + t.Parallel() + + type newPluginServerArgs struct { + commonWord string + nodeName string + } + + type newPluginServerWant struct { + registerAnno string + handshakeAnno string + allocAnno string + toAllocAnno string + } + + tests := []struct { + name string + args newPluginServerArgs + want newPluginServerWant + wantErr bool + }{ + { + name: "Ascend910A", + args: newPluginServerArgs{commonWord: "Ascend910A", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910A", + handshakeAnno: "hami.io/node-handshake-Ascend910A", + allocAnno: "huawei.com/Ascend910A", + toAllocAnno: "hami.io/Ascend910A-devices-to-allocate", + }, + }, + { + name: "Ascend910B2", + args: newPluginServerArgs{commonWord: "Ascend910B2", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910B2", + handshakeAnno: "hami.io/node-handshake-Ascend910B2", + allocAnno: "huawei.com/Ascend910B2", + toAllocAnno: "hami.io/Ascend910B2-devices-to-allocate", + }, + }, + { + name: "Ascend910B3", + args: newPluginServerArgs{commonWord: "Ascend910B3", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910B3", + handshakeAnno: "hami.io/node-handshake-Ascend910B3", + allocAnno: "huawei.com/Ascend910B3", + toAllocAnno: "hami.io/Ascend910B3-devices-to-allocate", + }, + }, + { + name: "Ascend910B4-1", + args: newPluginServerArgs{commonWord: "Ascend910B4-1", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910B4-1", + handshakeAnno: "hami.io/node-handshake-Ascend910B4-1", + allocAnno: "huawei.com/Ascend910B4-1", + toAllocAnno: "hami.io/Ascend910B4-1-devices-to-allocate", + }, + }, + { + name: "Ascend910B4", + args: newPluginServerArgs{commonWord: "Ascend910B4", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910B4", + handshakeAnno: "hami.io/node-handshake-Ascend910B4", + allocAnno: "huawei.com/Ascend910B4", + toAllocAnno: "hami.io/Ascend910B4-devices-to-allocate", + }, + }, + { + name: "Ascend310P", + args: newPluginServerArgs{commonWord: "Ascend310P", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend310P", + handshakeAnno: "hami.io/node-handshake-Ascend310P", + allocAnno: "huawei.com/Ascend310P", + toAllocAnno: "hami.io/Ascend310P-devices-to-allocate", + }, + }, + { + name: "Ascend910C", + args: newPluginServerArgs{commonWord: "Ascend910C", nodeName: "test-node"}, + want: newPluginServerWant{ + registerAnno: "hami.io/node-register-Ascend910C", + handshakeAnno: "hami.io/node-handshake-Ascend910C", + allocAnno: "huawei.com/Ascend910C", + toAllocAnno: "hami.io/Ascend910C-devices-to-allocate", + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + mgr := &FakeManager{CommonWordFunc: func() string { return tc.args.commonWord }} + ps, err := NewPluginServer(mgr, tc.args.nodeName, 60) + if (err != nil) != tc.wantErr { + t.Fatalf("NewPluginServer() error = %v, wantErr %v", err, tc.wantErr) + } + if ps.registerAnno != tc.want.registerAnno { + t.Errorf("registerAnno = %q, want %q", ps.registerAnno, tc.want.registerAnno) + } + if ps.handshakeAnno != tc.want.handshakeAnno { + t.Errorf("handshakeAnno = %q, want %q", ps.handshakeAnno, tc.want.handshakeAnno) + } + if ps.allocAnno != tc.want.allocAnno { + t.Errorf("allocAnno = %q, want %q", ps.allocAnno, tc.want.allocAnno) + } + if ps.toAllocDeviceAnno != tc.want.toAllocAnno { + t.Errorf("toAllocDeviceAnno = %q, want %q", ps.toAllocDeviceAnno, tc.want.toAllocAnno) + } + }) + } +} + +func TestNewPluginServer_RegistersInRequestDevices(t *testing.T) { + commonWord := "Ascend910" + origVal := device.InRequestDevices[commonWord] + delete(device.InRequestDevices, commonWord) + defer func() { + if origVal != "" { + device.InRequestDevices[commonWord] = origVal + } else { + delete(device.InRequestDevices, commonWord) + } + }() + + mgr := &FakeManager{ + CommonWordFunc: func() string { return commonWord }, + } + ps, err := NewPluginServer(mgr, "test-node", 60) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + got, ok := device.InRequestDevices[commonWord] + if !ok { + t.Fatal("InRequestDevices[commonWord] not registered") + } + want := ps.toAllocDeviceAnno + if got != want { + t.Errorf("InRequestDevices[%q] = %q, want %q", commonWord, got, want) + } +} + +// ============================================================================ +// apiDevices tests +// ============================================================================ + +func TestApiDevices(t *testing.T) { + type apiDevicesArgs struct { + mgr *FakeManager + } + + tests := []struct { + name string + args apiDevicesArgs + want []*v1beta1.Device + }{ + { + name: "EmptyDeviceList", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return nil }, + VDeviceCountFunc: func() int { return 1 }, + }, + }, + want: nil, + }, + { + name: "SingleDeviceVCount1", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Health: true}} + }, + VDeviceCountFunc: func() int { return 1 }, + }, + }, + want: []*v1beta1.Device{ + {ID: "uuid1-0", Health: v1beta1.Healthy}, + }, + }, + { + name: "SingleDeviceVCount3", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Health: true}} + }, + VDeviceCountFunc: func() int { return 3 }, + }, + }, + want: []*v1beta1.Device{ + {ID: "uuid1-0", Health: v1beta1.Healthy}, + {ID: "uuid1-1", Health: v1beta1.Healthy}, + {ID: "uuid1-2", Health: v1beta1.Healthy}, + }, + }, + { + name: "UnhealthyDevice", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Health: false}} + }, + VDeviceCountFunc: func() int { return 1 }, + }, + }, + want: []*v1beta1.Device{ + {ID: "uuid1-0", Health: v1beta1.Unhealthy}, + }, + }, + { + name: "MultipleDevicesDifferentHealth", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{ + {UUID: "uuid1", Health: true}, + {UUID: "uuid2", Health: false}, + } + }, + VDeviceCountFunc: func() int { return 2 }, + }, + }, + want: []*v1beta1.Device{ + {ID: "uuid1-0", Health: v1beta1.Healthy}, + {ID: "uuid1-1", Health: v1beta1.Healthy}, + {ID: "uuid2-0", Health: v1beta1.Unhealthy}, + {ID: "uuid2-1", Health: v1beta1.Unhealthy}, + }, + }, + { + name: "VDeviceCountZero", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { + return []*manager.Device{{UUID: "uuid1", Health: true}} + }, + VDeviceCountFunc: func() int { return 0 }, + }, + }, + want: nil, + }, + { + name: "EmptySliceDeviceList", + args: apiDevicesArgs{ + mgr: &FakeManager{ + GetDevicesFunc: func() []*manager.Device { return []*manager.Device{} }, + VDeviceCountFunc: func() int { return 1 }, + }, + }, + want: nil, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ps := &PluginServer{mgr: tc.args.mgr} + got := ps.apiDevices() + + if len(got) != len(tc.want) { + t.Fatalf("expected %d devices, got %d", len(tc.want), len(got)) + } + for i, wantDev := range tc.want { + if got[i].ID != wantDev.ID { + t.Errorf("device[%d].ID = %q, want %q", i, got[i].ID, wantDev.ID) + } + if got[i].Health != wantDev.Health { + t.Errorf("device[%d].Health = %v, want %v", i, got[i].Health, wantDev.Health) + } + } + }) + } +} + +// ============================================================================ +// CleanupIdleVNPUs tests +// ============================================================================ + +func TestCleanupIdleVNPUs(t *testing.T) { + type cleanupIdleVNPUsArgs struct { + mgr *FakeManager + } + + tests := []struct { + name string + args cleanupIdleVNPUsArgs + wantErr bool + }{ + { + name: "DelegatesToManager", + args: cleanupIdleVNPUsArgs{ + mgr: &FakeManager{ + CleanupIdleVNPUsFunc: func() error { return nil }, + }, + }, + }, + { + name: "ReturnsManagerError", + args: cleanupIdleVNPUsArgs{ + mgr: &FakeManager{ + CleanupIdleVNPUsFunc: func() error { return fmt.Errorf("cleanup failed") }, + }, + }, + wantErr: true, + }, + { + name: "NilFuncReturnsNil", + args: cleanupIdleVNPUsArgs{ + mgr: &FakeManager{}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ps := &PluginServer{mgr: tc.args.mgr} + err := ps.CleanupIdleVNPUs() + if (err != nil) != tc.wantErr { + t.Fatalf("CleanupIdleVNPUs() error = %v, wantErr %v", err, tc.wantErr) + } + }) + } +} diff --git a/internal/server/util_test.go b/internal/server/util_test.go new file mode 100644 index 0000000..b845422 --- /dev/null +++ b/internal/server/util_test.go @@ -0,0 +1,1300 @@ +package server + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + + "github.com/Project-HAMi/HAMi/pkg/device" + "github.com/Project-HAMi/ascend-device-plugin/internal/manager" +) + +// newTestPluginServer creates a PluginServer with sensible defaults for testing. +func newTestPluginServer(allocAnno, toAllocAnno string) *PluginServer { + return &PluginServer{ + mgr: &FakeManager{}, + commonWord: testCommonWord, + allocAnno: allocAnno, + toAllocDeviceAnno: toAllocAnno, + } +} + +// ============================================================================ +// fileSHA256 tests +// ============================================================================ + +func TestFileSHA256(t *testing.T) { + t.Parallel() + + type fileSHA256Args struct { + path string + } + + tests := []struct { + name string + args fileSHA256Args + want string + wantErr bool + }{ + { + name: "NonExistentFile", + args: fileSHA256Args{path: "/nonexistent/path/file.txt"}, + wantErr: true, + }, + { + name: "EmptyFile", + args: func() fileSHA256Args { + dir := t.TempDir() + f := filepath.Join(dir, "empty.txt") + _ = os.WriteFile(f, []byte{}, 0644) + return fileSHA256Args{path: f} + }(), + want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + name: "KnownContent", + args: func() fileSHA256Args { + dir := t.TempDir() + f := filepath.Join(dir, "hello.txt") + _ = os.WriteFile(f, []byte("hello world"), 0644) + return fileSHA256Args{path: f} + }(), + want: "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got, err := fileSHA256(tc.args.path) + if (err != nil) != tc.wantErr { + t.Fatalf("fileSHA256() error = %v, wantErr %v", err, tc.wantErr) + } + if got != tc.want { + t.Fatalf("fileSHA256() = %q, want %q", got, tc.want) + } + }) + } +} + +// ============================================================================ +// copyFile tests +// ============================================================================ + +func TestCopyFile(t *testing.T) { + t.Parallel() + + type copyFileArgs struct { + src string + dst string + } + + type copyFileWant struct { + content string + checkPerms bool + } + + tests := []struct { + name string + args copyFileArgs + want copyFileWant + wantErr bool + }{ + { + name: "NonExistentSource", + args: copyFileArgs{src: "/nonexistent/src.txt", dst: filepath.Join(t.TempDir(), "dst.txt")}, + wantErr: true, + }, + { + name: "ContentPreserved", + args: func() copyFileArgs { + dir := t.TempDir() + src := filepath.Join(dir, "src.txt") + dst := filepath.Join(dir, "dst.txt") + _ = os.WriteFile(src, []byte("test content for copy"), 0755) + return copyFileArgs{src: src, dst: dst} + }(), + want: copyFileWant{content: "test content for copy"}, + }, + { + name: "PermissionsPreserved", + args: func() copyFileArgs { + dir := t.TempDir() + src := filepath.Join(dir, "src.txt") + dst := filepath.Join(dir, "dst.txt") + _ = os.WriteFile(src, []byte("x"), 0755) + return copyFileArgs{src: src, dst: dst} + }(), + want: copyFileWant{checkPerms: true}, + }, + { + name: "OverwritesExisting", + args: func() copyFileArgs { + dir := t.TempDir() + src := filepath.Join(dir, "src.txt") + dst := filepath.Join(dir, "dst.txt") + _ = os.WriteFile(src, []byte("new content"), 0644) + _ = os.WriteFile(dst, []byte("old content"), 0644) + return copyFileArgs{src: src, dst: dst} + }(), + want: copyFileWant{content: "new content"}, + }, + { + name: "DestinationDirectoryNotExist", + args: func() copyFileArgs { + dir := t.TempDir() + src := filepath.Join(dir, "src.txt") + dst := filepath.Join(dir, "nonexistent", "dst.txt") + _ = os.WriteFile(src, []byte("x"), 0644) + return copyFileArgs{src: src, dst: dst} + }(), + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + err := copyFile(tc.args.src, tc.args.dst) + if (err != nil) != tc.wantErr { + t.Fatalf("copyFile() error = %v, wantErr %v", err, tc.wantErr) + } + if tc.wantErr { + return + } + if tc.want.content != "" { + got, err := os.ReadFile(tc.args.dst) + if err != nil { + t.Fatalf("failed to read dst: %v", err) + } + if string(got) != tc.want.content { + t.Fatalf("dst content = %q, want %q", got, tc.want.content) + } + } + if tc.want.checkPerms { + srcInfo, _ := os.Stat(tc.args.src) + dstInfo, _ := os.Stat(tc.args.dst) + if srcInfo.Mode() != dstInfo.Mode() { + t.Fatalf("dst mode = %v, want %v", dstInfo.Mode(), srcInfo.Mode()) + } + } + }) + } +} + +// ============================================================================ +// buildContainerAllocateResponse tests +// ============================================================================ + +func TestBuildContainerAllocateResponse(t *testing.T) { + const allocAnno = "huawei.com/Ascend910" + + type buildContainerAllocateResponseArgs struct { + pod *v1.Pod + containerDevs device.ContainerDevices + rtInfoLookup map[string]RuntimeInfo + } + + type buildContainerAllocateResponseWant struct { + envs map[string]string + mounts []*v1beta1.Mount + } + + tests := []struct { + name string + setup func() (*PluginServer, CleanupFunc) + args buildContainerAllocateResponseArgs + want buildContainerAllocateResponseWant + wantErr string + }{ + { + name: "SingleDeviceNonHamiCore", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 3} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "3", + "ASCEND_VNPU_SPECS": "vir01", + }, + }, + }, + { + name: "MultipleDevices", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + switch uuid { + case "uuid1": + return &manager.Device{UUID: "uuid1", PhyID: 0} + case "uuid2": + return &manager.Device{UUID: "uuid2", PhyID: 1} + default: + return nil + } + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4), cd("uuid2", "Ascend910", 2048, 8)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + "uuid2": {UUID: "uuid2", Temp: "vir01"}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "0,1", + "ASCEND_VNPU_SPECS": "vir01", + }, + }, + }, + { + name: "UnknownUUID", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{}, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("unknown-uuid", "Ascend910", 1024, 4)}, + }, + wantErr: "unknown uuid", + }, + { + name: "EmptyContainerDevs", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{}, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{}, + }, + wantErr: "annotation", + }, + { + name: "HamiCoreMode_Mounts", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 3} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + VNPUModeAnnotation: VNPUModeHamiCore, + }, + }, + }, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: func() map[string]RuntimeInfo { + mem := int64(16384) + core := int32(4) + return map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01", Memory: &mem, Core: &core}, + } + }(), + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "3", + "NPU_MEM_QUOTA": "16384", + "NPU_PRIORITY": "4", + "NPU_GLOBAL_SHM_PATH": "/hami-shared-region/3_global_registry", + }, + mounts: []*v1beta1.Mount{ + {HostPath: "/usr/local/bin/npu-smi", ContainerPath: "/usr/local/bin/npu-smi", ReadOnly: true}, + {HostPath: "/etc/ascend_install.info", ContainerPath: "/etc/ascend_install.info", ReadOnly: true}, + {HostPath: "/usr/local/Ascend/driver/lib64/driver", ContainerPath: "/usr/local/Ascend/driver/lib64/driver", ReadOnly: true}, + {HostPath: "/usr/local/Ascend/driver/version.info", ContainerPath: "/usr/local/Ascend/driver/version.info", ReadOnly: true}, + {HostPath: "/usr/local/hami-vnpu-core", ContainerPath: "/hami-vnpu-core", ReadOnly: true}, + {HostPath: "/usr/local/hami-vnpu-core/ld.so.preload", ContainerPath: "/etc/ld.so.preload", ReadOnly: true}, + {HostPath: "/usr/local/hami-shared-region", ContainerPath: "/hami-shared-region", ReadOnly: false}, + }, + }, + }, + { + name: "HamiCoreMode_NilMemoryCore", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 3} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VNPUModeAnnotation: VNPUModeHamiCore}, + }, + }, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01", Memory: nil, Core: nil}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "3", + "NPU_GLOBAL_SHM_PATH": "/hami-shared-region/3_global_registry", + }, + }, + }, + { + name: "HamiCoreMode_MemoryAndCoreSet", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 5} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VNPUModeAnnotation: VNPUModeHamiCore}, + }, + }, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: func() map[string]RuntimeInfo { + mem := int64(8192) + core := int32(2) + return map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01", Memory: &mem, Core: &core}, + } + }(), + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "NPU_MEM_QUOTA": "8192", + "NPU_PRIORITY": "2", + }, + }, + }, + { + name: "NonHamiCore_EmptyTemp", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 5} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: ""}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "5", + }, + }, + }, + { + name: "NonHamiCore_NonEmptyTemp", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 5} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir02"}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "5", + "ASCEND_VNPU_SPECS": "vir02", + }, + }, + }, + { + name: "UUIDNotInLookup", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 3} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{}, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "3", + }, + }, + }, + { + name: "FirstTempUsedWhenMultipleDevices", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + switch uuid { + case "uuid1": + return &manager.Device{UUID: "uuid1", PhyID: 0} + case "uuid2": + return &manager.Device{UUID: "uuid2", PhyID: 1} + default: + return nil + } + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4), cd("uuid2", "Ascend910", 2048, 8)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + "uuid2": {UUID: "uuid2", Temp: "vir02"}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "0,1", + "ASCEND_VNPU_SPECS": "vir01", + }, + }, + }, + { + name: "HamiCoreMultiDevice", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + switch uuid { + case "uuid1": + return &manager.Device{UUID: "uuid1", PhyID: 0} + case "uuid2": + return &manager.Device{UUID: "uuid2", PhyID: 1} + default: + return nil + } + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VNPUModeAnnotation: VNPUModeHamiCore}, + }, + }, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4), cd("uuid2", "Ascend910", 2048, 8)}, + rtInfoLookup: func() map[string]RuntimeInfo { + mem := int64(32768) + core := int32(8) + return map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01", Memory: &mem, Core: &core}, + "uuid2": {UUID: "uuid2", Temp: "vir02", Memory: nil, Core: nil}, + } + }(), + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "0,1", + "NPU_MEM_QUOTA": "32768", + "NPU_PRIORITY": "8", + "NPU_GLOBAL_SHM_PATH": "/hami-shared-region/0_global_registry", + }, + }, + }, + { + name: "ErrorIncludesAllocAnno", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{}, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{}, + }, + wantErr: "huawei.com/Ascend910", + }, + { + name: "ResponseStructFields", + setup: func() (*PluginServer, CleanupFunc) { + return &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: "uuid1", PhyID: 7} + }, + }, + allocAnno: allocAnno, + }, func() {} + }, + args: buildContainerAllocateResponseArgs{ + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + containerDevs: device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)}, + rtInfoLookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + }, + }, + want: buildContainerAllocateResponseWant{ + envs: map[string]string{ + "ASCEND_VISIBLE_DEVICES": "7", + "ASCEND_VNPU_SPECS": "vir01", + }, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + ps, cleanup := tc.setup() + t.Cleanup(cleanup) + + resp, err := ps.buildContainerAllocateResponse(tc.args.pod, tc.args.containerDevs, tc.args.rtInfoLookup) + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Check envs + for k, wantVal := range tc.want.envs { + if resp.Envs[k] != wantVal { + t.Fatalf("env[%q] = %q, want %q", k, resp.Envs[k], wantVal) + } + } + + // Check that unwanted envs are absent + if _, ok := resp.Envs["ASCEND_VNPU_SPECS"]; ok && tc.want.envs["ASCEND_VNPU_SPECS"] == "" { + // Only fail if the test doesn't expect ASCEND_VNPU_SPECS + if _, wantSet := tc.want.envs["ASCEND_VNPU_SPECS"]; !wantSet { + t.Fatal("ASCEND_VNPU_SPECS should not be set") + } + } + + // Check mounts + if tc.want.mounts != nil { + if len(resp.Mounts) != len(tc.want.mounts) { + t.Fatalf("expected %d mounts, got %d", len(tc.want.mounts), len(resp.Mounts)) + } + for i, wantMount := range tc.want.mounts { + if resp.Mounts[i].HostPath != wantMount.HostPath { + t.Errorf("mount[%d].HostPath = %q, want %q", i, resp.Mounts[i].HostPath, wantMount.HostPath) + } + if resp.Mounts[i].ContainerPath != wantMount.ContainerPath { + t.Errorf("mount[%d].ContainerPath = %q, want %q", i, resp.Mounts[i].ContainerPath, wantMount.ContainerPath) + } + if resp.Mounts[i].ReadOnly != wantMount.ReadOnly { + t.Errorf("mount[%d].ReadOnly = %v, want %v", i, resp.Mounts[i].ReadOnly, wantMount.ReadOnly) + } + } + } + + // Non-hami-core mode: Mounts and Devices should be nil + if tc.want.mounts == nil && tc.args.pod.Annotations[VNPUModeAnnotation] != VNPUModeHamiCore { + if resp.Mounts != nil { + t.Fatal("resp.Mounts should be nil in non-hami-core mode") + } + if resp.Devices != nil { + t.Fatal("resp.Devices should be nil") + } + } + }) + } +} + +// ============================================================================ +// popNextContainerDevices tests +// ============================================================================ + +func TestPopNextContainerDevices(t *testing.T) { + ps := newTestPluginServer("huawei.com/Ascend910", "hami.io/Ascend910-devices-to-allocate") + + type popNextContainerDevicesWant struct { + firstUUID string + mutatedFirst bool + remaining int + } + + tests := []struct { + name string + podSingleDev device.PodSingleDevice + want popNextContainerDevicesWant + wantErr string + }{ + { + name: "EmptyPodSingleDevice", + podSingleDev: device.PodSingleDevice{}, + wantErr: "no pending device allocation found", + }, + { + name: "AllContainersEmpty", + podSingleDev: device.PodSingleDevice{{}, {}, {}}, + wantErr: "no pending device allocation found", + }, + { + name: "FirstContainerHasDevices", + podSingleDev: device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }, + want: popNextContainerDevicesWant{ + firstUUID: "uuid1", + mutatedFirst: true, + remaining: 1, + }, + }, + { + name: "SecondContainerHasDevices", + podSingleDev: device.PodSingleDevice{ + {}, + {cd("uuid2", "Ascend910", 2048, 8)}, + {cd("uuid3", "Ascend910", 512, 2)}, + }, + want: popNextContainerDevicesWant{ + firstUUID: "uuid2", + }, + }, + { + name: "MutationErasesFirstNonEmpty", + podSingleDev: device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }, + want: popNextContainerDevicesWant{ + firstUUID: "uuid1", + mutatedFirst: true, + remaining: 1, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := ps.popNextContainerDevices(tc.podSingleDev) + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if tc.want.firstUUID != "" { + if len(got) == 0 || got[0].UUID != tc.want.firstUUID { + t.Fatalf("device UUID = %q, want %q", got[0].UUID, tc.want.firstUUID) + } + } + + if tc.want.mutatedFirst && len(tc.podSingleDev) > 0 { + if len(tc.podSingleDev[0]) != 0 { + t.Fatalf("first container should be erased after pop, got %d devices", len(tc.podSingleDev[0])) + } + } + + if tc.want.remaining > 0 && len(tc.podSingleDev) > 1 { + if len(tc.podSingleDev[1]) != tc.want.remaining { + t.Fatalf("second container should still have %d device(s), got %d", tc.want.remaining, len(tc.podSingleDev[1])) + } + } + }) + } +} + +// ============================================================================ +// buildRuntimeInfoLookup tests +// ============================================================================ + +func TestBuildRuntimeInfoLookup(t *testing.T) { + const allocAnno = "huawei.com/Ascend910" + + type buildRuntimeInfoLookupWant struct { + lookup map[string]RuntimeInfo + } + + tests := []struct { + name string + pod *v1.Pod + want buildRuntimeInfoLookupWant + wantErr string + }{ + { + name: "AnnotationNotSet", + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + wantErr: "not set", + }, + { + name: "InvalidJSON", + pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + allocAnno: "not-json", + }, + }, + }, + wantErr: "invalid", + }, + { + name: "Normal", + pod: func() *v1.Pod { + mem := int64(16384) + core := int32(4) + rtInfo := []RuntimeInfo{ + {UUID: "uuid1", Temp: "vir01", Memory: &mem, Core: &core}, + {UUID: "uuid2", Temp: "vir02"}, + } + data, _ := json.Marshal(rtInfo) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + allocAnno: string(data), + }, + }, + } + }(), + want: buildRuntimeInfoLookupWant{ + lookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + "uuid2": {UUID: "uuid2", Temp: "vir02"}, + }, + }, + }, + { + name: "EmptyUUIDSkipped", + pod: func() *v1.Pod { + rtInfo := []RuntimeInfo{ + {UUID: "", Temp: "vir01"}, + {UUID: "uuid2", Temp: "vir02"}, + } + data, _ := json.Marshal(rtInfo) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + allocAnno: string(data), + }, + }, + } + }(), + want: buildRuntimeInfoLookupWant{ + lookup: map[string]RuntimeInfo{ + "uuid2": {UUID: "uuid2", Temp: "vir02"}, + }, + }, + }, + { + name: "MultipleEntries", + pod: func() *v1.Pod { + rtInfo := []RuntimeInfo{ + {UUID: "uuid1", Temp: "vir01"}, + {UUID: "uuid2", Temp: "vir02"}, + {UUID: "uuid3", Temp: "vir03"}, + } + data, _ := json.Marshal(rtInfo) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + allocAnno: string(data), + }, + }, + } + }(), + want: buildRuntimeInfoLookupWant{ + lookup: map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + "uuid2": {UUID: "uuid2", Temp: "vir02"}, + "uuid3": {UUID: "uuid3", Temp: "vir03"}, + }, + }, + }, + } + + ps := &PluginServer{allocAnno: allocAnno} + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := ps.buildRuntimeInfoLookup(tc.pod) + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + // For InvalidJSON, verify %w wrapping + if tc.name == "InvalidJSON" { + var jsonErr *json.SyntaxError + if !errors.As(err, &jsonErr) { + t.Fatalf("expected wrapped json.SyntaxError, got: %v", err) + } + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(got) != len(tc.want.lookup) { + t.Fatalf("expected %d entries, got %d", len(tc.want.lookup), len(got)) + } + for uuid, wantInfo := range tc.want.lookup { + gotInfo, ok := got[uuid] + if !ok { + t.Fatalf("expected UUID %q in lookup", uuid) + } + if gotInfo.Temp != wantInfo.Temp { + t.Fatalf("lookup[%q].Temp = %q, want %q", uuid, gotInfo.Temp, wantInfo.Temp) + } + } + }) + } +} + +// ============================================================================ +// decodeDeviceAnnotations tests +// ============================================================================ + +func TestDecodeDeviceAnnotations(t *testing.T) { + type decodeDeviceAnnotationsWant struct { + nonEmptyContainers int + } + + tests := []struct { + name string + pod *v1.Pod + want decodeDeviceAnnotationsWant + wantErr string + setup func() CleanupFunc + }{ + { + name: "AnnotationNotPresent", + pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}}, + wantErr: "not found in pod annotations", + setup: func() CleanupFunc { return setupInRequestDevices(testCommonWord) }, + }, + { + name: "ValidAnnotation", + pod: func() *v1.Pod { + input := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + }) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "hami.io/Ascend910-devices-to-allocate": input, + }, + }, + } + }(), + want: decodeDeviceAnnotationsWant{nonEmptyContainers: 1}, + setup: func() CleanupFunc { return setupInRequestDevices(testCommonWord) }, + }, + { + name: "MultiContainer", + pod: func() *v1.Pod { + input := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "hami.io/Ascend910-devices-to-allocate": input, + }, + }, + } + }(), + want: decodeDeviceAnnotationsWant{nonEmptyContainers: 2}, + setup: func() CleanupFunc { return setupInRequestDevices(testCommonWord) }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if tc.setup != nil { + t.Cleanup(tc.setup()) + } + + ps := newTestPluginServer("huawei.com/Ascend910", "hami.io/Ascend910-devices-to-allocate") + got, err := ps.decodeDeviceAnnotations(tc.pod) + + if tc.wantErr != "" { + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("error should contain %q, got: %v", tc.wantErr, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + nonEmpty := 0 + for _, ctrDevs := range got { + if len(ctrDevs) > 0 { + nonEmpty++ + } + } + if nonEmpty != tc.want.nonEmptyContainers { + t.Fatalf("expected %d non-empty containers, got %d", tc.want.nonEmptyContainers, nonEmpty) + } + }) + } +} + +// ============================================================================ +// patchErasedAnnotation tests +// ============================================================================ + +func TestPatchErasedAnnotation(t *testing.T) { + type patchErasedAnnotationWant struct { + annotationChanged bool + nonEmptyAfter int + } + + tests := []struct { + name string + pod *v1.Pod + want patchErasedAnnotationWant + wantErr bool + }{ + { + name: "PatchesPodAnnotation", + pod: func() *v1.Pod { + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + input := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{toAllocAnno: input}, + }, + } + }(), + want: patchErasedAnnotationWant{ + annotationChanged: true, + nonEmptyAfter: 1, + }, + }, + { + name: "UpdatesInMemoryAnnotations", + pod: func() *v1.Pod { + toAllocAnno := "hami.io/Ascend910-devices-to-allocate" + input := device.EncodePodSingleDevice(device.PodSingleDevice{ + {cd("uuid1", "Ascend910", 1024, 4)}, + }) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{toAllocAnno: input}, + }, + } + }(), + want: patchErasedAnnotationWant{ + annotationChanged: true, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Cleanup(setupInRequestDevices(testCommonWord)) + t.Cleanup(setupFakeClient([]*v1.Pod{tc.pod}, nil)) + + ps := &PluginServer{ + commonWord: testCommonWord, + toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", + } + podSingleDev, _ := ps.decodeDeviceAnnotations(tc.pod) + ps.popNextContainerDevices(podSingleDev) + + origValue := tc.pod.Annotations[ps.toAllocDeviceAnno] + + err := ps.patchErasedAnnotation(tc.pod, podSingleDev) + + if (err != nil) != tc.wantErr { + t.Fatalf("patchErasedAnnotation() error = %v, wantErr %v", err, tc.wantErr) + } + if tc.wantErr { + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if tc.want.annotationChanged { + if tc.pod.Annotations[ps.toAllocDeviceAnno] == origValue { + t.Fatal("pod.Annotations should have been updated in place") + } + } + + if tc.want.nonEmptyAfter > 0 { + got, err := ps.decodeDeviceAnnotations(tc.pod) + if err != nil { + t.Fatalf("failed to decode patched annotation: %v", err) + } + nonEmpty := 0 + for _, ctrDevs := range got { + if len(ctrDevs) > 0 { + nonEmpty++ + } + } + if nonEmpty != tc.want.nonEmptyAfter { + t.Fatalf("expected %d non-empty container(s) after erase, got %d", tc.want.nonEmptyAfter, nonEmpty) + } + } + }) + } +} + +// ============================================================================ +// Integration: popNextContainerDevices after decode +// ============================================================================ + +func TestPopNextContainerDevices_AfterDecode(t *testing.T) { + cleanup := setupInRequestDevices(testCommonWord) + defer cleanup() + + ps := newTestPluginServer("huawei.com/Ascend910", "hami.io/Ascend910-devices-to-allocate") + input := device.EncodePodSingleDevice(device.PodSingleDevice{ + {}, + {cd("uuid1", "Ascend910", 1024, 4)}, + {cd("uuid2", "Ascend910", 2048, 8)}, + }) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "hami.io/Ascend910-devices-to-allocate": input, + }, + }, + } + + podSingleDev, err := ps.decodeDeviceAnnotations(pod) + if err != nil { + t.Fatalf("unexpected error decoding: %v", err) + } + + got, err := ps.popNextContainerDevices(podSingleDev) + if err != nil { + t.Fatalf("unexpected error popping: %v", err) + } + if got[0].UUID != "uuid1" { + t.Fatalf("device UUID = %q, want uuid1", got[0].UUID) + } + + // Pop again should return uuid2 + got2, err := ps.popNextContainerDevices(podSingleDev) + if err != nil { + t.Fatalf("unexpected error on second pop: %v", err) + } + if got2[0].UUID != "uuid2" { + t.Fatalf("device UUID = %q, want uuid2", got2[0].UUID) + } + + // Third pop should fail + _, err = ps.popNextContainerDevices(podSingleDev) + if err == nil { + t.Fatal("expected error on third pop, got nil") + } +} + +// ============================================================================ +// Benchmarks +// ============================================================================ + +func BenchmarkBuildRuntimeInfoLookup(b *testing.B) { + ps := newTestPluginServer("huawei.com/Ascend910", "hami.io/Ascend910-devices-to-allocate") + + sizes := []int{1, 8, 64} + for _, n := range sizes { + mem := int64(32768) + core := int32(10) + rtInfo := make([]RuntimeInfo, n) + for i := range rtInfo { + rtInfo[i] = RuntimeInfo{UUID: fmt.Sprintf("uuid-%d", i), Temp: fmt.Sprintf("vir%02d", i), Memory: &mem, Core: &core} + } + data, _ := json.Marshal(rtInfo) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"huawei.com/Ascend910": string(data)}, + }, + } + + b.Run(fmt.Sprintf("entries=%d", n), func(b *testing.B) { + for b.Loop() { + _, err := ps.buildRuntimeInfoLookup(pod) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +func BenchmarkBuildContainerAllocateResponse(b *testing.B) { + b.Run("SingleDevice", func(b *testing.B) { + ps := &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: uuid, PhyID: 3} + }, + }, + allocAnno: "huawei.com/Ascend910", + } + rtInfoLookup := map[string]RuntimeInfo{ + "uuid1": {UUID: "uuid1", Temp: "vir01"}, + } + pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}} + containerDevs := device.ContainerDevices{cd("uuid1", "Ascend910", 1024, 4)} + + for b.Loop() { + _, err := ps.buildContainerAllocateResponse(pod, containerDevs, rtInfoLookup) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("MultiDevice_8", func(b *testing.B) { + ps := &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: uuid, PhyID: 0} + }, + }, + allocAnno: "huawei.com/Ascend910", + } + const n = 8 + rtInfoLookup := make(map[string]RuntimeInfo, n) + containerDevs := make(device.ContainerDevices, n) + for i := range n { + uuid := fmt.Sprintf("uuid%d", i) + rtInfoLookup[uuid] = RuntimeInfo{UUID: uuid, Temp: "vir01"} + containerDevs[i] = cd(uuid, "Ascend910", 1024, 4) + } + pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}} + + for b.Loop() { + _, err := ps.buildContainerAllocateResponse(pod, containerDevs, rtInfoLookup) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("HamiCore_8Devices", func(b *testing.B) { + ps := &PluginServer{ + mgr: &FakeManager{ + GetDeviceByUUIDFunc: func(uuid string) *manager.Device { + return &manager.Device{UUID: uuid, PhyID: 0} + }, + }, + allocAnno: "huawei.com/Ascend910", + } + const n = 8 + mem := int64(32768) + core := int32(10) + rtInfoLookup := make(map[string]RuntimeInfo, n) + containerDevs := make(device.ContainerDevices, n) + for i := range n { + uuid := fmt.Sprintf("uuid%d", i) + rtInfoLookup[uuid] = RuntimeInfo{UUID: uuid, Temp: "vir01", Memory: &mem, Core: &core} + containerDevs[i] = cd(uuid, "Ascend910", 1024, 4) + } + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VNPUModeAnnotation: VNPUModeHamiCore}, + }, + } + + for b.Loop() { + _, err := ps.buildContainerAllocateResponse(pod, containerDevs, rtInfoLookup) + if err != nil { + b.Fatal(err) + } + } + }) +} From 7f2d8a505ed3dc97ca19e1e0b3d71ef32af966d3 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 15:17:17 +0800 Subject: [PATCH 05/12] ci: add test jobs Signed-off-by: houyuxi --- .github/workflows/ci.yml | 217 ++++++++++++++++++++++----------------- Makefile | 5 +- 2 files changed, 125 insertions(+), 97 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e008b19..4626d79 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,9 @@ on: push: branches: [ "main" ] tags: - - v[0-9]+.[0-9]+.[0-9]+.[0-9]+ - - v[0-9]+.[0-9]+.[0-9]+ - - v[0-9]+.[0-9]+ + - v[0-9]+.[0-9]+.[0-9]+.[0-9]+ + - v[0-9]+.[0-9]+.[0-9]+ + - v[0-9]+.[0-9]+ pull_request: branches: [ "main" ] @@ -21,114 +21,139 @@ jobs: name: lint runs-on: ubuntu-22.04 steps: - - name: checkout code - uses: actions/checkout@v6 - with: - submodules: recursive - - name: install Go - uses: actions/setup-go@v6 - with: - go-version: ${{ env.GO_VERSION }} - - name: go tidy - env: - GOTOOLCHAIN: auto - run: | - find mind-cluster/component -maxdepth 1 -type d ! -name "npu-exporter" ! -name "ascend-common" ! -name "component" -exec rm -rf {} \; - go mod download github.com/Project-HAMi/HAMi - go get github.com/Project-HAMi/ascend-device-plugin/internal/server - go get huawei.com/npu-exporter - go get huawei.com/npu-exporter/utils/logger@v0.0.0-00010101000000-000000000000 - go mod tidy - - name: golangci-lint - uses: golangci/golangci-lint-action@v6 - env: - GOTOOLCHAIN: auto - with: - version: v1.64.5 - install-mode: binary - args: --timeout=10m + - name: checkout code + uses: actions/checkout@v6 + with: + submodules: recursive + - name: install Go + uses: actions/setup-go@v6 + with: + go-version: ${{ env.GO_VERSION }} + - name: go tidy + env: + GOTOOLCHAIN: auto + run: | + find mind-cluster/component -maxdepth 1 -type d ! -name "npu-exporter" ! -name "ascend-common" ! -name "component" -exec rm -rf {} \; + go mod download github.com/Project-HAMi/HAMi + go get github.com/Project-HAMi/ascend-device-plugin/internal/server + go get huawei.com/npu-exporter + go get huawei.com/npu-exporter/utils/logger@v0.0.0-00010101000000-000000000000 + go mod tidy + - name: golangci-lint + uses: golangci/golangci-lint-action@v6 + env: + GOTOOLCHAIN: auto + with: + version: v1.64.5 + install-mode: binary + args: --timeout=10m + test: + name: Unit test + runs-on: ubuntu-22.04 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Go + uses: actions/setup-go@v6 + with: + go-version: ${{ env.GO_VERSION }} + - name: Go tidy + run: make tidy + - name: Run tests + run: make test + - name: Upload coverage to Codecov + if: ${{ github.repository == 'Project-HAMi/ascend-device-plugin' }} + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./_output/coverage/coverage.out + flags: unittests + fail_ci_if_error: false + verbose: true build-vnpu: runs-on: ubuntu-22.04-arm container: image: ghcr.io/dsfans2014/ascend-device-plugin/ascend-lib:0.2 steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - name: Cache cargo - uses: Swatinem/rust-cache@v2 - - name: Build - run: cargo build --release - working-directory: ./libvnpu - - name: Prepare artifacts for upload - run: | - mkdir -p ./artifacts - cp libvnpu/target/release/limiter ./artifacts/ - cp libvnpu/target/release/libvnpu.so ./artifacts/ - echo "/hami-vnpu-core/libvnpu.so" > ./artifacts/ld.so.preload - - name: upload artifacts - uses: actions/upload-artifact@v4 - with: - name: build-artifacts - if-no-files-found: error - path: ./artifacts/* + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + - name: Build + run: cargo build --release + working-directory: ./libvnpu + - name: Prepare artifacts for upload + run: | + mkdir -p ./artifacts + cp libvnpu/target/release/limiter ./artifacts/ + cp libvnpu/target/release/libvnpu.so ./artifacts/ + echo "/hami-vnpu-core/libvnpu.so" > ./artifacts/ld.so.preload + - name: upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + if-no-files-found: error + path: ./artifacts/* build: env: IMAGE_NAME: ${{ secrets.IMAGE_NAME || 'projecthami/ascend-device-plugin' }} runs-on: ubuntu-latest - needs: ["build-vnpu"] + needs: [ "build-vnpu", "test" ] steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - submodules: true - - - name: Get branch names. - id: branch-names - uses: tj-actions/branch-names@v8 + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + submodules: true + + - name: Get branch names. + id: branch-names + uses: tj-actions/branch-names@v8 - - name: Download Artifacts - uses: actions/download-artifact@v4 - with: - name: build-artifacts - path: ./lib/hami-vnpu-core/ + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts + path: ./lib/hami-vnpu-core/ - - name: Docker Login - if: ${{ github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') }} - uses: docker/login-action@v3.6.0 - with: - username: ${{ secrets.DOCKERHUB_TOKEN }} - password: ${{ secrets.DOCKERHUB_PASSWD }} + - name: Docker Login + if: ${{ github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') }} + uses: docker/login-action@v3.6.0 + with: + username: ${{ secrets.DOCKERHUB_TOKEN }} + password: ${{ secrets.DOCKERHUB_PASSWD }} - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - - name: Extract tag name - id: tag - run: | - if [[ "${{ github.ref }}" == refs/tags/* ]]; then - TAG_NAME="${GITHUB_REF#refs/tags/}" - echo "VERSION=${TAG_NAME}" >> $GITHUB_OUTPUT - echo "Extracted version: ${VERSION}" - else - echo "VERSION=latest" >> $GITHUB_OUTPUT - fi + - name: Extract tag name + id: tag + run: | + if [[ "${{ github.ref }}" == refs/tags/* ]]; then + TAG_NAME="${GITHUB_REF#refs/tags/}" + echo "VERSION=${TAG_NAME#v}" >> $GITHUB_OUTPUT + echo "Extracted version: ${VERSION}" + else + echo "VERSION=latest" >> $GITHUB_OUTPUT + fi - - name: Build and push - uses: docker/build-push-action@v6 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: ${{ github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') }} - build-args: | - BASE_IMAGE=ubuntu:20.04 - GO_VERSION=${{ env.GO_VERSION }} - VERSION=${{ steps.branch-names.outputs.current_branch || steps.branch-names.outputs.tag }}-${{ github.sha }} - tags: ${{ env.IMAGE_NAME }}:${{ steps.tag.outputs.VERSION }} + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') }} + build-args: | + BASE_IMAGE=ubuntu:20.04 + GO_VERSION=${{ env.GO_VERSION }} + VERSION=${{ steps.branch-names.outputs.current_branch || steps.branch-names.outputs.tag }}-${{ github.sha }} + tags: ${{ env.IMAGE_NAME }}:${{ steps.tag.outputs.VERSION }} diff --git a/Makefile b/Makefile index 1aed2dd..4ebabe3 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,9 @@ all: ascend-device-plugin tidy: $(GO) mod tidy +test: + $(GO) test -v -coverprofile=_output/coverage/coverage.out ./... + docker: docker build \ --build-arg BASE_IMAGE=ubuntu:20.04 \ @@ -24,4 +27,4 @@ ascend-device-plugin: clean: rm -rf ./ascend-device-plugin -.PHONY: all clean \ No newline at end of file +.PHONY: all tidy test lint clean \ No newline at end of file From e704e9aa9b638e26b4c82ed2677d82094af5270f Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 16:03:10 +0800 Subject: [PATCH 06/12] fix: use devmanager.DeviceInterface Signed-off-by: houyuxi --- internal/manager/manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/manager/manager.go b/internal/manager/manager.go index 2ad422f..d5fdf01 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -55,7 +55,7 @@ type Manager interface { type AscendManager struct { mu sync.RWMutex - mgr *devmanager.DeviceManager + mgr devmanager.DeviceInterface config internal.VNPUConfig globalConfig internal.Config devs []*Device From 1170c3ecdf7119f918512307d8c3b6bd7efb1db0 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 16:03:20 +0800 Subject: [PATCH 07/12] ci: only test internal package Signed-off-by: houyuxi --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4ebabe3..de79791 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ tidy: $(GO) mod tidy test: - $(GO) test -v -coverprofile=_output/coverage/coverage.out ./... + $(GO) test -v -coverprofile=_output/coverage/coverage.out ./internal/... docker: docker build \ From 40eccb292ab98e55e2facfd863a72fec17194600 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 16:09:33 +0800 Subject: [PATCH 08/12] ci: remove tidy in test job Signed-off-by: houyuxi --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4626d79..2317fc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,8 +59,6 @@ jobs: uses: actions/setup-go@v6 with: go-version: ${{ env.GO_VERSION }} - - name: Go tidy - run: make tidy - name: Run tests run: make test - name: Upload coverage to Codecov From d293ca2fd718cd0a51180a424cab9b27a9d5cfad Mon Sep 17 00:00:00 2001 From: houyuxi Date: Fri, 22 May 2026 16:14:38 +0800 Subject: [PATCH 09/12] ci: remove coverage.out Signed-off-by: houyuxi --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index de79791..1551fa4 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ tidy: $(GO) mod tidy test: - $(GO) test -v -coverprofile=_output/coverage/coverage.out ./internal/... + $(GO) test -v ./internal/... docker: docker build \ From 11c0365da74e17109bdc908c7a50205235f3902a Mon Sep 17 00:00:00 2001 From: houyuxi Date: Tue, 26 May 2026 12:44:53 +0800 Subject: [PATCH 10/12] fix(server): replace deprecated `grpc.DialContext` with `grpc.NewClient` Signed-off-by: houyuxi --- internal/server/register.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/internal/server/register.go b/internal/server/register.go index e3acb0c..c86565f 100644 --- a/internal/server/register.go +++ b/internal/server/register.go @@ -9,6 +9,7 @@ import ( "time" "google.golang.org/grpc" + "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "k8s.io/klog/v2" "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" @@ -136,17 +137,19 @@ func (ps *PluginServer) registerKubelet() error { func (ps *PluginServer) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - c, err := grpc.DialContext(ctx, unixSocketPath, + c, _ := grpc.NewClient(unixSocketPath, grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithBlock(), grpc.WithContextDialer(func(ctx2 context.Context, addr string) (net.Conn, error) { var d net.Dialer return d.DialContext(ctx2, "unix", addr) }), ) - if err != nil { - return nil, err + // NewClient is non-blocking; block here to match the original WithBlock behaviour. + if !c.WaitForStateChange(ctx, connectivity.Ready) { + c.Close() + return nil, ctx.Err() } + return c, nil } From c8c82395382c3343f2ffea0061a265a47e6d2e19 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Tue, 26 May 2026 12:45:11 +0800 Subject: [PATCH 11/12] fix: lint issues Signed-off-by: houyuxi --- internal/server/util_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/server/util_test.go b/internal/server/util_test.go index b845422..825c8b4 100644 --- a/internal/server/util_test.go +++ b/internal/server/util_test.go @@ -1089,7 +1089,7 @@ func TestPatchErasedAnnotation(t *testing.T) { toAllocDeviceAnno: "hami.io/Ascend910-devices-to-allocate", } podSingleDev, _ := ps.decodeDeviceAnnotations(tc.pod) - ps.popNextContainerDevices(podSingleDev) + _, _ = ps.popNextContainerDevices(podSingleDev) origValue := tc.pod.Annotations[ps.toAllocDeviceAnno] From 58062ec3f626862534c186433260ad0913b49322 Mon Sep 17 00:00:00 2001 From: houyuxi Date: Tue, 26 May 2026 12:48:59 +0800 Subject: [PATCH 12/12] ci: bump go version from `1.22.5` to `1.24.6` Signed-off-by: houyuxi --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2317fc8..8ea560c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ on: branches: [ "main" ] env: - GO_VERSION: "1.22.5" + GO_VERSION: "1.24.6" jobs: lint: @@ -47,6 +47,7 @@ jobs: version: v1.64.5 install-mode: binary args: --timeout=10m + test: name: Unit test runs-on: ubuntu-22.04