Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions gnmi_server/platform_system_health_cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/agiledragon/gomonkey/v2"
pb "github.com/openconfig/gnmi/proto/gnmi"
sccommon "github.com/sonic-net/sonic-gnmi/show_client/common"
"github.com/sonic-net/sonic-gnmi/show_client/helpers"

"golang.org/x/net/context"
"google.golang.org/grpc"
Expand Down Expand Up @@ -768,3 +769,109 @@ func mockSystemWithIgnoreConfig(t *testing.T) *gomonkey.Patches {

return patches
}

func TestGetShowSystemHealthSysreadyStatus(t *testing.T) {
s := createServer(t, ServerPort)
go runServer(t, s)
defer s.ForceStop()
defer ResetDataSetsAndMappings(t)

tlsConfig := &tls.Config{InsecureSkipVerify: true}
opts := []grpc.DialOption{grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))}

conn, err := grpc.Dial(TargetAddr, opts...)
if err != nil {
t.Fatalf("Dialing to %q failed: %v", TargetAddr, err)
}
defer conn.Close()

gClient := pb.NewGNMIClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), QueryTimeout*time.Second)
defer cancel()

ResetDataSetsAndMappings(t)

tests := []struct {
desc string
pathTarget string
textPbPath string
wantRetCode codes.Code
wantRespVal interface{}
valTest bool
testInit func()
}{
{
desc: "query SHOW system-health sysready-status - system ready",
pathTarget: "SHOW",
textPbPath: `
elem: <name: "system-health" >
elem: <name: "sysready-status" >
`,
wantRetCode: codes.OK,
wantRespVal: func() []byte {
expected := helpers.SysreadyStatus{
SystemStatus: "System is ready",
Services: []helpers.SysreadyService{
{ServiceName: "bgp", ServiceStatus: "OK", AppReadyStatus: "OK", DownReason: "-"},
{ServiceName: "swss", ServiceStatus: "OK", AppReadyStatus: "OK", DownReason: "-"},
{ServiceName: "syncd", ServiceStatus: "OK", AppReadyStatus: "OK", DownReason: "-"},
},
}
jsonData, _ := json.Marshal(expected)
return jsonData
}(),
valTest: true,
testInit: func() {
AddDataSet(t, StateDbNum, "../testdata/SYSREADY_STATUS.txt")
},
},
{
desc: "query SHOW system-health sysready-status - system not ready",
pathTarget: "SHOW",
textPbPath: `
elem: <name: "system-health" >
elem: <name: "sysready-status" >
`,
wantRetCode: codes.OK,
wantRespVal: func() []byte {
expected := helpers.SysreadyStatus{
SystemStatus: "System is not ready - one or more services are not up",
Services: []helpers.SysreadyService{
{ServiceName: "bgp", ServiceStatus: "OK", AppReadyStatus: "OK", DownReason: "-"},
{ServiceName: "swss", ServiceStatus: "OK", AppReadyStatus: "Down", DownReason: "orchagent is not responsive"},
{ServiceName: "syncd", ServiceStatus: "Down", AppReadyStatus: "Down", DownReason: "syncd service not running"},
},
}
jsonData, _ := json.Marshal(expected)
return jsonData
}(),
valTest: true,
testInit: func() {
FlushDataSet(t, StateDbNum)
AddDataSet(t, StateDbNum, "../testdata/SYSREADY_STATUS_NOT_READY.txt")
},
},
{
desc: "query SHOW system-health sysready-status - no data",
pathTarget: "SHOW",
textPbPath: `
elem: <name: "system-health" >
elem: <name: "sysready-status" >
`,
wantRetCode: codes.NotFound,
valTest: false,
testInit: func() {
FlushDataSet(t, StateDbNum)
},
},
}

for _, test := range tests {
if test.testInit != nil {
test.testInit()
}
t.Run(test.desc, func(t *testing.T) {
runTestGet(t, ctx, gClient, test.pathTarget, test.textPbPath, test.wantRetCode, test.wantRespVal, test.valTest)
})
}
}
88 changes: 88 additions & 0 deletions show_client/helpers/system_health_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,39 @@ import (
"sort"
"strings"

log "github.com/golang/glog"
natural "github.com/maruel/natural"
"github.com/sonic-net/sonic-gnmi/show_client/common"
"github.com/sonic-net/sonic-gnmi/show_client/helpers/health_checker"
)

// Redis table/field constants for sysready-status
const (
sysreadyTable = "SYSTEM_READY"
sysreadyKey = "SYSTEM_STATE"
sysreadyStatusField = "Status"

serviceStatusTable = "ALL_SERVICE_STATUS"

serviceStatusField = "service_status"
appReadyStatusField = "app_ready_status"
failReasonField = "fail_reason"
)

// SysreadyStatus is the top-level JSON response for sysready-status commands.
type SysreadyStatus struct {
SystemStatus string `json:"system_status"`
Services []SysreadyService `json:"services,omitempty"`
}

// SysreadyService represents one row in the sysready-status service table.
type SysreadyService struct {
ServiceName string `json:"service_name"`
ServiceStatus string `json:"service_status"`
AppReadyStatus string `json:"app_ready_status"`
DownReason string `json:"down_reason"`
}

// SystemHealthSummary represents the output structure for show system-health summary.
type SystemHealthSummary struct {
StatusLed string `json:"system_status_led"`
Expand Down Expand Up @@ -222,3 +252,61 @@ func DisplayIgnoreList(manager *health_checker.HealthCheckerManager) []HealthLis
})
return entries
}

// GetSysreadyStatus queries STATE_DB for the system ready state
// and returns "System is ready" or "System is not ready".
func GetSysreadyStatus() (string, error) {
queries := [][]string{
{common.StateDb, sysreadyTable, sysreadyKey},
}
data, err := common.GetMapFromQueries(queries)
if err != nil {
return "", fmt.Errorf("failed to read system ready state: %v", err)
}

raw := common.GetValueOrDefault(data, sysreadyStatusField, "")
if strings.EqualFold(raw, "UP") {
return "System is ready", nil
}
return "System is not ready - one or more services are not up", nil
}

// GetSysreadyServices queries ALL_SERVICE_STATUS from STATE_DB and returns
// the naturally sorted list of service entries.
func GetSysreadyServices() ([]SysreadyService, error) {
queries := [][]string{
{common.StateDb, serviceStatusTable},
}
data, err := common.GetMapFromQueries(queries)
if err != nil {
return nil, err
}
if len(data) == 0 {
return nil, nil
}

serviceKeys := make([]string, 0, len(data))
for k := range data {
serviceKeys = append(serviceKeys, k)
}
sort.Sort(natural.StringSlice(serviceKeys))

services := make([]SysreadyService, 0, len(serviceKeys))

for _, key := range serviceKeys {
info, ok := data[key].(map[string]interface{})
if !ok {
log.V(2).Infof("getSysreadyServices: skipping invalid entry for key %q", key)
continue
}

svc := SysreadyService{
ServiceName: key,
ServiceStatus: common.GetValueOrDefault(info, serviceStatusField, ""),
AppReadyStatus: common.GetValueOrDefault(info, appReadyStatusField, ""),
DownReason: common.GetValueOrDefault(info, failReasonField, ""),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If failReasonField is "-" or "", let's return "".
"-" make sense in tabular/visibility but "" will be more useful in json.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will come back on this

}
services = append(services, svc)
}
return services, nil
}
25 changes: 25 additions & 0 deletions show_client/platform_system_health_cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,28 @@ func getSystemHealthMonitorList(args sdc.CmdArgs, options sdc.OptionMap) ([]byte

return json.Marshal(result)
}

func getSystemHealthSysreadyStatus(args sdc.CmdArgs, options sdc.OptionMap) ([]byte, error) {
/* getSystemHealthSysreadyStatus implements "show system-health sysready-status".
Shows system ready status and per-service table.
*/
services, err := helpers.GetSysreadyServices()
if err != nil {
return nil, fmt.Errorf("failed to query service status: %w", err)
}

if services == nil {
return nil, fmt.Errorf("No system ready status data available - system-health service might be down")
}

sysStatus, err := helpers.GetSysreadyStatus()
if err != nil {
return nil, err
}

result := helpers.SysreadyStatus{
SystemStatus: sysStatus,
Services: services,
}
return json.Marshal(result)
}
10 changes: 10 additions & 0 deletions show_client/show_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -1163,4 +1163,14 @@ func init() {
0,
nil,
)

// SHOW/system-health/sysready-status
sdc.RegisterCliPath(
[]string{"SHOW", "system-health", "sysready-status"},
getSystemHealthSysreadyStatus,
"SHOW/system-health/sysready-status: Show system ready status",
0,
0,
nil,
)
}
23 changes: 23 additions & 0 deletions testdata/SYSREADY_STATUS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"SYSTEM_READY|SYSTEM_STATE": {
"Status": "UP"
},
"ALL_SERVICE_STATUS|bgp": {
"service_status": "OK",
"app_ready_status": "OK",
"fail_reason": "-",
"update_time": "2026-04-25 10:00:00"
},
"ALL_SERVICE_STATUS|swss": {
"service_status": "OK",
"app_ready_status": "OK",
"fail_reason": "-",
"update_time": "2026-04-25 10:00:01"
},
"ALL_SERVICE_STATUS|syncd": {
"service_status": "OK",
"app_ready_status": "OK",
"fail_reason": "-",
"update_time": "2026-04-25 10:00:02"
}
}
23 changes: 23 additions & 0 deletions testdata/SYSREADY_STATUS_NOT_READY.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"SYSTEM_READY|SYSTEM_STATE": {
"Status": "DOWN"
},
"ALL_SERVICE_STATUS|bgp": {
"service_status": "OK",
"app_ready_status": "OK",
"fail_reason": "-",
"update_time": "2026-04-25 10:00:00"
},
"ALL_SERVICE_STATUS|swss": {
"service_status": "OK",
"app_ready_status": "Down",
"fail_reason": "orchagent is not responsive",
"update_time": "2026-04-25 10:00:01"
},
"ALL_SERVICE_STATUS|syncd": {
"service_status": "Down",
"app_ready_status": "Down",
"fail_reason": "syncd service not running",
"update_time": "2026-04-25 10:00:02"
}
}
Loading