|
| 1 | +package oom |
| 2 | + |
| 3 | +import ( |
| 4 | + "bufio" |
| 5 | + "fmt" |
| 6 | + "github.com/Azure/kdebug/pkg/base" |
| 7 | + "github.com/Azure/kdebug/pkg/env" |
| 8 | + "os" |
| 9 | + "regexp" |
| 10 | + "strings" |
| 11 | +) |
| 12 | + |
| 13 | +const ( |
| 14 | + logPath = "/var/log/kern.log" |
| 15 | + cgroupOOMKeyStr = "Memory cgroup out of memory" |
| 16 | + outOfMemoryKey = "Out of memory" |
| 17 | +) |
| 18 | + |
| 19 | +var helpLink = []string{ |
| 20 | + "https://www.kernel.org/doc/gorman/html/understand/understand016.html", |
| 21 | + "https://stackoverflow.com/questions/18845857/what-does-anon-rss-and-total-vm-mean", |
| 22 | + "https://medium.com/tailwinds-navigator/kubernetes-tip-how-does-oomkilled-work-ba71b135993b", |
| 23 | +} |
| 24 | + |
| 25 | +var oomRegex = regexp.MustCompile("^(.*:.{2}:.{2}) .* process (.*) \\((.*)\\) .* anon-rss:(.*), file-rss.* oom_score_adj:(.*)") |
| 26 | + |
| 27 | +type OOMChecker struct { |
| 28 | + kernLogPath string |
| 29 | +} |
| 30 | + |
| 31 | +func (c *OOMChecker) Name() string { |
| 32 | + return "OOM" |
| 33 | +} |
| 34 | + |
| 35 | +func New() *OOMChecker { |
| 36 | + //todo: support other logpath |
| 37 | + return &OOMChecker{ |
| 38 | + kernLogPath: logPath, |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +func (c *OOMChecker) Check(ctx *base.CheckContext) ([]*base.CheckResult, error) { |
| 43 | + var results []*base.CheckResult |
| 44 | + oomResult, err := c.checkOOM(ctx) |
| 45 | + if err != nil { |
| 46 | + return nil, err |
| 47 | + } |
| 48 | + results = append(results, oomResult) |
| 49 | + return results, nil |
| 50 | +} |
| 51 | + |
| 52 | +func (c *OOMChecker) checkOOM(ctx *base.CheckContext) (*base.CheckResult, error) { |
| 53 | + result := &base.CheckResult{ |
| 54 | + Checker: c.Name(), |
| 55 | + } |
| 56 | + if !envCheck(ctx.Environment) { |
| 57 | + result.Description = fmt.Sprint("Skip oom check in non-linux os") |
| 58 | + return result, nil |
| 59 | + } |
| 60 | + oomInfos, err := c.getAndParseOOMLog() |
| 61 | + if err != nil { |
| 62 | + return nil, err |
| 63 | + } else if len(oomInfos) > 0 { |
| 64 | + result.Error = strings.Join(oomInfos, "\n") |
| 65 | + result.Description = "Detect process oom killed" |
| 66 | + result.HelpLinks = helpLink |
| 67 | + } else { |
| 68 | + result.Description = "No OOM found in recent kernlog." |
| 69 | + } |
| 70 | + return result, nil |
| 71 | +} |
| 72 | +func (c *OOMChecker) getAndParseOOMLog() ([]string, error) { |
| 73 | + file, err := os.Open(c.kernLogPath) |
| 74 | + if err != nil { |
| 75 | + return nil, err |
| 76 | + } |
| 77 | + defer file.Close() |
| 78 | + |
| 79 | + var oomInfos []string |
| 80 | + scanner := bufio.NewScanner(file) |
| 81 | + for scanner.Scan() { |
| 82 | + tmp := scanner.Text() |
| 83 | + //todo: more sophisticated OOM context |
| 84 | + //pattern match. https://github.com/torvalds/linux/blob/551acdc3c3d2b6bc97f11e31dcf960bc36343bfc/mm/oom_kill.c#L1120, https://github.com/torvalds/linux/blob/551acdc3c3d2b6bc97f11e31dcf960bc36343bfc/mm/oom_kill.c#L895 |
| 85 | + if strings.Contains(tmp, cgroupOOMKeyStr) || strings.Contains(tmp, outOfMemoryKey) { |
| 86 | + oomInfo, err := parseOOMContent(tmp) |
| 87 | + if err != nil { |
| 88 | + return nil, err |
| 89 | + } else { |
| 90 | + oomInfos = append(oomInfos, oomInfo) |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + if err := scanner.Err(); err != nil { |
| 96 | + return nil, err |
| 97 | + } |
| 98 | + return oomInfos, nil |
| 99 | +} |
| 100 | + |
| 101 | +func parseOOMContent(content string) (string, error) { |
| 102 | + match := oomRegex.FindStringSubmatch(content) |
| 103 | + if len(match) != 6 { |
| 104 | + err := fmt.Errorf("Can't parse oom content:%s \n", content) |
| 105 | + return "", err |
| 106 | + } else { |
| 107 | + return fmt.Sprintf("progress:[%s %s] is OOM kill at time [%s]. [rss:%s] [oom_score_adj:%s]\n", match[2], match[3], match[1], match[4], match[5]), nil |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +func envCheck(environment env.Environment) bool { |
| 112 | + //todo:support other os |
| 113 | + return environment.HasFlag("ubuntu") |
| 114 | +} |
0 commit comments