From eb46baad41da913d81dca4f87a510c31e39c875d Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Sun, 1 Apr 2018 01:23:31 -0400 Subject: [PATCH 01/13] Done the first four parts --- main.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- result.go | 14 +++++++--- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/main.go b/main.go index f62aa00..fc4f90c 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,12 @@ package main import ( + "bufio" "fmt" + "log" + "os" + "path/filepath" + "strings" "time" ) @@ -32,6 +37,75 @@ func main() { // number of line deleted // list of function calls seen in the diffs and their number of calls func compute() *result { + var r result - return nil + // A set to keep track of the files we've seen in the diffs + var seenFiles = make(map[string]struct{}) + + diffnames, err := filepath.Glob("./diffs/*.diff") + if err != nil { + log.Fatal(err) + } + + for _, diffname := range diffnames { + + diffFile, err := os.Open(diffname) + if err != nil { + log.Fatal(err) + } + + scanner := bufio.NewScanner(diffFile) + + inFileHeader := true + + var processBlockHeaderLine func(line string) + + processFileHeaderLine := func(line string) { + if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { + seenFiles[line[6:]] = struct{}{} // Add file to set + } else if strings.HasPrefix(line, "@@") { + inFileHeader = false + processBlockHeaderLine(line) + } else { + // TODO: error + } + } + + processBlockHeaderLine = func(line string) { + r.regions++ + } + + processFileLine := func(line string) { + if line[0] == ' ' { + + } else if line[0] == '-' { + r.lineDeleted++ + } else if line[0] == '+' { + r.lineAdded++ + } else if strings.HasPrefix(line, "@@") { + processBlockHeaderLine(line) + } else { + inFileHeader = true + processFileHeaderLine(line) + } + } + + for scanner.Scan() { + line := scanner.Text() + + if inFileHeader { + processFileHeaderLine(line) + } else { + processFileLine(line) + } + } + + diffFile.Close() + } + + for name, _ := range seenFiles { + r.files = append(r.files, name) + } + + return &r } diff --git a/result.go b/result.go index 7e78236..2c5d7f1 100644 --- a/result.go +++ b/result.go @@ -15,8 +15,9 @@ type result struct { lineAdded int //How many line were deleted totla lineDeleted int - //How many times the function seen in the code are called. - functionCalls map[string]int + //How many times the functionj seen in the code are called before and after + functionCallsBefore map[string]int + functionCallsAfter map[string]int } //String returns the value of results as a formated string @@ -33,8 +34,13 @@ func (r *result) String() string { r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) - buffer.WriteString("Functions calls: \n") - for key, value := range r.functionCalls { + buffer.WriteString("Function calls before: \n") + for key, value := range r.functionCallsBefore { + r.appendIntValueToBuffer(value, key, &buffer) + } + + buffer.WriteString("Function calls after: \n") + for key, value := range r.functionCallsAfter { r.appendIntValueToBuffer(value, key, &buffer) } From 0c5fc70f58ac655051e3f4f2cb2180261e2e678b Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 00:37:57 -0400 Subject: [PATCH 02/13] Added C function call counter, with horrible performance --- main.go | 165 +++++++++++++++++++++++++++++++++++++++++++++++++----- result.go | 8 +++ 2 files changed, 160 insertions(+), 13 deletions(-) diff --git a/main.go b/main.go index fc4f90c..faf91ba 100644 --- a/main.go +++ b/main.go @@ -2,10 +2,13 @@ package main import ( "bufio" + "bytes" + "errors" "fmt" "log" "os" "path/filepath" + "regexp" "strings" "time" ) @@ -28,6 +31,107 @@ func main() { fmt.Println(compute()) } +type tokenRule struct { + token int + regexp regexp.Regexp +} + +// This structure represents a (not efficient in general) tokenizer. +// It tokenizes a string by everytime trying all regexps and returning the token +// that matches. It assumes that all the regexps match the beginning of the string. +// +// It could be easily replaced by a more efficient and more complete tokenizer. +type tokenizer struct { + text []byte + tokenRules []tokenRule +} + +func (r *tokenizer) Next() (int, string, error) { + if len(r.text) == 0 { + return -1, "", errors.New("tokenizer text empty") + } + for _, rule := range r.tokenRules { + found := rule.regexp.Find(r.text) + if found != nil { + r.text = r.text[len(found):] + return rule.token, string(found), nil + } + } + return -1, "", errors.New("could not match any token") +} + +const ( + whitespace = iota + openParen + identifier + anythingElse +) + +var cTokenizer = tokenizer{ + []byte{}, + []tokenRule{ + {whitespace, *regexp.MustCompilePOSIX(`^[\t\n\f\r ]+`)}, + {openParen, *regexp.MustCompilePOSIX(`^\(`)}, + {identifier, *regexp.MustCompilePOSIX(`^[a-zA-Z_][a-zA-Z0-9_]*`)}, + {anythingElse, *regexp.MustCompilePOSIX(`^.`)}, + }, +} + +func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + cTokenizer.text = buffer.Bytes() + + var keywords = map[string]bool{ + "if": true, + "for": true, + "while": true, + } + + var tokens = [3]int{whitespace, whitespace, whitespace} + var strings [3]string + + for { + + for { // Loop to remove whitespace + if len(cTokenizer.text) == 0 { + return + } + tok, s, err := cTokenizer.Next() + if err != nil { + log.Fatal(err) // This shouldn't happen because of the anythingElse rule + } + if tok != whitespace { + tokens[0], tokens[1] = tokens[1], tokens[2] + strings[0], strings[1] = strings[1], strings[2] + tokens[2] = tok + strings[2] = s + break + } + } + + if tokens[0] != identifier && + tokens[1] == identifier && + tokens[2] == openParen && + !keywords[strings[1]] { + (*counts)[strings[1]]++ + } + } +} + +//Given a bytes.Buffer containing a code segment, its extension, and a map to +//use for counting, counts the function calls +func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) { + switch ext { + case ".c", ".h": + countCFunctionCalls(buffer, counts) + + case ".py": + + default: + + } +} + //compute parses the git diffs in ./diffs and returns //a result struct that contains all the relevant informations //about these diffs @@ -38,9 +142,11 @@ func main() { // list of function calls seen in the diffs and their number of calls func compute() *result { var r result + r.functionCallsBefore = make(map[string]int) + r.functionCallsAfter = make(map[string]int) - // A set to keep track of the files we've seen in the diffs var seenFiles = make(map[string]struct{}) + var seenExtensions = make(map[string]struct{}) diffnames, err := filepath.Glob("./diffs/*.diff") if err != nil { @@ -58,35 +164,64 @@ func compute() *result { inFileHeader := true - var processBlockHeaderLine func(line string) + var currentRegionBefore, currentRegionAfter bytes.Buffer + var currentExtension string + + // Here I create a small state machine where one of the following closures + // is meant to be executed at every line. + var processFileHeaderLine func(line string) + var processRegionHeaderLine func(line string) + var processCodeLine func(line string) - processFileHeaderLine := func(line string) { + processFileHeaderLine = func(line string) { if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { - seenFiles[line[6:]] = struct{}{} // Add file to set + var fileName = line[len("--- a/"):] + seenFiles[fileName] = struct{}{} + + var fileType = filepath.Ext(fileName) + if fileType == "" { + fileType = filepath.Base(fileName) + } + seenExtensions[fileType] = struct{}{} + currentExtension = fileType + } else if strings.HasPrefix(line, "@@") { inFileHeader = false - processBlockHeaderLine(line) + processRegionHeaderLine(line) } else { // TODO: error } } - processBlockHeaderLine = func(line string) { + processRegionHeaderLine = func(line string) { r.regions++ + currentRegionBefore.Reset() + currentRegionAfter.Reset() } - processFileLine := func(line string) { + processCodeLine = func(line string) { if line[0] == ' ' { - + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") } else if line[0] == '-' { r.lineDeleted++ + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") } else if line[0] == '+' { r.lineAdded++ - } else if strings.HasPrefix(line, "@@") { - processBlockHeaderLine(line) + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") } else { - inFileHeader = true - processFileHeaderLine(line) + countFunctionCalls(¤tRegionBefore, currentExtension, &r.functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtension, &r.functionCallsAfter) + if strings.HasPrefix(line, "@@") { + processRegionHeaderLine(line) + } else { + inFileHeader = true + processFileHeaderLine(line) + } } } @@ -96,7 +231,7 @@ func compute() *result { if inFileHeader { processFileHeaderLine(line) } else { - processFileLine(line) + processCodeLine(line) } } @@ -107,5 +242,9 @@ func compute() *result { r.files = append(r.files, name) } + for name, _ := range seenExtensions { + r.fileExtensions = append(r.fileExtensions, name) + } + return &r } diff --git a/result.go b/result.go index 2c5d7f1..e18a01c 100644 --- a/result.go +++ b/result.go @@ -9,6 +9,8 @@ import ( type result struct { //The name of the files seen files []string + //The name of the files seen + fileExtensions []string //How many region we have (i.e. seperated by @@) regions int //How many line were added total @@ -30,6 +32,12 @@ func (r *result) String() string { buffer.WriteString(file) buffer.WriteString("\n") } + buffer.WriteString("Extensions: \n") + for _, ext := range r.fileExtensions { + buffer.WriteString(" -") + buffer.WriteString(ext) + buffer.WriteString("\n") + } r.appendIntValueToBuffer(r.regions, "Regions", &buffer) r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) From 1d648ab82a509405c44bc50d6dbd5c0da6ec9710 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 08:40:28 -0400 Subject: [PATCH 03/13] Removed regexps, now performance isn't as horrible --- main.go | 102 ++++++++++++++++++++++++++++++------------------------ result.go | 2 ++ 2 files changed, 59 insertions(+), 45 deletions(-) diff --git a/main.go b/main.go index faf91ba..17276ac 100644 --- a/main.go +++ b/main.go @@ -3,7 +3,6 @@ package main import ( "bufio" "bytes" - "errors" "fmt" "log" "os" @@ -36,50 +35,46 @@ type tokenRule struct { regexp regexp.Regexp } -// This structure represents a (not efficient in general) tokenizer. -// It tokenizes a string by everytime trying all regexps and returning the token -// that matches. It assumes that all the regexps match the beginning of the string. +func beginsIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_' +} +func insideIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_' +} + +type tokenType int + +const ( + identifier tokenType = iota + somethingElse +) + +// A "tokenizer" that splits its input into things that look like identifiers and all other charcters // -// It could be easily replaced by a more efficient and more complete tokenizer. +// It could be replaced by a more complete tokenizer. type tokenizer struct { text []byte - tokenRules []tokenRule } -func (r *tokenizer) Next() (int, string, error) { - if len(r.text) == 0 { - return -1, "", errors.New("tokenizer text empty") - } - for _, rule := range r.tokenRules { - found := rule.regexp.Find(r.text) - if found != nil { - r.text = r.text[len(found):] - return rule.token, string(found), nil +func (r *tokenizer) Next() (token tokenType, text []byte) { + + if beginsIdentifier(r.text[0]) { + var i = 0 + for i < len(r.text) && insideIdentifier(r.text[i]) { + i++ } + var result = r.text[0:i] + r.text = r.text[i:] + return identifier, result } - return -1, "", errors.New("could not match any token") -} -const ( - whitespace = iota - openParen - identifier - anythingElse -) - -var cTokenizer = tokenizer{ - []byte{}, - []tokenRule{ - {whitespace, *regexp.MustCompilePOSIX(`^[\t\n\f\r ]+`)}, - {openParen, *regexp.MustCompilePOSIX(`^\(`)}, - {identifier, *regexp.MustCompilePOSIX(`^[a-zA-Z_][a-zA-Z0-9_]*`)}, - {anythingElse, *regexp.MustCompilePOSIX(`^.`)}, - }, + var result = r.text[0:1] + r.text = r.text[1:] + return somethingElse, result } -func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { - cTokenizer.text = buffer.Bytes() +func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { var keywords = map[string]bool{ "if": true, @@ -87,20 +82,37 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { "while": true, } - var tokens = [3]int{whitespace, whitespace, whitespace} - var strings [3]string + var whitespace = []byte { + ' ', + '\t', + '\n', + '\r', + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + } - for { + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + for { for { // Loop to remove whitespace - if len(cTokenizer.text) == 0 { + if len(tokenizer.text) == 0 { return } - tok, s, err := cTokenizer.Next() - if err != nil { - log.Fatal(err) // This shouldn't happen because of the anythingElse rule + tok, s := tokenizer.Next() + var isWhitespace = false + if tok == somethingElse { + for _, w := range whitespace { + if s[0] == w { + isWhitespace = true + break + } + } } - if tok != whitespace { + if tok == identifier || + (tok == somethingElse && !isWhitespace) { tokens[0], tokens[1] = tokens[1], tokens[2] strings[0], strings[1] = strings[1], strings[2] tokens[2] = tok @@ -111,9 +123,9 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { if tokens[0] != identifier && tokens[1] == identifier && - tokens[2] == openParen && - !keywords[strings[1]] { - (*counts)[strings[1]]++ + tokens[2] == somethingElse && strings[2][0] == '(' && + !keywords[string(strings[1])] { + (*counts)[string(strings[1])]++ } } } diff --git a/result.go b/result.go index e18a01c..a815c2b 100644 --- a/result.go +++ b/result.go @@ -26,12 +26,14 @@ type result struct { func (r *result) String() string { var buffer bytes.Buffer + /* buffer.WriteString("Files: \n") for _, file := range r.files { buffer.WriteString(" -") buffer.WriteString(file) buffer.WriteString("\n") } + */ buffer.WriteString("Extensions: \n") for _, ext := range r.fileExtensions { buffer.WriteString(" -") From d51e0779170fb587731d65fef16ccfd93339dda9 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 10:41:23 -0400 Subject: [PATCH 04/13] Duplicated the C function call counter function and adapted for python --- main.go | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/main.go b/main.go index 17276ac..7c8680f 100644 --- a/main.go +++ b/main.go @@ -7,7 +7,6 @@ import ( "log" "os" "path/filepath" - "regexp" "strings" "time" ) @@ -30,11 +29,6 @@ func main() { fmt.Println(compute()) } -type tokenRule struct { - token int - regexp regexp.Regexp -} - func beginsIdentifier(b byte) bool { return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_' } @@ -87,6 +81,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { '\t', '\n', '\r', + '\f', } var tokenizer = tokenizer{ @@ -130,14 +125,65 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { } } + +func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + // Since the open parenthesis for a function call must be on the same line as the name, + var whitespace = []byte { + ' ', + '\t', + '\r', + '\f', + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + } + + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + + for { + for { // Loop to remove whitespace + if len(tokenizer.text) == 0 { + return + } + tok, s := tokenizer.Next() + var isWhitespace = false + if tok == somethingElse { + for _, w := range whitespace { + if s[0] == w { + isWhitespace = true + break + } + } + } + if tok == identifier || + (tok == somethingElse && !isWhitespace) { + tokens[0], tokens[1] = tokens[1], tokens[2] + strings[0], strings[1] = strings[1], strings[2] + tokens[2] = tok + strings[2] = s + break + } + } + + if tokens[1] == identifier && + tokens[2] == somethingElse && strings[2][0] == '(' && + tokens[0] == identifier && string(strings[0]) != "def" { + (*counts)[string(strings[1])]++ + } + } +} + //Given a bytes.Buffer containing a code segment, its extension, and a map to //use for counting, counts the function calls func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) { switch ext { case ".c", ".h": countCFunctionCalls(buffer, counts) - case ".py": + countPythonFunctionCalls(buffer, counts) default: From 7dd90f5eaf72eb3b9fab44bca6ef4db164853489 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 10:59:44 -0400 Subject: [PATCH 05/13] Factored ignoring whitespace into the tokenizer --- main.go | 81 +++++++++++++++++++++++-------------------------------- result.go | 4 +-- 2 files changed, 36 insertions(+), 49 deletions(-) diff --git a/main.go b/main.go index 7c8680f..0034c53 100644 --- a/main.go +++ b/main.go @@ -39,6 +39,7 @@ func insideIdentifier(b byte) bool { type tokenType int const ( + endOfString tokenType = -1 identifier tokenType = iota somethingElse ) @@ -48,10 +49,15 @@ const ( // It could be replaced by a more complete tokenizer. type tokenizer struct { text []byte + toBeIgnored []byte } func (r *tokenizer) Next() (token tokenType, text []byte) { + if len(r.text) == 0 { + return endOfString, nil + } + if beginsIdentifier(r.text[0]) { var i = 0 for i < len(r.text) && insideIdentifier(r.text[i]) { @@ -62,9 +68,20 @@ func (r *tokenizer) Next() (token tokenType, text []byte) { return identifier, result } - var result = r.text[0:1] - r.text = r.text[1:] - return somethingElse, result + for len(r.text) > 0 { + var result = r.text[0:1] + r.text = r.text[1:] + var shouldBeIgnored = false + for _,c := range r.toBeIgnored { + if result[0] == c { + shouldBeIgnored = true + } + if !shouldBeIgnored { + return somethingElse, result + } + } + } + return endOfString, nil } @@ -86,36 +103,21 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { var tokenizer = tokenizer{ buffer.Bytes(), + whitespace, } var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} var strings = [3][]byte{{' '}, {' '}, {' '}} for { - for { // Loop to remove whitespace - if len(tokenizer.text) == 0 { - return - } - tok, s := tokenizer.Next() - var isWhitespace = false - if tok == somethingElse { - for _, w := range whitespace { - if s[0] == w { - isWhitespace = true - break - } - } - } - if tok == identifier || - (tok == somethingElse && !isWhitespace) { - tokens[0], tokens[1] = tokens[1], tokens[2] - strings[0], strings[1] = strings[1], strings[2] - tokens[2] = tok - strings[2] = s - break - } + tok, s := tokenizer.Next() + if tok == endOfString { + return } + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + if tokens[0] != identifier && tokens[1] == identifier && tokens[2] == somethingElse && strings[2][0] == '(' && @@ -138,36 +140,21 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { var tokenizer = tokenizer{ buffer.Bytes(), + whitespace, } var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} var strings = [3][]byte{{' '}, {' '}, {' '}} for { - for { // Loop to remove whitespace - if len(tokenizer.text) == 0 { - return - } - tok, s := tokenizer.Next() - var isWhitespace = false - if tok == somethingElse { - for _, w := range whitespace { - if s[0] == w { - isWhitespace = true - break - } - } - } - if tok == identifier || - (tok == somethingElse && !isWhitespace) { - tokens[0], tokens[1] = tokens[1], tokens[2] - strings[0], strings[1] = strings[1], strings[2] - tokens[2] = tok - strings[2] = s - break - } + tok, s := tokenizer.Next() + if tok == endOfString { + return } + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + if tokens[1] == identifier && tokens[2] == somethingElse && strings[2][0] == '(' && tokens[0] == identifier && string(strings[0]) != "def" { diff --git a/result.go b/result.go index a815c2b..57c4abc 100644 --- a/result.go +++ b/result.go @@ -26,14 +26,12 @@ type result struct { func (r *result) String() string { var buffer bytes.Buffer - /* buffer.WriteString("Files: \n") for _, file := range r.files { buffer.WriteString(" -") buffer.WriteString(file) buffer.WriteString("\n") } - */ buffer.WriteString("Extensions: \n") for _, ext := range r.fileExtensions { buffer.WriteString(" -") @@ -44,6 +42,7 @@ func (r *result) String() string { r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) + buffer.WriteString("Function calls before: \n") for key, value := range r.functionCallsBefore { r.appendIntValueToBuffer(value, key, &buffer) @@ -54,6 +53,7 @@ func (r *result) String() string { r.appendIntValueToBuffer(value, key, &buffer) } + return buffer.String() } From 26a4e1f708ce407df168f76f1cfb2cd091a3c54b Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 11:25:12 -0400 Subject: [PATCH 06/13] now using state functions for reading the diff files --- main.go | 52 +++++++++++++++++++++++----------------------------- result.go | 6 ++++-- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/main.go b/main.go index 0034c53..343ae5c 100644 --- a/main.go +++ b/main.go @@ -40,7 +40,7 @@ type tokenType int const ( endOfString tokenType = -1 - identifier tokenType = iota + identifier tokenType = iota somethingElse ) @@ -48,7 +48,7 @@ const ( // // It could be replaced by a more complete tokenizer. type tokenizer struct { - text []byte + text []byte toBeIgnored []byte } @@ -72,7 +72,7 @@ func (r *tokenizer) Next() (token tokenType, text []byte) { var result = r.text[0:1] r.text = r.text[1:] var shouldBeIgnored = false - for _,c := range r.toBeIgnored { + for _, c := range r.toBeIgnored { if result[0] == c { shouldBeIgnored = true } @@ -84,7 +84,6 @@ func (r *tokenizer) Next() (token tokenType, text []byte) { return endOfString, nil } - func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { var keywords = map[string]bool{ @@ -93,7 +92,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { "while": true, } - var whitespace = []byte { + var whitespace = []byte{ ' ', '\t', '\n', @@ -122,16 +121,15 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { tokens[1] == identifier && tokens[2] == somethingElse && strings[2][0] == '(' && !keywords[string(strings[1])] { - (*counts)[string(strings[1])]++ + (*counts)[string(strings[1])]++ } } } - func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { // Since the open parenthesis for a function call must be on the same line as the name, - var whitespace = []byte { + var whitespace = []byte{ ' ', '\t', '\r', @@ -158,7 +156,7 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { if tokens[1] == identifier && tokens[2] == somethingElse && strings[2][0] == '(' && tokens[0] == identifier && string(strings[0]) != "def" { - (*counts)[string(strings[1])]++ + (*counts)[string(strings[1])]++ } } } @@ -212,13 +210,13 @@ func compute() *result { var currentRegionBefore, currentRegionAfter bytes.Buffer var currentExtension string - // Here I create a small state machine where one of the following closures - // is meant to be executed at every line. - var processFileHeaderLine func(line string) - var processRegionHeaderLine func(line string) - var processCodeLine func(line string) + // Here I create a small state machine using state functions + type stateFn func(line string) stateFn + var processFileHeaderLine, + processRegionHeaderLine, + processCodeLine stateFn - processFileHeaderLine = func(line string) { + processFileHeaderLine = func(line string) stateFn { if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { var fileName = line[len("--- a/"):] seenFiles[fileName] = struct{}{} @@ -229,22 +227,20 @@ func compute() *result { } seenExtensions[fileType] = struct{}{} currentExtension = fileType - } else if strings.HasPrefix(line, "@@") { - inFileHeader = false - processRegionHeaderLine(line) - } else { - // TODO: error + return processRegionHeaderLine(line) } + return processFileHeaderLine } - processRegionHeaderLine = func(line string) { + processRegionHeaderLine = func(line string) stateFn { r.regions++ currentRegionBefore.Reset() currentRegionAfter.Reset() + return processCodeLine } - processCodeLine = func(line string) { + processCodeLine = func(line string) stateFn { if line[0] == ' ' { currentRegionBefore.WriteString(line[1:]) currentRegionBefore.WriteString("\n") @@ -262,22 +258,20 @@ func compute() *result { countFunctionCalls(¤tRegionBefore, currentExtension, &r.functionCallsBefore) countFunctionCalls(¤tRegionAfter, currentExtension, &r.functionCallsAfter) if strings.HasPrefix(line, "@@") { - processRegionHeaderLine(line) + return processRegionHeaderLine(line) } else { inFileHeader = true - processFileHeaderLine(line) + return processFileHeaderLine(line) } } + return processCodeLine } + var state = processFileHeaderLine for scanner.Scan() { line := scanner.Text() - if inFileHeader { - processFileHeaderLine(line) - } else { - processCodeLine(line) - } + state = state(line) // jumping on a trampoline } diffFile.Close() diff --git a/result.go b/result.go index 57c4abc..9f768db 100644 --- a/result.go +++ b/result.go @@ -27,11 +27,13 @@ func (r *result) String() string { var buffer bytes.Buffer buffer.WriteString("Files: \n") + /* for _, file := range r.files { buffer.WriteString(" -") buffer.WriteString(file) buffer.WriteString("\n") } + */ buffer.WriteString("Extensions: \n") for _, ext := range r.fileExtensions { buffer.WriteString(" -") @@ -42,7 +44,7 @@ func (r *result) String() string { r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) - + /* buffer.WriteString("Function calls before: \n") for key, value := range r.functionCallsBefore { r.appendIntValueToBuffer(value, key, &buffer) @@ -52,7 +54,7 @@ func (r *result) String() string { for key, value := range r.functionCallsAfter { r.appendIntValueToBuffer(value, key, &buffer) } - + */ return buffer.String() } From 1d7257e119b869def6a0a0d0f2f042b9e2bcb037 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 11:34:32 -0400 Subject: [PATCH 07/13] Now result only has a single functionCalls map And the values of the map are struct{before, after int} --- main.go | 23 ++++++++++++++++++----- result.go | 23 +++++++++-------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/main.go b/main.go index 343ae5c..af99f46 100644 --- a/main.go +++ b/main.go @@ -44,7 +44,7 @@ const ( somethingElse ) -// A "tokenizer" that splits its input into things that look like identifiers and all other charcters +// A "tokenizer" that splits its input into things that look like identifiers and all other characters // // It could be replaced by a more complete tokenizer. type tokenizer struct { @@ -185,8 +185,9 @@ func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int // list of function calls seen in the diffs and their number of calls func compute() *result { var r result - r.functionCallsBefore = make(map[string]int) - r.functionCallsAfter = make(map[string]int) + var functionCallsBefore = make(map[string]int) + var functionCallsAfter = make(map[string]int) + r.functionCalls = make(map[string]struct{ before, after int }) var seenFiles = make(map[string]struct{}) var seenExtensions = make(map[string]struct{}) @@ -255,8 +256,8 @@ func compute() *result { currentRegionAfter.WriteString(line[1:]) currentRegionAfter.WriteString("\n") } else { - countFunctionCalls(¤tRegionBefore, currentExtension, &r.functionCallsBefore) - countFunctionCalls(¤tRegionAfter, currentExtension, &r.functionCallsAfter) + countFunctionCalls(¤tRegionBefore, currentExtension, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtension, &functionCallsAfter) if strings.HasPrefix(line, "@@") { return processRegionHeaderLine(line) } else { @@ -285,5 +286,17 @@ func compute() *result { r.fileExtensions = append(r.fileExtensions, name) } + for name, times := range functionCallsBefore { + var prev = r.functionCalls[name] + prev.before += times + r.functionCalls[name] = prev + } + + for name, times := range functionCallsAfter { + var prev = r.functionCalls[name] + prev.after += times + r.functionCalls[name] = prev + } + return &r } diff --git a/result.go b/result.go index 9f768db..804e5c9 100644 --- a/result.go +++ b/result.go @@ -18,8 +18,7 @@ type result struct { //How many line were deleted totla lineDeleted int //How many times the functionj seen in the code are called before and after - functionCallsBefore map[string]int - functionCallsAfter map[string]int + functionCalls map[string]struct{ before, after int } } //String returns the value of results as a formated string @@ -27,13 +26,11 @@ func (r *result) String() string { var buffer bytes.Buffer buffer.WriteString("Files: \n") - /* for _, file := range r.files { buffer.WriteString(" -") buffer.WriteString(file) buffer.WriteString("\n") } - */ buffer.WriteString("Extensions: \n") for _, ext := range r.fileExtensions { buffer.WriteString(" -") @@ -44,17 +41,15 @@ func (r *result) String() string { r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) - /* - buffer.WriteString("Function calls before: \n") - for key, value := range r.functionCallsBefore { - r.appendIntValueToBuffer(value, key, &buffer) - } - - buffer.WriteString("Function calls after: \n") - for key, value := range r.functionCallsAfter { - r.appendIntValueToBuffer(value, key, &buffer) + buffer.WriteString("Function calls (before, after): \n") + for key, value := range r.functionCalls { + buffer.WriteString(key) + buffer.WriteString(" : ") + buffer.WriteString(strconv.Itoa(value.before)) + buffer.WriteString(", ") + buffer.WriteString(strconv.Itoa(value.after)) + buffer.WriteString("\n") } - */ return buffer.String() } From 8213243549e23c4e1bd04488e19dcf6103ea516d Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 12:16:38 -0400 Subject: [PATCH 08/13] Fixed some bugs Tokenizer now properly returns identifiers after whitespace Now correctly considers the file extension for both source and destination file Now consider /dev/null files --- main.go | 68 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/main.go b/main.go index af99f46..86aa4a5 100644 --- a/main.go +++ b/main.go @@ -37,29 +37,44 @@ func insideIdentifier(b byte) bool { } type tokenType int - const ( endOfString tokenType = -1 identifier tokenType = iota somethingElse ) -// A "tokenizer" that splits its input into things that look like identifiers and all other characters +// A "tokenizer" that removes characters to be ignored and splits its input +// into things that look like identifiers and all other characters. // -// It could be replaced by a more complete tokenizer. +// It could be replaced by a more complete tokenizer. One that takes care of +// comments and strings for example type tokenizer struct { text []byte toBeIgnored []byte } +func byteInSlice(b byte, slice []byte) bool { + for _, c := range slice { + if b == c { + return true + } + } + return false +} + + func (r *tokenizer) Next() (token tokenType, text []byte) { + for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) { + r.text = r.text[1:] + } + if len(r.text) == 0 { return endOfString, nil } if beginsIdentifier(r.text[0]) { - var i = 0 + var i = 1 for i < len(r.text) && insideIdentifier(r.text[i]) { i++ } @@ -68,20 +83,10 @@ func (r *tokenizer) Next() (token tokenType, text []byte) { return identifier, result } - for len(r.text) > 0 { - var result = r.text[0:1] - r.text = r.text[1:] - var shouldBeIgnored = false - for _, c := range r.toBeIgnored { - if result[0] == c { - shouldBeIgnored = true - } - if !shouldBeIgnored { - return somethingElse, result - } - } - } - return endOfString, nil + var result = r.text[:1] + r.text = r.text[1:] + return somethingElse, result + } func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { @@ -128,12 +133,11 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { - // Since the open parenthesis for a function call must be on the same line as the name, + // Since the open parenthesis for a function call must be on the same line as + // the name, I only ignore space and tabs. var whitespace = []byte{ ' ', '\t', - '\r', - '\f', } var tokenizer = tokenizer{ @@ -155,7 +159,7 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { if tokens[1] == identifier && tokens[2] == somethingElse && strings[2][0] == '(' && - tokens[0] == identifier && string(strings[0]) != "def" { + !(tokens[0] == identifier && string(strings[0]) == "def") { (*counts)[string(strings[1])]++ } } @@ -209,7 +213,7 @@ func compute() *result { inFileHeader := true var currentRegionBefore, currentRegionAfter bytes.Buffer - var currentExtension string + var currentExtensionBefore, currentExtensionAfter string // Here I create a small state machine using state functions type stateFn func(line string) stateFn @@ -219,7 +223,12 @@ func compute() *result { processFileHeaderLine = func(line string) stateFn { if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { - var fileName = line[len("--- a/"):] + + var fileName = line[len("--- "):] + if (fileName != "/dev/null") { + fileName = fileName[len("a/"):] + } + seenFiles[fileName] = struct{}{} var fileType = filepath.Ext(fileName) @@ -227,7 +236,12 @@ func compute() *result { fileType = filepath.Base(fileName) } seenExtensions[fileType] = struct{}{} - currentExtension = fileType + if (line[0] == '-') { + currentExtensionBefore = fileType + } else { + currentExtensionAfter = fileType + } + } else if strings.HasPrefix(line, "@@") { return processRegionHeaderLine(line) } @@ -256,8 +270,8 @@ func compute() *result { currentRegionAfter.WriteString(line[1:]) currentRegionAfter.WriteString("\n") } else { - countFunctionCalls(¤tRegionBefore, currentExtension, &functionCallsBefore) - countFunctionCalls(¤tRegionAfter, currentExtension, &functionCallsAfter) + countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) if strings.HasPrefix(line, "@@") { return processRegionHeaderLine(line) } else { From be0dbc5b33a2ade6439678c5d6b6f53dabb0d65a Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 12:56:07 -0400 Subject: [PATCH 09/13] Move state functions outside the loop and do keyword checks for python --- main.go | 169 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 91 insertions(+), 78 deletions(-) diff --git a/main.go b/main.go index 86aa4a5..c2c6f71 100644 --- a/main.go +++ b/main.go @@ -95,6 +95,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { "if": true, "for": true, "while": true, + "else": true, } var whitespace = []byte{ @@ -122,10 +123,9 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok strings[0], strings[1], strings[2] = strings[1], strings[2], s - if tokens[0] != identifier && - tokens[1] == identifier && - tokens[2] == somethingElse && strings[2][0] == '(' && - !keywords[string(strings[1])] { + if !(tokens[0] == identifier && !keywords[string(strings[0])]) && + tokens[1] == identifier && !keywords[string(strings[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { (*counts)[string(strings[1])]++ } } @@ -140,6 +140,18 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { '\t', } + var keywords = map[string]bool{ + "if": true, + "in": true, + "or": true, + "and": true, + "for": true, + "while": true, + "else": true, + "elif": true, + "def": true, + } + var tokenizer = tokenizer{ buffer.Bytes(), whitespace, @@ -157,9 +169,9 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok strings[0], strings[1], strings[2] = strings[1], strings[2], s - if tokens[1] == identifier && - tokens[2] == somethingElse && strings[2][0] == '(' && - !(tokens[0] == identifier && string(strings[0]) == "def") { + if !(tokens[0] == identifier && string(strings[0]) == "def") && + tokens[1] == identifier && !keywords[string(tokens[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { (*counts)[string(strings[1])]++ } } @@ -196,6 +208,77 @@ func compute() *result { var seenFiles = make(map[string]struct{}) var seenExtensions = make(map[string]struct{}) + var currentRegionBefore, currentRegionAfter bytes.Buffer + var currentExtensionBefore, currentExtensionAfter string + + // Here I create a small state machine using state functions + type stateFn func(line string) stateFn + var processFileHeaderLine, + processRegionHeaderLine, + processCodeLine stateFn + + processFileHeaderLine = func(line string) stateFn { + if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { + + var fileName = line[len("--- "):] + if (fileName != "/dev/null") { + fileName = fileName[len("a/"):] + } + + seenFiles[fileName] = struct{}{} + + var fileType = filepath.Ext(fileName) + if fileType == "" { + // If something doesn't have an extension, we assume the name itself + // is significant, like "Makefile" + fileType = filepath.Base(fileName) + } + seenExtensions[fileType] = struct{}{} + if (line[0] == '-') { + currentExtensionBefore = fileType + } else { + currentExtensionAfter = fileType + } + + } else if strings.HasPrefix(line, "@@") { + return processRegionHeaderLine(line) + } + return processFileHeaderLine + } + + processRegionHeaderLine = func(line string) stateFn { + r.regions++ + currentRegionBefore.Reset() + currentRegionAfter.Reset() + return processCodeLine + } + + processCodeLine = func(line string) stateFn { + if line[0] == ' ' { + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") + } else if line[0] == '-' { + r.lineDeleted++ + currentRegionBefore.WriteString(line[1:]) + currentRegionBefore.WriteString("\n") + } else if line[0] == '+' { + r.lineAdded++ + currentRegionAfter.WriteString(line[1:]) + currentRegionAfter.WriteString("\n") + } else { + countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) + if strings.HasPrefix(line, "@@") { + return processRegionHeaderLine(line) + } else { + return processFileHeaderLine(line) + } + } + return processCodeLine + } + diffnames, err := filepath.Glob("./diffs/*.diff") if err != nil { log.Fatal(err) @@ -210,83 +293,13 @@ func compute() *result { scanner := bufio.NewScanner(diffFile) - inFileHeader := true - - var currentRegionBefore, currentRegionAfter bytes.Buffer - var currentExtensionBefore, currentExtensionAfter string - - // Here I create a small state machine using state functions - type stateFn func(line string) stateFn - var processFileHeaderLine, - processRegionHeaderLine, - processCodeLine stateFn - - processFileHeaderLine = func(line string) stateFn { - if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { - - var fileName = line[len("--- "):] - if (fileName != "/dev/null") { - fileName = fileName[len("a/"):] - } - - seenFiles[fileName] = struct{}{} - var fileType = filepath.Ext(fileName) - if fileType == "" { - fileType = filepath.Base(fileName) - } - seenExtensions[fileType] = struct{}{} - if (line[0] == '-') { - currentExtensionBefore = fileType - } else { - currentExtensionAfter = fileType - } - - } else if strings.HasPrefix(line, "@@") { - return processRegionHeaderLine(line) - } - return processFileHeaderLine - } - - processRegionHeaderLine = func(line string) stateFn { - r.regions++ - currentRegionBefore.Reset() - currentRegionAfter.Reset() - return processCodeLine - } - - processCodeLine = func(line string) stateFn { - if line[0] == ' ' { - currentRegionBefore.WriteString(line[1:]) - currentRegionBefore.WriteString("\n") - currentRegionAfter.WriteString(line[1:]) - currentRegionAfter.WriteString("\n") - } else if line[0] == '-' { - r.lineDeleted++ - currentRegionBefore.WriteString(line[1:]) - currentRegionBefore.WriteString("\n") - } else if line[0] == '+' { - r.lineAdded++ - currentRegionAfter.WriteString(line[1:]) - currentRegionAfter.WriteString("\n") - } else { - countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) - countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) - if strings.HasPrefix(line, "@@") { - return processRegionHeaderLine(line) - } else { - inFileHeader = true - return processFileHeaderLine(line) - } - } - return processCodeLine - } var state = processFileHeaderLine for scanner.Scan() { line := scanner.Text() - state = state(line) // jumping on a trampoline + state = state(line) } diffFile.Close() From c4374657541572615277bbce0835a631a0a42dce Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 13:11:49 -0400 Subject: [PATCH 10/13] Put everything related to counting function calls into a different file --- countFunctionCalls.go | 168 +++++++++++++++++++++++++++++++++++++++++ main.go | 172 +----------------------------------------- 2 files changed, 172 insertions(+), 168 deletions(-) create mode 100644 countFunctionCalls.go diff --git a/countFunctionCalls.go b/countFunctionCalls.go new file mode 100644 index 0000000..d8a4250 --- /dev/null +++ b/countFunctionCalls.go @@ -0,0 +1,168 @@ +package main + +import ( + "bytes" +) + +func beginsIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_' +} + +func insideIdentifier(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_' +} + +type tokenType int + +const ( + endOfString tokenType = -1 + identifier tokenType = iota + somethingElse +) + +// A "tokenizer" that removes characters to be ignored and splits its input +// into things that look like identifiers and all other characters. +// +// It could be replaced by a more complete tokenizer. One that takes care of +// comments and strings for example +type tokenizer struct { + text []byte + toBeIgnored []byte +} + +func byteInSlice(b byte, slice []byte) bool { + for _, c := range slice { + if b == c { + return true + } + } + return false +} + +func (r *tokenizer) Next() (token tokenType, text []byte) { + + for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) { + r.text = r.text[1:] + } + + if len(r.text) == 0 { + return endOfString, nil + } + + if beginsIdentifier(r.text[0]) { + var i = 1 + for i < len(r.text) && insideIdentifier(r.text[i]) { + i++ + } + var result = r.text[0:i] + r.text = r.text[i:] + return identifier, result + } + + var result = r.text[:1] + r.text = r.text[1:] + return somethingElse, result + +} + +func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + var keywords = map[string]bool{ + "if": true, + "for": true, + "while": true, + "else": true, + } + + var whitespace = []byte{ + ' ', + '\t', + '\n', + '\r', + '\f', + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + whitespace, + } + + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + + for { + tok, s := tokenizer.Next() + if tok == endOfString { + return + } + + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + + if !(tokens[0] == identifier && !keywords[string(strings[0])]) && + tokens[1] == identifier && !keywords[string(strings[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { + (*counts)[string(strings[1])]++ + } + } +} + +func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { + + // Since the open parenthesis for a function call must be on the same line as + // the name, I only ignore space and tabs. + var whitespace = []byte{ + ' ', + '\t', + } + + var keywords = map[string]bool{ + "if": true, + "in": true, + "or": true, + "and": true, + "for": true, + "while": true, + "else": true, + "elif": true, + "def": true, + } + + var tokenizer = tokenizer{ + buffer.Bytes(), + whitespace, + } + + var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} + var strings = [3][]byte{{' '}, {' '}, {' '}} + + for { + tok, s := tokenizer.Next() + if tok == endOfString { + return + } + + tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok + strings[0], strings[1], strings[2] = strings[1], strings[2], s + + if !(tokens[0] == identifier && string(strings[0]) == "def") && + tokens[1] == identifier && !keywords[string(tokens[1])] && + tokens[2] == somethingElse && strings[2][0] == '(' { + (*counts)[string(strings[1])]++ + } + } +} + +//Given a bytes.Buffer containing a code segment, its extension, and a map to +//use for counting, counts the function calls +func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) { + switch ext { + case ".c", ".h": + countCFunctionCalls(buffer, counts) + case ".py": + countPythonFunctionCalls(buffer, counts) + + default: + + } +} diff --git a/main.go b/main.go index c2c6f71..edd6731 100644 --- a/main.go +++ b/main.go @@ -29,168 +29,6 @@ func main() { fmt.Println(compute()) } -func beginsIdentifier(b byte) bool { - return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_' -} -func insideIdentifier(b byte) bool { - return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_' -} - -type tokenType int -const ( - endOfString tokenType = -1 - identifier tokenType = iota - somethingElse -) - -// A "tokenizer" that removes characters to be ignored and splits its input -// into things that look like identifiers and all other characters. -// -// It could be replaced by a more complete tokenizer. One that takes care of -// comments and strings for example -type tokenizer struct { - text []byte - toBeIgnored []byte -} - -func byteInSlice(b byte, slice []byte) bool { - for _, c := range slice { - if b == c { - return true - } - } - return false -} - - -func (r *tokenizer) Next() (token tokenType, text []byte) { - - for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) { - r.text = r.text[1:] - } - - if len(r.text) == 0 { - return endOfString, nil - } - - if beginsIdentifier(r.text[0]) { - var i = 1 - for i < len(r.text) && insideIdentifier(r.text[i]) { - i++ - } - var result = r.text[0:i] - r.text = r.text[i:] - return identifier, result - } - - var result = r.text[:1] - r.text = r.text[1:] - return somethingElse, result - -} - -func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { - - var keywords = map[string]bool{ - "if": true, - "for": true, - "while": true, - "else": true, - } - - var whitespace = []byte{ - ' ', - '\t', - '\n', - '\r', - '\f', - } - - var tokenizer = tokenizer{ - buffer.Bytes(), - whitespace, - } - - var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} - var strings = [3][]byte{{' '}, {' '}, {' '}} - - for { - tok, s := tokenizer.Next() - if tok == endOfString { - return - } - - tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok - strings[0], strings[1], strings[2] = strings[1], strings[2], s - - if !(tokens[0] == identifier && !keywords[string(strings[0])]) && - tokens[1] == identifier && !keywords[string(strings[1])] && - tokens[2] == somethingElse && strings[2][0] == '(' { - (*counts)[string(strings[1])]++ - } - } -} - -func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { - - // Since the open parenthesis for a function call must be on the same line as - // the name, I only ignore space and tabs. - var whitespace = []byte{ - ' ', - '\t', - } - - var keywords = map[string]bool{ - "if": true, - "in": true, - "or": true, - "and": true, - "for": true, - "while": true, - "else": true, - "elif": true, - "def": true, - } - - var tokenizer = tokenizer{ - buffer.Bytes(), - whitespace, - } - - var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse} - var strings = [3][]byte{{' '}, {' '}, {' '}} - - for { - tok, s := tokenizer.Next() - if tok == endOfString { - return - } - - tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok - strings[0], strings[1], strings[2] = strings[1], strings[2], s - - if !(tokens[0] == identifier && string(strings[0]) == "def") && - tokens[1] == identifier && !keywords[string(tokens[1])] && - tokens[2] == somethingElse && strings[2][0] == '(' { - (*counts)[string(strings[1])]++ - } - } -} - -//Given a bytes.Buffer containing a code segment, its extension, and a map to -//use for counting, counts the function calls -func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) { - switch ext { - case ".c", ".h": - countCFunctionCalls(buffer, counts) - case ".py": - countPythonFunctionCalls(buffer, counts) - - default: - - } -} - //compute parses the git diffs in ./diffs and returns //a result struct that contains all the relevant informations //about these diffs @@ -214,14 +52,14 @@ func compute() *result { // Here I create a small state machine using state functions type stateFn func(line string) stateFn var processFileHeaderLine, - processRegionHeaderLine, - processCodeLine stateFn + processRegionHeaderLine, + processCodeLine stateFn processFileHeaderLine = func(line string) stateFn { if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") { var fileName = line[len("--- "):] - if (fileName != "/dev/null") { + if fileName != "/dev/null" { fileName = fileName[len("a/"):] } @@ -234,7 +72,7 @@ func compute() *result { fileType = filepath.Base(fileName) } seenExtensions[fileType] = struct{}{} - if (line[0] == '-') { + if line[0] == '-' { currentExtensionBefore = fileType } else { currentExtensionAfter = fileType @@ -293,8 +131,6 @@ func compute() *result { scanner := bufio.NewScanner(diffFile) - - var state = processFileHeaderLine for scanner.Scan() { line := scanner.Text() From 743bf342f8ffd91531e704543b19156a35754503 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 13:19:14 -0400 Subject: [PATCH 11/13] Made some things slightly more clear --- countFunctionCalls.go | 19 ++++++++++--------- result.go | 3 ++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/countFunctionCalls.go b/countFunctionCalls.go index d8a4250..abc8d8e 100644 --- a/countFunctionCalls.go +++ b/countFunctionCalls.go @@ -24,7 +24,7 @@ const ( // into things that look like identifiers and all other characters. // // It could be replaced by a more complete tokenizer. One that takes care of -// comments and strings for example +// comments and strings for example. type tokenizer struct { text []byte toBeIgnored []byte @@ -39,6 +39,7 @@ func byteInSlice(b byte, slice []byte) bool { return false } +// Get next token and text slice that goes with it func (r *tokenizer) Next() (token tokenType, text []byte) { for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) { @@ -67,13 +68,6 @@ func (r *tokenizer) Next() (token tokenType, text []byte) { func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { - var keywords = map[string]bool{ - "if": true, - "for": true, - "while": true, - "else": true, - } - var whitespace = []byte{ ' ', '\t', @@ -82,6 +76,13 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { '\f', } + var keywords = map[string]bool{ + "if": true, + "for": true, + "while": true, + "else": true, + } + var tokenizer = tokenizer{ buffer.Bytes(), whitespace, @@ -110,7 +111,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) { // Since the open parenthesis for a function call must be on the same line as - // the name, I only ignore space and tabs. + // the name, I only ignore spaces and tabs. var whitespace = []byte{ ' ', '\t', diff --git a/result.go b/result.go index 804e5c9..f705ab7 100644 --- a/result.go +++ b/result.go @@ -33,7 +33,7 @@ func (r *result) String() string { } buffer.WriteString("Extensions: \n") for _, ext := range r.fileExtensions { - buffer.WriteString(" -") + buffer.WriteString("\t-") buffer.WriteString(ext) buffer.WriteString("\n") } @@ -43,6 +43,7 @@ func (r *result) String() string { buffer.WriteString("Function calls (before, after): \n") for key, value := range r.functionCalls { + buffer.WriteString("\t") buffer.WriteString(key) buffer.WriteString(" : ") buffer.WriteString(strconv.Itoa(value.before)) From 2a6677b89bb2bf619d6c048d32f79f43c9d1fb66 Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 13:27:16 -0400 Subject: [PATCH 12/13] Fixed processing last region; some comments --- main.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index edd6731..b6439db 100644 --- a/main.go +++ b/main.go @@ -43,13 +43,18 @@ func compute() *result { var functionCallsAfter = make(map[string]int) r.functionCalls = make(map[string]struct{ before, after int }) + // I use sets instead of lists for files that we've seen var seenFiles = make(map[string]struct{}) var seenExtensions = make(map[string]struct{}) + // When reading in a region, I will be reading it into these buffers var currentRegionBefore, currentRegionAfter bytes.Buffer + + // Extensions for the file. Used to decide how to count functions var currentExtensionBefore, currentExtensionAfter string - // Here I create a small state machine using state functions + // Here I create a small state machine using state functions to read the + // relevent info from the diff files. type stateFn func(line string) stateFn var processFileHeaderLine, processRegionHeaderLine, @@ -71,6 +76,9 @@ func compute() *result { // is significant, like "Makefile" fileType = filepath.Base(fileName) } + if fileName == "/dev/null" { + fileType = "/dev/null" + } seenExtensions[fileType] = struct{}{} if line[0] == '-' { currentExtensionBefore = fileType @@ -86,8 +94,6 @@ func compute() *result { processRegionHeaderLine = func(line string) stateFn { r.regions++ - currentRegionBefore.Reset() - currentRegionAfter.Reset() return processCodeLine } @@ -106,8 +112,12 @@ func compute() *result { currentRegionAfter.WriteString(line[1:]) currentRegionAfter.WriteString("\n") } else { + // If we finished reading in the region, we process it countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) + currentRegionBefore.Reset() + currentRegionAfter.Reset() + if strings.HasPrefix(line, "@@") { return processRegionHeaderLine(line) } else { @@ -137,10 +147,16 @@ func compute() *result { state = state(line) } + // Process the last region + countFunctionCalls(¤tRegionBefore, currentExtensionBefore, &functionCallsBefore) + countFunctionCalls(¤tRegionAfter, currentExtensionAfter, &functionCallsAfter) + currentRegionBefore.Reset() + currentRegionAfter.Reset() diffFile.Close() } + // Turn set into list for name, _ := range seenFiles { r.files = append(r.files, name) } @@ -149,12 +165,12 @@ func compute() *result { r.fileExtensions = append(r.fileExtensions, name) } + // Combine the two functionCalls maps into one for name, times := range functionCallsBefore { var prev = r.functionCalls[name] prev.before += times r.functionCalls[name] = prev } - for name, times := range functionCallsAfter { var prev = r.functionCalls[name] prev.after += times From e1024b6939f62f1839305c7c484c3ddc0a4c71cb Mon Sep 17 00:00:00 2001 From: Paul-Andre Henegar Date: Mon, 2 Apr 2018 13:29:07 -0400 Subject: [PATCH 13/13] Removed extension list from result --- main.go | 6 ------ result.go | 8 -------- 2 files changed, 14 deletions(-) diff --git a/main.go b/main.go index b6439db..841ceeb 100644 --- a/main.go +++ b/main.go @@ -45,7 +45,6 @@ func compute() *result { // I use sets instead of lists for files that we've seen var seenFiles = make(map[string]struct{}) - var seenExtensions = make(map[string]struct{}) // When reading in a region, I will be reading it into these buffers var currentRegionBefore, currentRegionAfter bytes.Buffer @@ -79,7 +78,6 @@ func compute() *result { if fileName == "/dev/null" { fileType = "/dev/null" } - seenExtensions[fileType] = struct{}{} if line[0] == '-' { currentExtensionBefore = fileType } else { @@ -161,10 +159,6 @@ func compute() *result { r.files = append(r.files, name) } - for name, _ := range seenExtensions { - r.fileExtensions = append(r.fileExtensions, name) - } - // Combine the two functionCalls maps into one for name, times := range functionCallsBefore { var prev = r.functionCalls[name] diff --git a/result.go b/result.go index f705ab7..ddfd8cb 100644 --- a/result.go +++ b/result.go @@ -10,8 +10,6 @@ type result struct { //The name of the files seen files []string //The name of the files seen - fileExtensions []string - //How many region we have (i.e. seperated by @@) regions int //How many line were added total lineAdded int @@ -31,12 +29,6 @@ func (r *result) String() string { buffer.WriteString(file) buffer.WriteString("\n") } - buffer.WriteString("Extensions: \n") - for _, ext := range r.fileExtensions { - buffer.WriteString("\t-") - buffer.WriteString(ext) - buffer.WriteString("\n") - } r.appendIntValueToBuffer(r.regions, "Regions", &buffer) r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)