From eb46baad41da913d81dca4f87a510c31e39c875d Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Sun, 1 Apr 2018 01:23:31 -0400
Subject: [PATCH 01/13] Done the first four parts

---
 main.go   | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 result.go | 14 +++++++---
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/main.go b/main.go
index f62aa00..fc4f90c 100644
--- a/main.go
+++ b/main.go
@@ -1,7 +1,12 @@
 package main
 
 import (
+	"bufio"
 	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
 	"time"
 )
 
@@ -32,6 +37,75 @@ func main() {
 //	number of line deleted
 //	list of function calls seen in the diffs and their number of calls
 func compute() *result {
+	var r result
 
-	return nil
+	// A set to keep track of the files we've seen in the diffs
+	var seenFiles = make(map[string]struct{})
+
+	diffnames, err := filepath.Glob("./diffs/*.diff")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, diffname := range diffnames {
+
+		diffFile, err := os.Open(diffname)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		scanner := bufio.NewScanner(diffFile)
+
+		inFileHeader := true
+
+		var processBlockHeaderLine func(line string)
+
+		processFileHeaderLine := func(line string) {
+			if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
+				seenFiles[line[6:]] = struct{}{} // Add file to set
+			} else if strings.HasPrefix(line, "@@") {
+				inFileHeader = false
+				processBlockHeaderLine(line)
+			} else {
+				// TODO: error
+			}
+		}
+
+		processBlockHeaderLine = func(line string) {
+			r.regions++
+		}
+
+		processFileLine := func(line string) {
+			if line[0] == ' ' {
+
+			} else if line[0] == '-' {
+				r.lineDeleted++
+			} else if line[0] == '+' {
+				r.lineAdded++
+			} else if strings.HasPrefix(line, "@@") {
+				processBlockHeaderLine(line)
+			} else {
+				inFileHeader = true
+				processFileHeaderLine(line)
+			}
+		}
+
+		for scanner.Scan() {
+			line := scanner.Text()
+
+			if inFileHeader {
+				processFileHeaderLine(line)
+			} else {
+				processFileLine(line)
+			}
+		}
+
+		diffFile.Close()
+	}
+
+	for name, _ := range seenFiles {
+		r.files = append(r.files, name)
+	}
+
+	return &r
 }
diff --git a/result.go b/result.go
index 7e78236..2c5d7f1 100644
--- a/result.go
+++ b/result.go
@@ -15,8 +15,9 @@ type result struct {
 	lineAdded int
 	//How many line were deleted totla
 	lineDeleted int
-	//How many times the function seen in the code are called.
-	functionCalls map[string]int
+	//How many times the functionj seen in the code are called before and after
+	functionCallsBefore map[string]int
+	functionCallsAfter  map[string]int
 }
 
 //String returns the value of results as a formated string
@@ -33,8 +34,13 @@ func (r *result) String() string {
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)
 
-	buffer.WriteString("Functions calls: \n")
-	for key, value := range r.functionCalls {
+	buffer.WriteString("Function calls before: \n")
+	for key, value := range r.functionCallsBefore {
+		r.appendIntValueToBuffer(value, key, &buffer)
+	}
+
+	buffer.WriteString("Function calls after: \n")
+	for key, value := range r.functionCallsAfter {
 		r.appendIntValueToBuffer(value, key, &buffer)
 	}
 

From 0c5fc70f58ac655051e3f4f2cb2180261e2e678b Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 00:37:57 -0400
Subject: [PATCH 02/13] Added C function call counter, with horrible
 performance

---
 main.go   | 165 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 result.go |   8 +++
 2 files changed, 160 insertions(+), 13 deletions(-)

diff --git a/main.go b/main.go
index fc4f90c..faf91ba 100644
--- a/main.go
+++ b/main.go
@@ -2,10 +2,13 @@ package main
 
 import (
 	"bufio"
+	"bytes"
+	"errors"
 	"fmt"
 	"log"
 	"os"
 	"path/filepath"
+	"regexp"
 	"strings"
 	"time"
 )
@@ -28,6 +31,107 @@ func main() {
 	fmt.Println(compute())
 }
 
+type tokenRule struct {
+	token  int
+	regexp regexp.Regexp
+}
+
+// This structure represents a (not efficient in general) tokenizer.
+// It tokenizes a string by everytime trying all regexps and returning the token
+// that matches. It assumes that all the regexps match the beginning of the string.
+//
+// It could be easily replaced by a more efficient and more complete tokenizer.
+type tokenizer struct {
+	text       []byte
+	tokenRules []tokenRule
+}
+
+func (r *tokenizer) Next() (int, string, error) {
+	if len(r.text) == 0 {
+		return -1, "", errors.New("tokenizer text empty")
+	}
+	for _, rule := range r.tokenRules {
+		found := rule.regexp.Find(r.text)
+		if found != nil {
+			r.text = r.text[len(found):]
+			return rule.token, string(found), nil
+		}
+	}
+	return -1, "", errors.New("could not match any token")
+}
+
+const (
+	whitespace = iota
+	openParen
+	identifier
+	anythingElse
+)
+
+var cTokenizer = tokenizer{
+	[]byte{},
+	[]tokenRule{
+		{whitespace, *regexp.MustCompilePOSIX(`^[\t\n\f\r ]+`)},
+		{openParen, *regexp.MustCompilePOSIX(`^\(`)},
+		{identifier, *regexp.MustCompilePOSIX(`^[a-zA-Z_][a-zA-Z0-9_]*`)},
+		{anythingElse, *regexp.MustCompilePOSIX(`^.`)},
+	},
+}
+
+func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
+
+	cTokenizer.text = buffer.Bytes()
+
+	var keywords = map[string]bool{
+		"if":    true,
+		"for":   true,
+		"while": true,
+	}
+
+	var tokens = [3]int{whitespace, whitespace, whitespace}
+	var strings [3]string
+
+	for {
+
+		for { // Loop to remove whitespace
+			if len(cTokenizer.text) == 0 {
+				return
+			}
+			tok, s, err := cTokenizer.Next()
+			if err != nil {
+				log.Fatal(err) // This shouldn't happen because of the anythingElse rule
+			}
+			if tok != whitespace {
+				tokens[0], tokens[1] = tokens[1], tokens[2]
+				strings[0], strings[1] = strings[1], strings[2]
+				tokens[2] = tok
+				strings[2] = s
+				break
+			}
+		}
+
+		if tokens[0] != identifier &&
+			tokens[1] == identifier &&
+			tokens[2] == openParen &&
+			!keywords[strings[1]] {
+			(*counts)[strings[1]]++
+		}
+	}
+}
+
+//Given a bytes.Buffer containing a code segment, its extension, and a map to
+//use for counting, counts the function calls
+func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) {
+	switch ext {
+	case ".c", ".h":
+		countCFunctionCalls(buffer, counts)
+
+	case ".py":
+
+	default:
+
+	}
+}
+
 //compute parses the git diffs in ./diffs and returns
 //a result struct that contains all the relevant informations
 //about these diffs
@@ -38,9 +142,11 @@ func main() {
 //	list of function calls seen in the diffs and their number of calls
 func compute() *result {
 	var r result
+	r.functionCallsBefore = make(map[string]int)
+	r.functionCallsAfter = make(map[string]int)
 
-	// A set to keep track of the files we've seen in the diffs
 	var seenFiles = make(map[string]struct{})
+	var seenExtensions = make(map[string]struct{})
 
 	diffnames, err := filepath.Glob("./diffs/*.diff")
 	if err != nil {
@@ -58,35 +164,64 @@ func compute() *result {
 
 		inFileHeader := true
 
-		var processBlockHeaderLine func(line string)
+		var currentRegionBefore, currentRegionAfter bytes.Buffer
+		var currentExtension string
+
+		// Here I create a small state machine where one of the following closures
+		// is meant to be executed at every line.
+		var processFileHeaderLine func(line string)
+		var processRegionHeaderLine func(line string)
+		var processCodeLine func(line string)
 
-		processFileHeaderLine := func(line string) {
+		processFileHeaderLine = func(line string) {
 			if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
-				seenFiles[line[6:]] = struct{}{} // Add file to set
+				var fileName = line[len("--- a/"):]
+				seenFiles[fileName] = struct{}{}
+
+				var fileType = filepath.Ext(fileName)
+				if fileType == "" {
+					fileType = filepath.Base(fileName)
+				}
+				seenExtensions[fileType] = struct{}{}
+				currentExtension = fileType
+
 			} else if strings.HasPrefix(line, "@@") {
 				inFileHeader = false
-				processBlockHeaderLine(line)
+				processRegionHeaderLine(line)
 			} else {
 				// TODO: error
 			}
 		}
 
-		processBlockHeaderLine = func(line string) {
+		processRegionHeaderLine = func(line string) {
 			r.regions++
+			currentRegionBefore.Reset()
+			currentRegionAfter.Reset()
 		}
 
-		processFileLine := func(line string) {
+		processCodeLine = func(line string) {
 			if line[0] == ' ' {
-
+				currentRegionBefore.WriteString(line[1:])
+				currentRegionBefore.WriteString("\n")
+				currentRegionAfter.WriteString(line[1:])
+				currentRegionAfter.WriteString("\n")
 			} else if line[0] == '-' {
 				r.lineDeleted++
+				currentRegionBefore.WriteString(line[1:])
+				currentRegionBefore.WriteString("\n")
 			} else if line[0] == '+' {
 				r.lineAdded++
-			} else if strings.HasPrefix(line, "@@") {
-				processBlockHeaderLine(line)
+				currentRegionAfter.WriteString(line[1:])
+				currentRegionAfter.WriteString("\n")
 			} else {
-				inFileHeader = true
-				processFileHeaderLine(line)
+				countFunctionCalls(&currentRegionBefore, currentExtension, &r.functionCallsBefore)
+				countFunctionCalls(&currentRegionAfter, currentExtension, &r.functionCallsAfter)
+				if strings.HasPrefix(line, "@@") {
+					processRegionHeaderLine(line)
+				} else {
+					inFileHeader = true
+					processFileHeaderLine(line)
+				}
 			}
 		}
 
@@ -96,7 +231,7 @@ func compute() *result {
 			if inFileHeader {
 				processFileHeaderLine(line)
 			} else {
-				processFileLine(line)
+				processCodeLine(line)
 			}
 		}
 
@@ -107,5 +242,9 @@ func compute() *result {
 		r.files = append(r.files, name)
 	}
 
+	for name, _ := range seenExtensions {
+		r.fileExtensions = append(r.fileExtensions, name)
+	}
+
 	return &r
 }
diff --git a/result.go b/result.go
index 2c5d7f1..e18a01c 100644
--- a/result.go
+++ b/result.go
@@ -9,6 +9,8 @@ import (
 type result struct {
 	//The name of the files seen
 	files []string
+	//The name of the files seen
+	fileExtensions []string
 	//How many region we have (i.e. seperated by @@)
 	regions int
 	//How many line were added total
@@ -30,6 +32,12 @@ func (r *result) String() string {
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
+	buffer.WriteString("Extensions: \n")
+	for _, ext := range r.fileExtensions {
+		buffer.WriteString("	-")
+		buffer.WriteString(ext)
+		buffer.WriteString("\n")
+	}
 	r.appendIntValueToBuffer(r.regions, "Regions", &buffer)
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)

From 1d648ab82a509405c44bc50d6dbd5c0da6ec9710 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 08:40:28 -0400
Subject: [PATCH 03/13] Removed regexps, now performance isn't as horrible

---
 main.go   | 102 ++++++++++++++++++++++++++++++------------------------
 result.go |   2 ++
 2 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/main.go b/main.go
index faf91ba..17276ac 100644
--- a/main.go
+++ b/main.go
@@ -3,7 +3,6 @@ package main
 import (
 	"bufio"
 	"bytes"
-	"errors"
 	"fmt"
 	"log"
 	"os"
@@ -36,50 +35,46 @@ type tokenRule struct {
 	regexp regexp.Regexp
 }
 
-// This structure represents a (not efficient in general) tokenizer.
-// It tokenizes a string by everytime trying all regexps and returning the token
-// that matches. It assumes that all the regexps match the beginning of the string.
+func beginsIdentifier(b byte) bool {
+	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_'
+}
+func insideIdentifier(b byte) bool {
+	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_'
+}
+
+type tokenType int
+
+const (
+	identifier tokenType = iota
+	somethingElse
+)
+
+// A "tokenizer" that splits its input into things that look like identifiers and all other charcters
 //
-// It could be easily replaced by a more efficient and more complete tokenizer.
+// It could be replaced by a more complete tokenizer.
 type tokenizer struct {
 	text       []byte
-	tokenRules []tokenRule
 }
 
-func (r *tokenizer) Next() (int, string, error) {
-	if len(r.text) == 0 {
-		return -1, "", errors.New("tokenizer text empty")
-	}
-	for _, rule := range r.tokenRules {
-		found := rule.regexp.Find(r.text)
-		if found != nil {
-			r.text = r.text[len(found):]
-			return rule.token, string(found), nil
+func (r *tokenizer) Next() (token tokenType, text []byte) {
+
+	if beginsIdentifier(r.text[0]) {
+		var i = 0
+		for i < len(r.text) && insideIdentifier(r.text[i]) {
+			i++
 		}
+		var result = r.text[0:i]
+		r.text = r.text[i:]
+		return identifier, result
 	}
-	return -1, "", errors.New("could not match any token")
-}
 
-const (
-	whitespace = iota
-	openParen
-	identifier
-	anythingElse
-)
-
-var cTokenizer = tokenizer{
-	[]byte{},
-	[]tokenRule{
-		{whitespace, *regexp.MustCompilePOSIX(`^[\t\n\f\r ]+`)},
-		{openParen, *regexp.MustCompilePOSIX(`^\(`)},
-		{identifier, *regexp.MustCompilePOSIX(`^[a-zA-Z_][a-zA-Z0-9_]*`)},
-		{anythingElse, *regexp.MustCompilePOSIX(`^.`)},
-	},
+	var result = r.text[0:1]
+	r.text = r.text[1:]
+	return somethingElse, result
 }
 
-func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
-	cTokenizer.text = buffer.Bytes()
+func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	var keywords = map[string]bool{
 		"if":    true,
@@ -87,20 +82,37 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		"while": true,
 	}
 
-	var tokens = [3]int{whitespace, whitespace, whitespace}
-	var strings [3]string
+	var whitespace = []byte {
+		' ',
+		'\t',
+		'\n',
+		'\r',
+	}
+
+	var tokenizer = tokenizer{
+		buffer.Bytes(),
+	}
 
-	for {
+	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
+	var strings = [3][]byte{{' '}, {' '}, {' '}}
 
+	for {
 		for { // Loop to remove whitespace
-			if len(cTokenizer.text) == 0 {
+			if len(tokenizer.text) == 0 {
 				return
 			}
-			tok, s, err := cTokenizer.Next()
-			if err != nil {
-				log.Fatal(err) // This shouldn't happen because of the anythingElse rule
+			tok, s := tokenizer.Next()
+			var isWhitespace = false
+			if tok == somethingElse {
+				for _, w := range whitespace {
+					if s[0] == w {
+						isWhitespace = true
+						break
+					}
+				}
 			}
-			if tok != whitespace {
+			if tok == identifier ||
+			(tok == somethingElse && !isWhitespace) {
 				tokens[0], tokens[1] = tokens[1], tokens[2]
 				strings[0], strings[1] = strings[1], strings[2]
 				tokens[2] = tok
@@ -111,9 +123,9 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 		if tokens[0] != identifier &&
 			tokens[1] == identifier &&
-			tokens[2] == openParen &&
-			!keywords[strings[1]] {
-			(*counts)[strings[1]]++
+			tokens[2] == somethingElse && strings[2][0] == '(' &&
+			!keywords[string(strings[1])] {
+				(*counts)[string(strings[1])]++
 		}
 	}
 }
diff --git a/result.go b/result.go
index e18a01c..a815c2b 100644
--- a/result.go
+++ b/result.go
@@ -26,12 +26,14 @@ type result struct {
 func (r *result) String() string {
 
 	var buffer bytes.Buffer
+	/*
 	buffer.WriteString("Files: \n")
 	for _, file := range r.files {
 		buffer.WriteString("	-")
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
+	*/
 	buffer.WriteString("Extensions: \n")
 	for _, ext := range r.fileExtensions {
 		buffer.WriteString("	-")

From d51e0779170fb587731d65fef16ccfd93339dda9 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 10:41:23 -0400
Subject: [PATCH 04/13] Duplicated the C function call counter function and
 adapted for python

---
 main.go | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/main.go b/main.go
index 17276ac..7c8680f 100644
--- a/main.go
+++ b/main.go
@@ -7,7 +7,6 @@ import (
 	"log"
 	"os"
 	"path/filepath"
-	"regexp"
 	"strings"
 	"time"
 )
@@ -30,11 +29,6 @@ func main() {
 	fmt.Println(compute())
 }
 
-type tokenRule struct {
-	token  int
-	regexp regexp.Regexp
-}
-
 func beginsIdentifier(b byte) bool {
 	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_'
 }
@@ -87,6 +81,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		'\t',
 		'\n',
 		'\r',
+		'\f',
 	}
 
 	var tokenizer = tokenizer{
@@ -130,14 +125,65 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 	}
 }
 
+
+func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
+
+	// Since the open parenthesis for a function call must be on the same line as the name,
+	var whitespace = []byte {
+		' ',
+		'\t',
+		'\r',
+		'\f',
+	}
+
+	var tokenizer = tokenizer{
+		buffer.Bytes(),
+	}
+
+	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
+	var strings = [3][]byte{{' '}, {' '}, {' '}}
+
+	for {
+		for { // Loop to remove whitespace
+			if len(tokenizer.text) == 0 {
+				return
+			}
+			tok, s := tokenizer.Next()
+			var isWhitespace = false
+			if tok == somethingElse {
+				for _, w := range whitespace {
+					if s[0] == w {
+						isWhitespace = true
+						break
+					}
+				}
+			}
+			if tok == identifier ||
+			(tok == somethingElse && !isWhitespace) {
+				tokens[0], tokens[1] = tokens[1], tokens[2]
+				strings[0], strings[1] = strings[1], strings[2]
+				tokens[2] = tok
+				strings[2] = s
+				break
+			}
+		}
+
+		if tokens[1] == identifier &&
+			tokens[2] == somethingElse && strings[2][0] == '(' &&
+			tokens[0] == identifier && string(strings[0]) != "def" {
+				(*counts)[string(strings[1])]++
+		}
+	}
+}
+
 //Given a bytes.Buffer containing a code segment, its extension, and a map to
 //use for counting, counts the function calls
 func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) {
 	switch ext {
 	case ".c", ".h":
 		countCFunctionCalls(buffer, counts)
-
 	case ".py":
+		countPythonFunctionCalls(buffer, counts)
 
 	default:
 

From 7dd90f5eaf72eb3b9fab44bca6ef4db164853489 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 10:59:44 -0400
Subject: [PATCH 05/13] Factored ignoring whitespace into the tokenizer

---
 main.go   | 81 +++++++++++++++++++++++--------------------------------
 result.go |  4 +--
 2 files changed, 36 insertions(+), 49 deletions(-)

diff --git a/main.go b/main.go
index 7c8680f..0034c53 100644
--- a/main.go
+++ b/main.go
@@ -39,6 +39,7 @@ func insideIdentifier(b byte) bool {
 type tokenType int
 
 const (
+	endOfString tokenType = -1
 	identifier tokenType = iota
 	somethingElse
 )
@@ -48,10 +49,15 @@ const (
 // It could be replaced by a more complete tokenizer.
 type tokenizer struct {
 	text       []byte
+	toBeIgnored []byte
 }
 
 func (r *tokenizer) Next() (token tokenType, text []byte) {
 
+	if len(r.text) == 0 {
+		return endOfString, nil
+	}
+
 	if beginsIdentifier(r.text[0]) {
 		var i = 0
 		for i < len(r.text) && insideIdentifier(r.text[i]) {
@@ -62,9 +68,20 @@ func (r *tokenizer) Next() (token tokenType, text []byte) {
 		return identifier, result
 	}
 
-	var result = r.text[0:1]
-	r.text = r.text[1:]
-	return somethingElse, result
+	for len(r.text) > 0 {
+		var result = r.text[0:1]
+		r.text = r.text[1:]
+		var shouldBeIgnored = false
+		for _,c := range r.toBeIgnored {
+			if result[0] == c {
+				shouldBeIgnored = true
+			}
+			if !shouldBeIgnored {
+				return somethingElse, result
+			}
+		}
+	}
+	return endOfString, nil
 }
 
 
@@ -86,36 +103,21 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	var tokenizer = tokenizer{
 		buffer.Bytes(),
+		whitespace,
 	}
 
 	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
 	var strings = [3][]byte{{' '}, {' '}, {' '}}
 
 	for {
-		for { // Loop to remove whitespace
-			if len(tokenizer.text) == 0 {
-				return
-			}
-			tok, s := tokenizer.Next()
-			var isWhitespace = false
-			if tok == somethingElse {
-				for _, w := range whitespace {
-					if s[0] == w {
-						isWhitespace = true
-						break
-					}
-				}
-			}
-			if tok == identifier ||
-			(tok == somethingElse && !isWhitespace) {
-				tokens[0], tokens[1] = tokens[1], tokens[2]
-				strings[0], strings[1] = strings[1], strings[2]
-				tokens[2] = tok
-				strings[2] = s
-				break
-			}
+		tok, s := tokenizer.Next()
+		if tok == endOfString {
+			return
 		}
 
+		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
+		strings[0], strings[1], strings[2] = strings[1], strings[2], s
+
 		if tokens[0] != identifier &&
 			tokens[1] == identifier &&
 			tokens[2] == somethingElse && strings[2][0] == '(' &&
@@ -138,36 +140,21 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	var tokenizer = tokenizer{
 		buffer.Bytes(),
+		whitespace,
 	}
 
 	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
 	var strings = [3][]byte{{' '}, {' '}, {' '}}
 
 	for {
-		for { // Loop to remove whitespace
-			if len(tokenizer.text) == 0 {
-				return
-			}
-			tok, s := tokenizer.Next()
-			var isWhitespace = false
-			if tok == somethingElse {
-				for _, w := range whitespace {
-					if s[0] == w {
-						isWhitespace = true
-						break
-					}
-				}
-			}
-			if tok == identifier ||
-			(tok == somethingElse && !isWhitespace) {
-				tokens[0], tokens[1] = tokens[1], tokens[2]
-				strings[0], strings[1] = strings[1], strings[2]
-				tokens[2] = tok
-				strings[2] = s
-				break
-			}
+		tok, s := tokenizer.Next()
+		if tok == endOfString {
+			return
 		}
 
+		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
+		strings[0], strings[1], strings[2] = strings[1], strings[2], s
+
 		if tokens[1] == identifier &&
 			tokens[2] == somethingElse && strings[2][0] == '(' &&
 			tokens[0] == identifier && string(strings[0]) != "def" {
diff --git a/result.go b/result.go
index a815c2b..57c4abc 100644
--- a/result.go
+++ b/result.go
@@ -26,14 +26,12 @@ type result struct {
 func (r *result) String() string {
 
 	var buffer bytes.Buffer
-	/*
 	buffer.WriteString("Files: \n")
 	for _, file := range r.files {
 		buffer.WriteString("	-")
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
-	*/
 	buffer.WriteString("Extensions: \n")
 	for _, ext := range r.fileExtensions {
 		buffer.WriteString("	-")
@@ -44,6 +42,7 @@ func (r *result) String() string {
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)
 
+
 	buffer.WriteString("Function calls before: \n")
 	for key, value := range r.functionCallsBefore {
 		r.appendIntValueToBuffer(value, key, &buffer)
@@ -54,6 +53,7 @@ func (r *result) String() string {
 		r.appendIntValueToBuffer(value, key, &buffer)
 	}
 
+
 	return buffer.String()
 }
 

From 26a4e1f708ce407df168f76f1cfb2cd091a3c54b Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 11:25:12 -0400
Subject: [PATCH 06/13] now using state functions for reading the diff files

---
 main.go   | 52 +++++++++++++++++++++++-----------------------------
 result.go |  6 ++++--
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/main.go b/main.go
index 0034c53..343ae5c 100644
--- a/main.go
+++ b/main.go
@@ -40,7 +40,7 @@ type tokenType int
 
 const (
 	endOfString tokenType = -1
-	identifier tokenType = iota
+	identifier  tokenType = iota
 	somethingElse
 )
 
@@ -48,7 +48,7 @@ const (
 //
 // It could be replaced by a more complete tokenizer.
 type tokenizer struct {
-	text       []byte
+	text        []byte
 	toBeIgnored []byte
 }
 
@@ -72,7 +72,7 @@ func (r *tokenizer) Next() (token tokenType, text []byte) {
 		var result = r.text[0:1]
 		r.text = r.text[1:]
 		var shouldBeIgnored = false
-		for _,c := range r.toBeIgnored {
+		for _, c := range r.toBeIgnored {
 			if result[0] == c {
 				shouldBeIgnored = true
 			}
@@ -84,7 +84,6 @@ func (r *tokenizer) Next() (token tokenType, text []byte) {
 	return endOfString, nil
 }
 
-
 func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	var keywords = map[string]bool{
@@ -93,7 +92,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		"while": true,
 	}
 
-	var whitespace = []byte {
+	var whitespace = []byte{
 		' ',
 		'\t',
 		'\n',
@@ -122,16 +121,15 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 			tokens[1] == identifier &&
 			tokens[2] == somethingElse && strings[2][0] == '(' &&
 			!keywords[string(strings[1])] {
-				(*counts)[string(strings[1])]++
+			(*counts)[string(strings[1])]++
 		}
 	}
 }
 
-
 func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	// Since the open parenthesis for a function call must be on the same line as the name,
-	var whitespace = []byte {
+	var whitespace = []byte{
 		' ',
 		'\t',
 		'\r',
@@ -158,7 +156,7 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		if tokens[1] == identifier &&
 			tokens[2] == somethingElse && strings[2][0] == '(' &&
 			tokens[0] == identifier && string(strings[0]) != "def" {
-				(*counts)[string(strings[1])]++
+			(*counts)[string(strings[1])]++
 		}
 	}
 }
@@ -212,13 +210,13 @@ func compute() *result {
 		var currentRegionBefore, currentRegionAfter bytes.Buffer
 		var currentExtension string
 
-		// Here I create a small state machine where one of the following closures
-		// is meant to be executed at every line.
-		var processFileHeaderLine func(line string)
-		var processRegionHeaderLine func(line string)
-		var processCodeLine func(line string)
+		// Here I create a small state machine using state functions
+		type stateFn func(line string) stateFn
+		var processFileHeaderLine,
+			processRegionHeaderLine,
+			processCodeLine stateFn
 
-		processFileHeaderLine = func(line string) {
+		processFileHeaderLine = func(line string) stateFn {
 			if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
 				var fileName = line[len("--- a/"):]
 				seenFiles[fileName] = struct{}{}
@@ -229,22 +227,20 @@ func compute() *result {
 				}
 				seenExtensions[fileType] = struct{}{}
 				currentExtension = fileType
-
 			} else if strings.HasPrefix(line, "@@") {
-				inFileHeader = false
-				processRegionHeaderLine(line)
-			} else {
-				// TODO: error
+				return processRegionHeaderLine(line)
 			}
+			return processFileHeaderLine
 		}
 
-		processRegionHeaderLine = func(line string) {
+		processRegionHeaderLine = func(line string) stateFn {
 			r.regions++
 			currentRegionBefore.Reset()
 			currentRegionAfter.Reset()
+			return processCodeLine
 		}
 
-		processCodeLine = func(line string) {
+		processCodeLine = func(line string) stateFn {
 			if line[0] == ' ' {
 				currentRegionBefore.WriteString(line[1:])
 				currentRegionBefore.WriteString("\n")
@@ -262,22 +258,20 @@ func compute() *result {
 				countFunctionCalls(&currentRegionBefore, currentExtension, &r.functionCallsBefore)
 				countFunctionCalls(&currentRegionAfter, currentExtension, &r.functionCallsAfter)
 				if strings.HasPrefix(line, "@@") {
-					processRegionHeaderLine(line)
+					return processRegionHeaderLine(line)
 				} else {
 					inFileHeader = true
-					processFileHeaderLine(line)
+					return processFileHeaderLine(line)
 				}
 			}
+			return processCodeLine
 		}
 
+		var state = processFileHeaderLine
 		for scanner.Scan() {
 			line := scanner.Text()
 
-			if inFileHeader {
-				processFileHeaderLine(line)
-			} else {
-				processCodeLine(line)
-			}
+			state = state(line) // jumping on a trampoline
 		}
 
 		diffFile.Close()
diff --git a/result.go b/result.go
index 57c4abc..9f768db 100644
--- a/result.go
+++ b/result.go
@@ -27,11 +27,13 @@ func (r *result) String() string {
 
 	var buffer bytes.Buffer
 	buffer.WriteString("Files: \n")
+	/*
 	for _, file := range r.files {
 		buffer.WriteString("	-")
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
+	*/
 	buffer.WriteString("Extensions: \n")
 	for _, ext := range r.fileExtensions {
 		buffer.WriteString("	-")
@@ -42,7 +44,7 @@ func (r *result) String() string {
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)
 
-
+	/*
 	buffer.WriteString("Function calls before: \n")
 	for key, value := range r.functionCallsBefore {
 		r.appendIntValueToBuffer(value, key, &buffer)
@@ -52,7 +54,7 @@ func (r *result) String() string {
 	for key, value := range r.functionCallsAfter {
 		r.appendIntValueToBuffer(value, key, &buffer)
 	}
-
+	*/
 
 	return buffer.String()
 }

From 1d7257e119b869def6a0a0d0f2f042b9e2bcb037 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 11:34:32 -0400
Subject: [PATCH 07/13] Now result only has a single functionCalls map

And the values of the map are struct{before, after int}
---
 main.go   | 23 ++++++++++++++++++-----
 result.go | 23 +++++++++--------------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/main.go b/main.go
index 343ae5c..af99f46 100644
--- a/main.go
+++ b/main.go
@@ -44,7 +44,7 @@ const (
 	somethingElse
 )
 
-// A "tokenizer" that splits its input into things that look like identifiers and all other charcters
+// A "tokenizer" that splits its input into things that look like identifiers and all other characters
 //
 // It could be replaced by a more complete tokenizer.
 type tokenizer struct {
@@ -185,8 +185,9 @@ func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int
 //	list of function calls seen in the diffs and their number of calls
 func compute() *result {
 	var r result
-	r.functionCallsBefore = make(map[string]int)
-	r.functionCallsAfter = make(map[string]int)
+	var functionCallsBefore = make(map[string]int)
+	var functionCallsAfter = make(map[string]int)
+	r.functionCalls = make(map[string]struct{ before, after int })
 
 	var seenFiles = make(map[string]struct{})
 	var seenExtensions = make(map[string]struct{})
@@ -255,8 +256,8 @@ func compute() *result {
 				currentRegionAfter.WriteString(line[1:])
 				currentRegionAfter.WriteString("\n")
 			} else {
-				countFunctionCalls(&currentRegionBefore, currentExtension, &r.functionCallsBefore)
-				countFunctionCalls(&currentRegionAfter, currentExtension, &r.functionCallsAfter)
+				countFunctionCalls(&currentRegionBefore, currentExtension, &functionCallsBefore)
+				countFunctionCalls(&currentRegionAfter, currentExtension, &functionCallsAfter)
 				if strings.HasPrefix(line, "@@") {
 					return processRegionHeaderLine(line)
 				} else {
@@ -285,5 +286,17 @@ func compute() *result {
 		r.fileExtensions = append(r.fileExtensions, name)
 	}
 
+	for name, times := range functionCallsBefore {
+		var prev = r.functionCalls[name]
+		prev.before += times
+		r.functionCalls[name] = prev
+	}
+
+	for name, times := range functionCallsAfter {
+		var prev = r.functionCalls[name]
+		prev.after += times
+		r.functionCalls[name] = prev
+	}
+
 	return &r
 }
diff --git a/result.go b/result.go
index 9f768db..804e5c9 100644
--- a/result.go
+++ b/result.go
@@ -18,8 +18,7 @@ type result struct {
 	//How many line were deleted totla
 	lineDeleted int
 	//How many times the functionj seen in the code are called before and after
-	functionCallsBefore map[string]int
-	functionCallsAfter  map[string]int
+	functionCalls map[string]struct{ before, after int }
 }
 
 //String returns the value of results as a formated string
@@ -27,13 +26,11 @@ func (r *result) String() string {
 
 	var buffer bytes.Buffer
 	buffer.WriteString("Files: \n")
-	/*
 	for _, file := range r.files {
 		buffer.WriteString("	-")
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
-	*/
 	buffer.WriteString("Extensions: \n")
 	for _, ext := range r.fileExtensions {
 		buffer.WriteString("	-")
@@ -44,17 +41,15 @@ func (r *result) String() string {
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)
 
-	/*
-	buffer.WriteString("Function calls before: \n")
-	for key, value := range r.functionCallsBefore {
-		r.appendIntValueToBuffer(value, key, &buffer)
-	}
-
-	buffer.WriteString("Function calls after: \n")
-	for key, value := range r.functionCallsAfter {
-		r.appendIntValueToBuffer(value, key, &buffer)
+	buffer.WriteString("Function calls (before, after): \n")
+	for key, value := range r.functionCalls {
+		buffer.WriteString(key)
+		buffer.WriteString(" : ")
+		buffer.WriteString(strconv.Itoa(value.before))
+		buffer.WriteString(", ")
+		buffer.WriteString(strconv.Itoa(value.after))
+		buffer.WriteString("\n")
 	}
-	*/
 
 	return buffer.String()
 }

From 8213243549e23c4e1bd04488e19dcf6103ea516d Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 12:16:38 -0400
Subject: [PATCH 08/13] Fixed some bugs

Tokenizer now properly returns identifiers after whitespace
Now correctly considers the file extension for both source and
destination file
Now consider /dev/null files
---
 main.go | 68 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/main.go b/main.go
index af99f46..86aa4a5 100644
--- a/main.go
+++ b/main.go
@@ -37,29 +37,44 @@ func insideIdentifier(b byte) bool {
 }
 
 type tokenType int
-
 const (
 	endOfString tokenType = -1
 	identifier  tokenType = iota
 	somethingElse
 )
 
-// A "tokenizer" that splits its input into things that look like identifiers and all other characters
+// A "tokenizer" that removes characters to be ignored and splits its input
+// into things that look like identifiers and all other characters.
 //
-// It could be replaced by a more complete tokenizer.
+// It could be replaced by a more complete tokenizer. One that takes care of
+// comments and strings for example
 type tokenizer struct {
 	text        []byte
 	toBeIgnored []byte
 }
 
+func byteInSlice(b byte, slice []byte) bool {
+	for _, c := range slice {
+		if b == c {
+			return true
+		}
+	}
+	return false
+}
+
+
 func (r *tokenizer) Next() (token tokenType, text []byte) {
 
+	for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) {
+		r.text = r.text[1:]
+	}
+
 	if len(r.text) == 0 {
 		return endOfString, nil
 	}
 
 	if beginsIdentifier(r.text[0]) {
-		var i = 0
+		var i = 1
 		for i < len(r.text) && insideIdentifier(r.text[i]) {
 			i++
 		}
@@ -68,20 +83,10 @@ func (r *tokenizer) Next() (token tokenType, text []byte) {
 		return identifier, result
 	}
 
-	for len(r.text) > 0 {
-		var result = r.text[0:1]
-		r.text = r.text[1:]
-		var shouldBeIgnored = false
-		for _, c := range r.toBeIgnored {
-			if result[0] == c {
-				shouldBeIgnored = true
-			}
-			if !shouldBeIgnored {
-				return somethingElse, result
-			}
-		}
-	}
-	return endOfString, nil
+	var result = r.text[:1]
+	r.text = r.text[1:]
+	return somethingElse, result
+
 }
 
 func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
@@ -128,12 +133,11 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
-	// Since the open parenthesis for a function call must be on the same line as the name,
+	// Since the open parenthesis for a function call must be on the same line as
+	// the name, I only ignore space and tabs.
 	var whitespace = []byte{
 		' ',
 		'\t',
-		'\r',
-		'\f',
 	}
 
 	var tokenizer = tokenizer{
@@ -155,7 +159,7 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 		if tokens[1] == identifier &&
 			tokens[2] == somethingElse && strings[2][0] == '(' &&
-			tokens[0] == identifier && string(strings[0]) != "def" {
+			!(tokens[0] == identifier && string(strings[0]) == "def") {
 			(*counts)[string(strings[1])]++
 		}
 	}
@@ -209,7 +213,7 @@ func compute() *result {
 		inFileHeader := true
 
 		var currentRegionBefore, currentRegionAfter bytes.Buffer
-		var currentExtension string
+		var currentExtensionBefore, currentExtensionAfter string
 
 		// Here I create a small state machine using state functions
 		type stateFn func(line string) stateFn
@@ -219,7 +223,12 @@ func compute() *result {
 
 		processFileHeaderLine = func(line string) stateFn {
 			if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
-				var fileName = line[len("--- a/"):]
+
+				var fileName = line[len("--- "):]
+				if (fileName != "/dev/null") {
+					fileName = fileName[len("a/"):]
+				}
+
 				seenFiles[fileName] = struct{}{}
 
 				var fileType = filepath.Ext(fileName)
@@ -227,7 +236,12 @@ func compute() *result {
 					fileType = filepath.Base(fileName)
 				}
 				seenExtensions[fileType] = struct{}{}
-				currentExtension = fileType
+				if (line[0] == '-') {
+					currentExtensionBefore = fileType
+				} else {
+					currentExtensionAfter = fileType
+				}
+
 			} else if strings.HasPrefix(line, "@@") {
 				return processRegionHeaderLine(line)
 			}
@@ -256,8 +270,8 @@ func compute() *result {
 				currentRegionAfter.WriteString(line[1:])
 				currentRegionAfter.WriteString("\n")
 			} else {
-				countFunctionCalls(&currentRegionBefore, currentExtension, &functionCallsBefore)
-				countFunctionCalls(&currentRegionAfter, currentExtension, &functionCallsAfter)
+				countFunctionCalls(&currentRegionBefore, currentExtensionBefore, &functionCallsBefore)
+				countFunctionCalls(&currentRegionAfter, currentExtensionAfter, &functionCallsAfter)
 				if strings.HasPrefix(line, "@@") {
 					return processRegionHeaderLine(line)
 				} else {

From be0dbc5b33a2ade6439678c5d6b6f53dabb0d65a Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 12:56:07 -0400
Subject: [PATCH 09/13] Move state functions outside the loop and do keyword
 checks for python

---
 main.go | 169 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 91 insertions(+), 78 deletions(-)

diff --git a/main.go b/main.go
index 86aa4a5..c2c6f71 100644
--- a/main.go
+++ b/main.go
@@ -95,6 +95,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		"if":    true,
 		"for":   true,
 		"while": true,
+		"else": true,
 	}
 
 	var whitespace = []byte{
@@ -122,10 +123,9 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
 		strings[0], strings[1], strings[2] = strings[1], strings[2], s
 
-		if tokens[0] != identifier &&
-			tokens[1] == identifier &&
-			tokens[2] == somethingElse && strings[2][0] == '(' &&
-			!keywords[string(strings[1])] {
+		if !(tokens[0] == identifier && !keywords[string(strings[0])]) &&
+			tokens[1] == identifier && !keywords[string(strings[1])] &&
+			tokens[2] == somethingElse && strings[2][0] == '(' {
 			(*counts)[string(strings[1])]++
 		}
 	}
@@ -140,6 +140,18 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		'\t',
 	}
 
+	var keywords = map[string]bool{
+		"if":    true,
+		"in":    true,
+		"or":    true,
+		"and":    true,
+		"for":   true,
+		"while": true,
+		"else": true,
+		"elif": true,
+		"def": true,
+	}
+
 	var tokenizer = tokenizer{
 		buffer.Bytes(),
 		whitespace,
@@ -157,9 +169,9 @@ func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
 		strings[0], strings[1], strings[2] = strings[1], strings[2], s
 
-		if tokens[1] == identifier &&
-			tokens[2] == somethingElse && strings[2][0] == '(' &&
-			!(tokens[0] == identifier && string(strings[0]) == "def") {
+		if !(tokens[0] == identifier && string(strings[0]) == "def") &&
+		 tokens[1] == identifier && !keywords[string(tokens[1])] &&
+			tokens[2] == somethingElse && strings[2][0] == '(' {
 			(*counts)[string(strings[1])]++
 		}
 	}
@@ -196,6 +208,77 @@ func compute() *result {
 	var seenFiles = make(map[string]struct{})
 	var seenExtensions = make(map[string]struct{})
 
+	var currentRegionBefore, currentRegionAfter bytes.Buffer
+	var currentExtensionBefore, currentExtensionAfter string
+
+	// Here I create a small state machine using state functions
+	type stateFn func(line string) stateFn
+	var processFileHeaderLine,
+	processRegionHeaderLine,
+	processCodeLine stateFn
+
+	processFileHeaderLine = func(line string) stateFn {
+		if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
+
+			var fileName = line[len("--- "):]
+			if (fileName != "/dev/null") {
+				fileName = fileName[len("a/"):]
+			}
+
+			seenFiles[fileName] = struct{}{}
+
+			var fileType = filepath.Ext(fileName)
+			if fileType == "" {
+				// If something doesn't have an extension, we assume the name itself
+				// is significant, like "Makefile"
+				fileType = filepath.Base(fileName)
+			}
+			seenExtensions[fileType] = struct{}{}
+			if (line[0] == '-') {
+				currentExtensionBefore = fileType
+			} else {
+				currentExtensionAfter = fileType
+			}
+
+		} else if strings.HasPrefix(line, "@@") {
+			return processRegionHeaderLine(line)
+		}
+		return processFileHeaderLine
+	}
+
+	processRegionHeaderLine = func(line string) stateFn {
+		r.regions++
+		currentRegionBefore.Reset()
+		currentRegionAfter.Reset()
+		return processCodeLine
+	}
+
+	processCodeLine = func(line string) stateFn {
+		if line[0] == ' ' {
+			currentRegionBefore.WriteString(line[1:])
+			currentRegionBefore.WriteString("\n")
+			currentRegionAfter.WriteString(line[1:])
+			currentRegionAfter.WriteString("\n")
+		} else if line[0] == '-' {
+			r.lineDeleted++
+			currentRegionBefore.WriteString(line[1:])
+			currentRegionBefore.WriteString("\n")
+		} else if line[0] == '+' {
+			r.lineAdded++
+			currentRegionAfter.WriteString(line[1:])
+			currentRegionAfter.WriteString("\n")
+		} else {
+			countFunctionCalls(&currentRegionBefore, currentExtensionBefore, &functionCallsBefore)
+			countFunctionCalls(&currentRegionAfter, currentExtensionAfter, &functionCallsAfter)
+			if strings.HasPrefix(line, "@@") {
+				return processRegionHeaderLine(line)
+			} else {
+				return processFileHeaderLine(line)
+			}
+		}
+		return processCodeLine
+	}
+
 	diffnames, err := filepath.Glob("./diffs/*.diff")
 	if err != nil {
 		log.Fatal(err)
@@ -210,83 +293,13 @@ func compute() *result {
 
 		scanner := bufio.NewScanner(diffFile)
 
-		inFileHeader := true
-
-		var currentRegionBefore, currentRegionAfter bytes.Buffer
-		var currentExtensionBefore, currentExtensionAfter string
-
-		// Here I create a small state machine using state functions
-		type stateFn func(line string) stateFn
-		var processFileHeaderLine,
-			processRegionHeaderLine,
-			processCodeLine stateFn
-
-		processFileHeaderLine = func(line string) stateFn {
-			if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
-
-				var fileName = line[len("--- "):]
-				if (fileName != "/dev/null") {
-					fileName = fileName[len("a/"):]
-				}
-
-				seenFiles[fileName] = struct{}{}
 
-				var fileType = filepath.Ext(fileName)
-				if fileType == "" {
-					fileType = filepath.Base(fileName)
-				}
-				seenExtensions[fileType] = struct{}{}
-				if (line[0] == '-') {
-					currentExtensionBefore = fileType
-				} else {
-					currentExtensionAfter = fileType
-				}
-
-			} else if strings.HasPrefix(line, "@@") {
-				return processRegionHeaderLine(line)
-			}
-			return processFileHeaderLine
-		}
-
-		processRegionHeaderLine = func(line string) stateFn {
-			r.regions++
-			currentRegionBefore.Reset()
-			currentRegionAfter.Reset()
-			return processCodeLine
-		}
-
-		processCodeLine = func(line string) stateFn {
-			if line[0] == ' ' {
-				currentRegionBefore.WriteString(line[1:])
-				currentRegionBefore.WriteString("\n")
-				currentRegionAfter.WriteString(line[1:])
-				currentRegionAfter.WriteString("\n")
-			} else if line[0] == '-' {
-				r.lineDeleted++
-				currentRegionBefore.WriteString(line[1:])
-				currentRegionBefore.WriteString("\n")
-			} else if line[0] == '+' {
-				r.lineAdded++
-				currentRegionAfter.WriteString(line[1:])
-				currentRegionAfter.WriteString("\n")
-			} else {
-				countFunctionCalls(&currentRegionBefore, currentExtensionBefore, &functionCallsBefore)
-				countFunctionCalls(&currentRegionAfter, currentExtensionAfter, &functionCallsAfter)
-				if strings.HasPrefix(line, "@@") {
-					return processRegionHeaderLine(line)
-				} else {
-					inFileHeader = true
-					return processFileHeaderLine(line)
-				}
-			}
-			return processCodeLine
-		}
 
 		var state = processFileHeaderLine
 		for scanner.Scan() {
 			line := scanner.Text()
 
-			state = state(line) // jumping on a trampoline
+			state = state(line)
 		}
 
 		diffFile.Close()

From c4374657541572615277bbce0835a631a0a42dce Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 13:11:49 -0400
Subject: [PATCH 10/13] Put everything related to counting function calls into
 a different file

---
 countFunctionCalls.go | 168 +++++++++++++++++++++++++++++++++++++++++
 main.go               | 172 +-----------------------------------------
 2 files changed, 172 insertions(+), 168 deletions(-)
 create mode 100644 countFunctionCalls.go

diff --git a/countFunctionCalls.go b/countFunctionCalls.go
new file mode 100644
index 0000000..d8a4250
--- /dev/null
+++ b/countFunctionCalls.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+	"bytes"
+)
+
+func beginsIdentifier(b byte) bool {
+	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_'
+}
+
+func insideIdentifier(b byte) bool {
+	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_'
+}
+
+type tokenType int
+
+const (
+	endOfString tokenType = -1
+	identifier  tokenType = iota
+	somethingElse
+)
+
+// A "tokenizer" that removes characters to be ignored and splits its input
+// into things that look like identifiers and all other characters.
+//
+// It could be replaced by a more complete tokenizer. One that takes care of
+// comments and strings for example
+type tokenizer struct {
+	text        []byte
+	toBeIgnored []byte
+}
+
+func byteInSlice(b byte, slice []byte) bool {
+	for _, c := range slice {
+		if b == c {
+			return true
+		}
+	}
+	return false
+}
+
+func (r *tokenizer) Next() (token tokenType, text []byte) {
+
+	for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) {
+		r.text = r.text[1:]
+	}
+
+	if len(r.text) == 0 {
+		return endOfString, nil
+	}
+
+	if beginsIdentifier(r.text[0]) {
+		var i = 1
+		for i < len(r.text) && insideIdentifier(r.text[i]) {
+			i++
+		}
+		var result = r.text[0:i]
+		r.text = r.text[i:]
+		return identifier, result
+	}
+
+	var result = r.text[:1]
+	r.text = r.text[1:]
+	return somethingElse, result
+
+}
+
+func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
+
+	var keywords = map[string]bool{
+		"if":    true,
+		"for":   true,
+		"while": true,
+		"else":  true,
+	}
+
+	var whitespace = []byte{
+		' ',
+		'\t',
+		'\n',
+		'\r',
+		'\f',
+	}
+
+	var tokenizer = tokenizer{
+		buffer.Bytes(),
+		whitespace,
+	}
+
+	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
+	var strings = [3][]byte{{' '}, {' '}, {' '}}
+
+	for {
+		tok, s := tokenizer.Next()
+		if tok == endOfString {
+			return
+		}
+
+		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
+		strings[0], strings[1], strings[2] = strings[1], strings[2], s
+
+		if !(tokens[0] == identifier && !keywords[string(strings[0])]) &&
+			tokens[1] == identifier && !keywords[string(strings[1])] &&
+			tokens[2] == somethingElse && strings[2][0] == '(' {
+			(*counts)[string(strings[1])]++
+		}
+	}
+}
+
+func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
+
+	// Since the open parenthesis for a function call must be on the same line as
+	// the name, I only ignore space and tabs.
+	var whitespace = []byte{
+		' ',
+		'\t',
+	}
+
+	var keywords = map[string]bool{
+		"if":    true,
+		"in":    true,
+		"or":    true,
+		"and":   true,
+		"for":   true,
+		"while": true,
+		"else":  true,
+		"elif":  true,
+		"def":   true,
+	}
+
+	var tokenizer = tokenizer{
+		buffer.Bytes(),
+		whitespace,
+	}
+
+	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
+	var strings = [3][]byte{{' '}, {' '}, {' '}}
+
+	for {
+		tok, s := tokenizer.Next()
+		if tok == endOfString {
+			return
+		}
+
+		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
+		strings[0], strings[1], strings[2] = strings[1], strings[2], s
+
+		if !(tokens[0] == identifier && string(strings[0]) == "def") &&
+			tokens[1] == identifier && !keywords[string(tokens[1])] &&
+			tokens[2] == somethingElse && strings[2][0] == '(' {
+			(*counts)[string(strings[1])]++
+		}
+	}
+}
+
+//Given a bytes.Buffer containing a code segment, its extension, and a map to
+//use for counting, counts the function calls
+func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) {
+	switch ext {
+	case ".c", ".h":
+		countCFunctionCalls(buffer, counts)
+	case ".py":
+		countPythonFunctionCalls(buffer, counts)
+
+	default:
+
+	}
+}
diff --git a/main.go b/main.go
index c2c6f71..edd6731 100644
--- a/main.go
+++ b/main.go
@@ -29,168 +29,6 @@ func main() {
 	fmt.Println(compute())
 }
 
-func beginsIdentifier(b byte) bool {
-	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || b == '_'
-}
-func insideIdentifier(b byte) bool {
-	return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') || ('0' <= b && b <= '9') || b == '_'
-}
-
-type tokenType int
-const (
-	endOfString tokenType = -1
-	identifier  tokenType = iota
-	somethingElse
-)
-
-// A "tokenizer" that removes characters to be ignored and splits its input
-// into things that look like identifiers and all other characters.
-//
-// It could be replaced by a more complete tokenizer. One that takes care of
-// comments and strings for example
-type tokenizer struct {
-	text        []byte
-	toBeIgnored []byte
-}
-
-func byteInSlice(b byte, slice []byte) bool {
-	for _, c := range slice {
-		if b == c {
-			return true
-		}
-	}
-	return false
-}
-
-
-func (r *tokenizer) Next() (token tokenType, text []byte) {
-
-	for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) {
-		r.text = r.text[1:]
-	}
-
-	if len(r.text) == 0 {
-		return endOfString, nil
-	}
-
-	if beginsIdentifier(r.text[0]) {
-		var i = 1
-		for i < len(r.text) && insideIdentifier(r.text[i]) {
-			i++
-		}
-		var result = r.text[0:i]
-		r.text = r.text[i:]
-		return identifier, result
-	}
-
-	var result = r.text[:1]
-	r.text = r.text[1:]
-	return somethingElse, result
-
-}
-
-func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
-
-	var keywords = map[string]bool{
-		"if":    true,
-		"for":   true,
-		"while": true,
-		"else": true,
-	}
-
-	var whitespace = []byte{
-		' ',
-		'\t',
-		'\n',
-		'\r',
-		'\f',
-	}
-
-	var tokenizer = tokenizer{
-		buffer.Bytes(),
-		whitespace,
-	}
-
-	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
-	var strings = [3][]byte{{' '}, {' '}, {' '}}
-
-	for {
-		tok, s := tokenizer.Next()
-		if tok == endOfString {
-			return
-		}
-
-		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
-		strings[0], strings[1], strings[2] = strings[1], strings[2], s
-
-		if !(tokens[0] == identifier && !keywords[string(strings[0])]) &&
-			tokens[1] == identifier && !keywords[string(strings[1])] &&
-			tokens[2] == somethingElse && strings[2][0] == '(' {
-			(*counts)[string(strings[1])]++
-		}
-	}
-}
-
-func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
-
-	// Since the open parenthesis for a function call must be on the same line as
-	// the name, I only ignore space and tabs.
-	var whitespace = []byte{
-		' ',
-		'\t',
-	}
-
-	var keywords = map[string]bool{
-		"if":    true,
-		"in":    true,
-		"or":    true,
-		"and":    true,
-		"for":   true,
-		"while": true,
-		"else": true,
-		"elif": true,
-		"def": true,
-	}
-
-	var tokenizer = tokenizer{
-		buffer.Bytes(),
-		whitespace,
-	}
-
-	var tokens = [3]tokenType{somethingElse, somethingElse, somethingElse}
-	var strings = [3][]byte{{' '}, {' '}, {' '}}
-
-	for {
-		tok, s := tokenizer.Next()
-		if tok == endOfString {
-			return
-		}
-
-		tokens[0], tokens[1], tokens[2] = tokens[1], tokens[2], tok
-		strings[0], strings[1], strings[2] = strings[1], strings[2], s
-
-		if !(tokens[0] == identifier && string(strings[0]) == "def") &&
-		 tokens[1] == identifier && !keywords[string(tokens[1])] &&
-			tokens[2] == somethingElse && strings[2][0] == '(' {
-			(*counts)[string(strings[1])]++
-		}
-	}
-}
-
-//Given a bytes.Buffer containing a code segment, its extension, and a map to
-//use for counting, counts the function calls
-func countFunctionCalls(buffer *bytes.Buffer, ext string, counts *map[string]int) {
-	switch ext {
-	case ".c", ".h":
-		countCFunctionCalls(buffer, counts)
-	case ".py":
-		countPythonFunctionCalls(buffer, counts)
-
-	default:
-
-	}
-}
-
 //compute parses the git diffs in ./diffs and returns
 //a result struct that contains all the relevant informations
 //about these diffs
@@ -214,14 +52,14 @@ func compute() *result {
 	// Here I create a small state machine using state functions
 	type stateFn func(line string) stateFn
 	var processFileHeaderLine,
-	processRegionHeaderLine,
-	processCodeLine stateFn
+		processRegionHeaderLine,
+		processCodeLine stateFn
 
 	processFileHeaderLine = func(line string) stateFn {
 		if strings.HasPrefix(line, "+++") || strings.HasPrefix(line, "---") {
 
 			var fileName = line[len("--- "):]
-			if (fileName != "/dev/null") {
+			if fileName != "/dev/null" {
 				fileName = fileName[len("a/"):]
 			}
 
@@ -234,7 +72,7 @@ func compute() *result {
 				fileType = filepath.Base(fileName)
 			}
 			seenExtensions[fileType] = struct{}{}
-			if (line[0] == '-') {
+			if line[0] == '-' {
 				currentExtensionBefore = fileType
 			} else {
 				currentExtensionAfter = fileType
@@ -293,8 +131,6 @@ func compute() *result {
 
 		scanner := bufio.NewScanner(diffFile)
 
-
-
 		var state = processFileHeaderLine
 		for scanner.Scan() {
 			line := scanner.Text()

From 743bf342f8ffd91531e704543b19156a35754503 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 13:19:14 -0400
Subject: [PATCH 11/13] Made some things slightly more clear

---
 countFunctionCalls.go | 19 ++++++++++---------
 result.go             |  3 ++-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/countFunctionCalls.go b/countFunctionCalls.go
index d8a4250..abc8d8e 100644
--- a/countFunctionCalls.go
+++ b/countFunctionCalls.go
@@ -24,7 +24,7 @@ const (
 // into things that look like identifiers and all other characters.
 //
 // It could be replaced by a more complete tokenizer. One that takes care of
-// comments and strings for example
+// comments and strings for example.
 type tokenizer struct {
 	text        []byte
 	toBeIgnored []byte
@@ -39,6 +39,7 @@ func byteInSlice(b byte, slice []byte) bool {
 	return false
 }
 
+// Get next token and text slice that goes with it
 func (r *tokenizer) Next() (token tokenType, text []byte) {
 
 	for len(r.text) > 0 && byteInSlice(r.text[0], r.toBeIgnored) {
@@ -67,13 +68,6 @@ func (r *tokenizer) Next() (token tokenType, text []byte) {
 
 func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
-	var keywords = map[string]bool{
-		"if":    true,
-		"for":   true,
-		"while": true,
-		"else":  true,
-	}
-
 	var whitespace = []byte{
 		' ',
 		'\t',
@@ -82,6 +76,13 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 		'\f',
 	}
 
+	var keywords = map[string]bool{
+		"if":    true,
+		"for":   true,
+		"while": true,
+		"else":  true,
+	}
+
 	var tokenizer = tokenizer{
 		buffer.Bytes(),
 		whitespace,
@@ -110,7 +111,7 @@ func countCFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 func countPythonFunctionCalls(buffer *bytes.Buffer, counts *map[string]int) {
 
 	// Since the open parenthesis for a function call must be on the same line as
-	// the name, I only ignore space and tabs.
+	// the name, I only ignore spaces and tabs.
 	var whitespace = []byte{
 		' ',
 		'\t',
diff --git a/result.go b/result.go
index 804e5c9..f705ab7 100644
--- a/result.go
+++ b/result.go
@@ -33,7 +33,7 @@ func (r *result) String() string {
 	}
 	buffer.WriteString("Extensions: \n")
 	for _, ext := range r.fileExtensions {
-		buffer.WriteString("	-")
+		buffer.WriteString("\t-")
 		buffer.WriteString(ext)
 		buffer.WriteString("\n")
 	}
@@ -43,6 +43,7 @@ func (r *result) String() string {
 
 	buffer.WriteString("Function calls (before, after): \n")
 	for key, value := range r.functionCalls {
+		buffer.WriteString("\t")
 		buffer.WriteString(key)
 		buffer.WriteString(" : ")
 		buffer.WriteString(strconv.Itoa(value.before))

From 2a6677b89bb2bf619d6c048d32f79f43c9d1fb66 Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 13:27:16 -0400
Subject: [PATCH 12/13] Fixed processing last region; some comments

---
 main.go | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/main.go b/main.go
index edd6731..b6439db 100644
--- a/main.go
+++ b/main.go
@@ -43,13 +43,18 @@ func compute() *result {
 	var functionCallsAfter = make(map[string]int)
 	r.functionCalls = make(map[string]struct{ before, after int })
 
+	// I use sets instead of lists for files that we've seen
 	var seenFiles = make(map[string]struct{})
 	var seenExtensions = make(map[string]struct{})
 
+	// When reading in a region, I will be reading it into these buffers
 	var currentRegionBefore, currentRegionAfter bytes.Buffer
+
+	// Extensions for the file. Used to decide how to count functions
 	var currentExtensionBefore, currentExtensionAfter string
 
-	// Here I create a small state machine using state functions
+	// Here I create a small state machine using state functions to read the
+	// relevent info from the diff files.
 	type stateFn func(line string) stateFn
 	var processFileHeaderLine,
 		processRegionHeaderLine,
@@ -71,6 +76,9 @@ func compute() *result {
 				// is significant, like "Makefile"
 				fileType = filepath.Base(fileName)
 			}
+			if fileName == "/dev/null" {
+				fileType = "/dev/null"
+			}
 			seenExtensions[fileType] = struct{}{}
 			if line[0] == '-' {
 				currentExtensionBefore = fileType
@@ -86,8 +94,6 @@ func compute() *result {
 
 	processRegionHeaderLine = func(line string) stateFn {
 		r.regions++
-		currentRegionBefore.Reset()
-		currentRegionAfter.Reset()
 		return processCodeLine
 	}
 
@@ -106,8 +112,12 @@ func compute() *result {
 			currentRegionAfter.WriteString(line[1:])
 			currentRegionAfter.WriteString("\n")
 		} else {
+			// If we finished reading in the region, we process it
 			countFunctionCalls(&currentRegionBefore, currentExtensionBefore, &functionCallsBefore)
 			countFunctionCalls(&currentRegionAfter, currentExtensionAfter, &functionCallsAfter)
+			currentRegionBefore.Reset()
+			currentRegionAfter.Reset()
+
 			if strings.HasPrefix(line, "@@") {
 				return processRegionHeaderLine(line)
 			} else {
@@ -137,10 +147,16 @@ func compute() *result {
 
 			state = state(line)
 		}
+		// Process the last region
+		countFunctionCalls(&currentRegionBefore, currentExtensionBefore, &functionCallsBefore)
+		countFunctionCalls(&currentRegionAfter, currentExtensionAfter, &functionCallsAfter)
+		currentRegionBefore.Reset()
+		currentRegionAfter.Reset()
 
 		diffFile.Close()
 	}
 
+	// Turn set into list
 	for name, _ := range seenFiles {
 		r.files = append(r.files, name)
 	}
@@ -149,12 +165,12 @@ func compute() *result {
 		r.fileExtensions = append(r.fileExtensions, name)
 	}
 
+	// Combine the two functionCalls maps into one
 	for name, times := range functionCallsBefore {
 		var prev = r.functionCalls[name]
 		prev.before += times
 		r.functionCalls[name] = prev
 	}
-
 	for name, times := range functionCallsAfter {
 		var prev = r.functionCalls[name]
 		prev.after += times

From e1024b6939f62f1839305c7c484c3ddc0a4c71cb Mon Sep 17 00:00:00 2001
From: Paul-Andre Henegar <paul-andre.henegar@mail.mcgill.ca>
Date: Mon, 2 Apr 2018 13:29:07 -0400
Subject: [PATCH 13/13] Removed extension list from result

---
 main.go   | 6 ------
 result.go | 8 --------
 2 files changed, 14 deletions(-)

diff --git a/main.go b/main.go
index b6439db..841ceeb 100644
--- a/main.go
+++ b/main.go
@@ -45,7 +45,6 @@ func compute() *result {
 
 	// I use sets instead of lists for files that we've seen
 	var seenFiles = make(map[string]struct{})
-	var seenExtensions = make(map[string]struct{})
 
 	// When reading in a region, I will be reading it into these buffers
 	var currentRegionBefore, currentRegionAfter bytes.Buffer
@@ -79,7 +78,6 @@ func compute() *result {
 			if fileName == "/dev/null" {
 				fileType = "/dev/null"
 			}
-			seenExtensions[fileType] = struct{}{}
 			if line[0] == '-' {
 				currentExtensionBefore = fileType
 			} else {
@@ -161,10 +159,6 @@ func compute() *result {
 		r.files = append(r.files, name)
 	}
 
-	for name, _ := range seenExtensions {
-		r.fileExtensions = append(r.fileExtensions, name)
-	}
-
 	// Combine the two functionCalls maps into one
 	for name, times := range functionCallsBefore {
 		var prev = r.functionCalls[name]
diff --git a/result.go b/result.go
index f705ab7..ddfd8cb 100644
--- a/result.go
+++ b/result.go
@@ -10,8 +10,6 @@ type result struct {
 	//The name of the files seen
 	files []string
 	//The name of the files seen
-	fileExtensions []string
-	//How many region we have (i.e. seperated by @@)
 	regions int
 	//How many line were added total
 	lineAdded int
@@ -31,12 +29,6 @@ func (r *result) String() string {
 		buffer.WriteString(file)
 		buffer.WriteString("\n")
 	}
-	buffer.WriteString("Extensions: \n")
-	for _, ext := range r.fileExtensions {
-		buffer.WriteString("\t-")
-		buffer.WriteString(ext)
-		buffer.WriteString("\n")
-	}
 	r.appendIntValueToBuffer(r.regions, "Regions", &buffer)
 	r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer)
 	r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer)