diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..fa15ea7 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,96 @@ +[[package]] +name = "aho-corasick" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cfg-if" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "clever-challenge" +version = "0.1.0" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.44" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ucd-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "utf8-ranges" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" +"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" +"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" +"checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311" +"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16" +"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" +"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" +"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" +"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5835908 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "clever-challenge" +version = "0.1.0" +authors = ["Tormyst "] + +[dependencies] +regex = "1.1.0" +lazy_static = "1.2.0" diff --git a/Gopkg.lock b/Gopkg.lock deleted file mode 100644 index bef2d00..0000000 --- a/Gopkg.lock +++ /dev/null @@ -1,9 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - inputs-digest = "ab4fef131ee828e96ba67d31a7d690bd5f2f42040c6766b1b12fe856f87e0ff7" - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml deleted file mode 100644 index 9425a54..0000000 --- a/Gopkg.toml +++ /dev/null @@ -1,22 +0,0 @@ - -# Gopkg.toml example -# -# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md -# for detailed Gopkg.toml documentation. -# -# required = ["github.com/user/thing/cmd/thing"] -# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] -# -# [[constraint]] -# name = "github.com/user/project" -# version = "1.0.0" -# -# [[constraint]] -# name = "github.com/user/project2" -# branch = "dev" -# source = "github.com/myfork/project2" -# -# [[override]] -# name = "github.com/x/y" -# version = "2.4.0" - diff --git a/README.md b/README.md index b309fbc..f12fb50 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,6 @@ -# Clever Initiative Challenge +# Clever Initiative Challenge Implementation -The Clever-Initiative is a team of the Technology Group @ Ubisoft Montreal. Our goal is to discover, improve and promote state-of-the-art software engineering practices that aim to ease the production of quality. By augmenting the quality of our products, we mechanically improve the productivity of our teams as they can focus creating features rather than fixing bugs. - -The Clever-Initiative is the team behind the [Submit-Assitant](https://montreal.ubisoft.com/en/ubisoft-la-forge-presents-the-commit-assistant/) (a.k.a Clever-Commit) that received some press coverage recently: [Wired](http://www.wired.co.uk/article/ubisoft-commit-assist-ai), [MIT](https://www.technologyreview.com/the-download/610416/ai-can-help-spot-coding-mistakes-before-they-happen/), and [more](https://www.google.ca/search?q=commit+assistant+ubisoft). The scientific foundation behind our work have been accepted for publication to [MSR'18](https://montreal.ubisoft.com/en/clever-combining-code-metrics-with-clone-detection-for-just-in-time-fault-prevention-and-resolution-in-large-industrial-projects-2/), [CPPCON'18](https://www.youtube.com/watch?v=QDvic0QNtOY). - -We are currently looking for trainees to join us (Winter'19). The length and start date of the internship will be discussed on a per applicant basis. - -## Trainees - -A trainee applicant must: - -- Be engaged in a computer science (or related) university program. -- Be able to work in Canada legally. -- Be willing to come to Montreal. -- Be able to read, understand and implement scientific papers. -- Know: - - versionning systems (git, perforce, ...) - - c/c++/csharp or java -- Know or be willing to learn: - - golang - - docker - - sql - - angular +Within this repo, you will find an implementation of the Clever Initiative Challenge written in rust. The focus will be to keep the goals for extendability, maintainability and efficiency during the process of solving the problem. ## The Challenge @@ -35,38 +14,41 @@ The challenge for trainee applicant consists in parsing a few diffs--in the most All these stats are to be computed globally (i.e. for all the diffs combined). -In the main.go file; you'll find the `compute` method that needs to be implemented. +## Permanent Positions -```golang -//compute parses the git diffs in ./diffs and returns -//a result struct that contains all the relevant information -//about these diffs -// list of files in the diffs -// number of regions -// number of line added -// number of line deleted -// list of function calls seen in the diffs and their number of calls -func compute() *result { +I am applying for a permanent positions. I understand that this is meant for less permanent positions. However, as I was directly sent here, I will be completing this challenge. - return nil -} -``` +## Why rust? -To enter the challenge: +I wish I could do this in Go. I don't believe in presenting something in a programing language while I am learning it. As I would be new to Go, I would feel it is not my best work. -- Fork this repository -- Implement your solution -- Open a pull request with your solution. In the body of the pull request, you can explain the choices you made if necessary. +While I am new to rust, my major Rust side project [FeGaBo](github.com/tormyst/FeGaBo) has given me enough experience with the language. -You can alter the data structure, add files, remove files ... you can even start from scratch in another language if you feel like it. -However, note that we do use golang internally. +Using rust has interesting benefits. While possible to make efficient solutions in other languages, the advantages that rust offers pushes a lot of work onto the compiler. Ensuring that a task can multithreaded is simple in rust as if the operation is not memory safe, Rust will not compile. Casts and mutability are brought forward ensuring only what needs to be done is. -If you don't feel comfortable sharing your pull-request with the world (pull-request are public) you can invite me (@mathieunls for github, bitbucket and gitlab) and Florent Jousset (@heeko for github, bitbucket and gitlab) to a private repo of yours. Don't bother to send code by email, we won't read it. +While this solution did not use threads, they could be added relativly easaly to have each file be processed individualy. -## Permanent Positions +## Building this project + +This is a `cargo` project tested on stable rust. + +Once everything installed, best done through rustup (which can be installed through most package managers), you can use `cargo run` to run the solution + +For speed, try compiling under release: `cargo run --release` -We are also looking for permanent members to join our team. If you are interested, mail our human resource [contact](mailto:alison.laplante-rayworth@ubisoft.com?subject=Clever-Initiative) with your resume. You can submit your pull request for the challenge. However, you'll be subjected to an in-depth (much harder) coding test. This one has been conceived for students only and it might not be worth your time to take it ;). +## Anything else? -- [ ] Software Engineer / ML Dev (python, go, ml, sql, ...) -- [ ] Software Engineer / Backend Dev (go, c, cfg, ast, k8s, redis, ...) -- [ ] Software Engineer / Tool devs (csharp, python, cfg, ast, ...) +A check through of A trainee applicant must: + +- Be engaged in a computer science (or related) university program. (Compleated with masters) +- Be able to work in Canada legally. (Yes) +- Be willing to come to Montreal. (Already here) +- Be able to read, understand and implement scientific papers. (Did that during my masters) +- Know: + - versionning systems (git, perforce, ...) (I know both of those also svn and something called accurev) + - c/c++/csharp or java (c, c++,csharp and java) +- Know or be willing to learn: + - golang (It's next on my list, just wanted to get through this first) + - docker (I know a bit about how they work and have used it several times, but not in depth) + - sql (A few variants) + - angular (I am no designer, but I have made some fun things in angular, but not an expert) diff --git a/main.go b/main.go deleted file mode 100644 index f62aa00..0000000 --- a/main.go +++ /dev/null @@ -1,37 +0,0 @@ -package main - -import ( - "fmt" - "time" -) - -//timeTrack tracks the time it took to do things. -//It's a convenient method that you can use everywhere -//you feel like it -func timeTrack(start time.Time, name string) { - elapsed := time.Since(start) - fmt.Printf("%s took %s\n", name, elapsed) -} - -//main is the entry point of our go program. It defers -//the execution of timeTrack so we can know how long it -//took for the main to complete. -//It also calls the compute and output the returned struct -//to stdout. -func main() { - defer timeTrack(time.Now(), "compute diff") - fmt.Println(compute()) -} - -//compute parses the git diffs in ./diffs and returns -//a result struct that contains all the relevant informations -//about these diffs -// list of files in the diffs -// number of regions -// number of line added -// number of line deleted -// list of function calls seen in the diffs and their number of calls -func compute() *result { - - return nil -} diff --git a/result.go b/result.go deleted file mode 100644 index 7e78236..0000000 --- a/result.go +++ /dev/null @@ -1,50 +0,0 @@ -package main - -import ( - "bytes" - "strconv" -) - -//result contains an analysis of a set of commit -type result struct { - //The name of the files seen - files []string - //How many region we have (i.e. seperated by @@) - regions int - //How many line were added total - lineAdded int - //How many line were deleted totla - lineDeleted int - //How many times the function seen in the code are called. - functionCalls map[string]int -} - -//String returns the value of results as a formated string -func (r *result) String() string { - - var buffer bytes.Buffer - buffer.WriteString("Files: \n") - for _, file := range r.files { - buffer.WriteString(" -") - buffer.WriteString(file) - buffer.WriteString("\n") - } - r.appendIntValueToBuffer(r.regions, "Regions", &buffer) - r.appendIntValueToBuffer(r.lineAdded, "LA", &buffer) - r.appendIntValueToBuffer(r.lineDeleted, "LD", &buffer) - - buffer.WriteString("Functions calls: \n") - for key, value := range r.functionCalls { - r.appendIntValueToBuffer(value, key, &buffer) - } - - return buffer.String() -} - -//appendIntValueToBuffer appends int value to a bytes buffer -func (r result) appendIntValueToBuffer(value int, label string, buffer *bytes.Buffer) { - buffer.WriteString(label) - buffer.WriteString(" : ") - buffer.WriteString(strconv.Itoa(value)) - buffer.WriteString("\n") -} diff --git a/src/diff/diff_parse.rs b/src/diff/diff_parse.rs new file mode 100644 index 0000000..e11b21c --- /dev/null +++ b/src/diff/diff_parse.rs @@ -0,0 +1,49 @@ +use result::Result; +use regex::Regex; + +/// Returns the two filenames present in the diff header. +/// We are assuming --git, and this will ensure it as any non diff header will crash. +/// This is important as otherwise, we would be storing each filename twice: once for a path and +/// once for b path. +/// +/// This implementation assumes a path is any collection of characters that is not a space. +pub fn header_filenames(header: &str) -> (String, String) { + lazy_static! { + static ref HEADER_GIT_FILENAME: Regex = Regex::new(r"^diff --git a/([^ ]+) b/([^ ]+)$").unwrap(); + } + let groups = HEADER_GIT_FILENAME.captures(header).unwrap(); + (groups.get(1).unwrap().as_str().to_string(), groups.get(2).unwrap().as_str().to_string()) +} + +/// Finds and adds all functions to a given result structure. +/// This is the slowest part of the diff stats process as we have to iterate through a utf8 string +/// doing constant comparisons. +/// For the purposes of this chalange, I am defining a function given the regex bellow. A number +/// of word characters followed imidiatly by an open bracket. The issue with this structure is +/// that it is unlikely to be correct. Languages like C can have as much whitespace as wanted +/// between the name of the function and the open bracket. Likewise, the string 'for(' should not +/// be a function identifier, however this regex would. +/// +/// All in all, this is the best that can be done in a short time as the alternetive would be to +/// actually understand whatever language we are sifting though and then find funtions as defined +/// by the language itself. +/// +/// # Why are we passing in a result structure? +/// +/// This function has the beautiful disadvantage of being a variable size responce. +/// We don't know how many bytes any return of find_functions will take as it may return 0 or 5. +/// We however could return a Results structure: Initializing say 10_000 result structures, with +/// 10_000 empty sets for filenames is fairly useless. +/// Returning HashSet: We could return just the part of the results we wanted, the hashset and +/// create a function to add them together, or to open the underlying structure of the result. +/// I went with passing the result structure in directly. This is something I feel needs to be +/// fixed, however many other options seem unoptimal. +pub fn find_functions(string: &str, result: &mut Result) { + lazy_static! { + static ref FUNCTION_REGEX: Regex = Regex::new(r"\w+\(").unwrap(); + } + FUNCTION_REGEX + .find_iter(string) + .map(|function| format!("{})", function.as_str())) + .for_each(|string| result.add_function_call(string)) +} diff --git a/src/diff/diff_type.rs b/src/diff/diff_type.rs new file mode 100644 index 0000000..07f523a --- /dev/null +++ b/src/diff/diff_type.rs @@ -0,0 +1,207 @@ +/// Represents a kind of diff line. +/// The diff is split into 2 different parts: +/// Header information +/// File information +/// The problem is that this information is not split clearly. The header is not constant and may +/// lack any part of the header or body. +/// +/// In this enum, all posible types of lines are sorted. This is important even if the line is not +/// needed now, because if data is needed from a line later, we have the infromation labeled. +#[derive(Debug, Clone)] +pub enum DiffType { + /// The begining of a diff of the form: 'diff --git a/path/to/file b/path/tofile' + Header, + /// Index information about a file: 'index 0123456..789abcd 100644' + Index, + /// The path to the original file, the first file in the header: '--- a/path/to/file' + OriginalFile, + /// The path to the new file, the second file in the header: '+++ b/path/to/file' + NewFile, + /// Optional header to indicate that a files mode differs changed: 'new file mode 100644' + NewMode, + /// The start of a file region: '@@ -888,12 +1002,33 @@ part of the file here' + NewRegion, + /// The indication that the file was deleted: 'deleted file mode 100644' + FileDeleted, + /// A line of a file which is the same between the given files: ' always with a space' + FileLine, + /// A line that exists in the new file, but not the original: '+the line of the file' + Addition, + /// A line that exists in the original file, but not the new: '-the line of the file' + Subtraction, +} + + +impl DiffType { + /// Returns if a type is part of the file body, or has file body parts to it. + /// This is useful as the slowest part of the process would be parcing the entire file for + /// functions. + pub fn is_file_body(&self) -> bool { + match self { + DiffType::FileLine | DiffType::Addition | DiffType::Subtraction | + DiffType::NewRegion => true, + _ => false, + } + } +} + +/// Returns the diff status of a line. There are a few states, including the different header +/// lines, normal file lines, additions, and subtractions. +/// +/// This will do the minimum work required to figure out what type a line is because it assumes the +/// string slice it is given starts at the begining of the line and that the file is a valid diff +/// file. Any unknown first letter of the line is treated as FileLine, as are empty lines. +/// +/// # Known Issues +/// +/// Currently, OriginalFile and NewFile are treeted as Addition and Subtraction lines. +/// This is an issue of only using the first letter. While it could be improved, there are edge +/// cases that cannot be resolved such as an additon line adding a line with 2 or more '+' +/// characters. To avoid this, this system uses a blind approch that is not attempting to solve +/// this issue. +/// +/// # Arguments +/// +/// `line` - A string slice of a line of a diff file. +/// +/// # Example +/// ``` +/// assert!(diff_type("diff --git ...") == DiffType::Header) +fn diff_type(line: &str) -> DiffType { + match line.chars().next().unwrap_or(' ') { + 'd' => DiffType::Header, + 'i' => DiffType::Index, + 'n' => DiffType::NewMode, + '@' => DiffType::NewRegion, + '+' => DiffType::Addition, + '-' => DiffType::Subtraction, + ' ' => DiffType::FileLine, + _ => DiffType::FileLine, + } +} + +/// Stateful Diff reading to avoid confusion over header and body. +/// Place all stateful information here. +/// +/// TODO: If you would want to know how many functions came from a given file, +/// putting a current file calculation here would be how to do it. +pub struct DiffFormatTyper { + last: Option, +} + +impl DiffFormatTyper { + /// A new reader that should be started at the top of the file, however, it can start at any + /// header line. + pub fn new() -> Self { + DiffFormatTyper { last: None } + } + + /// A state machine that uses the guess of diff_type to create the actual type of a line and to + /// fail when it is not sure. + /// + /// Many just check that this line is expected and return it. Some however take the guess, do + /// a string compare to the minimum degree required and return the updated value. + /// + /// # Returns + /// + /// A value of Some(data) is considered a correct responce from the function. + /// None is the universal error. We also reset the state machine on error, and will continue + /// to output correct responces at the next header. + /// + /// A line to STDERR is sent indicating the error based on the string given and the current + /// state of the machine. + /// + /// # Note + /// + /// This section is messy and should be replaced with macros. + pub fn type_line(&mut self, line: &str) -> Option { + let diff_type = diff_type(line); + let diff_type_fixed = match self.last.clone() { + None => { + match diff_type { + DiffType::Header => Some(DiffType::Header), + _ => None, + } + } + Some(last_type) => { + match last_type { + DiffType::Header => { + match diff_type { + this @ DiffType::Index | + this @ DiffType::NewMode => Some(this), + DiffType::Header => { + if line.starts_with("deleted") { + Some(DiffType::FileDeleted) + } else { + None + } + } + _ => None, + } + } + DiffType::Index => { + match diff_type { + this @ DiffType::Header => Some(this), + DiffType::Subtraction => { + if line.starts_with("---") { + Some(DiffType::OriginalFile) + } else { + None + } + } + _ => None, + } + } + DiffType::OriginalFile => { + match diff_type { + DiffType::Addition => { + if line.starts_with("+++") { + Some(DiffType::NewFile) + } else { + None + } + } + _ => None, + } + } + DiffType::NewFile => { + match diff_type { + this @ DiffType::NewRegion => Some(this), + _ => None, + } + } + DiffType::NewMode => { + match diff_type { + this @ DiffType::Index => Some(this), + _ => None, + } + } + DiffType::FileDeleted => { + match diff_type { + this @ DiffType::Index => Some(this), + _ => None, + } + } + DiffType::NewRegion | DiffType::FileLine | DiffType::Addition | + DiffType::Subtraction => { + match diff_type { + this @ DiffType::FileLine | + this @ DiffType::Addition | + this @ DiffType::Subtraction | + this @ DiffType::NewRegion | + this @ DiffType::Header => Some(this), + _ => None, + } + } + } + } + }; + if let None = diff_type_fixed { + eprintln!("Error: Diff file is not formated correctly. Line \"{}\" after {:?} was unexpected.", + line, + self.last); + } + self.last = diff_type_fixed; + self.last.clone() + } +} diff --git a/src/diff/mod.rs b/src/diff/mod.rs new file mode 100644 index 0000000..dda5928 --- /dev/null +++ b/src/diff/mod.rs @@ -0,0 +1,97 @@ +use result::Result; // Result directly is our result. +use std::collections::{HashMap, HashSet}; +use std::fs::read; +use std::path::Path; + +mod diff_type; +use self::diff_type::{DiffType, DiffFormatTyper}; + +mod diff_parse; +use self::diff_parse::{find_functions, header_filenames}; + +/// Given a file, return statistical information about the file as if it were a diff file. +/// Returns Some Result if the file could be read and None if the reading failed. +/// +/// # Arguments +/// +/// `file` - The path to the file to be analized +/// +/// # Advice +/// +/// This will be the main function per file. +/// It is adviced to create a handler around this as this function is not taking exclusive +/// rights to the path. The path should be secured by the handler for the sake of returning +/// an error message if the path was invalid and no data could be read. +pub fn diffStats(file: &Path) -> Option { + if let Ok(file_data) = read(file) { + let mut retVal = Result::new(HashSet::new(), 0, 0, 0, HashMap::new()); + let file_data = match String::from_utf8(file_data) { + Ok(data) => data, + Err(_) => { + eprintln!("Error: Could not encode file as utf8"); + return None; + } + }; + let mut diff_format_typer = DiffFormatTyper::new(); + let lines = file_data.lines(); + for line in lines { + let diff_type = diff_format_typer.type_line(line); + match diff_type { + None => break, + Some(DiffType::Header) => { + let (file1, file2) = header_filenames(line); + retVal.add_filename(file1); + retVal.add_filename(file2); + } + Some(DiffType::NewRegion) => retVal.add_region(), + Some(DiffType::Addition) => retVal.count_added_line(), + Some(DiffType::Subtraction) => retVal.count_removed_line(), + _ => {} + }; + if let Some(diff_type) = diff_type { + if diff_type.is_file_body() { + find_functions(line, &mut retVal); + } + } + } + + Some(retVal) + } else { + None // During file error, we simply return nothing to indicate that the file has no contents instead of valid contents with nothing in it. + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use result::Result; + use super::diffStats; + + #[test] + fn invaldFile() { + let path = Path::new("/INVALID"); + assert!(match diffStats(&path) { + None => true, + _ => false, + }); + } + + #[test] + fn emptyFile() { + use std::fs::{write, remove_file}; + let filename = "test.tmp"; // We create an empty file just to be sure it exists. + write(&filename, ""); + let path = Path::new(&filename); + assert!(match diffStats(&path) { + Some(_) => true, + _ => false, + }); + // TODO implementing PartialEq on result to improve this test to the next line. + // assert!(match diffStats(&path){ Some(data) => data == Result::empty(), _ => false}); + remove_file(&filename); // Cleanup that file so we don't keep creating different files. + + // TODO crate tempfile should be introduced in test setup to improve this test by + // removing the issue with creating and removing a file. + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f0a096a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,66 @@ +#![allow(non_snake_case)] +extern crate regex; +#[macro_use] extern crate lazy_static; + +use std::time::Instant; +use std::path::Path; + +mod result; +use result::Result; + +mod diff; + +// timeTrack has been modified in the conversion from go to Rust. +// Unfortunetly, the time library has many direct time functions +// still marked as nightly. +fn timeTrack(start: Instant, string: &'static str) { + let elapsed = start.elapsed(); + print!("{} took {} seconds and {} nanoseconds\n", + string, + elapsed.as_secs(), + elapsed.subsec_nanos()); +} + +// main is the entry point of our Rust program. +// +// The go line: defer timeTrack(time.Now(), "compute diff") +// translates as our first and last lines. Rust has no defer +// as that is memory that is held for the duration of the function +// secretly and against the visibility that rust strives for. +// +// It also calls the compute and Display method of the returned struct +// to stdout. +fn main() { + let now = Instant::now(); + + let compute = compute().unwrap(); + timeTrack(now, "compute diff"); + let now = Instant::now(); + println!("{}", compute); + timeTrack(now, "compute print"); +} + +// compute parses the git diffs in ./diffs and returns +// a result struct that contains all the relevant informations +// about these diffs +// list of files in the diffs +// number of regions +// number of line added +// number of line deleted +// list of function calls seen in the diffs and their number of calls +fn compute() -> std::result::Result { + let mut retVal = Result::empty(); + let data_folder = Path::new("./diffs"); + + for entry in data_folder.read_dir()? { + let entry = entry?; + if entry.path().is_file() { + match diff::diffStats(entry.path().as_path()) { + Some(result) => retVal += result, + None => eprintln!("Error reading stats for path {}. Are you sure it exists?", entry.path().display()) + } + } + } + + Ok(retVal) +} diff --git a/src/result.rs b/src/result.rs new file mode 100644 index 0000000..cca3721 --- /dev/null +++ b/src/result.rs @@ -0,0 +1,102 @@ +use std::fmt; +use std::collections::{HashSet, HashMap}; +use std::ops::AddAssign; + +// result contains an analysis of a set of commits +pub struct Result { + // The name of the files seen + files: HashSet, + // How many regions we have (i.e. seperated by @@) + regions: usize, + // How many lines were added total + lineAdded: usize, + // How many lines were deleted total + lineDeleted: usize, + // How many times the function seen in the code are called + functionCalls: HashMap, +} + +impl Result { + // Creates an empty Result struct. + pub fn empty() -> Self { + Self::new(HashSet::new(), 0, 0, 0, HashMap::new()) + } + + // Constructor for a Result struct that expects everything to be handed to it. + pub fn new(files: HashSet, + regions: usize, + lineAdded: usize, + lineDeleted: usize, + functionCalls: HashMap) + -> Self { + Result { + files, + regions, + lineAdded, + lineDeleted, + functionCalls, + } + } + + pub fn add_filename(&mut self, filename: String) { + self.files.insert(filename); + } + + pub fn add_function_call(&mut self, function: String) { + let current = *self.functionCalls.get(&function).unwrap_or(&0); + self.functionCalls.insert(function,current + 1); + } + + pub fn add_region(&mut self) { + self.regions += 1 + } + + pub fn count_added_line(&mut self) { + self.lineAdded += 1 + } + + pub fn count_removed_line(&mut self) { + self.lineDeleted += 1 + } +} + +// Implementation of the formating system in rust ensuring that the structure may be +// printed in a way close to that of the print statment provided in the go structure. +impl fmt::Display for Result { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Files: \n")?; + for file in &self.files { + write!(f, " -{}\n", file)?; + } + write!(f, "Regions: {}\n", self.regions)?; + write!(f, "LA: {}\n", self.lineAdded)?; + write!(f, "LD: {}\n", self.lineDeleted)?; + + write!(f, "Functions calls: \n")?; + for (key, value) in &self.functionCalls { + write!(f, " {}: {}\n", key, value)?; + } + Ok(()) + } +} + +// Iplementation of the += opperator which is useful syntacticaly to clearly demonstrait +// how results are combined. +impl AddAssign for Result { + fn add_assign(&mut self, mut other: Result) { + other + .files + .drain() + .for_each(|file| { self.files.insert(file); }); + self.regions += other.regions; + self.lineAdded += other.lineAdded; + self.lineDeleted += other.lineDeleted; + other + .functionCalls + .drain() + .for_each(|(key, value)| { + let to_add = *self.functionCalls.get(&key).unwrap_or(&0); + self.functionCalls.insert(key, value + to_add); + }); + } +}