From 687159a720bd16e9f35afc8fcf4af4ed9e16dbe4 Mon Sep 17 00:00:00 2001 From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:39:10 -0600 Subject: [PATCH 1/4] cleanup loader poc test --- parser/gradeLoader.go | 96 +++++++++++++------------------------- parser/gradeLoader_test.go | 14 ++++++ parser/parser.go | 22 ++++++--- 3 files changed, 61 insertions(+), 71 deletions(-) create mode 100644 parser/gradeLoader_test.go diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go index 57c7d7a..7ae54bc 100644 --- a/parser/gradeLoader.go +++ b/parser/gradeLoader.go @@ -3,81 +3,48 @@ package parser import ( "encoding/csv" "fmt" + "github.com/UTDNebula/api-tools/utils" "log" "os" - "path/filepath" "strconv" "strings" ) -var grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"} - -func loadGrades(csvDir string) map[string]map[string][]int { +var ( + grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"} + optionalColumns = []string{"W", "P", "CR", "NC", "I"} + requiredColumns = []string{"Section", "Subject", "Catalog Number", "A+"} +) +func loadGrades(csvDir string) (map[string]map[string][]int, error) { // MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION gradeMap := make(map[string]map[string][]int) - if csvDir == "" { - log.Print("No grade data CSV directory specified. Grade data will not be included.") - return gradeMap - } - - dirPtr, err := os.Open(csvDir) - if err != nil { - panic(err) - } - defer dirPtr.Close() + fileNames := utils.GetAllFilesWithExtension(csvDir, ".csv") + for _, name := range fileNames { - csvFiles, err := dirPtr.ReadDir(-1) - if err != nil { - panic(err) - } - - for _, csvEntry := range csvFiles { - - if csvEntry.IsDir() { - continue - } - - csvPath := fmt.Sprintf("%s/%s", csvDir, csvEntry.Name()) - - csvFile, err := os.Open(csvPath) + var err error + gradeMap[name], err = csvToMap(name) if err != nil { - panic(err) - } - defer csvFile.Close() - - // Create logs directory - if _, err := os.Stat("./logs/grades"); err != nil { - os.Mkdir("./logs/grades", os.ModePerm) + return gradeMap, fmt.Errorf("error parsing %s: %v", name, err) } - - // Create log file [name of csv].log in logs directory - basePath := filepath.Base(csvPath) - csvName := strings.TrimSuffix(basePath, filepath.Ext(basePath)) - logFile, err := os.Create("./logs/grades/" + csvName + ".log") - - if err != nil { - log.Panic("Could not create CSV log file.") - } - defer logFile.Close() - - // Put data from csv into map - gradeMap[csvName] = csvToMap(csvFile, logFile) } - - return gradeMap + return gradeMap, nil } -func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { - reader := csv.NewReader(csvFile) - records, err := reader.ReadAll() // records is [][]strings +func csvToMap(filename string) (map[string][]int, error) { + file, err := os.Open(filename) if err != nil { - log.Panicf("Error parsing %s: %s", csvFile.Name(), err.Error()) + return nil, fmt.Errorf("error opening CSV file '%s': %v", filename, err) } - indexMap := make(map[string]int) + reader := csv.NewReader(file) + records, err := reader.ReadAll() + if err != nil { + return nil, fmt.Errorf("error parsing %s: %v", filename, err) + } + indexMap := make(map[string]int) for j, col := range records[0] { switch col { case "Catalog Number", "Catalog Nbr": @@ -89,18 +56,15 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { } } - // required columns - for _, name := range []string{"Section", "Subject", "Catalog Number", "A+"} { + for _, name := range requiredColumns { if _, ok := indexMap[name]; !ok { - fmt.Fprintf(logFile, "could not find %s column", name) - log.Panicf("could not find %s column", name) + return nil, fmt.Errorf("could not find %s column in %s", name, filename) } } - // optional columns - for _, name := range []string{"W", "P", "CR", "NC", "I"} { + for _, name := range optionalColumns { if _, ok := indexMap[name]; !ok { - logFile.WriteString(fmt.Sprintf("could not find %s column\n", name)) + log.Printf("could not find %s column in %s", name, filename) } } @@ -109,7 +73,6 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { catalogNumberCol := indexMap["Catalog Number"] distroMap := make(map[string][]int) - for _, record := range records[1:] { // convert grade distribution from string to int intSlice := make([]int, len(grades)) @@ -125,5 +88,10 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { distroKey := record[subjectCol] + record[catalogNumberCol] + trimmedSectionNumber distroMap[distroKey] = intSlice[:] } - return distroMap + + if err := file.Close(); err != nil { + return nil, fmt.Errorf("failed to close file '%s': %v", filename, err) + } + + return distroMap, nil } diff --git a/parser/gradeLoader_test.go b/parser/gradeLoader_test.go new file mode 100644 index 0000000..b7a414b --- /dev/null +++ b/parser/gradeLoader_test.go @@ -0,0 +1,14 @@ +package parser + +import ( + "testing" +) + +func TestLoadGrades(t *testing.T) { + + _, err := loadGrades("../grade-data/") + if err != nil { + t.Errorf("loadGrades() error = %v", err) + } + +} diff --git a/parser/parser.go b/parser/parser.go index 2b7049e..1dd2241 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -40,17 +40,25 @@ var ( timeLocation, timeError = time.LoadLocation("America/Chicago") ) +func init() { + if timeError != nil { + log.Fatalf("Failed to initialize timeLocation: %v", timeError) + } +} + // Parse loads scraped course artifacts, applies parsing and validation, and persists structured results. func Parse(inDir string, outDir string, csvPath string, skipValidation bool) { - // Panic if timeLocation didn't load properly - if timeError != nil { - panic(timeError) - } + if csvPath == "" { + log.Print("No grade data CSV directory specified. Grade data will not be included.") + } else { + var err error + GradeMap, err = loadGrades(csvPath) - // Load grade data from csv in advance - GradeMap = loadGrades(csvPath) - if len(GradeMap) != 0 { + if err != nil { + log.Fatalf("Failed to load grade data: %v", err) + return + } log.Printf("Loaded grade distributions for %d semesters.", len(GradeMap)) } From 7513640730ac343ce4d67828045e3c47c0f17b74 Mon Sep 17 00:00:00 2001 From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com> Date: Thu, 22 Jan 2026 20:53:34 -0600 Subject: [PATCH 2/4] Cleaned up gradeLoader code and updated parser tests to use grade data --- parser/gradeLoader.go | 22 +++++++++---- parser/parser_test.go | 77 +++++++++++++++++++++++++++++++++++-------- utils/methods.go | 4 +-- 3 files changed, 81 insertions(+), 22 deletions(-) diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go index 7ae54bc..43e4855 100644 --- a/parser/gradeLoader.go +++ b/parser/gradeLoader.go @@ -3,17 +3,20 @@ package parser import ( "encoding/csv" "fmt" - "github.com/UTDNebula/api-tools/utils" "log" "os" + "regexp" "strconv" "strings" + + "github.com/UTDNebula/api-tools/utils" ) var ( grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"} optionalColumns = []string{"W", "P", "CR", "NC", "I"} requiredColumns = []string{"Section", "Subject", "Catalog Number", "A+"} + semesterRegex = regexp.MustCompile(`[1-9][0-9][USF]`) ) func loadGrades(csvDir string) (map[string]map[string][]int, error) { @@ -23,12 +26,18 @@ func loadGrades(csvDir string) (map[string]map[string][]int, error) { fileNames := utils.GetAllFilesWithExtension(csvDir, ".csv") for _, name := range fileNames { + semester := semesterRegex.FindString(name) + if semester == "" { + return gradeMap, fmt.Errorf("invalid name %s, must match format {>10}{F,S,U} i.e. 22F", name) + } + var err error - gradeMap[name], err = csvToMap(name) + gradeMap[semester], err = csvToMap(name) if err != nil { return gradeMap, fmt.Errorf("error parsing %s: %v", name, err) } } + return gradeMap, nil } @@ -37,6 +46,11 @@ func csvToMap(filename string) (map[string][]int, error) { if err != nil { return nil, fmt.Errorf("error opening CSV file '%s': %v", filename, err) } + defer func(file *os.File) { + if err := file.Close(); err != nil { + log.Printf("failed to close file '%s': %v", filename, err) + } + }(file) reader := csv.NewReader(file) records, err := reader.ReadAll() @@ -89,9 +103,5 @@ func csvToMap(filename string) (map[string][]int, error) { distroMap[distroKey] = intSlice[:] } - if err := file.Close(); err != nil { - return nil, fmt.Errorf("failed to close file '%s': %v", filename, err) - } - return distroMap, nil } diff --git a/parser/parser_test.go b/parser/parser_test.go index cee8873..41e7af4 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -34,7 +34,7 @@ type TestData struct { // testData global dictionary containing the data from /testdata by folder name var testData map[string]TestData -// TestMain loads parser fixtures and handles the -update flag for regenerating expectations. +// TestMain loads parser fixtures and handles the `-update` flag for regenerating expectations. func TestMain(m *testing.M) { update := flag.Bool("update", false, "Regenerates the expected output for the provided test inputs. Should only be used when you are 100% sure your code is correct! It will make all test pass :)") @@ -132,10 +132,13 @@ func updateTestData() error { } defer os.RemoveAll(tempDir) - //Fill temp dir with all the test cases and expected values + GradeMap, err = loadGrades("../grade-data") + if err != nil { + return err + } + //Fill temp dir with all the test cases and expected values duplicates := make(map[string]bool) - for i, input := range utils.GetAllFilesWithExtension("testdata", ".html") { parse(input) @@ -214,20 +217,66 @@ func updateTestData() error { //rerun parser to get Courses.json, Sections.json, Professors.json - //Parse(tempDir, tempDir, "../grade-data", false) - //Grade data isn't work with tests currently - Parse(tempDir, tempDir, "", false) + Parse(tempDir, tempDir, "../grade-data", false) + + targetDir := "testdata" + + err = filepath.Walk(tempDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(tempDir, path) + if err != nil { + return err + } + destPath := filepath.Join(targetDir, relPath) + + if info.IsDir() { + return os.MkdirAll(destPath, 0755) + } + + newContent, err := os.ReadFile(path) + if err != nil { + return err + } + + if existingContent, err := os.ReadFile(destPath); err == nil { + if bytes.Equal(newContent, existingContent) { + return nil + } + } + + log.Printf("Updating file: %s", destPath) + return os.WriteFile(destPath, newContent, 0644) + }) - //overwrite the current test data with the new data - if err := os.RemoveAll("testdata"); err != nil { - return fmt.Errorf("failed to remove testdata: %v", err) + if err != nil { + return fmt.Errorf("failed to sync test data: %v", err) } - if err := os.CopyFS("testdata", os.DirFS(tempDir)); err != nil { - return fmt.Errorf("failed to copy testdata: %v", err) + err = filepath.Walk(targetDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(targetDir, path) + if err != nil { + return err + } + + srcPath := filepath.Join(tempDir, relPath) + if _, err := os.Stat(srcPath); os.IsNotExist(err) { + log.Printf("Removing stale file: %s", path) + return os.RemoveAll(path) + } + return nil + }) + + if err != nil { + return fmt.Errorf("failed to cleanup stale data: %v", err) } - //reset maps to avoid side effects. maybe parser should be an object? clearGlobals() return nil } @@ -244,8 +293,8 @@ func clearGlobals() { // TestParse verifies that parsing input fixtures generates the expected JSON exports. func TestParse(t *testing.T) { tempDir := t.TempDir() - // todo fix grade data, csvPath = ./grade-data panics - Parse("testdata", tempDir, "", false) + + Parse("testdata", tempDir, "../grade-data", false) OutputCourses, err := unmarshallFile[[]schema.Course](filepath.Join(tempDir, "courses.json")) if err != nil { diff --git a/utils/methods.go b/utils/methods.go index 90712e2..70b8bfe 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -224,8 +224,8 @@ func WriteJSON(filepath string, data interface{}) error { defer fptr.Close() encoder := json.NewEncoder(fptr) encoder.SetIndent("", "\t") - encoder.Encode(data) - return nil + encoder.SetEscapeHTML(false) + return encoder.Encode(data) } // GetAllFilesWithExtension recursively gathers file paths within inDir that match extension. From f5cf757aa71f853099f1c2ad83225b7a6af484dc Mon Sep 17 00:00:00 2001 From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com> Date: Thu, 22 Jan 2026 20:53:59 -0600 Subject: [PATCH 3/4] New testcase --- parser/testdata/case_006/classInfo.json | 14 ++ parser/testdata/case_006/course.json | 25 +++ parser/testdata/case_006/input.html | 268 +++++++++++++++++++++++ parser/testdata/case_006/professors.json | 23 ++ parser/testdata/case_006/section.json | 61 ++++++ 5 files changed, 391 insertions(+) create mode 100644 parser/testdata/case_006/classInfo.json create mode 100644 parser/testdata/case_006/course.json create mode 100644 parser/testdata/case_006/input.html create mode 100644 parser/testdata/case_006/professors.json create mode 100644 parser/testdata/case_006/section.json diff --git a/parser/testdata/case_006/classInfo.json b/parser/testdata/case_006/classInfo.json new file mode 100644 index 0000000..ae81d1c --- /dev/null +++ b/parser/testdata/case_006/classInfo.json @@ -0,0 +1,14 @@ +{ + "": "", + "Activity Type:": "Lecture", + "Add Consent:": "No Consent", + "Class Level:": "Undergraduate", + "Class Section:": "THEA1310.001.25S", + "Class/Course Number:": "24043 / 003909", + "Grading:": "Graded - Undergraduate", + "How often a course is scheduled:": "Once Each Long Semester", + "Instruction Mode:": "Face-to-Face", + "Orion Date/Time:": "2025-08-28 06:30:01", + "Semester Credit Hours:": "3", + "Session Type:": "Regular Academic Session" +} diff --git a/parser/testdata/case_006/course.json b/parser/testdata/case_006/course.json new file mode 100644 index 0000000..26de202 --- /dev/null +++ b/parser/testdata/case_006/course.json @@ -0,0 +1,25 @@ +{ + "_id": "6972d72a922d8b0bc3a3bce0", + "subject_prefix": "THEA", + "course_number": "1310", + "title": "Understanding Theatre", + "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", + "enrollment_reqs": "", + "school": "School of Arts, Humanities, and Technology", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "003909", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "6972d72a922d8b0bc3a3bce1" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null +} diff --git a/parser/testdata/case_006/input.html b/parser/testdata/case_006/input.html new file mode 100644 index 0000000..e4efe97 --- /dev/null +++ b/parser/testdata/case_006/input.html @@ -0,0 +1,268 @@ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Course Title: + + Understanding Theatre +
+ Class Info: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Class Section: + + THEA1310.001.25S + + Instruction Mode: + + Face-to-Face +
+ Class Level: + + Undergraduate + + Activity Type: + + Lecture +
+ Semester Credit Hours: + + 3 + + Class/Course Number: + + 24043 / 003909 +
+ Grading: + + Graded - Undergraduate + + Session Type: + + Regular Academic Session +
+ Add Consent: + + No Consent + + Orion Date/Time: + + 2025-08-28 06:30:01 +
+ How often a course is scheduled: + + Once Each Long Semester + + +
+ +
+ Status: + + Enrollment Status: OPEN    Available Seats: 15    Enrolled + Total: 33    Waitlist: 0 + +
+ Description: + + THEA 1310 - Understanding Theatre (3 + semester credit hours) Lectures, discussions, and performances designed to explore artistic, + philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics + may include analysis of scripts, the nature of the theater compared to the other performing arts, + and the nature of popular entertainments. (3-0) S + +
+ Instructor(s): + +
+
Damian Enyaosah ・ Primary Instructor ・ dpe190000@utdallas.edu +
+ +
TA/RA(s):(none)
+ Schedule: + +
+

Class Location and Times

+

Term: 25S
Type: Regular Academic Session
Starts: + January 21, 2025
Ends: May 16, 2025

+ +
+

+ January 21, 2025-May 9, 2025
+ Monday, Wednesday
+ 10:00am-11:15am
+ FO 1.202 + +

+
+
FO Building
Floor 1 - Room 1.202
+ +
+
+
+ +
+
+ +
Core:Texas Core Areas 050+090 - Creative Arts + CAO
+ College: + + School of Arts, Humanities, and + Technology + +
+ Syllabus: + + Syllabus for Understanding Theatre + (THEA1310.001.25S) + +
+ Evaluation: + + Class evaluation for + Understanding Theatre (THEA1310.001.25S) + +
+
+
The direct link to this class is: https://go.utdallas.edu/thea1310.001.25s
+
+ Register for this class on Orion: https://orion.utdallas.edu +
+
+ +
diff --git a/parser/testdata/case_006/professors.json b/parser/testdata/case_006/professors.json new file mode 100644 index 0000000..8860703 --- /dev/null +++ b/parser/testdata/case_006/professors.json @@ -0,0 +1,23 @@ +[ + { + "_id": "6972d72a922d8b0bc3a3bce2", + "first_name": "Damian", + "last_name": "Enyaosah", + "titles": [ + "Primary Instructor" + ], + "email": "dpe190000@utdallas.edu", + "phone_number": "", + "office": { + "building": "", + "room": "", + "map_uri": "" + }, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": [ + "6972d72a922d8b0bc3a3bce1" + ] + } +] diff --git a/parser/testdata/case_006/section.json b/parser/testdata/case_006/section.json new file mode 100644 index 0000000..5e58473 --- /dev/null +++ b/parser/testdata/case_006/section.json @@ -0,0 +1,61 @@ +{ + "_id": "6972d72a922d8b0bc3a3bce1", + "section_number": "001", + "course_reference": "6972d72a922d8b0bc3a3bce0", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "6972d72a922d8b0bc3a3bce2" + ], + "teaching_assistants": [], + "internal_class_number": "24043", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Monday", + "Wednesday" + ], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": { + "building": "FO", + "room": "1.202", + "map_uri": "https://locator.utdallas.edu/FO_1.202" + } + } + ], + "core_flags": [ + "050", + "090" + ], + "syllabus_uri": "https://dox.utdallas.edu/syl154931", + "grade_distribution": [ + 4, + 7, + 9, + 3, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 0, + 5, + 0, + 0, + 0, + 0, + 0 + ], + "attributes": null +} From 9040f94742b13c3c07dfb2e246c4070cc878b5c0 Mon Sep 17 00:00:00 2001 From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com> Date: Thu, 22 Jan 2026 22:26:55 -0600 Subject: [PATCH 4/4] Added unit tests for gradeLoader.go, added more test data, minor fix validator test --- build.bat | 4 +- parser/gradeLoader.go | 4 + parser/gradeLoader_test.go | 126 +++++++++++++++++- parser/testdata/case_000/course.json | 4 +- parser/testdata/case_000/professors.json | 8 +- parser/testdata/case_000/section.json | 29 +++- parser/testdata/case_001/course.json | 4 +- parser/testdata/case_001/professors.json | 8 +- parser/testdata/case_001/section.json | 29 +++- parser/testdata/case_002/course.json | 4 +- parser/testdata/case_002/professors.json | 4 +- parser/testdata/case_002/section.json | 27 +++- parser/testdata/case_003/course.json | 4 +- parser/testdata/case_003/professors.json | 4 +- parser/testdata/case_003/section.json | 6 +- parser/testdata/case_004/course.json | 4 +- parser/testdata/case_004/section.json | 4 +- parser/testdata/case_005/course.json | 4 +- parser/testdata/case_005/section.json | 4 +- parser/testdata/case_006/classInfo.json | 24 ++-- parser/testdata/case_006/course.json | 46 +++---- parser/testdata/case_006/professors.json | 4 +- parser/testdata/case_006/section.json | 118 ++++++++--------- parser/testdata/courses.json | 47 +++++-- parser/testdata/professors.json | 67 ++++++---- parser/testdata/sections.json | 160 ++++++++++++++++++++--- parser/validator_test.go | 15 ++- scrapers/coursebook.go | 2 +- 28 files changed, 558 insertions(+), 206 deletions(-) diff --git a/build.bat b/build.bat index 9a8003a..8d671d1 100644 --- a/build.bat +++ b/build.bat @@ -24,8 +24,8 @@ echo Performing checks... go mod tidy && ^ go vet ./... && ^ staticcheck ./... && ^ -gofmt -w ./.. && ^ -goimports -w ./.. +gofmt -w . && ^ +goimports -w . if ERRORLEVEL 1 exit /b %ERRORLEVEL% :: fail if error occurred echo Checks done! if %skip%==1 exit diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go index 43e4855..f92a7f0 100644 --- a/parser/gradeLoader.go +++ b/parser/gradeLoader.go @@ -58,6 +58,10 @@ func csvToMap(filename string) (map[string][]int, error) { return nil, fmt.Errorf("error parsing %s: %v", filename, err) } + if len(records) == 0 { + return nil, fmt.Errorf("empty CSV file '%s'", filename) + } + indexMap := make(map[string]int) for j, col := range records[0] { switch col { diff --git a/parser/gradeLoader_test.go b/parser/gradeLoader_test.go index b7a414b..efb3e1c 100644 --- a/parser/gradeLoader_test.go +++ b/parser/gradeLoader_test.go @@ -1,14 +1,134 @@ package parser import ( + "fmt" + "os" + "path/filepath" "testing" + + "github.com/google/go-cmp/cmp" +) + +var ( + gradeLoaderTestCases = map[string]struct { + csvContent string + want map[string][]int + fail bool + }{ + "Valid_Data": { + csvContent: `Instructor 1,Instructor 2,Instructor 3,Instructor 4,Instructor 5,Instructor 6,Subject,"Catalog Nbr",Section,A+,A,A-,B+,B,B-,C+,C,C-,D+,D,D-,F,NF,CR,I,NC,P,W +"Curchack, Fred",,,,,,AP,3300,501,6,4,2,2,1,3,1,1,,,,,1,,,,,,0 +"Anjum, Zafar",,,,,,ARAB,1311,001,,26,,,1,,,,,,,,,,,,,,2`, + want: map[string][]int{ + "AP3300501": {6, 4, 2, 2, 1, 3, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, + "ARAB13111": {0, 26, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0}, + }, + fail: false, + }, + "Missing_Required_Column_A+": { + csvContent: `Subject,"Catalog Nbr",Section,A,A-,B+ +CS,1337,001,10,5,5`, + fail: true, + }, + "Missing_Required_Column_Subject": { + csvContent: `Instructor,"Catalog Nbr",Section,A+,A +Doe,1337,001,10,5`, + fail: true, + }, + "Empty_File": { + csvContent: ``, + fail: true, + }, + } ) func TestLoadGrades(t *testing.T) { - _, err := loadGrades("../grade-data/") - if err != nil { - t.Errorf("loadGrades() error = %v", err) + invalidCSVNames := []string{"22", "2F", "2022F", "20-U", "15Fall"} + + for i, name := range invalidCSVNames { + t.Run( + fmt.Sprintf("Invalid_CSV_Name_%d", i), func(t *testing.T) { + tempDir := t.TempDir() + + temp, err := os.Create(filepath.Join(tempDir, name+".csv")) + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + _, err = loadGrades(tempDir) + if err == nil { + t.Errorf("expected error but got none") + } + }, + ) + } + + validCSVNames := []string{"25F", "18U", "26S"} + for i, name := range validCSVNames { + t.Run( + fmt.Sprintf("Valid_CSV_Name_%d", i), func(t *testing.T) { + tempDir := t.TempDir() + + temp, err := os.Create(filepath.Join(tempDir, name+".csv")) + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + _, err = temp.WriteString(gradeLoaderTestCases["Valid_Data"].csvContent) + if err != nil { + t.Errorf("failed to write test data: %v", err) + } + + _, err = loadGrades(tempDir) + if err != nil { + t.Errorf("valid .csv failed: %v", err) + } + }, + ) } + t.Run("Real_Data", func(t *testing.T) { + _, err := loadGrades("../grade-data/") + if err != nil { + t.Errorf("failed to load grades: %v", err) + } + }) +} + +func TestCSVToMap(t *testing.T) { + tempDir := t.TempDir() + + for name, testCase := range gradeLoaderTestCases { + t.Run(name, func(t *testing.T) { + + temp, err := os.CreateTemp(tempDir, "grades*.csv") + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + if _, err = temp.WriteString(testCase.csvContent); err != nil { + t.Errorf("failed to write test data: %v", err) + } + + output, err := csvToMap(temp.Name()) + if err != nil { + if testCase.fail { + return + } + t.Errorf("failed to load csv: %v", err) + } else if testCase.fail { + t.Errorf("expected failure but got none") + } else { + diff := cmp.Diff(testCase.want, output) + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + } + + }) + } } diff --git a/parser/testdata/case_000/course.json b/parser/testdata/case_000/course.json index 5e342be..e050494 100644 --- a/parser/testdata/case_000/course.json +++ b/parser/testdata/case_000/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23bd7", + "_id": "6972f54d6afb10b361a3e8b1", "subject_prefix": "ACCT", "course_number": "2301", "title": "Introductory Financial Accounting", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "6972f54d6afb10b361a3e8b2" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_000/professors.json b/parser/testdata/case_000/professors.json index 207c908..57aec29 100644 --- a/parser/testdata/case_000/professors.json +++ b/parser/testdata/case_000/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23bd9", + "_id": "6972f54d6afb10b361a3e8b3", "first_name": "Naim Bugra", "last_name": "Ozel", "titles": [ @@ -17,11 +17,11 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "6972f54d6afb10b361a3e8b2" ] }, { - "_id": "67d07ee0c972c18731e23bda", + "_id": "6972f54d6afb10b361a3e8b4", "first_name": "Jieying", "last_name": "Zhang", "titles": [ @@ -38,7 +38,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "6972f54d6afb10b361a3e8b2" ] } ] diff --git a/parser/testdata/case_000/section.json b/parser/testdata/case_000/section.json index a67c0f5..b2faa8f 100644 --- a/parser/testdata/case_000/section.json +++ b/parser/testdata/case_000/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23bd8", + "_id": "6972f54d6afb10b361a3e8b2", "section_number": "003", - "course_reference": "67d07ee0c972c18731e23bd7", + "course_reference": "6972f54d6afb10b361a3e8b1", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,8 +9,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bd9", - "67d07ee0c972c18731e23bda" + "6972f54d6afb10b361a3e8b3", + "6972f54d6afb10b361a3e8b4" ], "teaching_assistants": [ { @@ -48,6 +48,25 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152555", - "grade_distribution": [], + "grade_distribution": [ + 9, + 9, + 4, + 6, + 4, + 5, + 12, + 3, + 1, + 3, + 1, + 0, + 4, + 3, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_001/course.json b/parser/testdata/case_001/course.json index 24dcf8b..4a015f2 100644 --- a/parser/testdata/case_001/course.json +++ b/parser/testdata/case_001/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23bdb", + "_id": "6972f54d6afb10b361a3e8b5", "subject_prefix": "ACCT", "course_number": "2301", "title": "Introductory Financial Accounting", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "6972f54d6afb10b361a3e8b6" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_001/professors.json b/parser/testdata/case_001/professors.json index 8bf8c6b..72111e2 100644 --- a/parser/testdata/case_001/professors.json +++ b/parser/testdata/case_001/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23bdd", + "_id": "6972f54d6afb10b361a3e8b7", "first_name": "Jieying", "last_name": "Zhang", "titles": [ @@ -17,11 +17,11 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "6972f54d6afb10b361a3e8b6" ] }, { - "_id": "67d07ee0c972c18731e23bde", + "_id": "6972f54d6afb10b361a3e8b8", "first_name": "Naim Bugra", "last_name": "Ozel", "titles": [ @@ -38,7 +38,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "6972f54d6afb10b361a3e8b6" ] } ] diff --git a/parser/testdata/case_001/section.json b/parser/testdata/case_001/section.json index eeb9360..4b10fcf 100644 --- a/parser/testdata/case_001/section.json +++ b/parser/testdata/case_001/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23bdc", + "_id": "6972f54d6afb10b361a3e8b6", "section_number": "001", - "course_reference": "67d07ee0c972c18731e23bdb", + "course_reference": "6972f54d6afb10b361a3e8b5", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,8 +9,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bdd", - "67d07ee0c972c18731e23bde" + "6972f54d6afb10b361a3e8b7", + "6972f54d6afb10b361a3e8b8" ], "teaching_assistants": [ { @@ -48,6 +48,25 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152552", - "grade_distribution": [], + "grade_distribution": [ + 2, + 7, + 5, + 8, + 8, + 7, + 3, + 6, + 1, + 3, + 3, + 0, + 9, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_002/course.json b/parser/testdata/case_002/course.json index 141ff6b..1db1746 100644 --- a/parser/testdata/case_002/course.json +++ b/parser/testdata/case_002/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23bdf", + "_id": "6972f54d6afb10b361a3e8b9", "subject_prefix": "BA", "course_number": "1320", "title": "Business in a Global World", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be0" + "6972f54d6afb10b361a3e8ba" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_002/professors.json b/parser/testdata/case_002/professors.json index c6913f6..1f17b80 100644 --- a/parser/testdata/case_002/professors.json +++ b/parser/testdata/case_002/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23be1", + "_id": "6972f54d6afb10b361a3e8bb", "first_name": "Peter", "last_name": "Lewin", "titles": [ @@ -17,7 +17,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23be0" + "6972f54d6afb10b361a3e8ba" ] } ] diff --git a/parser/testdata/case_002/section.json b/parser/testdata/case_002/section.json index 6eb44f5..4ca35d9 100644 --- a/parser/testdata/case_002/section.json +++ b/parser/testdata/case_002/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be0", + "_id": "6972f54d6afb10b361a3e8ba", "section_number": "501", - "course_reference": "67d07ee0c972c18731e23bdf", + "course_reference": "6972f54d6afb10b361a3e8b9", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,7 +9,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23be1" + "6972f54d6afb10b361a3e8bb" ], "teaching_assistants": [ { @@ -44,6 +44,25 @@ "090" ], "syllabus_uri": "https://dox.utdallas.edu/syl153033", - "grade_distribution": [], + "grade_distribution": [ + 0, + 13, + 23, + 6, + 4, + 7, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_003/course.json b/parser/testdata/case_003/course.json index 94219f8..0111554 100644 --- a/parser/testdata/case_003/course.json +++ b/parser/testdata/case_003/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23be2", + "_id": "6972f54d6afb10b361a3e8bc", "subject_prefix": "BIOL", "course_number": "6111", "title": "Graduate Research Presentation", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be3" + "6972f54d6afb10b361a3e8bd" ], "lecture_contact_hours": "1", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_003/professors.json b/parser/testdata/case_003/professors.json index 3cb4a51..e921077 100644 --- a/parser/testdata/case_003/professors.json +++ b/parser/testdata/case_003/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23be4", + "_id": "6972f54d6afb10b361a3e8be", "first_name": "Tian", "last_name": "Hong", "titles": [ @@ -17,7 +17,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23be3" + "6972f54d6afb10b361a3e8bd" ] } ] diff --git a/parser/testdata/case_003/section.json b/parser/testdata/case_003/section.json index fff4105..d598a10 100644 --- a/parser/testdata/case_003/section.json +++ b/parser/testdata/case_003/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be3", + "_id": "6972f54d6afb10b361a3e8bd", "section_number": "016", - "course_reference": "67d07ee0c972c18731e23be2", + "course_reference": "6972f54d6afb10b361a3e8bc", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,7 +9,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23be4" + "6972f54d6afb10b361a3e8be" ], "teaching_assistants": [], "internal_class_number": "29611", diff --git a/parser/testdata/case_004/course.json b/parser/testdata/case_004/course.json index d8c5383..b253341 100644 --- a/parser/testdata/case_004/course.json +++ b/parser/testdata/case_004/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23be5", + "_id": "6972f54d6afb10b361a3e8bf", "subject_prefix": "AERO", "course_number": "3320", "title": "- Recitation", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be6" + "6972f54d6afb10b361a3e8c0" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", diff --git a/parser/testdata/case_004/section.json b/parser/testdata/case_004/section.json index 2481524..0608366 100644 --- a/parser/testdata/case_004/section.json +++ b/parser/testdata/case_004/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be6", + "_id": "6972f54d6afb10b361a3e8c0", "section_number": "201", - "course_reference": "67d07ee0c972c18731e23be5", + "course_reference": "6972f54d6afb10b361a3e8bf", "section_corequisites": null, "academic_session": { "name": "25S", diff --git a/parser/testdata/case_005/course.json b/parser/testdata/case_005/course.json index 9095afc..209bd70 100644 --- a/parser/testdata/case_005/course.json +++ b/parser/testdata/case_005/course.json @@ -1,5 +1,5 @@ { - "_id": "67d07ee0c972c18731e23be7", + "_id": "6972f54d6afb10b361a3e8c1", "subject_prefix": "AERO", "course_number": "4320", "title": "- Laboratory", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be8" + "6972f54d6afb10b361a3e8c2" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", diff --git a/parser/testdata/case_005/section.json b/parser/testdata/case_005/section.json index 712c972..247ab4c 100644 --- a/parser/testdata/case_005/section.json +++ b/parser/testdata/case_005/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be8", + "_id": "6972f54d6afb10b361a3e8c2", "section_number": "002", - "course_reference": "67d07ee0c972c18731e23be7", + "course_reference": "6972f54d6afb10b361a3e8c1", "section_corequisites": null, "academic_session": { "name": "25S", diff --git a/parser/testdata/case_006/classInfo.json b/parser/testdata/case_006/classInfo.json index ae81d1c..34e725a 100644 --- a/parser/testdata/case_006/classInfo.json +++ b/parser/testdata/case_006/classInfo.json @@ -1,14 +1,14 @@ { - "": "", - "Activity Type:": "Lecture", - "Add Consent:": "No Consent", - "Class Level:": "Undergraduate", - "Class Section:": "THEA1310.001.25S", - "Class/Course Number:": "24043 / 003909", - "Grading:": "Graded - Undergraduate", - "How often a course is scheduled:": "Once Each Long Semester", - "Instruction Mode:": "Face-to-Face", - "Orion Date/Time:": "2025-08-28 06:30:01", - "Semester Credit Hours:": "3", - "Session Type:": "Regular Academic Session" + "": "", + "Activity Type:": "Lecture", + "Add Consent:": "No Consent", + "Class Level:": "Undergraduate", + "Class Section:": "THEA1310.001.25S", + "Class/Course Number:": "24043 / 003909", + "Grading:": "Graded - Undergraduate", + "How often a course is scheduled:": "Once Each Long Semester", + "Instruction Mode:": "Face-to-Face", + "Orion Date/Time:": "2025-08-28 06:30:01", + "Semester Credit Hours:": "3", + "Session Type:": "Regular Academic Session" } diff --git a/parser/testdata/case_006/course.json b/parser/testdata/case_006/course.json index 26de202..a20be42 100644 --- a/parser/testdata/case_006/course.json +++ b/parser/testdata/case_006/course.json @@ -1,25 +1,25 @@ { - "_id": "6972d72a922d8b0bc3a3bce0", - "subject_prefix": "THEA", - "course_number": "1310", - "title": "Understanding Theatre", - "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", - "enrollment_reqs": "", - "school": "School of Arts, Humanities, and Technology", - "credit_hours": "3", - "class_level": "Undergraduate", - "activity_type": "Lecture", - "grading": "Graded - Undergraduate", - "internal_course_number": "003909", - "prerequisites": null, - "corequisites": null, - "co_or_pre_requisites": null, - "sections": [ - "6972d72a922d8b0bc3a3bce1" - ], - "lecture_contact_hours": "3", - "laboratory_contact_hours": "0", - "offering_frequency": "S", - "catalog_year": "24", - "attributes": null + "_id": "6972f54d6afb10b361a3e8c3", + "subject_prefix": "THEA", + "course_number": "1310", + "title": "Understanding Theatre", + "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", + "enrollment_reqs": "", + "school": "School of Arts, Humanities, and Technology", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "003909", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "6972f54d6afb10b361a3e8c4" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null } diff --git a/parser/testdata/case_006/professors.json b/parser/testdata/case_006/professors.json index 8860703..bfde863 100644 --- a/parser/testdata/case_006/professors.json +++ b/parser/testdata/case_006/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "6972d72a922d8b0bc3a3bce2", + "_id": "6972f54d6afb10b361a3e8c5", "first_name": "Damian", "last_name": "Enyaosah", "titles": [ @@ -17,7 +17,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "6972d72a922d8b0bc3a3bce1" + "6972f54d6afb10b361a3e8c4" ] } ] diff --git a/parser/testdata/case_006/section.json b/parser/testdata/case_006/section.json index 5e58473..2c57733 100644 --- a/parser/testdata/case_006/section.json +++ b/parser/testdata/case_006/section.json @@ -1,61 +1,61 @@ { - "_id": "6972d72a922d8b0bc3a3bce1", - "section_number": "001", - "course_reference": "6972d72a922d8b0bc3a3bce0", - "section_corequisites": null, - "academic_session": { - "name": "25S", - "start_date": "2025-01-21T00:00:00-06:00", - "end_date": "2025-05-16T00:00:00-05:00" - }, - "professors": [ - "6972d72a922d8b0bc3a3bce2" - ], - "teaching_assistants": [], - "internal_class_number": "24043", - "instruction_mode": "Face-to-Face", - "meetings": [ - { - "start_date": "2025-01-21T00:00:00-06:00", - "end_date": "2025-05-09T00:00:00-05:00", - "meeting_days": [ - "Monday", - "Wednesday" - ], - "start_time": "10:00am", - "end_time": "11:15am", - "modality": "", - "location": { - "building": "FO", - "room": "1.202", - "map_uri": "https://locator.utdallas.edu/FO_1.202" - } - } - ], - "core_flags": [ - "050", - "090" - ], - "syllabus_uri": "https://dox.utdallas.edu/syl154931", - "grade_distribution": [ - 4, - 7, - 9, - 3, - 1, - 1, - 0, - 1, - 2, - 0, - 0, - 0, - 5, - 0, - 0, - 0, - 0, - 0 - ], - "attributes": null + "_id": "6972f54d6afb10b361a3e8c4", + "section_number": "001", + "course_reference": "6972f54d6afb10b361a3e8c3", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "6972f54d6afb10b361a3e8c5" + ], + "teaching_assistants": [], + "internal_class_number": "24043", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Monday", + "Wednesday" + ], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": { + "building": "FO", + "room": "1.202", + "map_uri": "https://locator.utdallas.edu/FO_1.202" + } + } + ], + "core_flags": [ + "050", + "090" + ], + "syllabus_uri": "https://dox.utdallas.edu/syl154931", + "grade_distribution": [ + 4, + 7, + 9, + 3, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 0, + 5, + 0, + 0, + 0, + 0, + 0 + ], + "attributes": null } diff --git a/parser/testdata/courses.json b/parser/testdata/courses.json index 78ae4d4..df9144e 100644 --- a/parser/testdata/courses.json +++ b/parser/testdata/courses.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23bee", + "_id": "6972f54d6afb10b361a3e8cb", "subject_prefix": "BA", "course_number": "1320", "title": "Business in a Global World", @@ -16,7 +16,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bef" + "6972f54d6afb10b361a3e8cc" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", @@ -25,7 +25,7 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf1", + "_id": "6972f54d6afb10b361a3e8ce", "subject_prefix": "BIOL", "course_number": "6111", "title": "Graduate Research Presentation", @@ -41,7 +41,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bf2" + "6972f54d6afb10b361a3e8cf" ], "lecture_contact_hours": "1", "laboratory_contact_hours": "0", @@ -50,7 +50,7 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf4", + "_id": "6972f54d6afb10b361a3e8d1", "subject_prefix": "AERO", "course_number": "3320", "title": "- Recitation", @@ -66,7 +66,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bf5" + "6972f54d6afb10b361a3e8d2" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", @@ -75,7 +75,7 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf6", + "_id": "6972f54d6afb10b361a3e8d3", "subject_prefix": "AERO", "course_number": "4320", "title": "- Laboratory", @@ -91,7 +91,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bf7" + "6972f54d6afb10b361a3e8d4" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", @@ -100,7 +100,32 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23be9", + "_id": "6972f54d6afb10b361a3e8d5", + "subject_prefix": "THEA", + "course_number": "1310", + "title": "Understanding Theatre", + "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", + "enrollment_reqs": "", + "school": "School of Arts, Humanities, and Technology", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "003909", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "6972f54d6afb10b361a3e8d6" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null + }, + { + "_id": "6972f54d6afb10b361a3e8c6", "subject_prefix": "ACCT", "course_number": "2301", "title": "Introductory Financial Accounting", @@ -116,8 +141,8 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" + "6972f54d6afb10b361a3e8c7", + "6972f54d6afb10b361a3e8ca" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/professors.json b/parser/testdata/professors.json index 2a931c4..dc66b36 100644 --- a/parser/testdata/professors.json +++ b/parser/testdata/professors.json @@ -1,12 +1,12 @@ [ { - "_id": "67d07ee0c972c18731e23beb", - "first_name": "Naim Bugra", - "last_name": "Ozel", + "_id": "6972f54d6afb10b361a3e8c9", + "first_name": "Jieying", + "last_name": "Zhang", "titles": [ "Primary Instructor (50%)" ], - "email": "nbo150030@utdallas.edu", + "email": "jxz146230@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -17,18 +17,18 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" + "6972f54d6afb10b361a3e8c7", + "6972f54d6afb10b361a3e8ca" ] }, { - "_id": "67d07ee0c972c18731e23bec", - "first_name": "Jieying", - "last_name": "Zhang", + "_id": "6972f54d6afb10b361a3e8cd", + "first_name": "Peter", + "last_name": "Lewin", "titles": [ - "Primary Instructor (50%)" + "Primary Instructor" ], - "email": "jxz146230@utdallas.edu", + "email": "plewin@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -39,18 +39,17 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" + "6972f54d6afb10b361a3e8cc" ] }, { - "_id": "67d07ee0c972c18731e23bf0", - "first_name": "Peter", - "last_name": "Lewin", + "_id": "6972f54d6afb10b361a3e8d0", + "first_name": "Tian", + "last_name": "Hong", "titles": [ "Primary Instructor" ], - "email": "plewin@utdallas.edu", + "email": "txh240018@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -61,17 +60,38 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bef" + "6972f54d6afb10b361a3e8cf" ] }, { - "_id": "67d07ee0c972c18731e23bf3", - "first_name": "Tian", - "last_name": "Hong", + "_id": "6972f54d6afb10b361a3e8d7", + "first_name": "Damian", + "last_name": "Enyaosah", "titles": [ "Primary Instructor" ], - "email": "txh240018@utdallas.edu", + "email": "dpe190000@utdallas.edu", + "phone_number": "", + "office": { + "building": "", + "room": "", + "map_uri": "" + }, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": [ + "6972f54d6afb10b361a3e8d6" + ] + }, + { + "_id": "6972f54d6afb10b361a3e8c8", + "first_name": "Naim Bugra", + "last_name": "Ozel", + "titles": [ + "Primary Instructor (50%)" + ], + "email": "nbo150030@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -82,7 +102,8 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bf2" + "6972f54d6afb10b361a3e8c7", + "6972f54d6afb10b361a3e8ca" ] } ] diff --git a/parser/testdata/sections.json b/parser/testdata/sections.json index 92319de..da61c2f 100644 --- a/parser/testdata/sections.json +++ b/parser/testdata/sections.json @@ -1,8 +1,8 @@ [ { - "_id": "67d07ee0c972c18731e23bef", + "_id": "6972f54d6afb10b361a3e8cc", "section_number": "501", - "course_reference": "67d07ee0c972c18731e23bee", + "course_reference": "6972f54d6afb10b361a3e8cb", "section_corequisites": null, "academic_session": { "name": "25S", @@ -10,7 +10,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bf0" + "6972f54d6afb10b361a3e8cd" ], "teaching_assistants": [ { @@ -45,13 +45,32 @@ "090" ], "syllabus_uri": "https://dox.utdallas.edu/syl153033", - "grade_distribution": [], + "grade_distribution": [ + 0, + 13, + 23, + 6, + 4, + 7, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf2", + "_id": "6972f54d6afb10b361a3e8cf", "section_number": "016", - "course_reference": "67d07ee0c972c18731e23bf1", + "course_reference": "6972f54d6afb10b361a3e8ce", "section_corequisites": null, "academic_session": { "name": "25S", @@ -59,7 +78,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bf3" + "6972f54d6afb10b361a3e8d0" ], "teaching_assistants": [], "internal_class_number": "29611", @@ -87,9 +106,9 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf5", + "_id": "6972f54d6afb10b361a3e8d2", "section_number": "201", - "course_reference": "67d07ee0c972c18731e23bf4", + "course_reference": "6972f54d6afb10b361a3e8d1", "section_corequisites": null, "academic_session": { "name": "25S", @@ -123,9 +142,9 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf7", + "_id": "6972f54d6afb10b361a3e8d4", "section_number": "002", - "course_reference": "67d07ee0c972c18731e23bf6", + "course_reference": "6972f54d6afb10b361a3e8d3", "section_corequisites": null, "academic_session": { "name": "25S", @@ -174,9 +193,70 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bea", + "_id": "6972f54d6afb10b361a3e8d6", + "section_number": "001", + "course_reference": "6972f54d6afb10b361a3e8d5", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "6972f54d6afb10b361a3e8d7" + ], + "teaching_assistants": [], + "internal_class_number": "24043", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Monday", + "Wednesday" + ], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": { + "building": "FO", + "room": "1.202", + "map_uri": "https://locator.utdallas.edu/FO_1.202" + } + } + ], + "core_flags": [ + "050", + "090" + ], + "syllabus_uri": "https://dox.utdallas.edu/syl154931", + "grade_distribution": [ + 4, + 7, + 9, + 3, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 0, + 5, + 0, + 0, + 0, + 0, + 0 + ], + "attributes": null + }, + { + "_id": "6972f54d6afb10b361a3e8c7", "section_number": "003", - "course_reference": "67d07ee0c972c18731e23be9", + "course_reference": "6972f54d6afb10b361a3e8c6", "section_corequisites": null, "academic_session": { "name": "25S", @@ -184,8 +264,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23beb", - "67d07ee0c972c18731e23bec" + "6972f54d6afb10b361a3e8c8", + "6972f54d6afb10b361a3e8c9" ], "teaching_assistants": [ { @@ -223,13 +303,32 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152555", - "grade_distribution": [], + "grade_distribution": [ + 9, + 9, + 4, + 6, + 4, + 5, + 12, + 3, + 1, + 3, + 1, + 0, + 4, + 3, + 0, + 0, + 0, + 0 + ], "attributes": null }, { - "_id": "67d07ee0c972c18731e23bed", + "_id": "6972f54d6afb10b361a3e8ca", "section_number": "001", - "course_reference": "67d07ee0c972c18731e23be9", + "course_reference": "6972f54d6afb10b361a3e8c6", "section_corequisites": null, "academic_session": { "name": "25S", @@ -237,8 +336,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bec", - "67d07ee0c972c18731e23beb" + "6972f54d6afb10b361a3e8c9", + "6972f54d6afb10b361a3e8c8" ], "teaching_assistants": [ { @@ -276,7 +375,26 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152552", - "grade_distribution": [], + "grade_distribution": [ + 2, + 7, + 5, + 8, + 8, + 7, + 3, + 6, + 1, + 3, + 3, + 0, + 9, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null } ] diff --git a/parser/validator_test.go b/parser/validator_test.go index c98a24b..d885cef 100644 --- a/parser/validator_test.go +++ b/parser/validator_test.go @@ -52,8 +52,15 @@ func init() { panic(err) } - // The correct mapping - indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} + courseIndex := make(map[primitive.ObjectID]int) + for i, course := range testCourses { + courseIndex[course.Id] = i + } + + indexMap = make(map[int]int, len(testSections)) + for i, section := range testSections { + indexMap[i] = courseIndex[section.Course_reference] + } } // Test duplicate courses. Designed for fail cases @@ -222,8 +229,8 @@ func TestSectionReferenceProfFail(t *testing.T) { logOutput := logBuffer.String() for _, msg := range []string{ - "Nonexistent professor reference found for section ID ObjectID(\"67d07ee0c972c18731e23bea\")!", - "Referenced professor ID: ObjectID(\"67d07ee0c972c18731e23beb\")", + "Nonexistent professor reference found for section ID ObjectID(\"6972f54d6afb10b361a3e8c7\")!", + "Referenced professor ID: ObjectID(\"6972f54d6afb10b361a3e8c9\")", } { if !strings.Contains(logOutput, msg) { t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index 4f6119c..539b6a8 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -25,7 +25,7 @@ import ( ) var ( - prefixRegex = regexp.MustCompile("cp_[a-z]{0,5}") + prefixRegex = regexp.MustCompile("^cp_[a-z]{1,5}$") termRegex = regexp.MustCompile("[0-9]{1,2}[sfu]") )