From 687159a720bd16e9f35afc8fcf4af4ed9e16dbe4 Mon Sep 17 00:00:00 2001
From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:39:10 -0600
Subject: [PATCH 1/4] cleanup loader poc test
---
parser/gradeLoader.go | 96 +++++++++++++-------------------------
parser/gradeLoader_test.go | 14 ++++++
parser/parser.go | 22 ++++++---
3 files changed, 61 insertions(+), 71 deletions(-)
create mode 100644 parser/gradeLoader_test.go
diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go
index 57c7d7a..7ae54bc 100644
--- a/parser/gradeLoader.go
+++ b/parser/gradeLoader.go
@@ -3,81 +3,48 @@ package parser
import (
"encoding/csv"
"fmt"
+ "github.com/UTDNebula/api-tools/utils"
"log"
"os"
- "path/filepath"
"strconv"
"strings"
)
-var grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"}
-
-func loadGrades(csvDir string) map[string]map[string][]int {
+var (
+ grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"}
+ optionalColumns = []string{"W", "P", "CR", "NC", "I"}
+ requiredColumns = []string{"Section", "Subject", "Catalog Number", "A+"}
+)
+func loadGrades(csvDir string) (map[string]map[string][]int, error) {
// MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
gradeMap := make(map[string]map[string][]int)
- if csvDir == "" {
- log.Print("No grade data CSV directory specified. Grade data will not be included.")
- return gradeMap
- }
-
- dirPtr, err := os.Open(csvDir)
- if err != nil {
- panic(err)
- }
- defer dirPtr.Close()
+ fileNames := utils.GetAllFilesWithExtension(csvDir, ".csv")
+ for _, name := range fileNames {
- csvFiles, err := dirPtr.ReadDir(-1)
- if err != nil {
- panic(err)
- }
-
- for _, csvEntry := range csvFiles {
-
- if csvEntry.IsDir() {
- continue
- }
-
- csvPath := fmt.Sprintf("%s/%s", csvDir, csvEntry.Name())
-
- csvFile, err := os.Open(csvPath)
+ var err error
+ gradeMap[name], err = csvToMap(name)
if err != nil {
- panic(err)
- }
- defer csvFile.Close()
-
- // Create logs directory
- if _, err := os.Stat("./logs/grades"); err != nil {
- os.Mkdir("./logs/grades", os.ModePerm)
+ return gradeMap, fmt.Errorf("error parsing %s: %v", name, err)
}
-
- // Create log file [name of csv].log in logs directory
- basePath := filepath.Base(csvPath)
- csvName := strings.TrimSuffix(basePath, filepath.Ext(basePath))
- logFile, err := os.Create("./logs/grades/" + csvName + ".log")
-
- if err != nil {
- log.Panic("Could not create CSV log file.")
- }
- defer logFile.Close()
-
- // Put data from csv into map
- gradeMap[csvName] = csvToMap(csvFile, logFile)
}
-
- return gradeMap
+ return gradeMap, nil
}
-func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int {
- reader := csv.NewReader(csvFile)
- records, err := reader.ReadAll() // records is [][]strings
+func csvToMap(filename string) (map[string][]int, error) {
+ file, err := os.Open(filename)
if err != nil {
- log.Panicf("Error parsing %s: %s", csvFile.Name(), err.Error())
+ return nil, fmt.Errorf("error opening CSV file '%s': %v", filename, err)
}
- indexMap := make(map[string]int)
+ reader := csv.NewReader(file)
+ records, err := reader.ReadAll()
+ if err != nil {
+ return nil, fmt.Errorf("error parsing %s: %v", filename, err)
+ }
+ indexMap := make(map[string]int)
for j, col := range records[0] {
switch col {
case "Catalog Number", "Catalog Nbr":
@@ -89,18 +56,15 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int {
}
}
- // required columns
- for _, name := range []string{"Section", "Subject", "Catalog Number", "A+"} {
+ for _, name := range requiredColumns {
if _, ok := indexMap[name]; !ok {
- fmt.Fprintf(logFile, "could not find %s column", name)
- log.Panicf("could not find %s column", name)
+ return nil, fmt.Errorf("could not find %s column in %s", name, filename)
}
}
- // optional columns
- for _, name := range []string{"W", "P", "CR", "NC", "I"} {
+ for _, name := range optionalColumns {
if _, ok := indexMap[name]; !ok {
- logFile.WriteString(fmt.Sprintf("could not find %s column\n", name))
+ log.Printf("could not find %s column in %s", name, filename)
}
}
@@ -109,7 +73,6 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int {
catalogNumberCol := indexMap["Catalog Number"]
distroMap := make(map[string][]int)
-
for _, record := range records[1:] {
// convert grade distribution from string to int
intSlice := make([]int, len(grades))
@@ -125,5 +88,10 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int {
distroKey := record[subjectCol] + record[catalogNumberCol] + trimmedSectionNumber
distroMap[distroKey] = intSlice[:]
}
- return distroMap
+
+ if err := file.Close(); err != nil {
+ return nil, fmt.Errorf("failed to close file '%s': %v", filename, err)
+ }
+
+ return distroMap, nil
}
diff --git a/parser/gradeLoader_test.go b/parser/gradeLoader_test.go
new file mode 100644
index 0000000..b7a414b
--- /dev/null
+++ b/parser/gradeLoader_test.go
@@ -0,0 +1,14 @@
+package parser
+
+import (
+ "testing"
+)
+
+func TestLoadGrades(t *testing.T) {
+
+ _, err := loadGrades("../grade-data/")
+ if err != nil {
+ t.Errorf("loadGrades() error = %v", err)
+ }
+
+}
diff --git a/parser/parser.go b/parser/parser.go
index 2b7049e..1dd2241 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -40,17 +40,25 @@ var (
timeLocation, timeError = time.LoadLocation("America/Chicago")
)
+func init() {
+ if timeError != nil {
+ log.Fatalf("Failed to initialize timeLocation: %v", timeError)
+ }
+}
+
// Parse loads scraped course artifacts, applies parsing and validation, and persists structured results.
func Parse(inDir string, outDir string, csvPath string, skipValidation bool) {
- // Panic if timeLocation didn't load properly
- if timeError != nil {
- panic(timeError)
- }
+ if csvPath == "" {
+ log.Print("No grade data CSV directory specified. Grade data will not be included.")
+ } else {
+ var err error
+ GradeMap, err = loadGrades(csvPath)
- // Load grade data from csv in advance
- GradeMap = loadGrades(csvPath)
- if len(GradeMap) != 0 {
+ if err != nil {
+ log.Fatalf("Failed to load grade data: %v", err)
+ return
+ }
log.Printf("Loaded grade distributions for %d semesters.", len(GradeMap))
}
From 7513640730ac343ce4d67828045e3c47c0f17b74 Mon Sep 17 00:00:00 2001
From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com>
Date: Thu, 22 Jan 2026 20:53:34 -0600
Subject: [PATCH 2/4] Cleaned up gradeLoader code and updated parser tests to
use grade data
---
parser/gradeLoader.go | 22 +++++++++----
parser/parser_test.go | 77 +++++++++++++++++++++++++++++++++++--------
utils/methods.go | 4 +--
3 files changed, 81 insertions(+), 22 deletions(-)
diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go
index 7ae54bc..43e4855 100644
--- a/parser/gradeLoader.go
+++ b/parser/gradeLoader.go
@@ -3,17 +3,20 @@ package parser
import (
"encoding/csv"
"fmt"
- "github.com/UTDNebula/api-tools/utils"
"log"
"os"
+ "regexp"
"strconv"
"strings"
+
+ "github.com/UTDNebula/api-tools/utils"
)
var (
grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"}
optionalColumns = []string{"W", "P", "CR", "NC", "I"}
requiredColumns = []string{"Section", "Subject", "Catalog Number", "A+"}
+ semesterRegex = regexp.MustCompile(`[1-9][0-9][USF]`)
)
func loadGrades(csvDir string) (map[string]map[string][]int, error) {
@@ -23,12 +26,18 @@ func loadGrades(csvDir string) (map[string]map[string][]int, error) {
fileNames := utils.GetAllFilesWithExtension(csvDir, ".csv")
for _, name := range fileNames {
+ semester := semesterRegex.FindString(name)
+ if semester == "" {
+ return gradeMap, fmt.Errorf("invalid name %s, must match format {>10}{F,S,U} i.e. 22F", name)
+ }
+
var err error
- gradeMap[name], err = csvToMap(name)
+ gradeMap[semester], err = csvToMap(name)
if err != nil {
return gradeMap, fmt.Errorf("error parsing %s: %v", name, err)
}
}
+
return gradeMap, nil
}
@@ -37,6 +46,11 @@ func csvToMap(filename string) (map[string][]int, error) {
if err != nil {
return nil, fmt.Errorf("error opening CSV file '%s': %v", filename, err)
}
+ defer func(file *os.File) {
+ if err := file.Close(); err != nil {
+ log.Printf("failed to close file '%s': %v", filename, err)
+ }
+ }(file)
reader := csv.NewReader(file)
records, err := reader.ReadAll()
@@ -89,9 +103,5 @@ func csvToMap(filename string) (map[string][]int, error) {
distroMap[distroKey] = intSlice[:]
}
- if err := file.Close(); err != nil {
- return nil, fmt.Errorf("failed to close file '%s': %v", filename, err)
- }
-
return distroMap, nil
}
diff --git a/parser/parser_test.go b/parser/parser_test.go
index cee8873..41e7af4 100644
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -34,7 +34,7 @@ type TestData struct {
// testData global dictionary containing the data from /testdata by folder name
var testData map[string]TestData
-// TestMain loads parser fixtures and handles the -update flag for regenerating expectations.
+// TestMain loads parser fixtures and handles the `-update` flag for regenerating expectations.
func TestMain(m *testing.M) {
update := flag.Bool("update", false, "Regenerates the expected output for the provided test inputs. Should only be used when you are 100% sure your code is correct! It will make all test pass :)")
@@ -132,10 +132,13 @@ func updateTestData() error {
}
defer os.RemoveAll(tempDir)
- //Fill temp dir with all the test cases and expected values
+ GradeMap, err = loadGrades("../grade-data")
+ if err != nil {
+ return err
+ }
+ //Fill temp dir with all the test cases and expected values
duplicates := make(map[string]bool)
-
for i, input := range utils.GetAllFilesWithExtension("testdata", ".html") {
parse(input)
@@ -214,20 +217,66 @@ func updateTestData() error {
//rerun parser to get Courses.json, Sections.json, Professors.json
- //Parse(tempDir, tempDir, "../grade-data", false)
- //Grade data isn't work with tests currently
- Parse(tempDir, tempDir, "", false)
+ Parse(tempDir, tempDir, "../grade-data", false)
+
+ targetDir := "testdata"
+
+ err = filepath.Walk(tempDir, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ relPath, err := filepath.Rel(tempDir, path)
+ if err != nil {
+ return err
+ }
+ destPath := filepath.Join(targetDir, relPath)
+
+ if info.IsDir() {
+ return os.MkdirAll(destPath, 0755)
+ }
+
+ newContent, err := os.ReadFile(path)
+ if err != nil {
+ return err
+ }
+
+ if existingContent, err := os.ReadFile(destPath); err == nil {
+ if bytes.Equal(newContent, existingContent) {
+ return nil
+ }
+ }
+
+ log.Printf("Updating file: %s", destPath)
+ return os.WriteFile(destPath, newContent, 0644)
+ })
- //overwrite the current test data with the new data
- if err := os.RemoveAll("testdata"); err != nil {
- return fmt.Errorf("failed to remove testdata: %v", err)
+ if err != nil {
+ return fmt.Errorf("failed to sync test data: %v", err)
}
- if err := os.CopyFS("testdata", os.DirFS(tempDir)); err != nil {
- return fmt.Errorf("failed to copy testdata: %v", err)
+ err = filepath.Walk(targetDir, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ relPath, err := filepath.Rel(targetDir, path)
+ if err != nil {
+ return err
+ }
+
+ srcPath := filepath.Join(tempDir, relPath)
+ if _, err := os.Stat(srcPath); os.IsNotExist(err) {
+ log.Printf("Removing stale file: %s", path)
+ return os.RemoveAll(path)
+ }
+ return nil
+ })
+
+ if err != nil {
+ return fmt.Errorf("failed to cleanup stale data: %v", err)
}
- //reset maps to avoid side effects. maybe parser should be an object?
clearGlobals()
return nil
}
@@ -244,8 +293,8 @@ func clearGlobals() {
// TestParse verifies that parsing input fixtures generates the expected JSON exports.
func TestParse(t *testing.T) {
tempDir := t.TempDir()
- // todo fix grade data, csvPath = ./grade-data panics
- Parse("testdata", tempDir, "", false)
+
+ Parse("testdata", tempDir, "../grade-data", false)
OutputCourses, err := unmarshallFile[[]schema.Course](filepath.Join(tempDir, "courses.json"))
if err != nil {
diff --git a/utils/methods.go b/utils/methods.go
index 90712e2..70b8bfe 100644
--- a/utils/methods.go
+++ b/utils/methods.go
@@ -224,8 +224,8 @@ func WriteJSON(filepath string, data interface{}) error {
defer fptr.Close()
encoder := json.NewEncoder(fptr)
encoder.SetIndent("", "\t")
- encoder.Encode(data)
- return nil
+ encoder.SetEscapeHTML(false)
+ return encoder.Encode(data)
}
// GetAllFilesWithExtension recursively gathers file paths within inDir that match extension.
From f5cf757aa71f853099f1c2ad83225b7a6af484dc Mon Sep 17 00:00:00 2001
From: SoggyRihno <94922205+SoggyRihno@users.noreply.github.com>
Date: Thu, 22 Jan 2026 20:53:59 -0600
Subject: [PATCH 3/4] New testcase
---
parser/testdata/case_006/classInfo.json | 14 ++
parser/testdata/case_006/course.json | 25 +++
parser/testdata/case_006/input.html | 268 +++++++++++++++++++++++
parser/testdata/case_006/professors.json | 23 ++
parser/testdata/case_006/section.json | 61 ++++++
5 files changed, 391 insertions(+)
create mode 100644 parser/testdata/case_006/classInfo.json
create mode 100644 parser/testdata/case_006/course.json
create mode 100644 parser/testdata/case_006/input.html
create mode 100644 parser/testdata/case_006/professors.json
create mode 100644 parser/testdata/case_006/section.json
diff --git a/parser/testdata/case_006/classInfo.json b/parser/testdata/case_006/classInfo.json
new file mode 100644
index 0000000..ae81d1c
--- /dev/null
+++ b/parser/testdata/case_006/classInfo.json
@@ -0,0 +1,14 @@
+{
+ "": "",
+ "Activity Type:": "Lecture",
+ "Add Consent:": "No Consent",
+ "Class Level:": "Undergraduate",
+ "Class Section:": "THEA1310.001.25S",
+ "Class/Course Number:": "24043 / 003909",
+ "Grading:": "Graded - Undergraduate",
+ "How often a course is scheduled:": "Once Each Long Semester",
+ "Instruction Mode:": "Face-to-Face",
+ "Orion Date/Time:": "2025-08-28 06:30:01",
+ "Semester Credit Hours:": "3",
+ "Session Type:": "Regular Academic Session"
+}
diff --git a/parser/testdata/case_006/course.json b/parser/testdata/case_006/course.json
new file mode 100644
index 0000000..26de202
--- /dev/null
+++ b/parser/testdata/case_006/course.json
@@ -0,0 +1,25 @@
+{
+ "_id": "6972d72a922d8b0bc3a3bce0",
+ "subject_prefix": "THEA",
+ "course_number": "1310",
+ "title": "Understanding Theatre",
+ "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S",
+ "enrollment_reqs": "",
+ "school": "School of Arts, Humanities, and Technology",
+ "credit_hours": "3",
+ "class_level": "Undergraduate",
+ "activity_type": "Lecture",
+ "grading": "Graded - Undergraduate",
+ "internal_course_number": "003909",
+ "prerequisites": null,
+ "corequisites": null,
+ "co_or_pre_requisites": null,
+ "sections": [
+ "6972d72a922d8b0bc3a3bce1"
+ ],
+ "lecture_contact_hours": "3",
+ "laboratory_contact_hours": "0",
+ "offering_frequency": "S",
+ "catalog_year": "24",
+ "attributes": null
+}
diff --git a/parser/testdata/case_006/input.html b/parser/testdata/case_006/input.html
new file mode 100644
index 0000000..e4efe97
--- /dev/null
+++ b/parser/testdata/case_006/input.html
@@ -0,0 +1,268 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Course Title:
+
+
+ Understanding Theatre
+
+
+
+
+ Class Info:
+
+
+
+
+
+
+ Class Section:
+
+
+ THEA1310.001.25S
+
+
+ Instruction Mode:
+
+
+ Face-to-Face
+
+
+
+
+ Class Level:
+
+
+ Undergraduate
+
+
+ Activity Type:
+
+
+ Lecture
+
+
+
+
+ Semester Credit Hours:
+
+
+ 3
+
+
+ Class/Course Number:
+
+
+ 24043 / 003909
+
+
+
+
+ Grading:
+
+
+ Graded - Undergraduate
+
+
+ Session Type:
+
+
+ Regular Academic Session
+
+
+
+
+ Add Consent:
+
+
+ No Consent
+
+
+ Orion Date/Time:
+
+
+ 2025-08-28 06:30:01
+
+
+
+
+ How often a course is scheduled:
+
+
+ Once Each Long Semester
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Status:
+
+
+ Enrollment Status: OPEN Available Seats: 15 Enrolled
+ Total: 33 Waitlist: 0
+
+
+
+
+
+ Description:
+
+
+ THEA 1310 - Understanding Theatre (3
+ semester credit hours) Lectures, discussions, and performances designed to explore artistic,
+ philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics
+ may include analysis of scripts, the nature of the theater compared to the other performing arts,
+ and the nature of popular entertainments. (3-0) S
+
+