From cec6ca0a4be812d53d234ab0ba77ab2d72829414 Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Thu, 13 Mar 2025 19:54:39 -0500 Subject: [PATCH 1/2] Add unit tests for validator --- .env.template | 4 +- parser/validator.go | 155 +++++++------ parser/validator_test.go | 457 +++++++++++++++++++++++++++++++++++++++ utils/methods.go | 5 +- 4 files changed, 556 insertions(+), 65 deletions(-) create mode 100644 parser/validator_test.go diff --git a/.env.template b/.env.template index fd04e52..9bb0daf 100644 --- a/.env.template +++ b/.env.template @@ -1,4 +1,4 @@ -#Scrapers +# Scrapers LOGIN_NETID= LOGIN_PASSWORD= LOGIN_ASTRA_USERNAME= @@ -6,5 +6,5 @@ LOGIN_ASTRA_PASSWORD= #Login to https://east.mymazevo.com/main-home then go to https://east.mymazevo.com/api/tenantsettings/GetApiKey MAZEVO_API_KEY= -#Uploader +# Uploader MONGODB_URI= diff --git a/parser/validator.go b/parser/validator.go index 78ae337..e37da2d 100644 --- a/parser/validator.go +++ b/parser/validator.go @@ -2,10 +2,14 @@ package parser import ( "log" + "slices" "github.com/UTDNebula/api-tools/utils" + "github.com/UTDNebula/nebula-api/api/schema" + "go.mongodb.org/mongo-driver/bson/primitive" ) +// Main validation, putting everything together func validate() { // Set up deferred handler for panics to display validation fails defer func() { @@ -16,78 +20,33 @@ func validate() { log.Printf("\nValidating courses...") courseKeys := utils.GetMapKeys(Courses) - for i := 0; i < len(courseKeys)-1; i++ { + for i := range len(courseKeys) { course1 := Courses[courseKeys[i]] // Check for duplicate courses by comparing course_number, subject_prefix, and catalog_year as a compound key for j := i + 1; j < len(courseKeys); j++ { course2 := Courses[courseKeys[j]] - if course2.Catalog_year == course1.Catalog_year && course2.Course_number == course1.Course_number && course2.Subject_prefix == course1.Subject_prefix { - log.Printf("Duplicate course found for %s%s!", course1.Subject_prefix, course1.Course_number) - log.Printf("Course 1: %v\n\nCourse 2: %v", course1, course2) - log.Panic("Courses failed to validate!") - } + valDuplicateCourses(course1, course2) } // Make sure course isn't referencing any nonexistent sections, and that course-section references are consistent both ways - for _, sectionId := range course1.Sections { - section, exists := Sections[sectionId] - if !exists { - log.Printf("Nonexistent section reference found for %s%s!", course1.Subject_prefix, course1.Course_number) - log.Printf("Referenced section ID: %s\nCourse ID: %s", sectionId, course1.Id) - log.Panic("Courses failed to validate!") - } - if section.Course_reference != course1.Id { - log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!", course1.Subject_prefix, course1.Course_number) - log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", sectionId, course1.Id, section.Course_reference) - log.Panic("Courses failed to validate!") - } - } + valCourseReference(course1, Sections) } courseKeys = nil log.Print("No invalid courses!") log.Print("Validating sections...") sectionKeys := utils.GetMapKeys(Sections) - for i := 0; i < len(sectionKeys)-1; i++ { + for i := range len(sectionKeys) { section1 := Sections[sectionKeys[i]] // Check for duplicate sections by comparing section_number, course_reference, and academic_session as a compound key for j := i + 1; j < len(sectionKeys); j++ { section2 := Sections[sectionKeys[j]] - if section2.Section_number == section1.Section_number && - section2.Course_reference == section1.Course_reference && - section2.Academic_session == section1.Academic_session { - log.Print("Duplicate section found!") - log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2) - log.Panic("Sections failed to validate!") - } + valDuplicateSections(section1, section2) } // Make sure section isn't referencing any nonexistent professors, and that section-professor references are consistent both ways - for _, profId := range section1.Professors { - professorKey, exists := ProfessorIDMap[profId] - if !exists { - log.Printf("Nonexistent professor reference found for section ID %s!", section1.Id) - log.Printf("Referenced professor ID: %s", profId) - log.Panic("Sections failed to validate!") - } - profRefsSection := false - for _, profSection := range Professors[professorKey].Sections { - if profSection == section1.Id { - profRefsSection = true - break - } - } - if !profRefsSection { - log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section1.Id) - log.Printf("Referenced professor ID: %s", profId) - log.Panic("Sections failed to validate!") - } - } + valSectionReferenceProf(section1, Professors, ProfessorIDMap) + // Make sure section isn't referencing a nonexistant course - _, exists := CourseIDMap[section1.Course_reference] - if !exists { - log.Printf("Nonexistent course reference found for section ID %s!", section1.Id) - log.Printf("Referenced course ID: %s", section1.Course_reference) - log.Panic("Sections failed to validate!") - } + valSectionReferenceCourse(section1, CourseIDMap) } sectionKeys = nil log.Printf("No invalid sections!") @@ -95,18 +54,92 @@ func validate() { log.Printf("Validating professors...") profKeys := utils.GetMapKeys(Professors) // Check for duplicate professors by comparing first_name, last_name, and sections as a compound key - for i := 0; i < len(profKeys)-1; i++ { + for i := range len(profKeys) { prof1 := Professors[profKeys[i]] for j := i + 1; j < len(profKeys); j++ { prof2 := Professors[profKeys[j]] - if prof2.First_name == prof1.First_name && - prof2.Last_name == prof1.Last_name && - prof2.Profile_uri == prof1.Profile_uri { - log.Printf("Duplicate professor found!") - log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2) - log.Panic("Professors failed to validate!") - } + valDuplicateProfs(prof1, prof2) } } log.Printf("No invalid professors!") } + +// Validate if the courses are duplicate +func valDuplicateCourses(course1 *schema.Course, course2 *schema.Course) { + if course1.Catalog_year == course2.Catalog_year && course1.Course_number == course2.Course_number && + course1.Subject_prefix == course2.Subject_prefix { + log.Printf("Duplicate course found for %s%s!", course1.Subject_prefix, course1.Course_number) + log.Printf("Course 1: %v\n\nCourse 2: %v", course1, course2) + log.Panic("Courses failed to validate!") + } +} + +// Validate course reference to sections +func valCourseReference(course *schema.Course, sections map[primitive.ObjectID]*schema.Section) { + for _, sectionID := range course.Sections { + section, exists := sections[sectionID] + // validate if course references to some section not in the parsed sections + if !exists { + log.Printf("Nonexistent section reference found for %s%s!", course.Subject_prefix, course.Course_number) + log.Printf("Referenced section ID: %s\nCourse ID: %s", sectionID, course.Id) + log.Panic("Courses failed to validate!") + } + + // validate if the ref sections references back to the course + if section.Course_reference != course.Id { + log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!", course.Subject_prefix, course.Course_number) + log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", sectionID, course.Id, section.Course_reference) + log.Panic("Courses failed to validate!") + } + } +} + +// Validate if the sections are duplicate +func valDuplicateSections(section1 *schema.Section, section2 *schema.Section) { + if section1.Section_number == section2.Section_number && section1.Course_reference == section2.Course_reference && + section1.Academic_session == section2.Academic_session { + log.Print("Duplicate section found!") + log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2) + log.Panic("Sections failed to validate!") + } +} + +// Validate section reference to professor +func valSectionReferenceProf(section *schema.Section, profs map[string]*schema.Professor, profIDMap map[primitive.ObjectID]string) { + for _, profID := range section.Professors { + professorKey, exists := profIDMap[profID] + // validate if the section references to some prof not in the parsed professors + if !exists { + log.Printf("Nonexistent professor reference found for section ID %s!", section.Id) + log.Printf("Referenced professor ID: %s", profID) + log.Panic("Sections failed to validate!") + } + + // validate if the referenced professor references back to section + if !slices.Contains(profs[professorKey].Sections, section.Id) { + log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section.Id) + log.Printf("Referenced professor ID: %s", profID) + log.Panic("Sections failed to validate!") + } + } +} + +// Validate section reference to course +func valSectionReferenceCourse(section *schema.Section, courseIDMap map[primitive.ObjectID]string) { + _, exists := courseIDMap[section.Course_reference] + // validate if section reference some course not in parsed courses + if !exists { + log.Printf("Nonexistent course reference found for section ID %s!", section.Id) + log.Printf("Referenced course ID: %s", section.Course_reference) + log.Panic("Sections failed to validate!") + } +} + +// Validate if the professors are duplicate +func valDuplicateProfs(prof1 *schema.Professor, prof2 *schema.Professor) { + if prof1.First_name == prof2.First_name && prof1.Last_name == prof2.Last_name && prof1.Profile_uri == prof2.Profile_uri { + log.Printf("Duplicate professor found!") + log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2) + log.Panic("Professors failed to validate!") + } +} diff --git a/parser/validator_test.go b/parser/validator_test.go new file mode 100644 index 0000000..8e629d2 --- /dev/null +++ b/parser/validator_test.go @@ -0,0 +1,457 @@ +package parser + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "os" + "strings" + "testing" + + "github.com/UTDNebula/nebula-api/api/schema" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +// Globals for testing these validation units +var testCourses []*schema.Course +var testSections []*schema.Section +var testProfessors []*schema.Professor + +// Map used to map index of test sections to test courses +var indexMap map[int]int + +// Main to upload the test data +func TestMain(m *testing.M) { + // parse the test courses + data, err := os.ReadFile("./testdata/courses.json") + if err != nil { + panic(err) + } + err = json.Unmarshal(data, &testCourses) + if err != nil { + panic(err) + } + + // parse the test sections + data, err = os.ReadFile("./testdata/sections.json") + if err != nil { + panic(err) + } + err = json.Unmarshal(data, &testSections) + if err != nil { + panic(err) + } + + // parse the test professors + data, err = os.ReadFile("./testdata/professors.json") + if err != nil { + panic(err) + } + err = json.Unmarshal(data, &testProfessors) + if err != nil { + panic(err) + } + + // map + indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} + + testRun := m.Run() + os.Exit(testRun) +} + +// Test duplicate courses. Designed for fail cases +func TestDuplicateCoursesFail(t *testing.T) { + for i := range len(testCourses) { + t.Run(fmt.Sprintf("Duplicate course %v", i), func(t *testing.T) { + testDuplicateFail("course", i, t) + }) + } +} + +// Test duplicate sections. Designed for fail cases +func TestDuplicateSectionsFail(t *testing.T) { + for i := range len(testSections) { + t.Run(fmt.Sprintf("Duplicate section %v", i), func(t *testing.T) { + testDuplicateFail("section", i, t) + }) + } +} + +// Test duplicate professors . Designed for fail cases +func TestDuplicateProfFail(t *testing.T) { + for i := range len(testProfessors) { + t.Run(fmt.Sprintf("Duplicate professor %v", i), func(t *testing.T) { + testDuplicateFail("professor", i, t) + }) + } +} + +// Test duplicate courses. Designed for pass case +func TestDuplicateCoursesPass(t *testing.T) { + for i := range len(testCourses) - 1 { + t.Run(fmt.Sprintf("Duplicate courses %v, %v", i, i+1), func(t *testing.T) { + testDuplicatePass("course", i, i+1, t) + }) + } +} + +// Test duplicate sections. Designed for pass cases +func TestDuplicateSectionsPass(t *testing.T) { + for i := range len(testSections) - 1 { + t.Run(fmt.Sprintf("Duplicate sections %v, %v", i, i+1), func(t *testing.T) { + testDuplicatePass("section", i, i+1, t) + }) + } +} + +// Test duplicate professors. Designed for pass cases +func TestDuplicateProfPass(t *testing.T) { + for i := range len(testProfessors) - 1 { + t.Run(fmt.Sprintf("Duplicate professors %v, %v", i, i+1), func(t *testing.T) { + testDuplicatePass("professor", i, i+1, t) + }) + } +} + +// Test if course references to anything nonexistent. Designed for pass case +func TestCourseReferencePass(t *testing.T) { + sectionMap := make(map[primitive.ObjectID]*schema.Section) + for _, section := range testSections { + sectionMap[section.Id] = section + } + + // Buffer to capture the output + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + + if logOutput != "" { + t.Errorf("Expected nothing printed in log") + } + if r := recover(); r != nil { + t.Errorf("The function panic unexpectedly for course") + } + }() + + // Run func + for _, course := range testCourses { + valCourseReference(course, sectionMap) + } +} + +// Test if function log expected msgs when course references non-existent sections +// 2 types of fail: +// - Course references non-existent section +// - Section doesn't reference back to same course +// +// This is fail type 1 +func TestCourseReferenceFail1(t *testing.T) { + for key, value := range indexMap { + t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) { + testCourseReferenceFail(1, value, key, t) + }) + } +} + +// This is fail type 2 +func TestCourseReferenceFail2(t *testing.T) { + for key, value := range indexMap { + t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) { + testCourseReferenceFail(2, value, key, t) + }) + } +} + +// Test section reference to professor, designed for pass case +func TestSectionReferenceProfPass(t *testing.T) { + // Build profIDMap & profs + profIDMap := make(map[primitive.ObjectID]string) + profs := make(map[string]*schema.Professor) + + for _, professor := range testProfessors { + profIDMap[professor.Id] = professor.First_name + professor.Last_name + profs[professor.First_name+professor.Last_name] = professor + } + + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + + if logOutput != "" { + t.Errorf("Expected nothing printed in log") + } + if r := recover(); r != nil { + t.Errorf("The function panic unexpectedly for section") + } + }() + + for _, section := range testSections { + valSectionReferenceProf(section, profs, profIDMap) + } +} + +// Test section reference to professors, designed for fail case +func TestSectionReferenceProfFail(t *testing.T) { + profIDMap := make(map[primitive.ObjectID]string) + profs := make(map[string]*schema.Professor) + + for i, professor := range testProfessors { + if i != 0 { + profIDMap[professor.Id] = professor.First_name + professor.Last_name + profs[professor.First_name+professor.Last_name] = professor + } + } + + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + + for _, msg := range []string{ + "Nonexistent professor reference found for section ID ObjectID(\"67d07ee0c972c18731e23bea\")!", + "Referenced professor ID: ObjectID(\"67d07ee0c972c18731e23beb\")", + } { + if !strings.Contains(logOutput, msg) { + t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) + } + } + + if r := recover(); r == nil { + t.Errorf("The function didn't panic") + } else { + if r != "Sections failed to validate!" { + t.Errorf("The function panic the wrong message") + } + } + }() + + for _, section := range testSections { + valSectionReferenceProf(section, profs, profIDMap) + } +} + +// Test section reference to course +func TestSectionReferenceCourse(t *testing.T) { + courseIDMap := make(map[primitive.ObjectID]string) + for _, course := range testCourses { + courseIDMap[course.Id] = course.Internal_course_number + course.Catalog_year + } + + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + + if logOutput != "" { + t.Errorf("Expected nothing printed in log") + } + if r := recover(); r != nil { + t.Errorf("The function panic unexpectedly for section") + } + }() + + for _, section := range testSections { + valSectionReferenceCourse(section, courseIDMap) + } +} + +/* BELOW HERE ARE HELPER FUNCTION FOR TESTS ABOVE */ + +// Helper function +// Test if validate() throws erros when encountering duplicate +// Design for fail cases +func testDuplicateFail(objType string, index int, t *testing.T) { + // the buffer used to capture the log output + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + // determine the expected msgs and panic msgs based on object type + var expectedMsgs []string + var panicMsg string + + switch objType { + case "course": + failCourse := testCourses[index] + + // list of msgs it must print + expectedMsgs = []string{ + fmt.Sprintf("Duplicate course found for %s%s!", failCourse.Subject_prefix, failCourse.Course_number), + fmt.Sprintf("Course 1: %v\n\nCourse 2: %v", failCourse, failCourse), + } + panicMsg = "Courses failed to validate!" + case "section": + failSection := testSections[index] + + expectedMsgs = []string{ + "Duplicate section found!", + fmt.Sprintf("Section 1: %v\n\nSection 2: %v", failSection, failSection), + } + panicMsg = "Sections failed to validate!" + case "professor": + failProf := testProfessors[index] + + expectedMsgs = []string{ + "Duplicate professor found!", + fmt.Sprintf("Professor 1: %v\n\nProfessor 2: %v", failProf, failProf), + } + panicMsg = "Professors failed to validate!" + } + + defer func() { + logOutput := logBuffer.String() // log output after running the function + + // log output needs to contain lines in the list + for _, msg := range expectedMsgs { + if !strings.Contains(logOutput, msg) { + t.Errorf("Exptected the message for %v: %v", objType, msg) + } + } + + // test whether func panics and sends the correct panic msg + if r := recover(); r == nil { + t.Errorf("The function didn't panic for %v", objType) + } else { + if r != panicMsg { + // The panic msg is incorrect + t.Errorf("The function outputted the wrong panic message for %v.", objType) + } + } + }() + + // Run func + switch objType { + case "course": + valDuplicateCourses(testCourses[index], testCourses[index]) + case "section": + valDuplicateSections(testSections[index], testSections[index]) + case "professor": + valDuplicateProfs(testProfessors[index], testProfessors[index]) + } +} + +// Helper function +// Test if func doesn't log anything and doesn't panic. +// Design for pass cases +func testDuplicatePass(objType string, index1 int, index2 int, t *testing.T) { + // Buffer to capture the output + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + if logOutput != "" { + t.Errorf("Expected nothing in log for " + objType) + } + if r := recover(); r != nil { + t.Errorf("The function panic unexpectedly for " + objType) + } + }() + + // Run func according to the object type. Choose pair of objects which are not duplicate + switch objType { + case "course": + valDuplicateCourses(testCourses[index1], testCourses[index2]) + case "section": + valDuplicateSections(testSections[index1], testSections[index2]) + case "professor": + valDuplicateProfs(testProfessors[index1], testProfessors[index2]) + } +} + +// Helper function for the case of course reference that fails +// failType: 1 means it lacks one sections +// failType: 2 means one section's course reference has been modified +func testCourseReferenceFail(failType int, courseIndex int, sectionIndex int, t *testing.T) { + sectionMap := make(map[primitive.ObjectID]*schema.Section) + + var sectionID, originalID primitive.ObjectID // used to store IDs of modified sections + + // Build the failed section map based on fail type + if failType == 1 { + // misses a section + for i, section := range testSections { + if sectionIndex != i { + sectionMap[section.Id] = section + } else { + sectionID = section.Id // Nonexistent ID referenced by course + } + } + } else { + // one section doesn't reference to correct courses + for i, section := range testSections { + sectionMap[section.Id] = section + if sectionIndex == i { + // save the section ID and original course reference to be restored later on + sectionID = section.Id + originalID = section.Course_reference + + // modify part + sectionMap[section.Id].Course_reference = primitive.NewObjectID() + } + } + } + + // Expected msgs + var expectedMsgs []string + + // The course that references nonexistent stuff + var failCourse *schema.Course + + if failType == 1 { + failCourse = testCourses[courseIndex] + + expectedMsgs = []string{ + fmt.Sprintf("Nonexistent section reference found for %v%v!", failCourse.Subject_prefix, failCourse.Course_number), + fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s", sectionID, failCourse.Id), + } + } else { + failCourse = testCourses[courseIndex] + failSection := testSections[sectionIndex] + + expectedMsgs = []string{ + fmt.Sprintf("Inconsistent section reference found for %v%v! The course references the section, but not vice-versa!", + failCourse.Subject_prefix, failCourse.Course_number), + fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", + failSection.Id, failCourse.Id, failSection.Course_reference), + } + } + + // Buffer to capture the output + var logBuffer bytes.Buffer + log.SetOutput(&logBuffer) + + defer func() { + logOutput := logBuffer.String() + + for _, msg := range expectedMsgs { + if !strings.Contains(logOutput, msg) { + t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) + } + } + + // restore to original course reference of modified section (if needed) + if failType == 2 { + sectionMap[sectionID].Course_reference = originalID + } + + if r := recover(); r == nil { + t.Errorf("The function didn't panic") + } else { + if r != "Courses failed to validate!" { + t.Errorf("The function panic the wrong message") + } + } + }() + + // Run func + for _, course := range testCourses { + valCourseReference(course, sectionMap) + } +} diff --git a/utils/methods.go b/utils/methods.go index 9c97324..4d01fc1 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -38,13 +38,13 @@ func GetEnv(name string) (string, error) { func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) { log.Printf("Initializing chromedp...") if Headless { - chromedpCtx, cancelFnc = chromedp.NewContext(context.Background()) + chromedpCtx, cancelFnc = chromedp.NewContext(context.Background(), chromedp.WithDebugf(log.Printf)) } else { allocCtx, _ := chromedp.NewExecAllocator(context.Background()) chromedpCtx, cancelFnc = chromedp.NewContext(allocCtx) } log.Printf("Initialized chromedp!") - return + return chromedpCtx, cancelFnc } // This function generates a fresh auth token and returns the new headers @@ -262,6 +262,7 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa return res, err } +// Get all the available course prefixes func GetCoursePrefixes(chromedpCtx context.Context) []string { // Refresh the token // refreshToken(chromedpCtx) From 627137d1861e40607775439b73ef87d3ea15b96a Mon Sep 17 00:00:00 2001 From: mikehquan19 Date: Thu, 13 Mar 2025 20:11:44 -0500 Subject: [PATCH 2/2] Add unit test for validator --- parser/validator_test.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/parser/validator_test.go b/parser/validator_test.go index 8e629d2..e1ca364 100644 --- a/parser/validator_test.go +++ b/parser/validator_test.go @@ -21,8 +21,7 @@ var testProfessors []*schema.Professor // Map used to map index of test sections to test courses var indexMap map[int]int -// Main to upload the test data -func TestMain(m *testing.M) { +func init() { // parse the test courses data, err := os.ReadFile("./testdata/courses.json") if err != nil { @@ -53,11 +52,7 @@ func TestMain(m *testing.M) { panic(err) } - // map indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} - - testRun := m.Run() - os.Exit(testRun) } // Test duplicate courses. Designed for fail cases