From cec6ca0a4be812d53d234ab0ba77ab2d72829414 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Thu, 13 Mar 2025 19:54:39 -0500
Subject: [PATCH 01/11] Add unit tests for validator

---
 .env.template            |   4 +-
 parser/validator.go      | 155 +++++++------
 parser/validator_test.go | 457 +++++++++++++++++++++++++++++++++++++++
 utils/methods.go         |   5 +-
 4 files changed, 556 insertions(+), 65 deletions(-)
 create mode 100644 parser/validator_test.go

diff --git a/.env.template b/.env.template
index fd04e52..9bb0daf 100644
--- a/.env.template
+++ b/.env.template
@@ -1,4 +1,4 @@
-#Scrapers
+# Scrapers
 LOGIN_NETID=
 LOGIN_PASSWORD=
 LOGIN_ASTRA_USERNAME=
@@ -6,5 +6,5 @@ LOGIN_ASTRA_PASSWORD=
 #Login to https://east.mymazevo.com/main-home then go to https://east.mymazevo.com/api/tenantsettings/GetApiKey
 MAZEVO_API_KEY=
 
-#Uploader
+# Uploader
 MONGODB_URI=
diff --git a/parser/validator.go b/parser/validator.go
index 78ae337..e37da2d 100644
--- a/parser/validator.go
+++ b/parser/validator.go
@@ -2,10 +2,14 @@ package parser
 
 import (
 	"log"
+	"slices"
 
 	"github.com/UTDNebula/api-tools/utils"
+	"github.com/UTDNebula/nebula-api/api/schema"
+	"go.mongodb.org/mongo-driver/bson/primitive"
 )
 
+// Main validation, putting everything together
 func validate() {
 	// Set up deferred handler for panics to display validation fails
 	defer func() {
@@ -16,78 +20,33 @@ func validate() {
 
 	log.Printf("\nValidating courses...")
 	courseKeys := utils.GetMapKeys(Courses)
-	for i := 0; i < len(courseKeys)-1; i++ {
+	for i := range len(courseKeys) {
 		course1 := Courses[courseKeys[i]]
 		// Check for duplicate courses by comparing course_number, subject_prefix, and catalog_year as a compound key
 		for j := i + 1; j < len(courseKeys); j++ {
 			course2 := Courses[courseKeys[j]]
-			if course2.Catalog_year == course1.Catalog_year && course2.Course_number == course1.Course_number && course2.Subject_prefix == course1.Subject_prefix {
-				log.Printf("Duplicate course found for %s%s!", course1.Subject_prefix, course1.Course_number)
-				log.Printf("Course 1: %v\n\nCourse 2: %v", course1, course2)
-				log.Panic("Courses failed to validate!")
-			}
+			valDuplicateCourses(course1, course2)
 		}
 		// Make sure course isn't referencing any nonexistent sections, and that course-section references are consistent both ways
-		for _, sectionId := range course1.Sections {
-			section, exists := Sections[sectionId]
-			if !exists {
-				log.Printf("Nonexistent section reference found for %s%s!", course1.Subject_prefix, course1.Course_number)
-				log.Printf("Referenced section ID: %s\nCourse ID: %s", sectionId, course1.Id)
-				log.Panic("Courses failed to validate!")
-			}
-			if section.Course_reference != course1.Id {
-				log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!", course1.Subject_prefix, course1.Course_number)
-				log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", sectionId, course1.Id, section.Course_reference)
-				log.Panic("Courses failed to validate!")
-			}
-		}
+		valCourseReference(course1, Sections)
 	}
 	courseKeys = nil
 	log.Print("No invalid courses!")
 
 	log.Print("Validating sections...")
 	sectionKeys := utils.GetMapKeys(Sections)
-	for i := 0; i < len(sectionKeys)-1; i++ {
+	for i := range len(sectionKeys) {
 		section1 := Sections[sectionKeys[i]]
 		// Check for duplicate sections by comparing section_number, course_reference, and academic_session as a compound key
 		for j := i + 1; j < len(sectionKeys); j++ {
 			section2 := Sections[sectionKeys[j]]
-			if section2.Section_number == section1.Section_number &&
-				section2.Course_reference == section1.Course_reference &&
-				section2.Academic_session == section1.Academic_session {
-				log.Print("Duplicate section found!")
-				log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2)
-				log.Panic("Sections failed to validate!")
-			}
+			valDuplicateSections(section1, section2)
 		}
 		// Make sure section isn't referencing any nonexistent professors, and that section-professor references are consistent both ways
-		for _, profId := range section1.Professors {
-			professorKey, exists := ProfessorIDMap[profId]
-			if !exists {
-				log.Printf("Nonexistent professor reference found for section ID %s!", section1.Id)
-				log.Printf("Referenced professor ID: %s", profId)
-				log.Panic("Sections failed to validate!")
-			}
-			profRefsSection := false
-			for _, profSection := range Professors[professorKey].Sections {
-				if profSection == section1.Id {
-					profRefsSection = true
-					break
-				}
-			}
-			if !profRefsSection {
-				log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section1.Id)
-				log.Printf("Referenced professor ID: %s", profId)
-				log.Panic("Sections failed to validate!")
-			}
-		}
+		valSectionReferenceProf(section1, Professors, ProfessorIDMap)
+
 		// Make sure section isn't referencing a nonexistant course
-		_, exists := CourseIDMap[section1.Course_reference]
-		if !exists {
-			log.Printf("Nonexistent course reference found for section ID %s!", section1.Id)
-			log.Printf("Referenced course ID: %s", section1.Course_reference)
-			log.Panic("Sections failed to validate!")
-		}
+		valSectionReferenceCourse(section1, CourseIDMap)
 	}
 	sectionKeys = nil
 	log.Printf("No invalid sections!")
@@ -95,18 +54,92 @@ func validate() {
 	log.Printf("Validating professors...")
 	profKeys := utils.GetMapKeys(Professors)
 	// Check for duplicate professors by comparing first_name, last_name, and sections as a compound key
-	for i := 0; i < len(profKeys)-1; i++ {
+	for i := range len(profKeys) {
 		prof1 := Professors[profKeys[i]]
 		for j := i + 1; j < len(profKeys); j++ {
 			prof2 := Professors[profKeys[j]]
-			if prof2.First_name == prof1.First_name &&
-				prof2.Last_name == prof1.Last_name &&
-				prof2.Profile_uri == prof1.Profile_uri {
-				log.Printf("Duplicate professor found!")
-				log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2)
-				log.Panic("Professors failed to validate!")
-			}
+			valDuplicateProfs(prof1, prof2)
 		}
 	}
 	log.Printf("No invalid professors!")
 }
+
+// Validate if the courses are duplicate
+func valDuplicateCourses(course1 *schema.Course, course2 *schema.Course) {
+	if course1.Catalog_year == course2.Catalog_year && course1.Course_number == course2.Course_number &&
+		course1.Subject_prefix == course2.Subject_prefix {
+		log.Printf("Duplicate course found for %s%s!", course1.Subject_prefix, course1.Course_number)
+		log.Printf("Course 1: %v\n\nCourse 2: %v", course1, course2)
+		log.Panic("Courses failed to validate!")
+	}
+}
+
+// Validate course reference to sections
+func valCourseReference(course *schema.Course, sections map[primitive.ObjectID]*schema.Section) {
+	for _, sectionID := range course.Sections {
+		section, exists := sections[sectionID]
+		// validate if course references to some section not in the parsed sections
+		if !exists {
+			log.Printf("Nonexistent section reference found for %s%s!", course.Subject_prefix, course.Course_number)
+			log.Printf("Referenced section ID: %s\nCourse ID: %s", sectionID, course.Id)
+			log.Panic("Courses failed to validate!")
+		}
+
+		// validate if the ref sections references back to the course
+		if section.Course_reference != course.Id {
+			log.Printf("Inconsistent section reference found for %s%s! The course references the section, but not vice-versa!", course.Subject_prefix, course.Course_number)
+			log.Printf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", sectionID, course.Id, section.Course_reference)
+			log.Panic("Courses failed to validate!")
+		}
+	}
+}
+
+// Validate if the sections are duplicate
+func valDuplicateSections(section1 *schema.Section, section2 *schema.Section) {
+	if section1.Section_number == section2.Section_number && section1.Course_reference == section2.Course_reference &&
+		section1.Academic_session == section2.Academic_session {
+		log.Print("Duplicate section found!")
+		log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2)
+		log.Panic("Sections failed to validate!")
+	}
+}
+
+// Validate section reference to professor
+func valSectionReferenceProf(section *schema.Section, profs map[string]*schema.Professor, profIDMap map[primitive.ObjectID]string) {
+	for _, profID := range section.Professors {
+		professorKey, exists := profIDMap[profID]
+		// validate if the section references to some prof not in the parsed professors
+		if !exists {
+			log.Printf("Nonexistent professor reference found for section ID %s!", section.Id)
+			log.Printf("Referenced professor ID: %s", profID)
+			log.Panic("Sections failed to validate!")
+		}
+
+		// validate if the referenced professor references back to section
+		if !slices.Contains(profs[professorKey].Sections, section.Id) {
+			log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section.Id)
+			log.Printf("Referenced professor ID: %s", profID)
+			log.Panic("Sections failed to validate!")
+		}
+	}
+}
+
+// Validate section reference to course
+func valSectionReferenceCourse(section *schema.Section, courseIDMap map[primitive.ObjectID]string) {
+	_, exists := courseIDMap[section.Course_reference]
+	// validate if section reference some course not in parsed courses
+	if !exists {
+		log.Printf("Nonexistent course reference found for section ID %s!", section.Id)
+		log.Printf("Referenced course ID: %s", section.Course_reference)
+		log.Panic("Sections failed to validate!")
+	}
+}
+
+// Validate if the professors are duplicate
+func valDuplicateProfs(prof1 *schema.Professor, prof2 *schema.Professor) {
+	if prof1.First_name == prof2.First_name && prof1.Last_name == prof2.Last_name && prof1.Profile_uri == prof2.Profile_uri {
+		log.Printf("Duplicate professor found!")
+		log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2)
+		log.Panic("Professors failed to validate!")
+	}
+}
diff --git a/parser/validator_test.go b/parser/validator_test.go
new file mode 100644
index 0000000..8e629d2
--- /dev/null
+++ b/parser/validator_test.go
@@ -0,0 +1,457 @@
+package parser
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/UTDNebula/nebula-api/api/schema"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+)
+
+// Globals for testing these validation units
+var testCourses []*schema.Course
+var testSections []*schema.Section
+var testProfessors []*schema.Professor
+
+// Map used to map index of test sections to test courses
+var indexMap map[int]int
+
+// Main to upload the test data
+func TestMain(m *testing.M) {
+	// parse the test courses
+	data, err := os.ReadFile("./testdata/courses.json")
+	if err != nil {
+		panic(err)
+	}
+	err = json.Unmarshal(data, &testCourses)
+	if err != nil {
+		panic(err)
+	}
+
+	// parse the test sections
+	data, err = os.ReadFile("./testdata/sections.json")
+	if err != nil {
+		panic(err)
+	}
+	err = json.Unmarshal(data, &testSections)
+	if err != nil {
+		panic(err)
+	}
+
+	// parse the test professors
+	data, err = os.ReadFile("./testdata/professors.json")
+	if err != nil {
+		panic(err)
+	}
+	err = json.Unmarshal(data, &testProfessors)
+	if err != nil {
+		panic(err)
+	}
+
+	// map
+	indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4}
+
+	testRun := m.Run()
+	os.Exit(testRun)
+}
+
+// Test duplicate courses. Designed for fail cases
+func TestDuplicateCoursesFail(t *testing.T) {
+	for i := range len(testCourses) {
+		t.Run(fmt.Sprintf("Duplicate course %v", i), func(t *testing.T) {
+			testDuplicateFail("course", i, t)
+		})
+	}
+}
+
+// Test duplicate sections. Designed for fail cases
+func TestDuplicateSectionsFail(t *testing.T) {
+	for i := range len(testSections) {
+		t.Run(fmt.Sprintf("Duplicate section %v", i), func(t *testing.T) {
+			testDuplicateFail("section", i, t)
+		})
+	}
+}
+
+// Test duplicate professors . Designed for fail cases
+func TestDuplicateProfFail(t *testing.T) {
+	for i := range len(testProfessors) {
+		t.Run(fmt.Sprintf("Duplicate professor %v", i), func(t *testing.T) {
+			testDuplicateFail("professor", i, t)
+		})
+	}
+}
+
+// Test duplicate courses. Designed for pass case
+func TestDuplicateCoursesPass(t *testing.T) {
+	for i := range len(testCourses) - 1 {
+		t.Run(fmt.Sprintf("Duplicate courses %v, %v", i, i+1), func(t *testing.T) {
+			testDuplicatePass("course", i, i+1, t)
+		})
+	}
+}
+
+// Test duplicate sections. Designed for pass cases
+func TestDuplicateSectionsPass(t *testing.T) {
+	for i := range len(testSections) - 1 {
+		t.Run(fmt.Sprintf("Duplicate sections %v, %v", i, i+1), func(t *testing.T) {
+			testDuplicatePass("section", i, i+1, t)
+		})
+	}
+}
+
+// Test duplicate professors. Designed for pass cases
+func TestDuplicateProfPass(t *testing.T) {
+	for i := range len(testProfessors) - 1 {
+		t.Run(fmt.Sprintf("Duplicate professors %v, %v", i, i+1), func(t *testing.T) {
+			testDuplicatePass("professor", i, i+1, t)
+		})
+	}
+}
+
+// Test if course references to anything nonexistent. Designed for pass case
+func TestCourseReferencePass(t *testing.T) {
+	sectionMap := make(map[primitive.ObjectID]*schema.Section)
+	for _, section := range testSections {
+		sectionMap[section.Id] = section
+	}
+
+	// Buffer to capture the output
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+
+		if logOutput != "" {
+			t.Errorf("Expected nothing printed in log")
+		}
+		if r := recover(); r != nil {
+			t.Errorf("The function panic unexpectedly for course")
+		}
+	}()
+
+	// Run func
+	for _, course := range testCourses {
+		valCourseReference(course, sectionMap)
+	}
+}
+
+// Test if function log expected msgs when course references non-existent sections
+// 2 types of fail:
+//   - Course references non-existent section
+//   - Section doesn't reference back to same course
+//
+// This is fail type 1
+func TestCourseReferenceFail1(t *testing.T) {
+	for key, value := range indexMap {
+		t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) {
+			testCourseReferenceFail(1, value, key, t)
+		})
+	}
+}
+
+// This is fail type 2
+func TestCourseReferenceFail2(t *testing.T) {
+	for key, value := range indexMap {
+		t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) {
+			testCourseReferenceFail(2, value, key, t)
+		})
+	}
+}
+
+// Test section reference to professor, designed for pass case
+func TestSectionReferenceProfPass(t *testing.T) {
+	// Build profIDMap & profs
+	profIDMap := make(map[primitive.ObjectID]string)
+	profs := make(map[string]*schema.Professor)
+
+	for _, professor := range testProfessors {
+		profIDMap[professor.Id] = professor.First_name + professor.Last_name
+		profs[professor.First_name+professor.Last_name] = professor
+	}
+
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+
+		if logOutput != "" {
+			t.Errorf("Expected nothing printed in log")
+		}
+		if r := recover(); r != nil {
+			t.Errorf("The function panic unexpectedly for section")
+		}
+	}()
+
+	for _, section := range testSections {
+		valSectionReferenceProf(section, profs, profIDMap)
+	}
+}
+
+// Test section reference to professors, designed for fail case
+func TestSectionReferenceProfFail(t *testing.T) {
+	profIDMap := make(map[primitive.ObjectID]string)
+	profs := make(map[string]*schema.Professor)
+
+	for i, professor := range testProfessors {
+		if i != 0 {
+			profIDMap[professor.Id] = professor.First_name + professor.Last_name
+			profs[professor.First_name+professor.Last_name] = professor
+		}
+	}
+
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+
+		for _, msg := range []string{
+			"Nonexistent professor reference found for section ID ObjectID(\"67d07ee0c972c18731e23bea\")!",
+			"Referenced professor ID: ObjectID(\"67d07ee0c972c18731e23beb\")",
+		} {
+			if !strings.Contains(logOutput, msg) {
+				t.Errorf("The function didn't log correct message. Expected \"%v\"", msg)
+			}
+		}
+
+		if r := recover(); r == nil {
+			t.Errorf("The function didn't panic")
+		} else {
+			if r != "Sections failed to validate!" {
+				t.Errorf("The function panic the wrong message")
+			}
+		}
+	}()
+
+	for _, section := range testSections {
+		valSectionReferenceProf(section, profs, profIDMap)
+	}
+}
+
+// Test section reference to course
+func TestSectionReferenceCourse(t *testing.T) {
+	courseIDMap := make(map[primitive.ObjectID]string)
+	for _, course := range testCourses {
+		courseIDMap[course.Id] = course.Internal_course_number + course.Catalog_year
+	}
+
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+
+		if logOutput != "" {
+			t.Errorf("Expected nothing printed in log")
+		}
+		if r := recover(); r != nil {
+			t.Errorf("The function panic unexpectedly for section")
+		}
+	}()
+
+	for _, section := range testSections {
+		valSectionReferenceCourse(section, courseIDMap)
+	}
+}
+
+/* BELOW HERE ARE HELPER FUNCTION FOR TESTS ABOVE */
+
+// Helper function
+// Test if validate() throws erros when encountering duplicate
+// Design for fail cases
+func testDuplicateFail(objType string, index int, t *testing.T) {
+	// the buffer used to capture the log output
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	// determine the expected msgs and panic msgs based on object type
+	var expectedMsgs []string
+	var panicMsg string
+
+	switch objType {
+	case "course":
+		failCourse := testCourses[index]
+
+		// list of msgs it must print
+		expectedMsgs = []string{
+			fmt.Sprintf("Duplicate course found for %s%s!", failCourse.Subject_prefix, failCourse.Course_number),
+			fmt.Sprintf("Course 1: %v\n\nCourse 2: %v", failCourse, failCourse),
+		}
+		panicMsg = "Courses failed to validate!"
+	case "section":
+		failSection := testSections[index]
+
+		expectedMsgs = []string{
+			"Duplicate section found!",
+			fmt.Sprintf("Section 1: %v\n\nSection 2: %v", failSection, failSection),
+		}
+		panicMsg = "Sections failed to validate!"
+	case "professor":
+		failProf := testProfessors[index]
+
+		expectedMsgs = []string{
+			"Duplicate professor found!",
+			fmt.Sprintf("Professor 1: %v\n\nProfessor 2: %v", failProf, failProf),
+		}
+		panicMsg = "Professors failed to validate!"
+	}
+
+	defer func() {
+		logOutput := logBuffer.String() // log output after running the function
+
+		// log output needs to contain lines in the list
+		for _, msg := range expectedMsgs {
+			if !strings.Contains(logOutput, msg) {
+				t.Errorf("Exptected the message for %v: %v", objType, msg)
+			}
+		}
+
+		// test whether func panics and sends the correct panic msg
+		if r := recover(); r == nil {
+			t.Errorf("The function didn't panic for %v", objType)
+		} else {
+			if r != panicMsg {
+				// The panic msg is incorrect
+				t.Errorf("The function outputted the wrong panic message for %v.", objType)
+			}
+		}
+	}()
+
+	// Run func
+	switch objType {
+	case "course":
+		valDuplicateCourses(testCourses[index], testCourses[index])
+	case "section":
+		valDuplicateSections(testSections[index], testSections[index])
+	case "professor":
+		valDuplicateProfs(testProfessors[index], testProfessors[index])
+	}
+}
+
+// Helper function
+// Test if func doesn't log anything and doesn't panic.
+// Design for pass cases
+func testDuplicatePass(objType string, index1 int, index2 int, t *testing.T) {
+	// Buffer to capture the output
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+		if logOutput != "" {
+			t.Errorf("Expected nothing in log for " + objType)
+		}
+		if r := recover(); r != nil {
+			t.Errorf("The function panic unexpectedly for " + objType)
+		}
+	}()
+
+	// Run func according to the object type. Choose pair of objects which are not duplicate
+	switch objType {
+	case "course":
+		valDuplicateCourses(testCourses[index1], testCourses[index2])
+	case "section":
+		valDuplicateSections(testSections[index1], testSections[index2])
+	case "professor":
+		valDuplicateProfs(testProfessors[index1], testProfessors[index2])
+	}
+}
+
+// Helper function for the case of course reference that fails
+// failType: 1 means it lacks one sections
+// failType: 2 means one section's course reference has been modified
+func testCourseReferenceFail(failType int, courseIndex int, sectionIndex int, t *testing.T) {
+	sectionMap := make(map[primitive.ObjectID]*schema.Section)
+
+	var sectionID, originalID primitive.ObjectID // used to store IDs of modified sections
+
+	// Build the failed section map based on fail type
+	if failType == 1 {
+		// misses a section
+		for i, section := range testSections {
+			if sectionIndex != i {
+				sectionMap[section.Id] = section
+			} else {
+				sectionID = section.Id // Nonexistent ID referenced by course
+			}
+		}
+	} else {
+		// one section doesn't reference to correct courses
+		for i, section := range testSections {
+			sectionMap[section.Id] = section
+			if sectionIndex == i {
+				// save the section ID and original course reference to be restored later on
+				sectionID = section.Id
+				originalID = section.Course_reference
+
+				// modify part
+				sectionMap[section.Id].Course_reference = primitive.NewObjectID()
+			}
+		}
+	}
+
+	// Expected msgs
+	var expectedMsgs []string
+
+	// The course that references nonexistent stuff
+	var failCourse *schema.Course
+
+	if failType == 1 {
+		failCourse = testCourses[courseIndex]
+
+		expectedMsgs = []string{
+			fmt.Sprintf("Nonexistent section reference found for %v%v!", failCourse.Subject_prefix, failCourse.Course_number),
+			fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s", sectionID, failCourse.Id),
+		}
+	} else {
+		failCourse = testCourses[courseIndex]
+		failSection := testSections[sectionIndex]
+
+		expectedMsgs = []string{
+			fmt.Sprintf("Inconsistent section reference found for %v%v! The course references the section, but not vice-versa!",
+				failCourse.Subject_prefix, failCourse.Course_number),
+			fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s",
+				failSection.Id, failCourse.Id, failSection.Course_reference),
+		}
+	}
+
+	// Buffer to capture the output
+	var logBuffer bytes.Buffer
+	log.SetOutput(&logBuffer)
+
+	defer func() {
+		logOutput := logBuffer.String()
+
+		for _, msg := range expectedMsgs {
+			if !strings.Contains(logOutput, msg) {
+				t.Errorf("The function didn't log correct message. Expected \"%v\"", msg)
+			}
+		}
+
+		// restore to original course reference of modified section (if needed)
+		if failType == 2 {
+			sectionMap[sectionID].Course_reference = originalID
+		}
+
+		if r := recover(); r == nil {
+			t.Errorf("The function didn't panic")
+		} else {
+			if r != "Courses failed to validate!" {
+				t.Errorf("The function panic the wrong message")
+			}
+		}
+	}()
+
+	// Run func
+	for _, course := range testCourses {
+		valCourseReference(course, sectionMap)
+	}
+}
diff --git a/utils/methods.go b/utils/methods.go
index 9c97324..4d01fc1 100644
--- a/utils/methods.go
+++ b/utils/methods.go
@@ -38,13 +38,13 @@ func GetEnv(name string) (string, error) {
 func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) {
 	log.Printf("Initializing chromedp...")
 	if Headless {
-		chromedpCtx, cancelFnc = chromedp.NewContext(context.Background())
+		chromedpCtx, cancelFnc = chromedp.NewContext(context.Background(), chromedp.WithDebugf(log.Printf))
 	} else {
 		allocCtx, _ := chromedp.NewExecAllocator(context.Background())
 		chromedpCtx, cancelFnc = chromedp.NewContext(allocCtx)
 	}
 	log.Printf("Initialized chromedp!")
-	return
+	return chromedpCtx, cancelFnc
 }
 
 // This function generates a fresh auth token and returns the new headers
@@ -262,6 +262,7 @@ func RetryHTTP(requestCreator func() *http.Request, client *http.Client, retryCa
 	return res, err
 }
 
+// Get all the available course prefixes
 func GetCoursePrefixes(chromedpCtx context.Context) []string {
 	// Refresh the token
 	// refreshToken(chromedpCtx)

From 627137d1861e40607775439b73ef87d3ea15b96a Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Thu, 13 Mar 2025 20:11:44 -0500
Subject: [PATCH 02/11] Add unit test for validator

---
 parser/validator_test.go | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/parser/validator_test.go b/parser/validator_test.go
index 8e629d2..e1ca364 100644
--- a/parser/validator_test.go
+++ b/parser/validator_test.go
@@ -21,8 +21,7 @@ var testProfessors []*schema.Professor
 // Map used to map index of test sections to test courses
 var indexMap map[int]int
 
-// Main to upload the test data
-func TestMain(m *testing.M) {
+func init() {
 	// parse the test courses
 	data, err := os.ReadFile("./testdata/courses.json")
 	if err != nil {
@@ -53,11 +52,7 @@ func TestMain(m *testing.M) {
 		panic(err)
 	}
 
-	// map
 	indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4}
-
-	testRun := m.Run()
-	os.Exit(testRun)
 }
 
 // Test duplicate courses. Designed for fail cases

From f1a2f92a125b4215c1a239310795b8a62a25cc4d Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Mon, 21 Apr 2025 02:08:39 -0500
Subject: [PATCH 03/11] Refactor calendar scraper

---
 scrapers/calendar.go | 64 +++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index 8a130dd..176737c 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -39,18 +39,17 @@ func ScrapeCalendar(outDir string) {
 	events := []schema.Event{}
 
 	log.Printf("Scraping event page links")
-	//Grab all links to event pages
+	// Grab all links to event pages
 	var pageLinks []string = []string{}
 	_, err = chromedp.RunResponse(chromedpCtx,
 		chromedp.Navigate(CALENDAR_LINK),
-		chromedp.QueryAfter(".item.event_item.vevent > a",
+		chromedp.QueryAfter(".em-card_image > a",
 			func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 				for _, node := range nodes {
 					href, hasHref := node.Attribute("href")
 					if !hasHref {
 						return errors.New("event card was missing an href")
 					}
-
 					pageLinks = append(pageLinks, href)
 				}
 				return nil
@@ -61,13 +60,17 @@ func ScrapeCalendar(outDir string) {
 		panic(err)
 	}
 	log.Printf("Scraped event page links!")
+	for _, page := range pageLinks {
+		// Print the links of the page
+		log.Println(page)
+	}
 
 	for _, page := range pageLinks {
-		//Navigate to page and get page summary
+		// Navigate to page and get page summary
 		summary := ""
 		_, err := chromedp.RunResponse(chromedpCtx,
 			chromedp.Navigate(page),
-			chromedp.QueryAfter(".summary",
+			chromedp.QueryAfter(".em-card_title",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
 						summary = trailingSpaceRegex.ReplaceAllString(getNodeText(nodes[0]), "")
@@ -126,13 +129,26 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped time: %s to %s ", dateTimeStart, dateTimeEnd)
 
-		//Grab Location of Event
-		var location string = ""
+		// Grab Location of Event
+
+		// If .location doesn't have children, then it's an virtual event
+		var location string = "Virtual Event" // Default
+
 		err = chromedp.Run(chromedpCtx,
+			// Grab the name of the location
+			chromedp.QueryAfter("p.location > a",
+				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
+					if len(nodes) != 0 {
+						location = getNodeText(nodes[0]) + "\n "
+					}
+					return nil
+				}, chromedp.AtLeast(0),
+			),
+			// Grab the address of the location (concatenated with the name)
 			chromedp.QueryAfter("p.location > span",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
-						location = getNodeText(nodes[0])
+						location += getNodeText(nodes[0])
 					}
 					return nil
 				}, chromedp.AtLeast(0),
@@ -143,13 +159,15 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped location: %s, ", location)
 
-		//Get description of event
+		// Get description of event
 		var description string = ""
 		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".description > p",
+			chromedp.QueryAfter(".em-about_description > p",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						description = getNodeText(nodes[0])
+					for _, node := range nodes {
+						if getNodeText(node) != "" {
+							description += getNodeText(node) + "\n\n"
+						}
 					}
 					return nil
 				}, chromedp.AtLeast(0),
@@ -160,7 +178,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped description: %s, ", description)
 
-		//Grab Event Type
+		// Grab Event Type
 		var eventType []string = []string{}
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".filter-event_types > p > a",
@@ -177,7 +195,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped event type: %s", eventType)
 
-		//Grab Target Audience
+		// Grab Target Audience
 		targetAudience := []string{}
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".filter-event_target_audience > p > a",
@@ -194,7 +212,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped target audience: %s, ", targetAudience)
 
-		//Grab Topic
+		// Grab Topic
 		topic := []string{}
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".filter-event_topic > p > a",
@@ -211,7 +229,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped topic: %s, ", topic)
 
-		//Grab Event Tags
+		// Grab Event Tags
 		tags := []string{}
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".event-tags > p > a",
@@ -228,7 +246,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped tags: %s, ", tags)
 
-		//Grab Website
+		// Grab Website
 		var eventWebsite string = ""
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".event-website > p > a",
@@ -249,7 +267,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped website: %s, ", eventWebsite)
 
-		//Grab Department
+		// Grab Department
 		var eventDepartment []string = []string{}
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".event-group > a",
@@ -266,7 +284,7 @@ func ScrapeCalendar(outDir string) {
 		}
 		utils.VPrintf("Scraped department: %s, ", eventDepartment)
 
-		//Grab Contact information
+		// Grab Contact information
 		var contactInformationName string = ""
 		var contactInformationEmail string = ""
 		var contactInformationPhone string = ""
@@ -279,10 +297,14 @@ func ScrapeCalendar(outDir string) {
 					return nil
 				}, chromedp.AtLeast(0),
 			),
-			chromedp.QueryAfter(".custom-field-contact_information_email",
+			chromedp.QueryAfter(".custom-field-contact_information_email > a",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
-						contactInformationEmail = getNodeText(nodes[0])
+						emailHref, hasEmailHref := nodes[0].Attribute("href")
+						if !hasEmailHref {
+							return errors.New("event contact doesn't have email")
+						}
+						contactInformationEmail = emailHref[7:]
 					}
 					return nil
 				}, chromedp.AtLeast(0),

From 27bcf6395b4e022ffe3af1ebb1b7107cce1e93b7 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Mon, 21 Apr 2025 02:29:43 -0500
Subject: [PATCH 04/11] Just revert the previous erroneous commit

---
 utils/methods.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/methods.go b/utils/methods.go
index 2398351..2195240 100644
--- a/utils/methods.go
+++ b/utils/methods.go
@@ -38,7 +38,7 @@ func GetEnv(name string) (string, error) {
 func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) {
 	log.Printf("Initializing chromedp...")
 	if Headless {
-		chromedpCtx, cancelFnc = chromedp.NewContext(context.Background(), chromedp.WithDebugf(log.Printf))
+		chromedpCtx, cancelFnc = chromedp.NewContext(context.Background())
 	} else {
 		allocCtx, _ := chromedp.NewExecAllocator(context.Background())
 		chromedpCtx, cancelFnc = chromedp.NewContext(allocCtx)

From 616ead81a03055a4e515394e96ce775aef7c7cbe Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Mon, 21 Apr 2025 04:03:38 -0500
Subject: [PATCH 05/11] Some minor fixes and comments for readability

---
 scrapers/calendar.go | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index 176737c..1627f95 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -25,6 +25,7 @@ import (
 const CALENDAR_LINK string = "https://calendar.utdallas.edu/calendar"
 
 var trailingSpaceRegex *regexp.Regexp = regexp.MustCompile(`(\s{2,}?\s{2,})|(\n)`)
+var leadingSpaceRegex *regexp.Regexp = regexp.MustCompile(`^\s+`)
 
 func ScrapeCalendar(outDir string) {
 
@@ -61,7 +62,7 @@ func ScrapeCalendar(outDir string) {
 	}
 	log.Printf("Scraped event page links!")
 	for _, page := range pageLinks {
-		// Print the links of the page
+		// Print the links of the page to check
 		log.Println(page)
 	}
 
@@ -139,7 +140,8 @@ func ScrapeCalendar(outDir string) {
 			chromedp.QueryAfter("p.location > a",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
-						location = getNodeText(nodes[0]) + "\n "
+						// Location's name somehow contains leading space, trim it
+						location = leadingSpaceRegex.ReplaceAllString(getNodeText(nodes[0]), "")
 					}
 					return nil
 				}, chromedp.AtLeast(0),
@@ -148,7 +150,10 @@ func ScrapeCalendar(outDir string) {
 			chromedp.QueryAfter("p.location > span",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
-						location += getNodeText(nodes[0])
+						// There are cases where it doesn't show the address
+						if getNodeText(nodes[0]) != "" {
+							location += "\n" + getNodeText(nodes[0])
+						}
 					}
 					return nil
 				}, chromedp.AtLeast(0),
@@ -164,8 +169,9 @@ func ScrapeCalendar(outDir string) {
 		err = chromedp.Run(chromedpCtx,
 			chromedp.QueryAfter(".em-about_description > p",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
+					// Concatenate all the sentences in the description together
 					for _, node := range nodes {
-						if getNodeText(node) != "" {
+						if getNodeText(node) != "" && getNodeText(node) != "\u00A0" {
 							description += getNodeText(node) + "\n\n"
 						}
 					}

From d5f4b6e1b8c21337adda5a5986037f2fe4347577 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Mon, 21 Apr 2025 14:15:25 -0500
Subject: [PATCH 06/11] Adjust the validation test a bit for readability

---
 parser/validator_test.go | 88 ++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/parser/validator_test.go b/parser/validator_test.go
index e1ca364..6a9c9ff 100644
--- a/parser/validator_test.go
+++ b/parser/validator_test.go
@@ -18,11 +18,11 @@ var testCourses []*schema.Course
 var testSections []*schema.Section
 var testProfessors []*schema.Professor
 
-// Map used to map index of test sections to test courses
+// Map index of test sections to test courses
 var indexMap map[int]int
 
 func init() {
-	// parse the test courses
+	// Parse the test courses
 	data, err := os.ReadFile("./testdata/courses.json")
 	if err != nil {
 		panic(err)
@@ -32,7 +32,7 @@ func init() {
 		panic(err)
 	}
 
-	// parse the test sections
+	// Parse the test sections
 	data, err = os.ReadFile("./testdata/sections.json")
 	if err != nil {
 		panic(err)
@@ -42,7 +42,7 @@ func init() {
 		panic(err)
 	}
 
-	// parse the test professors
+	// Parse the test professors
 	data, err = os.ReadFile("./testdata/professors.json")
 	if err != nil {
 		panic(err)
@@ -52,6 +52,7 @@ func init() {
 		panic(err)
 	}
 
+	// The correct mapping
 	indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4}
 }
 
@@ -142,20 +143,20 @@ func TestCourseReferencePass(t *testing.T) {
 //   - Course references non-existent section
 //   - Section doesn't reference back to same course
 //
-// This is fail type 1
+// This is fail: missing
 func TestCourseReferenceFail1(t *testing.T) {
 	for key, value := range indexMap {
 		t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) {
-			testCourseReferenceFail(1, value, key, t)
+			testCourseReferenceFail("missing", value, key, t)
 		})
 	}
 }
 
-// This is fail type 2
+// This is fail: modified
 func TestCourseReferenceFail2(t *testing.T) {
 	for key, value := range indexMap {
 		t.Run(fmt.Sprintf("Section %v & course %v", key, value), func(t *testing.T) {
-			testCourseReferenceFail(2, value, key, t)
+			testCourseReferenceFail("modified", value, key, t)
 		})
 	}
 }
@@ -192,6 +193,7 @@ func TestSectionReferenceProfPass(t *testing.T) {
 
 // Test section reference to professors, designed for fail case
 func TestSectionReferenceProfFail(t *testing.T) {
+
 	profIDMap := make(map[primitive.ObjectID]string)
 	profs := make(map[string]*schema.Professor)
 
@@ -257,23 +259,22 @@ func TestSectionReferenceCourse(t *testing.T) {
 	}
 }
 
-/* BELOW HERE ARE HELPER FUNCTION FOR TESTS ABOVE */
+/******** BELOW HERE ARE HELPER FUNCTION FOR TESTS ABOVE ********/
 
-// Helper function
 // Test if validate() throws erros when encountering duplicate
 // Design for fail cases
-func testDuplicateFail(objType string, index int, t *testing.T) {
+func testDuplicateFail(objType string, ix int, t *testing.T) {
 	// the buffer used to capture the log output
 	var logBuffer bytes.Buffer
 	log.SetOutput(&logBuffer)
 
-	// determine the expected msgs and panic msgs based on object type
+	// Determine the expected messages and panic messages based on object type
 	var expectedMsgs []string
 	var panicMsg string
 
 	switch objType {
 	case "course":
-		failCourse := testCourses[index]
+		failCourse := testCourses[ix]
 
 		// list of msgs it must print
 		expectedMsgs = []string{
@@ -282,7 +283,7 @@ func testDuplicateFail(objType string, index int, t *testing.T) {
 		}
 		panicMsg = "Courses failed to validate!"
 	case "section":
-		failSection := testSections[index]
+		failSection := testSections[ix]
 
 		expectedMsgs = []string{
 			"Duplicate section found!",
@@ -290,7 +291,7 @@ func testDuplicateFail(objType string, index int, t *testing.T) {
 		}
 		panicMsg = "Sections failed to validate!"
 	case "professor":
-		failProf := testProfessors[index]
+		failProf := testProfessors[ix]
 
 		expectedMsgs = []string{
 			"Duplicate professor found!",
@@ -302,14 +303,14 @@ func testDuplicateFail(objType string, index int, t *testing.T) {
 	defer func() {
 		logOutput := logBuffer.String() // log output after running the function
 
-		// log output needs to contain lines in the list
+		// Log output needs to contain lines in the list
 		for _, msg := range expectedMsgs {
 			if !strings.Contains(logOutput, msg) {
 				t.Errorf("Exptected the message for %v: %v", objType, msg)
 			}
 		}
 
-		// test whether func panics and sends the correct panic msg
+		// Test whether func panics and sends the correct panic msg
 		if r := recover(); r == nil {
 			t.Errorf("The function didn't panic for %v", objType)
 		} else {
@@ -323,18 +324,17 @@ func testDuplicateFail(objType string, index int, t *testing.T) {
 	// Run func
 	switch objType {
 	case "course":
-		valDuplicateCourses(testCourses[index], testCourses[index])
+		valDuplicateCourses(testCourses[ix], testCourses[ix])
 	case "section":
-		valDuplicateSections(testSections[index], testSections[index])
+		valDuplicateSections(testSections[ix], testSections[ix])
 	case "professor":
-		valDuplicateProfs(testProfessors[index], testProfessors[index])
+		valDuplicateProfs(testProfessors[ix], testProfessors[ix])
 	}
 }
 
-// Helper function
 // Test if func doesn't log anything and doesn't panic.
 // Design for pass cases
-func testDuplicatePass(objType string, index1 int, index2 int, t *testing.T) {
+func testDuplicatePass(objType string, ix1 int, ix2 int, t *testing.T) {
 	// Buffer to capture the output
 	var logBuffer bytes.Buffer
 	log.SetOutput(&logBuffer)
@@ -349,45 +349,45 @@ func testDuplicatePass(objType string, index1 int, index2 int, t *testing.T) {
 		}
 	}()
 
-	// Run func according to the object type. Choose pair of objects which are not duplicate
+	// Run func according to the object type.
+	// Choose pair of objects which are not duplicate
 	switch objType {
 	case "course":
-		valDuplicateCourses(testCourses[index1], testCourses[index2])
+		valDuplicateCourses(testCourses[ix1], testCourses[ix2])
 	case "section":
-		valDuplicateSections(testSections[index1], testSections[index2])
+		valDuplicateSections(testSections[ix1], testSections[ix2])
 	case "professor":
-		valDuplicateProfs(testProfessors[index1], testProfessors[index2])
+		valDuplicateProfs(testProfessors[ix1], testProfessors[ix2])
 	}
 }
 
-// Helper function for the case of course reference that fails
-// failType: 1 means it lacks one sections
-// failType: 2 means one section's course reference has been modified
-func testCourseReferenceFail(failType int, courseIndex int, sectionIndex int, t *testing.T) {
+// fail = "missing" means it lacks one sections
+// fail = "modified" means one section's course reference has been modified
+func testCourseReferenceFail(fail string, courseIx int, sectionIx int, t *testing.T) {
 	sectionMap := make(map[primitive.ObjectID]*schema.Section)
 
 	var sectionID, originalID primitive.ObjectID // used to store IDs of modified sections
 
 	// Build the failed section map based on fail type
-	if failType == 1 {
-		// misses a section
+	if fail == "missing" {
+		// Misses a section
 		for i, section := range testSections {
-			if sectionIndex != i {
+			if sectionIx != i {
 				sectionMap[section.Id] = section
 			} else {
 				sectionID = section.Id // Nonexistent ID referenced by course
 			}
 		}
-	} else {
-		// one section doesn't reference to correct courses
+	} else if fail == "modified" {
+		// One section doesn't reference to correct courses
 		for i, section := range testSections {
 			sectionMap[section.Id] = section
-			if sectionIndex == i {
-				// save the section ID and original course reference to be restored later on
+			if sectionIx == i {
+				// Save the section ID and original course reference to be restored later on
 				sectionID = section.Id
 				originalID = section.Course_reference
 
-				// modify part
+				// Modified part
 				sectionMap[section.Id].Course_reference = primitive.NewObjectID()
 			}
 		}
@@ -399,16 +399,16 @@ func testCourseReferenceFail(failType int, courseIndex int, sectionIndex int, t
 	// The course that references nonexistent stuff
 	var failCourse *schema.Course
 
-	if failType == 1 {
-		failCourse = testCourses[courseIndex]
+	if fail == "missing" {
+		failCourse = testCourses[courseIx]
 
 		expectedMsgs = []string{
 			fmt.Sprintf("Nonexistent section reference found for %v%v!", failCourse.Subject_prefix, failCourse.Course_number),
 			fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s", sectionID, failCourse.Id),
 		}
 	} else {
-		failCourse = testCourses[courseIndex]
-		failSection := testSections[sectionIndex]
+		failCourse = testCourses[courseIx]
+		failSection := testSections[sectionIx]
 
 		expectedMsgs = []string{
 			fmt.Sprintf("Inconsistent section reference found for %v%v! The course references the section, but not vice-versa!",
@@ -431,8 +431,8 @@ func testCourseReferenceFail(failType int, courseIndex int, sectionIndex int, t
 			}
 		}
 
-		// restore to original course reference of modified section (if needed)
-		if failType == 2 {
+		// Restore to original course reference of modified section (if needed)
+		if fail == "modified" {
 			sectionMap[sectionID].Course_reference = originalID
 		}
 

From 401e11559b3399b6093ed00cc5d9180e8d086284 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Fri, 1 Aug 2025 14:34:56 -0500
Subject: [PATCH 07/11] Implement API approach to getting callendar data

---
 scrapers/calendar.go | 120 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index 1627f95..7cf89eb 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -5,11 +5,13 @@
 package scrapers
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log"
+	"net/http"
 	"os"
 	"regexp"
 	"time"
@@ -132,15 +134,15 @@ func ScrapeCalendar(outDir string) {
 
 		// Grab Location of Event
 
-		// If .location doesn't have children, then it's an virtual event
-		var location string = "Virtual Event" // Default
+		// If p.location doesn't have children, then it's an virtual event
+		var location string = "Virtual Event"
 
 		err = chromedp.Run(chromedpCtx,
 			// Grab the name of the location
 			chromedp.QueryAfter("p.location > a",
 				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
 					if len(nodes) != 0 {
-						// Location's name somehow contains leading space, trim it
+						// Location's name somehow contains leading space, so trim it
 						location = leadingSpaceRegex.ReplaceAllString(getNodeText(nodes[0]), "")
 					}
 					return nil
@@ -310,6 +312,7 @@ func ScrapeCalendar(outDir string) {
 						if !hasEmailHref {
 							return errors.New("event contact doesn't have email")
 						}
+						// Slicing the text to exclude "mailto:"
 						contactInformationEmail = emailHref[7:]
 					}
 					return nil
@@ -363,3 +366,114 @@ func ScrapeCalendar(outDir string) {
 	encoder.Encode(events)
 	fptr.Close()
 }
+
+// Get the calendar data through API instead of scraping from website
+func ScrapeAPICalendar(outDir string) {
+	err := os.MkdirAll(outDir, 0777)
+	if err != nil {
+		panic(err)
+	}
+	client := http.Client{Timeout: 30 * time.Second}
+	var events []schema.Event
+
+	for i := range 1 {
+		// Set up the API Request
+		calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", i)
+		req, err := http.NewRequest("GET", calendarUrl, nil)
+		if err != nil {
+			panic(err)
+		}
+
+		// Call API to get the response
+		res, err := client.Do(req)
+		if err != nil {
+			panic(err)
+		}
+		if res != nil && res.StatusCode != 200 {
+			log.Panicf("ERROR: Status was %s", res.Status)
+		}
+
+		buffer := bytes.Buffer{}
+		buffer.ReadFrom(res.Body)
+		res.Body.Close()
+
+		// Structure of the API response
+		type RawEvent struct {
+			Event map[string]any `json:"event"`
+		}
+
+		type APICalendarResponse struct {
+			Events []RawEvent        `json:"events"`
+			Page   map[string]int    `json:"page"`
+			Date   map[string]string `json:"date"`
+		}
+
+		var responseData APICalendarResponse
+		if err := json.Unmarshal(buffer.Bytes(), &responseData); err != nil {
+			panic(err)
+		}
+
+		for _, rawEvent := range responseData.Events {
+			filters := pullMap(rawEvent.Event["filters"])
+			eventType := []string{}
+			eventTopic := []string{}
+			eventAudience := []string{}
+
+			// Parse the event types, event topic, and event target audience
+			rawTypes := pullSlice(filters["event_types"])
+			for _, rawType := range rawTypes {
+				eventType = append(eventType, pullMap(rawType)["name"].(string))
+			}
+
+			rawTopic := pullSlice(filters["event_topic"])
+			for _, topic := range rawTopic {
+				eventTopic = append(eventTopic, pullMap(topic)["name"].(string))
+			}
+
+			rawAudience := pullSlice(filters["event_target_audience"])
+			for _, audience := range rawAudience {
+				eventAudience = append(eventAudience, pullMap(audience)["name"].(string))
+			}
+
+			// Parse the event departments
+			departments := []string{}
+			rawDeparments := pullSlice(rawEvent.Event["departments"])
+			for _, deparment := range rawDeparments {
+				departments = append(departments, pullMap(deparment)["name"].(string))
+			}
+
+			events = append(events, schema.Event{
+				Id:             primitive.NewObjectID(),
+				EventType:      eventType,
+				TargetAudience: eventAudience,
+				Topic:          eventTopic,
+				Department:     departments,
+			})
+		}
+	}
+
+	fptr, err := os.Create(fmt.Sprintf("%s/events1.json", outDir))
+	if err != nil {
+		panic(err)
+	}
+	encoder := json.NewEncoder(fptr)
+	encoder.SetIndent("", "\t")
+	encoder.Encode(events)
+	fptr.Close()
+}
+
+// Casting an any to an slice of any
+func pullSlice(data any) []any {
+	if array, ok := data.([]any); ok {
+		return array
+	}
+	return nil
+}
+
+// Casting an any to map from string to any
+func pullMap(data any) map[string]any {
+	if dataMap, ok := data.(map[string]any); ok {
+		return dataMap
+	}
+	return nil
+}

From 6508e800b644437bc7132da01fa9bc44d54cf455 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Sat, 2 Aug 2025 17:13:11 -0500
Subject: [PATCH 08/11] Move api calendar as separate options, can change later

---
 main.go                 |   4 +
 scrapers/apiCalendar.go | 196 ++++++++++++++++++++++++++++++++++++++++
 scrapers/calendar.go    | 113 -----------------------
 3 files changed, 200 insertions(+), 113 deletions(-)
 create mode 100644 scrapers/apiCalendar.go

diff --git a/main.go b/main.go
index 727c376..bdafd1b 100644
--- a/main.go
+++ b/main.go
@@ -39,6 +39,8 @@ func main() {
 	scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.")
 	// Flag for calendar scraping
 	scrapeCalendar := flag.Bool("calendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
+	// Flag for api calendar scraping
+	scrapeAPICalendar := flag.Bool("apiCalendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
 	// Flag for astra scraping and parsing
 	astra := flag.Bool("astra", false, "Alongside -scrape or -parse, signifies that Astra should be scraped/parsed.")
 	// Flag for mazevo scraping and parsing
@@ -104,6 +106,8 @@ func main() {
 			scrapers.ScrapeOrganizations(*outDir)
 		case *scrapeCalendar:
 			scrapers.ScrapeCalendar(*outDir)
+		case *scrapeAPICalendar:
+			scrapers.ScrapeAPICalendar(*outDir)
 		case *astra:
 			scrapers.ScrapeAstra(*outDir)
 		case *mazevo:
diff --git a/scrapers/apiCalendar.go b/scrapers/apiCalendar.go
new file mode 100644
index 0000000..74023eb
--- /dev/null
+++ b/scrapers/apiCalendar.go
@@ -0,0 +1,196 @@
+package scrapers
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"os"
+	"time"
+
+	"github.com/UTDNebula/api-tools/utils"
+	"github.com/UTDNebula/nebula-api/api/schema"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+)
+
+// Structure of the API response
+type APICalendarResponse struct {
+	Events []struct {
+		Event map[string]interface{} `json:"event"`
+	} `json:"events"`
+	Page map[string]int    `json:"page"`
+	Date map[string]string `json:"date"`
+}
+
+// Get the calendar data through API instead of scraping from website
+func ScrapeAPICalendar(outDir string) {
+	err := os.MkdirAll(outDir, 0777)
+	if err != nil {
+		panic(err)
+	}
+	cli := http.Client{Timeout: 30 * time.Second}
+	var calendarData APICalendarResponse
+
+	// Get the total number of pages
+	log.Printf("Getting the number of pages...")
+	if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil {
+		panic(err)
+	}
+	numPages := calendarData.Page["total"]
+	log.Printf("The number of pages is %d!\n\n", numPages)
+
+	var events []schema.Event
+	for page := range numPages {
+		log.Printf("Scraping events of page %d...", page+1)
+		if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil {
+			panic(err)
+		}
+		log.Printf("Scraped events of page %d successfully!\n", page+1)
+
+		log.Printf("Parsing the events of page %d...", page+1)
+		for _, rawEvent := range calendarData.Events {
+			// Parse the time
+			eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"])
+			startTime := parseTime(toString(eventInstance["start"]))
+			endTime := startTime
+			if toString(eventInstance["end"]) != "" {
+				endTime = parseTime(toString(eventInstance["end"]))
+			}
+
+			// Parse location
+			location := fmt.Sprintf("%s-%s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"]))
+
+			// Parse the event types, event topic, and event target audience
+			filters := toMap(rawEvent.Event["filters"])
+			eventTypes := []string{}
+			eventTopics := []string{}
+			targetAudiences := []string{}
+
+			rawTypes := toSlice(filters["event_types"])
+			for _, rawType := range rawTypes {
+				eventTypes = append(eventTypes, toString(toMap(rawType)["name"]))
+			}
+
+			rawAudiences := toSlice(filters["event_target_audience"])
+			for _, audience := range rawAudiences {
+				targetAudiences = append(targetAudiences, toString(toMap(audience)["name"]))
+			}
+
+			rawTopics := toSlice(filters["event_topic"])
+			for _, topic := range rawTopics {
+				eventTopics = append(eventTopics, toString(toMap(topic)["name"]))
+			}
+
+			// Parse the event departments, and tags
+			departments := []string{}
+			tags := []string{}
+
+			rawTags := toSlice(rawEvent.Event["tags"])
+			for _, tag := range rawTags {
+				tags = append(tags, tag.(string))
+			}
+
+			rawDeparments := toSlice(rawEvent.Event["departments"])
+			for _, deparment := range rawDeparments {
+				departments = append(departments, toMap(deparment)["name"].(string))
+			}
+
+			// Parse the contact info, =ote that some events won't have contact phone number
+			rawContactInfo := toMap(rawEvent.Event["custom_fields"])
+			contactInfo := [3]string{}
+			for i, infoField := range []string{
+				"contact_information_name", "contact_information_email", "contact_information_phone",
+			} {
+				contactInfo[i] = toString(rawContactInfo[infoField])
+			}
+
+			events = append(events, schema.Event{
+				Id:                 primitive.NewObjectID(),
+				Summary:            toString(rawEvent.Event["title"]),
+				Location:           location,
+				StartTime:          startTime,
+				EndTime:            endTime,
+				Description:        toString(rawEvent.Event["description_text"]),
+				EventType:          eventTypes,
+				TargetAudience:     targetAudiences,
+				Topic:              eventTopics,
+				EventTags:          tags,
+				EventWebsite:       toString(rawEvent.Event["url"]),
+				Department:         departments,
+				ContactName:        contactInfo[0],
+				ContactEmail:       contactInfo[1],
+				ContactPhoneNumber: contactInfo[2],
+			})
+		}
+		log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
+	}
+
+	if err := utils.WriteJSON(fmt.Sprintf("%s/api_events.json", outDir), events); err != nil {
+		panic(err)
+	}
+	log.Printf("Finished parsing %d events successfully!\n\n", len(events))
+}
+
+// Get the data from the api and unmarshal it to data
+func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
+	// Call API to get the byte data
+	calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page)
+	req, err := http.NewRequest("GET", calendarUrl, nil)
+	if err != nil {
+		return err
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		return err
+	}
+	if res != nil && res.StatusCode != 200 {
+		return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status)
+	}
+
+	// Unmarshal bytes to the response data
+	buffer := bytes.Buffer{}
+	if _, err = buffer.ReadFrom(res.Body); err != nil {
+		return err
+	}
+	res.Body.Close()
+	if err = json.Unmarshal(buffer.Bytes(), &data); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Casting an interface{} to an slice of interface{}
+func toSlice(data interface{}) []interface{} {
+	if array, ok := data.([]interface{}); ok {
+		return array
+	}
+	return nil
+}
+
+// Casting an interface{} to map from string to interface{}
+func toMap(data interface{}) map[string]interface{} {
+	if dataMap, ok := data.(map[string]interface{}); ok {
+		return dataMap
+	}
+	return nil
+}
+
+// Casting an interface{} to string
+func toString(data interface{}) string {
+	if data != nil {
+		if dataString, ok := data.(string); ok {
+			return dataString
+		}
+	}
+	return ""
+}
+
+// Parse string time
+func parseTime(stringTime string) time.Time {
+	parsedTime, err := time.Parse(time.RFC3339, stringTime)
+	if err != nil {
+		panic(err)
+	}
+	return parsedTime
+}
diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index 7cf89eb..adf6f82 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -5,13 +5,11 @@
 package scrapers
 
 import (
-	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log"
-	"net/http"
 	"os"
 	"regexp"
 	"time"
@@ -366,114 +364,3 @@ func ScrapeCalendar(outDir string) {
 	encoder.Encode(events)
 	fptr.Close()
 }
-
-// Get the calendar data through API instead of scraping from website
-func ScrapeAPICalendar(outDir string) {
-	err := os.MkdirAll(outDir, 0777)
-	if err != nil {
-		panic(err)
-	}
-	client := http.Client{Timeout: 30 * time.Second}
-	var events []schema.Event
-
-	for i := range 1 {
-		// Set up the API Request
-		calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", i)
-		req, err := http.NewRequest("GET", calendarUrl, nil)
-		if err != nil {
-			panic(err)
-		}
-
-		// Call API to get the response
-		res, err := client.Do(req)
-		if err != nil {
-			panic(err)
-		}
-		if res != nil && res.StatusCode != 200 {
-			log.Panicf("ERROR: Status was %s", res.Status)
-		}
-
-		buffer := bytes.Buffer{}
-		buffer.ReadFrom(res.Body)
-		res.Body.Close()
-
-		// Structure of the API response
-		type RawEvent struct {
-			Event map[string]any `json:"event"`
-		}
-
-		type APICalendarResponse struct {
-			Events []RawEvent        `json:"events"`
-			Page   map[string]int    `json:"page"`
-			Date   map[string]string `json:"date"`
-		}
-
-		var responseData APICalendarResponse
-		if err := json.Unmarshal(buffer.Bytes(), &responseData); err != nil {
-			panic(err)
-		}
-
-		for _, rawEvent := range responseData.Events {
-			filters := pullMap(rawEvent.Event["filters"])
-			eventType := []string{}
-			eventTopic := []string{}
-			eventAudience := []string{}
-
-			// Parse the event types, event topic, and event target audience
-			rawTypes := pullSlice(filters["event_types"])
-			for _, rawType := range rawTypes {
-				eventType = append(eventType, pullMap(rawType)["name"].(string))
-			}
-
-			rawTopic := pullSlice(filters["event_topic"])
-			for _, topic := range rawTopic {
-				eventTopic = append(eventTopic, pullMap(topic)["name"].(string))
-			}
-
-			rawAudience := pullSlice(filters["event_target_audience"])
-			for _, audience := range rawAudience {
-				eventAudience = append(eventAudience, pullMap(audience)["name"].(string))
-			}
-
-			// Parse the event departments
-			departments := []string{}
-			rawDeparments := pullSlice(rawEvent.Event["departments"])
-			for _, deparment := range rawDeparments {
-				departments = append(departments, pullMap(deparment)["name"].(string))
-			}
-
-			events = append(events, schema.Event{
-				Id:             primitive.NewObjectID(),
-				EventType:      eventType,
-				TargetAudience: eventAudience,
-				Topic:          eventTopic,
-				Department:     departments,
-			})
-		}
-	}
-
-	fptr, err := os.Create(fmt.Sprintf("%s/events1.json", outDir))
-	if err != nil {
-		panic(err)
-	}
-	encoder := json.NewEncoder(fptr)
-	encoder.SetIndent("", "\t")
-	encoder.Encode(events)
-	fptr.Close()
-}
-
-// Casting an any to an slice of any
-func pullSlice(data any) []any {
-	if array, ok := data.([]any); ok {
-		return array
-	}
-	return nil
-}
-
-// Casting an any to map from string to any
-func pullMap(data any) map[string]any {
-	if dataMap, ok := data.(map[string]any); ok {
-		return dataMap
-	}
-	return nil
-}

From 72018d75496ac66d69ea56da69e4974dbff3fa93 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Sat, 2 Aug 2025 19:11:25 -0500
Subject: [PATCH 09/11] Change the name of the file to write into

---
 scrapers/apiCalendar.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapers/apiCalendar.go b/scrapers/apiCalendar.go
index 74023eb..161b5ec 100644
--- a/scrapers/apiCalendar.go
+++ b/scrapers/apiCalendar.go
@@ -126,7 +126,7 @@ func ScrapeAPICalendar(outDir string) {
 		log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
 	}
 
-	if err := utils.WriteJSON(fmt.Sprintf("%s/api_events.json", outDir), events); err != nil {
+	if err := utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), events); err != nil {
 		panic(err)
 	}
 	log.Printf("Finished parsing %d events successfully!\n\n", len(events))

From b21b721048f08748de5fb5c56bf144c7757cccca Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Sun, 3 Aug 2025 18:48:18 -0500
Subject: [PATCH 10/11] Completely refactor the calendar scraper to api calling
 approach

---
 main.go                 |   4 -
 scrapers/apiCalendar.go | 196 -----------------
 scrapers/calendar.go    | 477 +++++++++++++---------------------------
 3 files changed, 156 insertions(+), 521 deletions(-)
 delete mode 100644 scrapers/apiCalendar.go

diff --git a/main.go b/main.go
index 0e155ab..5df58b7 100644
--- a/main.go
+++ b/main.go
@@ -39,8 +39,6 @@ func main() {
 	scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.")
 	// Flag for calendar scraping
 	scrapeCalendar := flag.Bool("calendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
-	// Flag for api calendar scraping
-	scrapeAPICalendar := flag.Bool("apiCalendar", false, "Alongside -scrape, signifies that calendar should be scraped.")
 	// Flag for astra scraping and parsing
 	astra := flag.Bool("astra", false, "Alongside -scrape or -parse, signifies that Astra should be scraped/parsed.")
 	// Flag for mazevo scraping and parsing
@@ -109,8 +107,6 @@ func main() {
 			scrapers.ScrapeOrganizations(*outDir)
 		case *scrapeCalendar:
 			scrapers.ScrapeCalendar(*outDir)
-		case *scrapeAPICalendar:
-			scrapers.ScrapeAPICalendar(*outDir)
 		case *astra:
 			scrapers.ScrapeAstra(*outDir)
 		case *mazevo:
diff --git a/scrapers/apiCalendar.go b/scrapers/apiCalendar.go
deleted file mode 100644
index 161b5ec..0000000
--- a/scrapers/apiCalendar.go
+++ /dev/null
@@ -1,196 +0,0 @@
-package scrapers
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"log"
-	"net/http"
-	"os"
-	"time"
-
-	"github.com/UTDNebula/api-tools/utils"
-	"github.com/UTDNebula/nebula-api/api/schema"
-	"go.mongodb.org/mongo-driver/bson/primitive"
-)
-
-// Structure of the API response
-type APICalendarResponse struct {
-	Events []struct {
-		Event map[string]interface{} `json:"event"`
-	} `json:"events"`
-	Page map[string]int    `json:"page"`
-	Date map[string]string `json:"date"`
-}
-
-// Get the calendar data through API instead of scraping from website
-func ScrapeAPICalendar(outDir string) {
-	err := os.MkdirAll(outDir, 0777)
-	if err != nil {
-		panic(err)
-	}
-	cli := http.Client{Timeout: 30 * time.Second}
-	var calendarData APICalendarResponse
-
-	// Get the total number of pages
-	log.Printf("Getting the number of pages...")
-	if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil {
-		panic(err)
-	}
-	numPages := calendarData.Page["total"]
-	log.Printf("The number of pages is %d!\n\n", numPages)
-
-	var events []schema.Event
-	for page := range numPages {
-		log.Printf("Scraping events of page %d...", page+1)
-		if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil {
-			panic(err)
-		}
-		log.Printf("Scraped events of page %d successfully!\n", page+1)
-
-		log.Printf("Parsing the events of page %d...", page+1)
-		for _, rawEvent := range calendarData.Events {
-			// Parse the time
-			eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"])
-			startTime := parseTime(toString(eventInstance["start"]))
-			endTime := startTime
-			if toString(eventInstance["end"]) != "" {
-				endTime = parseTime(toString(eventInstance["end"]))
-			}
-
-			// Parse location
-			location := fmt.Sprintf("%s-%s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"]))
-
-			// Parse the event types, event topic, and event target audience
-			filters := toMap(rawEvent.Event["filters"])
-			eventTypes := []string{}
-			eventTopics := []string{}
-			targetAudiences := []string{}
-
-			rawTypes := toSlice(filters["event_types"])
-			for _, rawType := range rawTypes {
-				eventTypes = append(eventTypes, toString(toMap(rawType)["name"]))
-			}
-
-			rawAudiences := toSlice(filters["event_target_audience"])
-			for _, audience := range rawAudiences {
-				targetAudiences = append(targetAudiences, toString(toMap(audience)["name"]))
-			}
-
-			rawTopics := toSlice(filters["event_topic"])
-			for _, topic := range rawTopics {
-				eventTopics = append(eventTopics, toString(toMap(topic)["name"]))
-			}
-
-			// Parse the event departments, and tags
-			departments := []string{}
-			tags := []string{}
-
-			rawTags := toSlice(rawEvent.Event["tags"])
-			for _, tag := range rawTags {
-				tags = append(tags, tag.(string))
-			}
-
-			rawDeparments := toSlice(rawEvent.Event["departments"])
-			for _, deparment := range rawDeparments {
-				departments = append(departments, toMap(deparment)["name"].(string))
-			}
-
-			// Parse the contact info, =ote that some events won't have contact phone number
-			rawContactInfo := toMap(rawEvent.Event["custom_fields"])
-			contactInfo := [3]string{}
-			for i, infoField := range []string{
-				"contact_information_name", "contact_information_email", "contact_information_phone",
-			} {
-				contactInfo[i] = toString(rawContactInfo[infoField])
-			}
-
-			events = append(events, schema.Event{
-				Id:                 primitive.NewObjectID(),
-				Summary:            toString(rawEvent.Event["title"]),
-				Location:           location,
-				StartTime:          startTime,
-				EndTime:            endTime,
-				Description:        toString(rawEvent.Event["description_text"]),
-				EventType:          eventTypes,
-				TargetAudience:     targetAudiences,
-				Topic:              eventTopics,
-				EventTags:          tags,
-				EventWebsite:       toString(rawEvent.Event["url"]),
-				Department:         departments,
-				ContactName:        contactInfo[0],
-				ContactEmail:       contactInfo[1],
-				ContactPhoneNumber: contactInfo[2],
-			})
-		}
-		log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
-	}
-
-	if err := utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), events); err != nil {
-		panic(err)
-	}
-	log.Printf("Finished parsing %d events successfully!\n\n", len(events))
-}
-
-// Get the data from the api and unmarshal it to data
-func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
-	// Call API to get the byte data
-	calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page)
-	req, err := http.NewRequest("GET", calendarUrl, nil)
-	if err != nil {
-		return err
-	}
-	res, err := client.Do(req)
-	if err != nil {
-		return err
-	}
-	if res != nil && res.StatusCode != 200 {
-		return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status)
-	}
-
-	// Unmarshal bytes to the response data
-	buffer := bytes.Buffer{}
-	if _, err = buffer.ReadFrom(res.Body); err != nil {
-		return err
-	}
-	res.Body.Close()
-	if err = json.Unmarshal(buffer.Bytes(), &data); err != nil {
-		return err
-	}
-	return nil
-}
-
-// Casting an interface{} to an slice of interface{}
-func toSlice(data interface{}) []interface{} {
-	if array, ok := data.([]interface{}); ok {
-		return array
-	}
-	return nil
-}
-
-// Casting an interface{} to map from string to interface{}
-func toMap(data interface{}) map[string]interface{} {
-	if dataMap, ok := data.(map[string]interface{}); ok {
-		return dataMap
-	}
-	return nil
-}
-
-// Casting an interface{} to string
-func toString(data interface{}) string {
-	if data != nil {
-		if dataString, ok := data.(string); ok {
-			return dataString
-		}
-	}
-	return ""
-}
-
-// Parse string time
-func parseTime(stringTime string) time.Time {
-	parsedTime, err := time.Parse(time.RFC3339, stringTime)
-	if err != nil {
-		panic(err)
-	}
-	return parsedTime
-}
diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index adf6f82..0bdd92a 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -5,362 +5,197 @@
 package scrapers
 
 import (
-	"context"
+	"bytes"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"log"
+	"net/http"
 	"os"
-	"regexp"
 	"time"
 
 	"github.com/UTDNebula/api-tools/utils"
 	"github.com/UTDNebula/nebula-api/api/schema"
-	"github.com/chromedp/cdproto/cdp"
-	"github.com/chromedp/cdproto/runtime"
-	"github.com/chromedp/chromedp"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 )
 
-const CALENDAR_LINK string = "https://calendar.utdallas.edu/calendar"
+// Structure of the API response
+type RawEvent struct {
+	Event map[string]interface{} `json:"event"`
+}
 
-var trailingSpaceRegex *regexp.Regexp = regexp.MustCompile(`(\s{2,}?\s{2,})|(\n)`)
-var leadingSpaceRegex *regexp.Regexp = regexp.MustCompile(`^\s+`)
+type APICalendarResponse struct {
+	Events []RawEvent        `json:"events"`
+	Page   map[string]int    `json:"page"`
+	Date   map[string]string `json:"date"`
+}
 
+// Get the calendar data through API instead of scraping from website
 func ScrapeCalendar(outDir string) {
-
-	chromedpCtx, cancel := utils.InitChromeDp()
-	defer cancel()
-
 	err := os.MkdirAll(outDir, 0777)
 	if err != nil {
 		panic(err)
 	}
+	cli := http.Client{Timeout: 15 * time.Second}
+	var calendarData APICalendarResponse
 
-	events := []schema.Event{}
-
-	log.Printf("Scraping event page links")
-	// Grab all links to event pages
-	var pageLinks []string = []string{}
-	_, err = chromedp.RunResponse(chromedpCtx,
-		chromedp.Navigate(CALENDAR_LINK),
-		chromedp.QueryAfter(".em-card_image > a",
-			func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-				for _, node := range nodes {
-					href, hasHref := node.Attribute("href")
-					if !hasHref {
-						return errors.New("event card was missing an href")
-					}
-					pageLinks = append(pageLinks, href)
-				}
-				return nil
-			},
-		),
-	)
-	if err != nil {
+	// Get the total number of pages
+	log.Printf("Getting the number of pages...")
+	if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil {
 		panic(err)
 	}
-	log.Printf("Scraped event page links!")
-	for _, page := range pageLinks {
-		// Print the links of the page to check
-		log.Println(page)
-	}
-
-	for _, page := range pageLinks {
-		// Navigate to page and get page summary
-		summary := ""
-		_, err := chromedp.RunResponse(chromedpCtx,
-			chromedp.Navigate(page),
-			chromedp.QueryAfter(".em-card_title",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						summary = trailingSpaceRegex.ReplaceAllString(getNodeText(nodes[0]), "")
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
+	numPages := calendarData.Page["total"]
+	log.Printf("The number of pages is %d!\n\n", numPages)
 
-		if err != nil {
+	var events []schema.Event
+	for page := range numPages {
+		log.Printf("Scraping events of page %d...", page+1)
+		if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil {
 			panic(err)
 		}
-		utils.VPrintf("Navigated to page %s", summary)
-
-		// Grab date/time of the event
-		var dateTimeStart time.Time
-		var dateTimeEnd time.Time
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".dtstart",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						timeStamp, hasTime := nodes[0].Attribute("title")
-						if !hasTime {
-							return errors.New("event does not have a start time")
-						}
-						formattedTime, err := time.Parse(time.RFC3339, timeStamp)
-						if err != nil {
-							return err
-						}
-
-						dateTimeStart = formattedTime
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-			chromedp.QueryAfter(".dtend",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						timeStamp, hasTime := nodes[0].Attribute("title")
-						if !hasTime {
-							return errors.New("event does not have an end time")
-						}
-						formattedTime, err := time.Parse(time.RFC3339, timeStamp)
-						if err != nil {
-							return err
-						}
-
-						dateTimeEnd = formattedTime
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			continue
-		}
-		utils.VPrintf("Scraped time: %s to %s ", dateTimeStart, dateTimeEnd)
-
-		// Grab Location of Event
-
-		// If p.location doesn't have children, then it's an virtual event
-		var location string = "Virtual Event"
-
-		err = chromedp.Run(chromedpCtx,
-			// Grab the name of the location
-			chromedp.QueryAfter("p.location > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						// Location's name somehow contains leading space, so trim it
-						location = leadingSpaceRegex.ReplaceAllString(getNodeText(nodes[0]), "")
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-			// Grab the address of the location (concatenated with the name)
-			chromedp.QueryAfter("p.location > span",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						// There are cases where it doesn't show the address
-						if getNodeText(nodes[0]) != "" {
-							location += "\n" + getNodeText(nodes[0])
-						}
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			continue
-		}
-		utils.VPrintf("Scraped location: %s, ", location)
-
-		// Get description of event
-		var description string = ""
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".em-about_description > p",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					// Concatenate all the sentences in the description together
-					for _, node := range nodes {
-						if getNodeText(node) != "" && getNodeText(node) != "\u00A0" {
-							description += getNodeText(node) + "\n\n"
-						}
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			continue
-		}
-		utils.VPrintf("Scraped description: %s, ", description)
-
-		// Grab Event Type
-		var eventType []string = []string{}
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".filter-event_types > p > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					for _, node := range nodes {
-						eventType = append(eventType, getNodeText(node))
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
+		log.Printf("Scraped events of page %d successfully!\n", page+1)
+
+		log.Printf("Parsing the events of page %d...", page+1)
+		for _, rawEvent := range calendarData.Events {
+			// Parse the time
+			eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"])
+			startTime := parseTime(toString(eventInstance["start"]))
+			endTime := startTime
+			if toString(eventInstance["end"]) != "" {
+				endTime = parseTime(toString(eventInstance["end"]))
+			}
+
+			location := utils.TrimWhitespace(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])))
+
+			// Parse the event types, event topic, and event target audience
+			filters := toMap(rawEvent.Event["filters"])
+			eventTypes := []string{}
+			eventTopics := []string{}
+			targetAudiences := []string{}
+
+			rawTypes := toSlice(filters["event_types"])
+			for _, rawType := range rawTypes {
+				eventTypes = append(eventTypes, toString(toMap(rawType)["name"]))
+			}
+
+			rawAudiences := toSlice(filters["event_target_audience"])
+			for _, audience := range rawAudiences {
+				targetAudiences = append(targetAudiences, toString(toMap(audience)["name"]))
+			}
+
+			rawTopics := toSlice(filters["event_topic"])
+			for _, topic := range rawTopics {
+				eventTopics = append(eventTopics, toString(toMap(topic)["name"]))
+			}
+
+			// Parse the event departments, and tags
+			departments := []string{}
+			tags := []string{}
+
+			rawTags := toSlice(rawEvent.Event["tags"])
+			for _, tag := range rawTags {
+				tags = append(tags, tag.(string))
+			}
+
+			rawDeparments := toSlice(rawEvent.Event["departments"])
+			for _, deparment := range rawDeparments {
+				departments = append(departments, toMap(deparment)["name"].(string))
+			}
+
+			// Parse the contact info, =ote that some events won't have contact phone number
+			rawContactInfo := toMap(rawEvent.Event["custom_fields"])
+			contactInfo := [3]string{}
+			for i, infoField := range []string{
+				"contact_information_name", "contact_information_email", "contact_information_phone",
+			} {
+				contactInfo[i] = toString(rawContactInfo[infoField])
+			}
+
+			events = append(events, schema.Event{
+				Id:                 primitive.NewObjectID(),
+				Summary:            toString(rawEvent.Event["title"]),
+				Location:           location,
+				StartTime:          startTime,
+				EndTime:            endTime,
+				Description:        toString(rawEvent.Event["description_text"]),
+				EventType:          eventTypes,
+				TargetAudience:     targetAudiences,
+				Topic:              eventTopics,
+				EventTags:          tags,
+				EventWebsite:       toString(rawEvent.Event["url"]),
+				Department:         departments,
+				ContactName:        contactInfo[0],
+				ContactEmail:       contactInfo[1],
+				ContactPhoneNumber: contactInfo[2],
+			})
 		}
-		utils.VPrintf("Scraped event type: %s", eventType)
+		log.Printf("Parsed the events of page %d successfully!\n\n", page+1)
+	}
 
-		// Grab Target Audience
-		targetAudience := []string{}
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".filter-event_target_audience > p > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					for _, node := range nodes {
-						targetAudience = append(targetAudience, getNodeText(node))
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
-		}
-		utils.VPrintf("Scraped target audience: %s, ", targetAudience)
+	if err := utils.WriteJSON(fmt.Sprintf("%s/events.json", outDir), events); err != nil {
+		panic(err)
+	}
+	log.Printf("Finished parsing %d events successfully!\n\n", len(events))
+}
 
-		// Grab Topic
-		topic := []string{}
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".filter-event_topic > p > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					for _, node := range nodes {
-						topic = append(topic, getNodeText(node))
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
-		}
-		utils.VPrintf("Scraped topic: %s, ", topic)
+// Scrape the data from the api and unmarshal it to response data
+func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error {
+	// Call API to get the byte data
+	calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page)
+	req, err := http.NewRequest("GET", calendarUrl, nil)
+	if err != nil {
+		return err
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		return err
+	}
+	if res != nil && res.StatusCode != 200 {
+		return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status)
+	}
 
-		// Grab Event Tags
-		tags := []string{}
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".event-tags > p > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					for _, node := range nodes {
-						tags = append(tags, getNodeText(node))
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
-		}
-		utils.VPrintf("Scraped tags: %s, ", tags)
+	// Unmarshal bytes to the response data
+	buffer := bytes.Buffer{}
+	if _, err = buffer.ReadFrom(res.Body); err != nil {
+		return err
+	}
+	res.Body.Close()
+	if err = json.Unmarshal(buffer.Bytes(), &data); err != nil {
+		return err
+	}
+	return nil
+}
 
-		// Grab Website
-		var eventWebsite string = ""
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".event-website > p > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						href, hasHref := nodes[0].Attribute("href")
-						if !hasHref {
-							return errors.New("event does not have website")
-						}
-						eventWebsite = href
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			continue
-		}
-		utils.VPrintf("Scraped website: %s, ", eventWebsite)
+// Casting an interface{} to an slice of interface{}
+func toSlice(data interface{}) []interface{} {
+	if array, ok := data.([]interface{}); ok {
+		return array
+	}
+	return nil
+}
 
-		// Grab Department
-		var eventDepartment []string = []string{}
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".event-group > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					for _, node := range nodes {
-						eventDepartment = append(eventDepartment, getNodeText(node))
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
-		}
-		utils.VPrintf("Scraped department: %s, ", eventDepartment)
+// Casting an interface{} to map from string to interface{}
+func toMap(data interface{}) map[string]interface{} {
+	if dataMap, ok := data.(map[string]interface{}); ok {
+		return dataMap
+	}
+	return nil
+}
 
-		// Grab Contact information
-		var contactInformationName string = ""
-		var contactInformationEmail string = ""
-		var contactInformationPhone string = ""
-		err = chromedp.Run(chromedpCtx,
-			chromedp.QueryAfter(".custom-field-contact_information_name",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						contactInformationName = getNodeText(nodes[0])
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-			chromedp.QueryAfter(".custom-field-contact_information_email > a",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						emailHref, hasEmailHref := nodes[0].Attribute("href")
-						if !hasEmailHref {
-							return errors.New("event contact doesn't have email")
-						}
-						// Slicing the text to exclude "mailto:"
-						contactInformationEmail = emailHref[7:]
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-			chromedp.QueryAfter(".custom-field-contact_information_phone",
-				func(ctx context.Context, _ runtime.ExecutionContextID, nodes ...*cdp.Node) error {
-					if len(nodes) != 0 {
-						contactInformationPhone = getNodeText(nodes[0])
-						if err != nil {
-							return err
-						}
-					}
-					return nil
-				}, chromedp.AtLeast(0),
-			),
-		)
-		if err != nil {
-			panic(err)
+// Casting an interface{} to string, if the data is nil then string is ""
+func toString(data interface{}) string {
+	if data != nil {
+		if dataString, ok := data.(string); ok {
+			return dataString
 		}
-		utils.VPrintf("Scraped contact name info: %s", contactInformationName)
-		utils.VPrintf("Scraped contact email info: %s", contactInformationEmail)
-		utils.VPrintf("Scraped contact phone info: %s", contactInformationPhone)
-
-		events = append(events, schema.Event{
-			Id:                 primitive.NewObjectID(),
-			Summary:            summary,
-			Location:           location,
-			StartTime:          dateTimeStart,
-			EndTime:            dateTimeEnd,
-			Description:        description,
-			EventType:          eventType,
-			TargetAudience:     targetAudience,
-			Topic:              topic,
-			EventTags:          tags,
-			EventWebsite:       eventWebsite,
-			Department:         eventDepartment,
-			ContactName:        contactInformationName,
-			ContactEmail:       contactInformationEmail,
-			ContactPhoneNumber: contactInformationPhone,
-		})
 	}
+	return ""
+}
 
-	// Write event data to output file
-	fptr, err := os.Create(fmt.Sprintf("%s/events.json", outDir))
+// Parse string time
+func parseTime(stringTime string) time.Time {
+	parsedTime, err := time.Parse(time.RFC3339, stringTime)
 	if err != nil {
 		panic(err)
 	}
-	encoder := json.NewEncoder(fptr)
-	encoder.SetIndent("", "\t")
-	encoder.Encode(events)
-	fptr.Close()
+	return parsedTime
 }

From a2545161383ecbfeed1719c6844ae3fc6c458f01 Mon Sep 17 00:00:00 2001
From: mikehquan19 <hoangphucquan19@gmail.com>
Date: Sun, 3 Aug 2025 19:56:26 -0500
Subject: [PATCH 11/11] Trim the space in location

---
 scrapers/calendar.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scrapers/calendar.go b/scrapers/calendar.go
index 0bdd92a..4429c2e 100644
--- a/scrapers/calendar.go
+++ b/scrapers/calendar.go
@@ -11,6 +11,7 @@ import (
 	"log"
 	"net/http"
 	"os"
+	"strings"
 	"time"
 
 	"github.com/UTDNebula/api-tools/utils"
@@ -64,7 +65,8 @@ func ScrapeCalendar(outDir string) {
 				endTime = parseTime(toString(eventInstance["end"]))
 			}
 
-			location := utils.TrimWhitespace(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])))
+			// Parse location
+			location := strings.Trim(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])), " ,")
 
 			// Parse the event types, event topic, and event target audience
 			filters := toMap(rawEvent.Event["filters"])