Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions flow/importer/uw/parts/course/canonical.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package course

import "strings"

var subjectRenames = map[string]string{
"MSCI": "MSE",
}

func canonicalSubject(subj string) string {
upper := strings.ToUpper(subj)
if mapped, ok := subjectRenames[upper]; ok {
return mapped
}
return upper
}

func canonicalCourseCode(subj, num string) string {
return strings.ToLower(canonicalSubject(subj) + num)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo this is not needed. If MSCI has been changed to MSE, UW API would give us MSE only.

If UW API does give MSCI in the future, it means there are some real MSCI courses being offered, then we should not map them to MSE.

Overall, I don't think importer needs to know any remapping information

23 changes: 23 additions & 0 deletions flow/importer/uw/parts/course/canonical_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package course

import "testing"

func TestCanonicalCourseCode(t *testing.T) {
tests := []struct {
subj string
num string
want string
}{
{"MSCI", "100", "mse100"},
{"msci", "331", "mse331"},
{"MSE", "100", "mse100"},
{"CS", "115", "cs115"},
}

for _, tt := range tests {
got := canonicalCourseCode(tt.subj, tt.num)
if got != tt.want {
t.Fatalf("canonicalCourseCode(%s,%s) = %s, want %s", tt.subj, tt.num, got, tt.want)
}
}
}
3 changes: 2 additions & 1 deletion flow/importer/uw/parts/course/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ func convertAll(
}

func convertCourse(dst *convertResult, apiCourse *apiCourse) error {
courseCode := strings.ToLower(apiCourse.Subject + apiCourse.Number)
courseCode := canonicalCourseCode(apiCourse.Subject, apiCourse.Number)
apiCourse.Subject = canonicalSubject(apiCourse.Subject)
newCourse := course{
Code: courseCode,
Name: apiCourse.Name,
Expand Down
5 changes: 3 additions & 2 deletions flow/importer/uw/parts/course/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func fetchAll(client *api.Client, termIds []int) ([]apiCourse, []apiClass, error
}

for _, class := range <-sema {
class.CourseCode = strings.ToLower(course.Subject + course.Number)
class.CourseCode = canonicalCourseCode(course.Subject, course.Number)
classes = append(classes, class)
}

Expand Down Expand Up @@ -88,7 +88,8 @@ func fetchCourses(client *api.Client, termIds []int) ([]apiCourse, error) {
return nil, fmt.Errorf("failed to fetch term %d: %w", termId, err)
}
for _, course := range termCourses {
courseCode := course.Subject + course.Number
course.Subject = canonicalSubject(course.Subject)
courseCode := canonicalCourseCode(course.Subject, course.Number)
if err != nil {
log.Warnf("skipping course with missing data")
continue
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP FUNCTION IF EXISTS public.merge_course(text, text);
89 changes: 89 additions & 0 deletions hasura/migrations/default/1770000000000_merge_msci_to_mse/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
-- Merge MSCI course codes into MSE and make the helper reusable for future renames.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested this migration on prod data. Applying it raises some errors due to review_check_course_taken trigger, likely due to some weird data on prod, i need to look into this further and add more details later.

CREATE OR REPLACE FUNCTION public.merge_course(old_code text, new_code text)
RETURNS void AS $$
DECLARE
old_id int;
new_id int;
BEGIN
old_code := lower(old_code);
new_code := lower(new_code);

SELECT id INTO old_id FROM course WHERE code = old_code;
SELECT id INTO new_id FROM course WHERE code = new_code;

IF old_id IS NULL THEN
RETURN;
END IF;

IF new_id IS NULL THEN
UPDATE course SET code = new_code WHERE id = old_id;
RETURN;
END IF;

INSERT INTO course_prerequisite(course_id, prerequisite_id, is_corequisite)
SELECT new_id, prerequisite_id, is_corequisite
FROM course_prerequisite
WHERE course_id = old_id
ON CONFLICT DO NOTHING;

INSERT INTO course_prerequisite(course_id, prerequisite_id, is_corequisite)
SELECT course_id, new_id, is_corequisite
FROM course_prerequisite
WHERE prerequisite_id = old_id
ON CONFLICT DO NOTHING;

DELETE FROM course_prerequisite WHERE course_id = old_id OR prerequisite_id = old_id;

INSERT INTO course_antirequisite(course_id, antirequisite_id)
SELECT new_id, antirequisite_id
FROM course_antirequisite
WHERE course_id = old_id
ON CONFLICT DO NOTHING;

INSERT INTO course_antirequisite(course_id, antirequisite_id)
SELECT course_id, new_id
FROM course_antirequisite
WHERE antirequisite_id = old_id
ON CONFLICT DO NOTHING;

DELETE FROM course_antirequisite WHERE course_id = old_id OR antirequisite_id = old_id;

UPDATE course_section SET course_id = new_id WHERE course_id = old_id;

INSERT INTO user_course_taken(course_id, user_id, term_id, level)
SELECT new_id, user_id, term_id, level
FROM user_course_taken
WHERE course_id = old_id
ON CONFLICT DO NOTHING;
DELETE FROM user_course_taken WHERE course_id = old_id;

INSERT INTO user_shortlist(course_id, user_id)
SELECT new_id, user_id
FROM user_shortlist
WHERE course_id = old_id
ON CONFLICT DO NOTHING;
DELETE FROM user_shortlist WHERE course_id = old_id;

INSERT INTO review(course_id, prof_id, user_id, liked, course_easy, course_useful, course_comment, prof_clear, prof_engaging, prof_comment, public, legacy, created_at, updated_at)
SELECT new_id, prof_id, user_id, liked, course_easy, course_useful, course_comment, prof_clear, prof_engaging, prof_comment, public, legacy, created_at, updated_at
FROM review
WHERE course_id = old_id
ON CONFLICT (course_id, user_id) DO NOTHING;
DELETE FROM review WHERE course_id = old_id;
Copy link
Member

@AD1938 AD1938 Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should avoid removing old courses. If a new user who has taken a MSCI course signs up after we applied this migration. The transcript still records MSCI, the parsing would find no matches for this MSCI course and the user would not be able to leave reviews.

Also, if someone (say an alumni) who has taken MSCI before but does not know the MSCI -> MSE subject code change wants to search some MSCI data, they won't find anything.

UW Flow has been keeping lots of migrated and eliminated course codes. For example, CM is a subject code that is no longer in use but we still keep their data (reviews + ratings)


DELETE FROM course WHERE id = old_id;
END;
$$ LANGUAGE plpgsql;

DO $$
DECLARE
rec record;
BEGIN
FOR rec IN SELECT code FROM course WHERE code LIKE 'msci%' LOOP
PERFORM merge_course(rec.code, 'mse' || substring(rec.code FROM 5));
END LOOP;
END $$;

REFRESH MATERIALIZED VIEW materialized.course_rating;
REFRESH MATERIALIZED VIEW materialized.course_search_index;
REFRESH MATERIALIZED VIEW materialized.course_review_rating;