-
Notifications
You must be signed in to change notification settings - Fork 9
feat: canonicalize MSCI to MSE course codes #170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| package course | ||
|
|
||
| import "strings" | ||
|
|
||
| var subjectRenames = map[string]string{ | ||
| "MSCI": "MSE", | ||
| } | ||
|
|
||
| func canonicalSubject(subj string) string { | ||
| upper := strings.ToUpper(subj) | ||
| if mapped, ok := subjectRenames[upper]; ok { | ||
| return mapped | ||
| } | ||
| return upper | ||
| } | ||
|
|
||
| func canonicalCourseCode(subj, num string) string { | ||
| return strings.ToLower(canonicalSubject(subj) + num) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| package course | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestCanonicalCourseCode(t *testing.T) { | ||
| tests := []struct { | ||
| subj string | ||
| num string | ||
| want string | ||
| }{ | ||
| {"MSCI", "100", "mse100"}, | ||
| {"msci", "331", "mse331"}, | ||
| {"MSE", "100", "mse100"}, | ||
| {"CS", "115", "cs115"}, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| got := canonicalCourseCode(tt.subj, tt.num) | ||
| if got != tt.want { | ||
| t.Fatalf("canonicalCourseCode(%s,%s) = %s, want %s", tt.subj, tt.num, got, tt.want) | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| DROP FUNCTION IF EXISTS public.merge_course(text, text); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| -- Merge MSCI course codes into MSE and make the helper reusable for future renames. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tested this migration on prod data. Applying it raises some errors due to |
||
| CREATE OR REPLACE FUNCTION public.merge_course(old_code text, new_code text) | ||
| RETURNS void AS $$ | ||
| DECLARE | ||
| old_id int; | ||
| new_id int; | ||
| BEGIN | ||
| old_code := lower(old_code); | ||
| new_code := lower(new_code); | ||
|
|
||
| SELECT id INTO old_id FROM course WHERE code = old_code; | ||
| SELECT id INTO new_id FROM course WHERE code = new_code; | ||
|
|
||
| IF old_id IS NULL THEN | ||
| RETURN; | ||
| END IF; | ||
|
|
||
| IF new_id IS NULL THEN | ||
| UPDATE course SET code = new_code WHERE id = old_id; | ||
| RETURN; | ||
| END IF; | ||
|
|
||
| INSERT INTO course_prerequisite(course_id, prerequisite_id, is_corequisite) | ||
| SELECT new_id, prerequisite_id, is_corequisite | ||
| FROM course_prerequisite | ||
| WHERE course_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
|
|
||
| INSERT INTO course_prerequisite(course_id, prerequisite_id, is_corequisite) | ||
| SELECT course_id, new_id, is_corequisite | ||
| FROM course_prerequisite | ||
| WHERE prerequisite_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
|
|
||
| DELETE FROM course_prerequisite WHERE course_id = old_id OR prerequisite_id = old_id; | ||
|
|
||
| INSERT INTO course_antirequisite(course_id, antirequisite_id) | ||
| SELECT new_id, antirequisite_id | ||
| FROM course_antirequisite | ||
| WHERE course_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
|
|
||
| INSERT INTO course_antirequisite(course_id, antirequisite_id) | ||
| SELECT course_id, new_id | ||
| FROM course_antirequisite | ||
| WHERE antirequisite_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
|
|
||
| DELETE FROM course_antirequisite WHERE course_id = old_id OR antirequisite_id = old_id; | ||
|
|
||
| UPDATE course_section SET course_id = new_id WHERE course_id = old_id; | ||
|
|
||
| INSERT INTO user_course_taken(course_id, user_id, term_id, level) | ||
| SELECT new_id, user_id, term_id, level | ||
| FROM user_course_taken | ||
| WHERE course_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
| DELETE FROM user_course_taken WHERE course_id = old_id; | ||
|
|
||
| INSERT INTO user_shortlist(course_id, user_id) | ||
| SELECT new_id, user_id | ||
| FROM user_shortlist | ||
| WHERE course_id = old_id | ||
| ON CONFLICT DO NOTHING; | ||
| DELETE FROM user_shortlist WHERE course_id = old_id; | ||
|
|
||
| INSERT INTO review(course_id, prof_id, user_id, liked, course_easy, course_useful, course_comment, prof_clear, prof_engaging, prof_comment, public, legacy, created_at, updated_at) | ||
| SELECT new_id, prof_id, user_id, liked, course_easy, course_useful, course_comment, prof_clear, prof_engaging, prof_comment, public, legacy, created_at, updated_at | ||
| FROM review | ||
| WHERE course_id = old_id | ||
| ON CONFLICT (course_id, user_id) DO NOTHING; | ||
| DELETE FROM review WHERE course_id = old_id; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should avoid removing old courses. If a new user who has taken a MSCI course signs up after we applied this migration. The transcript still records MSCI, the parsing would find no matches for this MSCI course and the user would not be able to leave reviews. Also, if someone (say an alumni) who has taken MSCI before but does not know the MSCI -> MSE subject code change wants to search some MSCI data, they won't find anything. UW Flow has been keeping lots of migrated and eliminated course codes. For example, |
||
|
|
||
| DELETE FROM course WHERE id = old_id; | ||
| END; | ||
| $$ LANGUAGE plpgsql; | ||
|
|
||
| DO $$ | ||
| DECLARE | ||
| rec record; | ||
| BEGIN | ||
| FOR rec IN SELECT code FROM course WHERE code LIKE 'msci%' LOOP | ||
| PERFORM merge_course(rec.code, 'mse' || substring(rec.code FROM 5)); | ||
| END LOOP; | ||
| END $$; | ||
|
|
||
| REFRESH MATERIALIZED VIEW materialized.course_rating; | ||
| REFRESH MATERIALIZED VIEW materialized.course_search_index; | ||
| REFRESH MATERIALIZED VIEW materialized.course_review_rating; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
imo this is not needed. If MSCI has been changed to MSE, UW API would give us MSE only.
If UW API does give MSCI in the future, it means there are some real MSCI courses being offered, then we should not map them to MSE.
Overall, I don't think importer needs to know any remapping information