From c56fa0306b1146373a59f4ccc3583ddb432f3963 Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Sun, 30 Jan 2022 14:21:51 +0530 Subject: [PATCH 1/4] z3 optimize for computing min cost matching with other constraints --- csec_mentor_matching/analysis.py | 24 ++++++++++++++ csec_mentor_matching/main.py | 54 +++++++++++--------------------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py index 5d90289..265cc68 100644 --- a/csec_mentor_matching/analysis.py +++ b/csec_mentor_matching/analysis.py @@ -4,6 +4,7 @@ import itertools as it import random import warnings +import z3 warnings.filterwarnings('error') @@ -133,3 +134,26 @@ def detect_duplicates(mentors, mentees) : for pt in (set(mentors[0]) & set(mentees[0])) : n = mentors[0].index(pt) print( mentors[1][n] + " <" + pt + "> Filled both mentor and mentee forms.") + +# to encode matching as SAT; every mentee-mentor pair will have a cost and a boolean var to denote whether they are picked or not +# min cost matching = optimise cost of chosen pairs + choose only one edge per mentee +# other constraints to be added separately + +# vars have mentor as first index and mentee as second +def mentor_count_constraint(tee_tor_vars, costs, min_cnt): + l = [] + for i in range(len(tee_tor_vars)): + l.append(z3.AtLeast(tee_tor_vars[i], min_cnt)) + return z3.And(l) + +def matching(optimiser, tee_tor_vars, costs): + for j in range(len(tee_tor_vars[0])): + optimiser.add(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1)) + + v = 0 + for i in range(len(tee_tor_vars)): + for j in range(len(tee_tor_vars[0])): + v += If(tee_tor_vars[i][j], costs[i][j], 0) + optimiser.minimize(v) + + return optimiser.check() diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py index 66f0d92..19cab05 100644 --- a/csec_mentor_matching/main.py +++ b/csec_mentor_matching/main.py @@ -13,6 +13,7 @@ import numpy as np from functools import reduce import sys +import z3 MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk' MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg' @@ -35,45 +36,28 @@ analysis.detect_duplicates(mentors, mentees) -#mentors = list(mentors) -#mentors.append([np.zeros((len(mentors[0]),))]) -#mentors = tuple(mentors) - -#mentors_temp = list( map( lambda x: np.asarray(x, dtype=type(x[0])), mentors ) ) -#mentees = tuple( list( map( lambda x: np.asarray(x, dtype=type(x[0])), mentees ) ) ) -#mentors_split = [] -#mx = max(mentors[4]) -#multi = 0 -#for i in range(mx, -1, -1) : -# mask = (mentors_temp[4] == i) -# temp = mentors_temp[0][mask] -# len_mentors = len(temp) -# len_mentees = np.sum(np.asarray(mentees[3]) <= i) -# # len_mentees is obviously non-zeros because it's set of all mentees irrespective of year -# if len_mentors != 0 : -# factor = (int(len_mentees / len_mentors) + (1 if len_mentees % len_mentors else 0)) + 1 -# else: -# factor = 1 -# temp = [temp, mentors_temp[1][mask], mentors_temp[2][mask], mentors_temp[3][mask], mentors_temp[4][mask]] -# temp = list( map( lambda x : np.repeat(x, factor), temp ) ) -# multi += len_mentors*factor -# print(f"Value of multi: {multi}") -# mentors_split += [temp] - -## all this fucking banana reduce for just merging the individual lists into a mega list -#mentors = reduce(lambda x, y: [np.append(u[0], u[1]) for u in zip(x, y)], mentors_split, [[] for _ in mentors_split[0]]) -#mentors = list( map( list, mentors ) ) print('+' * 80) -# mentees should be a perfect multiple of mentor numbers -# this is necessitated by the matching API -multi = (int(len(mentees[0]) / len(mentors[0])) + (1 if len(mentees[0]) % len(mentors[0]) else 0)) * len(mentors[0]) -costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) -assignment = matcher.match(multi, len(mentors[0]), costs) +osts = analysis.costs(len(mentees[0]), mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) -print('+' * 80, file=sys.stderr) -assignment = list( zip(assignment[:len(mentees[0])], list(range(len(mentees[0])))) ) +opt = z3.Optimize() + +tee_tor_vars = [[Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))] + +opt.add(analysis.mentor_count_constraint(tee_tor_vars, costs, 3)) + +res = analysis.matching(opt, tee_tor_vars, costs) + +assert res == sat + +m = opt.model() + +assignment = [] +for i in range(len(mentors[0])): + for j in range(len(mentees[0])): + if (m[tee_tor_vars[i][j]]): + assignment.append((i,j)) assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), assignment)) assignment = pd.DataFrame(assignment) From 84c0debd795612bc281d2dd77bf5cf01e2ceeddd Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Sun, 30 Jan 2022 23:20:54 +0530 Subject: [PATCH 2/4] change minimize to soft assertions with integer cost --- csec_mentor_matching/analysis.py | 32 ++++++++++++++++++-------------- csec_mentor_matching/main.py | 15 +++++++++++---- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py index 265cc68..3fe3907 100644 --- a/csec_mentor_matching/analysis.py +++ b/csec_mentor_matching/analysis.py @@ -75,8 +75,7 @@ def mentee_analysis(mentees) : return (emails, names, interests, years) # the cost with number of matching subjects -dot_cost = np.exp(np.arange(12)/3.0) -dot_cost[0] += 1e+20 +dot_cost = np.exp(np.arange(12)/3.0)[::-1] # cost with number of extra subjects that dont match with mentor extra_cost = np.exp(np.arange(12)/3.0) @@ -96,7 +95,6 @@ def eval_cost(x) : tor_int = int(tor_int) # 0 common very high cost, less common low cost, more chances of matching. - # lot of matches considered superflous. common = dot_cost[count_bits(tee_int & tor_int)] # more the tee extra more the cost, less chances of matching. @@ -107,28 +105,32 @@ def eval_cost(x) : # lower the chances of getting that mentor. # More focussed the mentor on particular topics, the # lesser the chance of getting the mentor. - pro_cost = np.exp(pro* (0.40/count_bits(tor_int))) + pro_cost = np.exp((1/pro) * (1/count_bits(tor_int))) # Chances of matching a mentor in lower year than you # should be negligible - year_cost = 1e+10 if tee_year > tor_year else 0 + year_cost = 100 if tee_year > tor_year else (20 if tee_year == tor_year else 0) # random luck - luck = random.uniform(50.0, 275.0) + luck = random.uniform(2.0, 10.0) return np.log(common + tee_extra + pro_cost + year_cost + luck) -def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) : +def costs(mentee_int, mentee_year, mentor_int, proficiency, mentor_year) : # the matcher is a minimum cost matching algorithm # Make the costs higher to decrease the possibility of matching. # add dummy candidates with very high weight - mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int)))) - mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year)))) + # multi is number of mentees +# mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int)))) +# mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year)))) costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year)))) costs = list(map( eval_cost, costs)) - return costs + mat = [] + for i in range(len(mentor_int)): + mat.append(list(map(lambda x : int(x), costs[i::len(mentor_int)]))) + return mat def detect_duplicates(mentors, mentees) : for pt in (set(mentors[0]) & set(mentees[0])) : @@ -143,17 +145,19 @@ def detect_duplicates(mentors, mentees) : def mentor_count_constraint(tee_tor_vars, costs, min_cnt): l = [] for i in range(len(tee_tor_vars)): - l.append(z3.AtLeast(tee_tor_vars[i], min_cnt)) + l.append(z3.AtLeast(*tee_tor_vars[i], min_cnt)) return z3.And(l) def matching(optimiser, tee_tor_vars, costs): for j in range(len(tee_tor_vars[0])): - optimiser.add(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1)) + optimiser.add(z3.simplify(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1))) v = 0 for i in range(len(tee_tor_vars)): for j in range(len(tee_tor_vars[0])): - v += If(tee_tor_vars[i][j], costs[i][j], 0) - optimiser.minimize(v) + optimiser.add_soft(tee_tor_vars[i][j], costs[i][j]) +# optimiser.minimize(v) + + print(optimiser) return optimiser.check() diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py index 19cab05..4d3f63a 100644 --- a/csec_mentor_matching/main.py +++ b/csec_mentor_matching/main.py @@ -15,6 +15,11 @@ import sys import z3 +z3.set_param('parallel.enable', True) +z3.set_param('parallel.threads.max', 32) +z3.set_param('sat.local_search_threads', 4) +z3.set_param('sat.threads', 4) + MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk' MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg' MENTOR_YEAR_SPREADSHEET = '15sDdZuQmdCkCgJEI2tXgRahDH2s0_GfXaG4kVIz9gsU' @@ -39,17 +44,19 @@ print('+' * 80) -osts = analysis.costs(len(mentees[0]), mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) +costs = analysis.costs(mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) opt = z3.Optimize() -tee_tor_vars = [[Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))] +tee_tor_vars = [[z3.Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))] + +opt.add(z3.simplify(analysis.mentor_count_constraint(tee_tor_vars, costs, 3))) -opt.add(analysis.mentor_count_constraint(tee_tor_vars, costs, 3)) +print('+' * 80) res = analysis.matching(opt, tee_tor_vars, costs) -assert res == sat +assert res == z3.sat m = opt.model() From db3fca9686077da33ed87a6eaeea8918c49858c4 Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Tue, 1 Feb 2022 23:13:55 +0530 Subject: [PATCH 3/4] Add visualization of mapping and mentee interests --- csec_mentor_matching/.gitignore | 1 + csec_mentor_matching/analysis.py | 59 +++++++++++++++++++++++++------- csec_mentor_matching/main.py | 18 +++++++--- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/csec_mentor_matching/.gitignore b/csec_mentor_matching/.gitignore index 7778391..6181b4c 100644 --- a/csec_mentor_matching/.gitignore +++ b/csec_mentor_matching/.gitignore @@ -3,3 +3,4 @@ *.xls *.xlsx costs +*.png diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py index eda7f31..92a2d24 100644 --- a/csec_mentor_matching/analysis.py +++ b/csec_mentor_matching/analysis.py @@ -5,6 +5,7 @@ import random import warnings import z3 +import matplotlib.pyplot as plt #warnings.filterwarnings('error') @@ -150,6 +151,7 @@ def mentee_analysis(mentees) : # cost with number of extra subjects that dont match with mentor extra_cost = np.exp(np.arange(12)/3.0) +U = int('1'*11, 2) def count_bits(x) : t = 0 @@ -162,7 +164,6 @@ def eval_cost(x) : # remember more the cost, less chances of matching (tee_int, tee_year) , (tor_int, pro, tor_year) = x - U = int('1'*11, 2) tor_int = int(tor_int) tee_int = int(tee_int) @@ -181,11 +182,11 @@ def eval_cost(x) : # lower the chances of getting that mentor. # More focussed the mentor on particular topics, the # lesser the chance of getting the mentor. - pro_cost = np.exp((1/pro) * (1/count_bits(tor_int))) + pro_cost = np.exp(10 * (1/pro) * (1/count_bits(tor_int))) # Chances of matching a mentor in lower year than you # should be negligible - year_cost = 100 if tee_year > tor_year else (20 if tee_year == tor_year else 0) + year_cost = 100 if tee_year > tor_year else (10 if tee_year == tor_year else 0) # random luck luck = random.uniform(2.0, 10.0) @@ -203,21 +204,16 @@ def eval_cost(x) : return np.log(common + tee_extra + pro_cost + year_cost + luck) -def costs(mentee_int, mentee_year, mentor_int, proficiency, mentor_year) : +def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) : # the matcher is a minimum cost matching algorithm # Make the costs higher to decrease the possibility of matching. - - # add dummy candidates with very high weight # multi is number of mentees -# mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int)))) -# mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year)))) + mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int)))) + mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year)))) costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year)))) costs = list(map( eval_cost, costs)) - mat = [] - for i in range(len(mentor_int)): - mat.append(list(map(lambda x : int(x), costs[i::len(mentor_int)]))) - return mat + return costs def detect_duplicates(mentors, mentees) : for pt in (set(mentors[0]) & set(mentees[0])) : @@ -245,6 +241,43 @@ def matching(optimiser, tee_tor_vars, costs): optimiser.add_soft(tee_tor_vars[i][j], costs[i][j]) # optimiser.minimize(v) - print(optimiser) +# print(optimiser) return optimiser.check() + +def visualise_interests(mentees): + interests = mentees[2] + BLK = 20 + img = np.zeros((len(MENTEE_INTEREST_FULL)*BLK, BLK*len(interests), 3), dtype=np.float) + + for i, v in enumerate(interests): + mask = v + for j, val in enumerate(bin(mask)[2:].zfill(len(MENTEE_INTEREST_FULL))): + img[j*BLK:(j+1)*BLK, i*BLK:(i+1)*BLK, :] = int(val) + +# print(img) + plt.imsave('mentee_interests.png', img) + + +def visualize_mapping(mapping, mentors, mentees): + BLK = 20 + img = np.zeros((BLK*7, BLK*len(mentors[0]), 3), dtype=np.uint8) + cnts = np.zeros((len(mentors[0])), dtype=np.int) + for tor, tee in mapping: + tor_int = mentors[2][tor] + tee_int = mentees[2][tee] + val = [[[0, 255, 0]]] # green is default + + if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2: + # set difference of tee_int with tor_int more than, say, 2 + val = [[[255, 0, 0]]] # red + elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2: + # set difference of tor_int with tee_int + val = [[[0, 0, 255]]] # blue + + img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val + cnts[tor] += 1 + +# print(img) + + plt.imsave('mapping.png', img) diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py index 0c15184..ae8ec03 100644 --- a/csec_mentor_matching/main.py +++ b/csec_mentor_matching/main.py @@ -37,10 +37,13 @@ mentors = analysis.mentor_analysis((mentors, mentors_year)) mentees = analysis.mentee_analysis(mentees) +mentees_og = mentees print('+' * 80) analysis.detect_duplicates(mentors, mentees) +analysis.visualise_interests(mentees) + mentee_set1, mentee_set2 = analysis.arrange_mentees(mentees, len(mentors[0])) #mentors = list(mentors) @@ -80,9 +83,9 @@ multi = len(mentees[0]) costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) temp = matcher.match(multi, len(mentors[0]), costs) -temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) +temp1 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) -assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp)) +assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp1)) print('+' * 80) @@ -90,12 +93,17 @@ multi = len(mentors[0]) costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4]) temp = matcher.match(multi, len(mentors[0]), costs) -temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) +temp2 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) ) -assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp)) +assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp2)) assignment = pd.DataFrame(assignment) -print('+' * 80, file=sys.stderr) +mapping = temp1 + temp2 +mapping.sort() + +analysis.visualize_mapping(mapping, mentors, mentees_og) + +print('+' * 80) assignment.columns = ['Mentor Email', 'Mentor Name', 'Mentor Year', 'Mentee Email', 'Mentee Name', 'Mentee Year'] print("The assertion that Mentor Year >= Mentee Year: " + ("Failed" if np.all(assignment['Mentor Year'] >= assignment['Mentee Year'] ) else "Passed")) From 231e501fcbaaa3870de90388da3deb9f122b6c5e Mon Sep 17 00:00:00 2001 From: Sahil Jain Date: Tue, 1 Feb 2022 23:31:32 +0530 Subject: [PATCH 4/4] some more changes --- csec_mentor_matching/analysis.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py index 92a2d24..db84c3f 100644 --- a/csec_mentor_matching/analysis.py +++ b/csec_mentor_matching/analysis.py @@ -268,12 +268,13 @@ def visualize_mapping(mapping, mentors, mentees): tee_int = mentees[2][tee] val = [[[0, 255, 0]]] # green is default - if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2: + if count_bits(tor_int & tee_int) <= 3: + # if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2: # set difference of tee_int with tor_int more than, say, 2 val = [[[255, 0, 0]]] # red - elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2: + # elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2: # set difference of tor_int with tee_int - val = [[[0, 0, 255]]] # blue + #val = [[[0, 0, 255]]] # blue img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val cnts[tor] += 1