From c56fa0306b1146373a59f4ccc3583ddb432f3963 Mon Sep 17 00:00:00 2001
From: Sahil Jain <jsahil730@gmail.com>
Date: Sun, 30 Jan 2022 14:21:51 +0530
Subject: [PATCH 1/4] z3 optimize for computing min cost matching with other
 constraints

---
 csec_mentor_matching/analysis.py | 24 ++++++++++++++
 csec_mentor_matching/main.py     | 54 +++++++++++---------------------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py
index 5d90289..265cc68 100644
--- a/csec_mentor_matching/analysis.py
+++ b/csec_mentor_matching/analysis.py
@@ -4,6 +4,7 @@
 import itertools as it
 import random
 import warnings
+import z3
 
 warnings.filterwarnings('error')
 
@@ -133,3 +134,26 @@ def detect_duplicates(mentors, mentees) :
     for pt in (set(mentors[0]) & set(mentees[0])) :
         n = mentors[0].index(pt)
         print( mentors[1][n] + " <" + pt + "> Filled both mentor and mentee forms.")
+
+# to encode matching as SAT; every mentee-mentor pair will have a cost and a boolean var to denote whether they are picked or not
+# min cost matching = optimise cost of chosen pairs + choose only one edge per mentee
+# other constraints to be added separately
+
+# vars have mentor as first index and mentee as second
+def mentor_count_constraint(tee_tor_vars, costs, min_cnt):
+    l = []
+    for i in range(len(tee_tor_vars)):
+        l.append(z3.AtLeast(tee_tor_vars[i], min_cnt))
+    return z3.And(l)
+
+def matching(optimiser, tee_tor_vars, costs):
+    for j in range(len(tee_tor_vars[0])):
+        optimiser.add(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1))
+    
+    v = 0
+    for i in range(len(tee_tor_vars)):
+        for j in range(len(tee_tor_vars[0])):
+            v += If(tee_tor_vars[i][j], costs[i][j], 0)
+    optimiser.minimize(v)
+
+    return optimiser.check()
diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py
index 66f0d92..19cab05 100644
--- a/csec_mentor_matching/main.py
+++ b/csec_mentor_matching/main.py
@@ -13,6 +13,7 @@
 import numpy as np
 from functools import reduce
 import sys
+import z3
 
 MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk'
 MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg'
@@ -35,45 +36,28 @@
 
 analysis.detect_duplicates(mentors, mentees)
 
-#mentors = list(mentors)
-#mentors.append([np.zeros((len(mentors[0]),))])
-#mentors = tuple(mentors)
-
-#mentors_temp = list( map( lambda x: np.asarray(x, dtype=type(x[0])), mentors ) )
-#mentees = tuple( list( map( lambda x: np.asarray(x, dtype=type(x[0])), mentees ) ) )
-#mentors_split = []
-#mx = max(mentors[4])
-#multi = 0
-#for i in range(mx, -1, -1) :
-#    mask = (mentors_temp[4] == i)
-#    temp = mentors_temp[0][mask] 
-#    len_mentors = len(temp)
-#    len_mentees = np.sum(np.asarray(mentees[3]) <= i)
-#    # len_mentees is obviously non-zeros because it's set of all mentees irrespective of year
-#    if len_mentors != 0 :
-#        factor = (int(len_mentees / len_mentors) + (1 if len_mentees % len_mentors else 0)) + 1
-#    else:
-#        factor = 1
-#    temp = [temp, mentors_temp[1][mask], mentors_temp[2][mask], mentors_temp[3][mask], mentors_temp[4][mask]]
-#    temp = list( map( lambda x : np.repeat(x, factor), temp ) )
-#    multi += len_mentors*factor
-#    print(f"Value of multi: {multi}")
-#    mentors_split += [temp]
-
-## all this fucking banana reduce for just merging the individual lists into a mega list
-#mentors = reduce(lambda x, y: [np.append(u[0], u[1]) for u in zip(x, y)], mentors_split, [[] for _ in mentors_split[0]])
-#mentors = list( map( list, mentors ) )
 
 print('+' * 80)
 
-# mentees should be a perfect multiple of mentor numbers
-# this is necessitated by the matching API
-multi = (int(len(mentees[0]) / len(mentors[0])) + (1 if len(mentees[0]) % len(mentors[0]) else 0)) * len(mentors[0])
-costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
-assignment = matcher.match(multi, len(mentors[0]), costs)
+osts = analysis.costs(len(mentees[0]), mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 
-print('+' * 80, file=sys.stderr)
-assignment = list( zip(assignment[:len(mentees[0])], list(range(len(mentees[0])))) )
+opt = z3.Optimize()
+
+tee_tor_vars = [[Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))]
+
+opt.add(analysis.mentor_count_constraint(tee_tor_vars, costs, 3))
+
+res = analysis.matching(opt, tee_tor_vars, costs)
+
+assert res == sat
+
+m = opt.model()
+
+assignment = []
+for i in range(len(mentors[0])):
+    for j in range(len(mentees[0])):
+        if (m[tee_tor_vars[i][j]]):
+            assignment.append((i,j))
 
 assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), assignment))
 assignment = pd.DataFrame(assignment)

From 84c0debd795612bc281d2dd77bf5cf01e2ceeddd Mon Sep 17 00:00:00 2001
From: Sahil Jain <jsahil730@gmail.com>
Date: Sun, 30 Jan 2022 23:20:54 +0530
Subject: [PATCH 2/4] change minimize to soft assertions with integer cost

---
 csec_mentor_matching/analysis.py | 32 ++++++++++++++++++--------------
 csec_mentor_matching/main.py     | 15 +++++++++++----
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py
index 265cc68..3fe3907 100644
--- a/csec_mentor_matching/analysis.py
+++ b/csec_mentor_matching/analysis.py
@@ -75,8 +75,7 @@ def mentee_analysis(mentees) :
     return (emails, names, interests, years)
 
 # the cost with number of matching subjects
-dot_cost = np.exp(np.arange(12)/3.0)
-dot_cost[0] += 1e+20
+dot_cost = np.exp(np.arange(12)/3.0)[::-1]
 
 # cost with number of extra subjects that dont match with mentor
 extra_cost = np.exp(np.arange(12)/3.0)
@@ -96,7 +95,6 @@ def eval_cost(x) :
     tor_int = int(tor_int)
 
     # 0 common very high cost, less common low cost, more chances of matching.
-    # lot of matches considered superflous.
     common    = dot_cost[count_bits(tee_int & tor_int)]
 
     # more the tee extra more the cost, less chances of matching.
@@ -107,28 +105,32 @@ def eval_cost(x) :
     # lower the chances of getting that mentor.
     # More focussed the mentor on particular topics, the
     # lesser the chance of getting the mentor.
-    pro_cost  = np.exp(pro* (0.40/count_bits(tor_int)))
+    pro_cost  = np.exp((1/pro) * (1/count_bits(tor_int)))
 
     # Chances of matching a mentor in lower year than you
     # should be negligible
-    year_cost = 1e+10 if tee_year > tor_year else 0
+    year_cost = 100 if tee_year > tor_year else (20 if tee_year == tor_year else 0)
 
     # random luck
-    luck      = random.uniform(50.0, 275.0)
+    luck      = random.uniform(2.0, 10.0)
 
     return np.log(common + tee_extra + pro_cost + year_cost + luck)
 
-def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
+def costs(mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
     # the matcher is a minimum cost matching algorithm
     # Make the costs higher to decrease the possibility of matching.
     
     # add dummy candidates with very high weight
-    mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
-    mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
+    # multi is number of mentees
+#    mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
+#    mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
     costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year))))
 
     costs = list(map( eval_cost, costs))
-    return costs
+    mat = []
+    for i in range(len(mentor_int)):
+        mat.append(list(map(lambda x : int(x), costs[i::len(mentor_int)])))
+    return mat
 
 def detect_duplicates(mentors, mentees) :
     for pt in (set(mentors[0]) & set(mentees[0])) :
@@ -143,17 +145,19 @@ def detect_duplicates(mentors, mentees) :
 def mentor_count_constraint(tee_tor_vars, costs, min_cnt):
     l = []
     for i in range(len(tee_tor_vars)):
-        l.append(z3.AtLeast(tee_tor_vars[i], min_cnt))
+        l.append(z3.AtLeast(*tee_tor_vars[i], min_cnt))
     return z3.And(l)
 
 def matching(optimiser, tee_tor_vars, costs):
     for j in range(len(tee_tor_vars[0])):
-        optimiser.add(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1))
+        optimiser.add(z3.simplify(z3.PbEq([(tee_tor_vars[i][j],1) for i in range(len(tee_tor_vars))], 1)))
     
     v = 0
     for i in range(len(tee_tor_vars)):
         for j in range(len(tee_tor_vars[0])):
-            v += If(tee_tor_vars[i][j], costs[i][j], 0)
-    optimiser.minimize(v)
+            optimiser.add_soft(tee_tor_vars[i][j], costs[i][j])
+#    optimiser.minimize(v)
+
+    print(optimiser)
 
     return optimiser.check()
diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py
index 19cab05..4d3f63a 100644
--- a/csec_mentor_matching/main.py
+++ b/csec_mentor_matching/main.py
@@ -15,6 +15,11 @@
 import sys
 import z3
 
+z3.set_param('parallel.enable', True)
+z3.set_param('parallel.threads.max', 32)
+z3.set_param('sat.local_search_threads', 4)
+z3.set_param('sat.threads', 4)
+
 MENTEE_SPREADSHEET = '1byO5GRxBVTaDVXNR_JeykVWwA9LrirKhmjjn_8TIUXk'
 MENTOR_SPREADSHEET = '1SMl0cjfrLgU6D2iYtiQwdl1O5kHTdGdyDqWdysdwTWg'
 MENTOR_YEAR_SPREADSHEET = '15sDdZuQmdCkCgJEI2tXgRahDH2s0_GfXaG4kVIz9gsU'
@@ -39,17 +44,19 @@
 
 print('+' * 80)
 
-osts = analysis.costs(len(mentees[0]), mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
+costs = analysis.costs(mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 
 opt = z3.Optimize()
 
-tee_tor_vars = [[Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))]
+tee_tor_vars = [[z3.Bool(f'var_{i}_{j}') for j in range(len(mentees[0]))] for i in range(len(mentors[0]))]
+
+opt.add(z3.simplify(analysis.mentor_count_constraint(tee_tor_vars, costs, 3)))
 
-opt.add(analysis.mentor_count_constraint(tee_tor_vars, costs, 3))
+print('+' * 80)
 
 res = analysis.matching(opt, tee_tor_vars, costs)
 
-assert res == sat
+assert res == z3.sat
 
 m = opt.model()
 

From db3fca9686077da33ed87a6eaeea8918c49858c4 Mon Sep 17 00:00:00 2001
From: Sahil Jain <jsahil730@gmail.com>
Date: Tue, 1 Feb 2022 23:13:55 +0530
Subject: [PATCH 3/4] Add visualization of mapping and mentee interests

---
 csec_mentor_matching/.gitignore  |  1 +
 csec_mentor_matching/analysis.py | 59 +++++++++++++++++++++++++-------
 csec_mentor_matching/main.py     | 18 +++++++---
 3 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/csec_mentor_matching/.gitignore b/csec_mentor_matching/.gitignore
index 7778391..6181b4c 100644
--- a/csec_mentor_matching/.gitignore
+++ b/csec_mentor_matching/.gitignore
@@ -3,3 +3,4 @@
 *.xls
 *.xlsx
 costs
+*.png
diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py
index eda7f31..92a2d24 100644
--- a/csec_mentor_matching/analysis.py
+++ b/csec_mentor_matching/analysis.py
@@ -5,6 +5,7 @@
 import random
 import warnings
 import z3
+import matplotlib.pyplot as plt
 
 #warnings.filterwarnings('error')
 
@@ -150,6 +151,7 @@ def mentee_analysis(mentees) :
 # cost with number of extra subjects that dont match with mentor
 extra_cost = np.exp(np.arange(12)/3.0)
 
+U = int('1'*11, 2)
 
 def count_bits(x) :
     t = 0
@@ -162,7 +164,6 @@ def eval_cost(x) :
     # remember more the cost, less chances of matching
     (tee_int, tee_year) , (tor_int, pro, tor_year) = x
 
-    U = int('1'*11, 2)
     tor_int = int(tor_int)
     tee_int = int(tee_int)
 
@@ -181,11 +182,11 @@ def eval_cost(x) :
     # lower the chances of getting that mentor.
     # More focussed the mentor on particular topics, the
     # lesser the chance of getting the mentor.
-    pro_cost  = np.exp((1/pro) * (1/count_bits(tor_int)))
+    pro_cost  = np.exp(10 * (1/pro) * (1/count_bits(tor_int)))
 
     # Chances of matching a mentor in lower year than you
     # should be negligible
-    year_cost = 100 if tee_year > tor_year else (20 if tee_year == tor_year else 0)
+    year_cost = 100 if tee_year > tor_year else (10 if tee_year == tor_year else 0)
 
     # random luck
     luck      = random.uniform(2.0, 10.0)
@@ -203,21 +204,16 @@ def eval_cost(x) :
 
     return np.log(common + tee_extra + pro_cost + year_cost + luck)
 
-def costs(mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
+def costs(multi, mentee_int, mentee_year, mentor_int, proficiency, mentor_year) :
     # the matcher is a minimum cost matching algorithm
     # Make the costs higher to decrease the possibility of matching.
-    
-    # add dummy candidates with very high weight
     # multi is number of mentees
-#    mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
-#    mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
+    mentee_int = np.append(mentee_int, np.asarray([0] * (multi - len(mentee_int))))
+    mentee_year = np.append(mentee_year, np.asarray([len(MENTEE_YEAR_FULL)+1] * (multi - len(mentee_year))))
     costs = list(it.product(list(zip(mentee_int, mentee_year)), list(zip(mentor_int, proficiency, mentor_year))))
 
     costs = list(map( eval_cost, costs))
-    mat = []
-    for i in range(len(mentor_int)):
-        mat.append(list(map(lambda x : int(x), costs[i::len(mentor_int)])))
-    return mat
+    return costs
 
 def detect_duplicates(mentors, mentees) :
     for pt in (set(mentors[0]) & set(mentees[0])) :
@@ -245,6 +241,43 @@ def matching(optimiser, tee_tor_vars, costs):
             optimiser.add_soft(tee_tor_vars[i][j], costs[i][j])
 #    optimiser.minimize(v)
 
-    print(optimiser)
+#    print(optimiser)
 
     return optimiser.check()
+
+def visualise_interests(mentees):
+    interests = mentees[2]
+    BLK = 20 
+    img = np.zeros((len(MENTEE_INTEREST_FULL)*BLK, BLK*len(interests), 3), dtype=np.float)
+
+    for i, v in enumerate(interests):
+        mask = v
+        for j, val in enumerate(bin(mask)[2:].zfill(len(MENTEE_INTEREST_FULL))):
+            img[j*BLK:(j+1)*BLK, i*BLK:(i+1)*BLK, :] = int(val)
+
+#    print(img)
+    plt.imsave('mentee_interests.png', img)
+
+
+def visualize_mapping(mapping, mentors, mentees):
+    BLK = 20
+    img = np.zeros((BLK*7, BLK*len(mentors[0]), 3), dtype=np.uint8)
+    cnts = np.zeros((len(mentors[0])), dtype=np.int)
+    for tor, tee in mapping:
+        tor_int = mentors[2][tor]
+        tee_int = mentees[2][tee]
+        val = [[[0, 255, 0]]] # green is default
+
+        if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2:
+            # set difference of tee_int with tor_int more than, say, 2
+            val = [[[255, 0, 0]]] # red
+        elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2:
+            # set difference of tor_int with tee_int
+            val = [[[0, 0, 255]]] # blue
+
+        img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val
+        cnts[tor] += 1
+
+#    print(img)
+
+    plt.imsave('mapping.png', img)
diff --git a/csec_mentor_matching/main.py b/csec_mentor_matching/main.py
index 0c15184..ae8ec03 100644
--- a/csec_mentor_matching/main.py
+++ b/csec_mentor_matching/main.py
@@ -37,10 +37,13 @@
 mentors = analysis.mentor_analysis((mentors, mentors_year))
 mentees = analysis.mentee_analysis(mentees)
 
+mentees_og = mentees
 print('+' * 80)
 
 analysis.detect_duplicates(mentors, mentees)
 
+analysis.visualise_interests(mentees)
+
 mentee_set1, mentee_set2 = analysis.arrange_mentees(mentees, len(mentors[0]))
 
 #mentors = list(mentors)
@@ -80,9 +83,9 @@
 multi = len(mentees[0]) 
 costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 temp = matcher.match(multi, len(mentors[0]), costs)
-temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
+temp1 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
 
-assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
+assignment = list(map( lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp1))
 
 print('+' * 80)
 
@@ -90,12 +93,17 @@
 multi = len(mentors[0])
 costs = analysis.costs(multi, mentees[2], mentees[3], mentors[2], mentors[3], mentors[4])
 temp = matcher.match(multi, len(mentors[0]), costs)
-temp = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
+temp2 = list( zip(temp[:len(mentees[0])], list(range(len(mentees[0])))) )
 
-assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp))
+assignment += list(map(lambda x : (mentors[0][x[0]], mentors[1][x[0]], mentors[4][x[0]], mentees[0][x[1]], mentees[1][x[1]], mentees[3][x[1]]), temp2))
 assignment = pd.DataFrame(assignment)
 
-print('+' * 80, file=sys.stderr)
+mapping = temp1 + temp2
+mapping.sort()
+
+analysis.visualize_mapping(mapping, mentors, mentees_og)
+
+print('+' * 80)
 assignment.columns = ['Mentor Email', 'Mentor Name', 'Mentor Year', 'Mentee Email', 'Mentee Name', 'Mentee Year']
 print("The assertion that Mentor Year >= Mentee Year: " + ("Failed" if np.all(assignment['Mentor Year'] >= assignment['Mentee Year'] ) else "Passed"))
 

From 231e501fcbaaa3870de90388da3deb9f122b6c5e Mon Sep 17 00:00:00 2001
From: Sahil Jain <jsahil730@gmail.com>
Date: Tue, 1 Feb 2022 23:31:32 +0530
Subject: [PATCH 4/4] some more changes

---
 csec_mentor_matching/analysis.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/csec_mentor_matching/analysis.py b/csec_mentor_matching/analysis.py
index 92a2d24..db84c3f 100644
--- a/csec_mentor_matching/analysis.py
+++ b/csec_mentor_matching/analysis.py
@@ -268,12 +268,13 @@ def visualize_mapping(mapping, mentors, mentees):
         tee_int = mentees[2][tee]
         val = [[[0, 255, 0]]] # green is default
 
-        if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2:
+        if count_bits(tor_int & tee_int) <= 3:
+        # if count_bits(tee_int & (~(tor_int & tee_int) & U)) > 2:
             # set difference of tee_int with tor_int more than, say, 2
             val = [[[255, 0, 0]]] # red
-        elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2:
+        # elif count_bits(tor_int & (~(tor_int & tee_int) & U)) > 2:
             # set difference of tor_int with tee_int
-            val = [[[0, 0, 255]]] # blue
+            #val = [[[0, 0, 255]]] # blue
 
         img[cnts[tor]*BLK:(cnts[tor]+1)*BLK, tor*BLK:(tor+1)*BLK, :] = val
         cnts[tor] += 1