diff --git a/recsys/als-half.ipynb b/recsys/als-half.ipynb new file mode 100644 index 0000000..46a5a28 --- /dev/null +++ b/recsys/als-half.ipynb @@ -0,0 +1,657 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 61, + "id": "d2a4455a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "77ed3484", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "af66f007", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns] 100836\n", + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "50413 325 3927 3.0 1039397688\n", + "50414 325 3981 2.0 1039398309\n", + "50415 325 3994 4.0 1039398793\n", + "50416 325 4017 3.0 1039396037\n", + "50417 325 4034 4.0 1039398396\n", + "\n", + "[50418 rows x 4 columns] 50418\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "[[4. 0. 4. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [3.5 4. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]]\n" + ] + } + ], + "source": [ + "big_df = pd.read_csv(ratings_path, sep = ',')\n", + "big_df.sort_values('timestamp')\n", + "num_rows = big_df.shape[0]\n", + "print(big_df, num_rows)\n", + "df = big_df.iloc[:int(num_rows/2)]\n", + "print(df, df.shape[0])\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "print(ratings)\n", + "for row in df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "print(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "f9e63f83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " ...,\n", + " [3.5, 4. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]])" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# taken from ethen8181\n", + "def create_train_test(ratings):\n", + " \"\"\"\n", + " split into training and test sets,\n", + " remove 10 ratings from each user\n", + " and assign them to the test set\n", + " \"\"\"\n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", + "\n", + " train[user, test_index] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " assert np.all(train * test == 0)\n", + " return train, test\n", + "\n", + "train, test = create_train_test(ratings)\n", + "#del ratings\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "bd9b47bb", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train, test):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production)\n", + " self.test_mse_record = []\n", + " self.train_mse_record = [] \n", + " for _ in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " predictions = self.predict()\n", + " #print(predictions)\n", + " test_mse = self.compute_mse(test, predictions)\n", + " train_mse = self.compute_mse(train, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " self.train_mse_record.append(train_mse)\n", + " \n", + " return self \n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " print(y_pred[mask])\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "73d89971", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.plot(model.train_mse_record, label = 'Train', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "16b9cf07", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 100, num_features = 200, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "bf2be80c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.5037482 0.6464488 0.86420217 ... 0.18081841 0.10623224 0.15315889]\n", + "[1.14166979 0.3457674 0.73312602 ... 0.37752427 0.1315888 0.42903076]\n", + "[-0.25377314 -0.24611038 -0.47028482 ... -0.077076 0.01770958\n", + " -0.01431049]\n", + "[3.49954347 3.88175469 3.59930154 ... 3.8229048 2.87641366 3.68848983]\n", + "[-0.16703589 -0.17537446 -0.34193023 ... -0.0678052 0.04234823\n", + " -0.04945025]\n", + "[3.6585508 3.90115889 3.75023126 ... 3.8478806 2.90612924 3.80817785]\n", + "[-0.11922917 -0.13327906 -0.26897441 ... -0.05161294 0.04280381\n", + " -0.05740035]\n", + "[3.72858365 3.90939432 3.80916617 ... 3.8579699 2.90682217 3.85453407]\n", + "[-0.0907974 -0.10580113 -0.21947042 ... -0.03978622 0.03795843\n", + " -0.0567409 ]\n", + "[3.78747759 3.92678063 3.83840323 ... 3.86847173 2.90692725 3.87766997]\n", + "[-0.0734607 -0.08751785 -0.18335912 ... -0.03069853 0.03188109\n", + " -0.05328649]\n", + "[3.84034476 3.94640919 3.86016853 ... 3.876998 2.90817406 3.89036646]\n", + "[-0.06276966 -0.07545507 -0.15794879 ... -0.02344582 0.02601958\n", + " -0.04940785]\n", + "[3.88517186 3.96427019 3.87889441 ... 3.88397644 2.91025717 3.89835878]\n", + "[-0.05597785 -0.06731001 -0.14078108 ... -0.01758457 0.02104115\n", + " -0.04598931]\n", + "[3.92140666 3.97911736 3.89539414 ... 3.89008054 2.91282365 3.90453294]\n", + "[-0.05141777 -0.06147258 -0.12945775 ... -0.01281145 0.01712118\n", + " -0.04321025]\n", + "[3.94992425 3.99094713 3.90996067 ... 3.89566766 2.91559247 3.91011964]\n", + "[-0.0481329 -0.05694909 -0.12205999 ... -0.00888987 0.01418895\n", + " -0.04099246]\n", + "[3.9721065 4.00021658 3.92286078 ... 3.90088537 2.91837343 3.91550464]\n", + "[-0.04559752 -0.05317288 -0.11722055 ... -0.00563463 0.01208643\n", + " -0.0392035 ]\n", + "[3.98933229 4.00748452 3.93435975 ... 3.90579054 2.92105244 3.92073167]\n", + "[-0.04352929 -0.04984013 -0.11402214 ... -0.00290292 0.01064313\n", + " -0.03772649]\n", + "[4.00278291 4.01325748 3.94469266 ... 3.91040811 2.92356761 3.92574601]\n", + "[-0.04177675 -0.04679892 -0.11186843 ... -0.00058539 0.00970561\n", + " -0.03647418]\n", + "[4.01339884 4.01793974 3.9540498 ... 3.91475373 2.92589046 3.93048889]\n", + "[-0.04025657 -0.04398041 -0.11037822 ... 0.00140167 0.00914664\n", + " -0.03538525]\n", + "[4.02189892 4.02183216 3.96257746 ... 3.91884111 2.92801348 3.93492408]\n", + "[-0.03892031 -0.04135798 -0.10931012 ... 0.00312258 0.00886558\n", + " -0.03441737]\n", + "[4.02881984 4.02514961 3.97038595 ... 3.9226837 2.92994224 3.9390391 ]\n", + "[-0.03773729 -0.03892405 -0.10851209 ... 0.00462688 0.00878538\n", + " -0.03354125]\n", + "[4.03455735 4.02804204 3.97755908 ... 3.92629477 2.93169003 3.94283902]\n", + "[-0.03668612 -0.03667762 -0.10788869 ... 0.00595288 0.00884833\n", + " -0.0327364 ]\n", + "[4.03940174 4.03061294 3.98416223 ... 3.92968719 2.93327419 3.94633975]\n", + "[-0.03575042 -0.03461824 -0.10738009 ... 0.00713034 0.00901185\n", + " -0.03198832]\n", + "[4.04356601 4.03293376 3.99024844 ... 3.93287324 2.93471357 3.94956275]\n", + "[-0.03491676 -0.03274348 -0.10694878 ... 0.00818248 0.00924487\n", + " -0.03128663]\n", + "[4.04720702 4.03505422 3.99586239 ... 3.93586457 2.93602683 3.9525315 ]\n", + "[-0.03417365 -0.03104825 -0.10657119 ... 0.00912762 0.00952486\n", + " -0.03062384]\n", + "[4.05044099 4.03700953 4.00104308 ... 3.93867228 2.93723136 3.95526937]\n", + "[-0.03351095 -0.0295251 -0.10623247 ... 0.0099803 0.00983558\n", + " -0.02999447]\n", + "[4.05335461 4.03882513 4.00582531 ... 3.94130695 2.93834279 3.95779841]\n", + "[-0.03291961 -0.0281647 -0.10592318 ... 0.01075226 0.01016539\n", + " -0.02939452]\n", + "[4.05601305 4.04051998 4.01024071 ... 3.94377875 2.93937471 3.96013888]\n", + "[-0.03239151 -0.02695649 -0.10563727 ... 0.01145307 0.01050593\n", + " -0.02882101]\n", + "[4.05846567 4.04210856 4.01431828 ... 3.94609745 2.94033869 3.96230897]\n", + "[-0.0319193 -0.02588921 -0.10537078 ... 0.01209066 0.0108513\n", + " -0.02827168]\n", + "[4.06075009 4.04360232 4.01808474 ... 3.9482725 2.94124444 3.96432493]\n", + "[-0.03149638 -0.02495137 -0.10512109 ... 0.0126717 0.0111973\n", + " -0.02774484]\n", + "[4.06289523 4.04501055 4.02156477 ... 3.95031298 2.94210001 3.96620118]\n", + "[-0.0311168 -0.02413154 -0.10488637 ... 0.01320188 0.01154103\n", + " -0.02723916]\n", + "[4.06492342 4.04634098 4.02478118 ... 3.95222763 2.94291198 3.96795047]\n", + "[-0.0307752 -0.02341863 -0.10466535 ... 0.01368614 0.01188047\n", + " -0.02675359]\n", + "[4.06685205 4.0476002 4.02775507 ... 3.95402483 2.94368571 3.9695841 ]\n", + "[-0.03046679 -0.02280205 -0.10445706 ... 0.01412878 0.01221427\n", + " -0.02628728]\n", + "[4.06869471 4.04879394 4.03050591 ... 3.95571256 2.94442556 3.97111209]\n", + "[-0.03018728 -0.02227178 -0.10426075 ... 0.01453361 0.01254155\n", + " -0.02583952]\n", + "[4.07046216 4.04992721 4.03305169 ... 3.95729844 2.94513507 3.97254333]\n", + "[-0.02993284 -0.02181847 -0.10407583 ... 0.01490404 0.01286176\n", + " -0.0254097 ]\n", + "[4.07216291 4.05100449 4.03540901 ... 3.95878967 2.94581709 3.97388577]\n", + "[-0.02970008 -0.02143346 -0.10390181 ... 0.01524312 0.01317456\n", + " -0.02499729]\n", + "[4.07380384 4.05202979 4.03759318 ... 3.96019303 2.94647395 3.97514649]\n", + "[-0.02948599 -0.02110876 -0.10373824 ... 0.01555362 0.01347977\n", + " -0.0246018 ]\n", + "[4.07539049 4.05300677 4.0396183 ... 3.96151491 2.94710755 3.97633186]\n", + "[-0.02928792 -0.02083703 -0.10358479 ... 0.01583806 0.01377732\n", + " -0.02422277]\n", + "[4.07692743 4.0539387 4.04149737 ... 3.96276129 2.94771947 3.97744759]\n", + "[-0.02910353 -0.02061162 -0.10344111 ... 0.0160987 0.01406719\n", + " -0.0238598 ]\n", + "[4.07841846 4.05482862 4.04324237 ... 3.96393776 2.94831102 3.97849885]\n", + "[-0.0289308 -0.02042643 -0.10330693 ... 0.01633765 0.0143494\n", + " -0.02351247]\n", + "[4.07986678 4.05567927 4.04486432 ... 3.9650495 2.9488833 3.9794903 ]\n", + "[-0.02876793 -0.02027598 -0.10318201 ... 0.01655681 0.01462399\n", + " -0.02318039]\n", + "[4.08127512 4.05649319 4.04637335 ... 3.96610137 2.94943728 3.98042619]\n", + "[-0.02861338 -0.02015529 -0.10306613 ... 0.01675793 0.01489099\n", + " -0.02286318]\n", + "[4.08264588 4.05727268 4.04777881 ... 3.96709783 2.94997377 3.98131037]\n", + "[-0.02846582 -0.02005987 -0.10295909 ... 0.01694262 0.01515044\n", + " -0.02256045]\n", + "[4.08398113 4.05801988 4.04908925 ... 3.96804303 2.95049353 3.98214635]\n", + "[-0.02832411 -0.01998571 -0.10286073 ... 0.01711233 0.01540235\n", + " -0.02227184]\n", + "[4.08528276 4.05873676 4.05031257 ... 3.96894081 2.95099722 3.98293735]\n", + "[-0.02818725 -0.01992918 -0.10277092 ... 0.01726843 0.01564675\n", + " -0.02199696]\n", + "[4.08655245 4.0594251 4.05145599 ... 3.96979469 2.95148548 3.98368632]\n", + "[-0.02805444 -0.01988704 -0.1026895 ... 0.01741213 0.01588362\n", + " -0.02173545]\n", + "[4.08779175 4.06008658 4.05252615 ... 3.97060795 2.95195886 3.98439596]\n", + "[-0.02792496 -0.01985636 -0.10261638 ... 0.01754459 0.01611295\n", + " -0.02148693]\n", + "[4.08900207 4.0607227 4.05352916 ... 3.9713836 2.95241794 3.98506876]\n", + "[-0.02779825 -0.01983456 -0.10255145 ... 0.01766684 0.01633472\n", + " -0.02125105]\n", + "[4.09018471 4.06133488 4.05447059 ... 3.97212439 2.95286322 3.98570702]\n", + "[-0.02767382 -0.0198193 -0.10249462 ... 0.01777982 0.01654889\n", + " -0.02102743]\n", + "[4.09134089 4.06192439 4.05535556 ... 3.97283289 2.95329522 3.98631285]\n", + "[-0.0275513 -0.01980849 -0.10244582 ... 0.01788442 0.01675542\n", + " -0.02081571]\n", + "[4.09247174 4.06249242 4.05618875 ... 3.97351145 2.95371442 3.98688822]\n", + "[-0.02743039 -0.01980026 -0.10240495 ... 0.01798144 0.01695425\n", + " -0.02061552]\n", + "[4.0935783 4.06304006 4.05697444 ... 3.97416223 2.95412131 3.98743496]\n", + "[-0.02731088 -0.01979295 -0.10237196 ... 0.01807161 0.01714535\n", + " -0.02042652]\n", + "[4.09466155 4.0635683 4.05771656 ... 3.97478725 2.95451636 3.98795475]\n", + "[-0.02719259 -0.01978505 -0.10234678 ... 0.0181556 0.01732864\n", + " -0.02024834]\n", + "[4.09572239 4.06407805 4.05841868 ... 3.97538834 2.95490001 3.98844918]\n", + "[-0.02707543 -0.01977522 -0.10232934 ... 0.01823404 0.01750409\n", + " -0.02008064]\n", + "[4.09676164 4.06457016 4.05908405 ... 3.97596722 2.95527272 3.98891971]\n", + "[-0.02695936 -0.01976225 -0.10231956 ... 0.01830749 0.01767162\n", + " -0.01992305]\n", + "[4.09778006 4.06504539 4.05971566 ... 3.97652548 2.95563493 3.98936772]\n", + "[-0.02684438 -0.01974506 -0.10231739 ... 0.01837646 0.01783121\n", + " -0.01977525]\n", + "[4.09877835 4.06550444 4.0603162 ... 3.97706457 2.95598706 3.98979451]\n", + "[-0.02673053 -0.01972266 -0.10232274 ... 0.01844144 0.01798279\n", + " -0.01963688]\n", + "[4.09975711 4.06594795 4.06088814 ... 3.97758586 2.95632953 3.99020129]\n", + "[-0.0266179 -0.01969418 -0.10233555 ... 0.01850286 0.01812633\n", + " -0.0195076 ]\n", + "[4.1007169 4.0663765 4.06143372 ... 3.97809064 2.95666275 3.99058919]\n", + "[-0.0265066 -0.01965881 -0.10235573 ... 0.01856112 0.01826179\n", + " -0.01938709]\n", + "[4.10165818 4.06679063 4.06195495 ... 3.97858007 2.95698712 3.99095931]\n", + "[-0.02639679 -0.01961582 -0.10238319 ... 0.0186166 0.01838916\n", + " -0.01927502]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[4.10258135 4.06719081 4.06245367 ... 3.97905527 2.95730302 3.99131266]\n", + "[-0.02628865 -0.01956456 -0.10241783 ... 0.01866962 0.0185084\n", + " -0.01917104]\n", + "[4.10348675 4.0675775 4.06293153 ... 3.97951729 2.95761083 3.99165022]\n", + "[-0.02618239 -0.01950443 -0.10245954 ... 0.01872051 0.01861952\n", + " -0.01907485]\n", + "[4.10437463 4.06795108 4.06339004 ... 3.97996709 2.9579109 3.99197292]\n", + "[-0.02607824 -0.0194349 -0.10250822 ... 0.01876955 0.01872252\n", + " -0.0189861 ]\n", + "[4.10524515 4.06831191 4.06383052 ... 3.9804056 2.9582036 3.99228163]\n", + "[-0.02597647 -0.01935546 -0.10256371 ... 0.01881699 0.01881742\n", + " -0.01890449]\n", + "[4.10609844 4.06866033 4.06425417 ... 3.9808337 2.95848924 3.99257721]\n", + "[-0.02587735 -0.01926568 -0.1026259 ... 0.01886309 0.01890424\n", + " -0.01882969]\n", + "[4.10693451 4.0689966 4.06466209 ... 3.9812522 2.95876816 3.99286046]\n", + "[-0.02578117 -0.01916515 -0.1026946 ... 0.01890806 0.01898303\n", + " -0.01876139]\n", + "[4.10775332 4.06932101 4.06505521 ... 3.98166189 2.95904066 3.99313219]\n", + "[-0.02568827 -0.01905353 -0.10276966 ... 0.0189521 0.01905385\n", + " -0.01869925]\n", + "[4.10855476 4.06963377 4.06543439 ... 3.98206351 2.95930704 3.99339315]\n", + "[-0.02559896 -0.01893049 -0.10285088 ... 0.01899541 0.01911678\n", + " -0.01864298]\n", + "[4.10933863 4.06993511 4.06580037 ... 3.98245777 2.95956757 3.99364407]\n", + "[-0.02551361 -0.01879577 -0.10293805 ... 0.01903815 0.0191719\n", + " -0.01859223]\n", + "[4.11010468 4.07022519 4.06615382 ... 3.98284534 2.95982252 3.99388567]\n", + "[-0.02543256 -0.01864913 -0.10303096 ... 0.01908048 0.01921932\n", + " -0.0185467 ]\n", + "[4.11085258 4.0705042 4.0664953 ... 3.98322686 2.96007213 3.99411865]\n", + "[-0.02535618 -0.01849039 -0.10312934 ... 0.01912255 0.01925918\n", + " -0.01850607]\n", + "[4.11158194 4.07077227 4.06682532 ... 3.98360295 2.96031664 3.99434369]\n", + "[-0.02528484 -0.0183194 -0.10323294 ... 0.01916448 0.01929162\n", + " -0.01847001]\n", + "[4.11229232 4.07102954 4.06714429 ... 3.98397418 2.96055627 3.99456145]\n", + "[-0.02521893 -0.01813607 -0.10334147 ... 0.01920638 0.01931681\n", + " -0.01843819]\n", + "[4.1129832 4.07127613 4.06745259 ... 3.98434112 2.96079121 3.99477259]\n", + "[-0.02515883 -0.01794034 -0.10345463 ... 0.01924838 0.01933493\n", + " -0.0184103 ]\n", + "[4.11365403 4.07151215 4.06775051 ... 3.9847043 2.96102164 3.99497775]\n", + "[-0.02510491 -0.01773221 -0.10357207 ... 0.01929055 0.01934621\n", + " -0.018386 ]\n", + "[4.11430422 4.0717377 4.06803833 ... 3.98506421 2.96124775 3.99517754]\n", + "[-0.02505755 -0.01751172 -0.10369346 ... 0.01933298 0.01935087\n", + " -0.01836497]\n", + "[4.11493312 4.07195287 4.06831624 ... 3.98542133 2.96146967 3.99537259]\n", + "[-0.02501711 -0.01727899 -0.10381842 ... 0.01937576 0.01934917\n", + " -0.01834688]\n", + "[4.11554006 4.07215776 4.06858443 ... 3.9857761 2.96168754 3.9955635 ]\n", + "[-0.02498395 -0.01703416 -0.10394655 ... 0.01941893 0.01934139\n", + " -0.0183314 ]\n", + "[4.11612437 4.07235246 4.06884302 ... 3.98612895 2.96190149 3.99575084]\n", + "[-0.02495841 -0.01677745 -0.10407745 ... 0.01946255 0.01932782\n", + " -0.01831821]\n", + "[4.11668532 4.07253707 4.06909213 ... 3.98648026 2.96211162 3.9959352 ]\n", + "[-0.02494081 -0.01650913 -0.10421069 ... 0.01950667 0.01930879\n", + " -0.01830697]\n", + "[4.11722222 4.07271168 4.06933184 ... 3.98683039 2.96231802 3.99611713]\n", + "[-0.02493144 -0.01622952 -0.10434581 ... 0.01955132 0.01928464\n", + " -0.01829735]\n", + "[4.11773435 4.07287641 4.06956223 ... 3.98717967 2.96252077 3.99629718]\n", + "[-0.02493059 -0.01593902 -0.10448236 ... 0.01959652 0.01925574\n", + " -0.01828905]\n", + "[4.11822104 4.07303135 4.06978334 ... 3.98752839 2.96271991 3.99647587]\n", + "[-0.02493849 -0.01563807 -0.10461987 ... 0.0196423 0.01922245\n", + " -0.01828173]\n", + "[4.11868162 4.07317665 4.06999521 ... 3.98787683 2.96291551 3.99665371]\n", + "[-0.02495536 -0.01532718 -0.10475784 ... 0.01968865 0.01918519\n", + " -0.01827509]\n", + "[4.11911546 4.07331243 4.0701979 ... 3.98822521 2.9631076 3.99683117]\n", + "[-0.02498137 -0.01500691 -0.10489579 ... 0.01973558 0.01914437\n", + " -0.01826882]\n", + "[4.11952198 4.07343885 4.07039143 ... 3.98857372 2.9632962 3.99700873]\n", + "[-0.02501666 -0.01467787 -0.10503323 ... 0.01978309 0.01910042\n", + " -0.01826262]\n", + "[4.11990066 4.07355608 4.07057585 ... 3.98892254 2.96348132 3.99718683]\n", + "[-0.02506133 -0.01434075 -0.10516964 ... 0.01983114 0.01905376\n", + " -0.0182562 ]\n", + "[4.12025103 4.0736643 4.0707512 ... 3.98927179 2.96366297 3.99736586]\n", + "[-0.02511542 -0.01399627 -0.10530455 ... 0.01987973 0.01900486\n", + " -0.01824929]\n", + "[4.12057271 4.0737637 4.07091755 ... 3.98962156 2.96384113 3.99754623]\n", + "[-0.02517894 -0.01364517 -0.10543745 ... 0.01992882 0.01895416\n", + " -0.01824163]\n", + "[4.12086539 4.07385452 4.07107497 ... 3.98997191 2.96401581 3.99772828]\n", + "[-0.02525187 -0.01328828 -0.10556788 ... 0.01997837 0.01890212\n", + " -0.01823295]\n", + "[4.12112886 4.07393699 4.07122355 ... 3.99032286 2.96418696 3.99791234]\n", + "[-0.0253341 -0.01292642 -0.10569537 ... 0.02002834 0.01884919\n", + " -0.01822304]\n", + "[4.12136301 4.07401137 4.07136338 ... 3.99067441 2.96435459 3.9980987 ]\n", + "[-0.02542552 -0.01256048 -0.10581946 ... 0.02007869 0.01879583\n", + " -0.01821167]\n", + "[4.1215678 4.07407793 4.07149459 ... 3.99102652 2.96451864 3.99828761]\n", + "[-0.02552593 -0.01219133 -0.10593973 ... 0.02012935 0.01874249\n", + " -0.01819864]\n", + "[4.12174334 4.07413698 4.07161733 ... 3.99137911 2.9646791 3.9984793 ]\n", + "[-0.02563513 -0.01181988 -0.10605577 ... 0.02018028 0.0186896\n", + " -0.01818378]\n", + "[4.12188981 4.07418881 4.07173177 ... 3.99173207 2.96483593 3.99867395]\n", + "[-0.02575283 -0.01144705 -0.10616721 ... 0.02023141 0.01863757\n", + " -0.01816691]\n", + "[4.1220075 4.07423376 4.07183809 ... 3.99208529 2.96498911 3.99887171]\n", + "[-0.02587874 -0.01107376 -0.10627369 ... 0.02028269 0.01858684\n", + " -0.0181479 ]\n", + "[4.12209684 4.07427216 4.07193649 ... 3.9924386 2.96513859 3.99907271]\n", + "[-0.02601251 -0.0107009 -0.10637489 ... 0.02033403 0.01853777\n", + " -0.01812664]\n", + "[4.12215831 4.07430437 4.07202722 ... 3.99279182 2.96528437 3.99927703]\n", + "[-0.02615376 -0.01032938 -0.10647054 ... 0.02038539 0.01849075\n", + " -0.01810301]\n", + "[4.12219254 4.07433075 4.07211052 ... 3.99314475 2.9654264 3.9994847 ]\n", + "[-0.02630207 -0.00996008 -0.10656037 ... 0.02043668 0.01844611\n", + " -0.01807695]\n", + "[4.12220023 4.07435168 4.07218666 ... 3.99349718 2.96556468 3.99969574]\n", + "[-0.026457 -0.00959384 -0.10664417 ... 0.02048785 0.01840419\n", + " -0.0180484 ]\n", + "[4.12218217 4.07436752 4.07225594 ... 3.99384888 2.96569919 3.99991014]\n", + "[-0.02661809 -0.00923148 -0.10672177 ... 0.02053882 0.01836527\n", + " -0.01801731]\n", + "[4.12213925 4.07437867 4.07231864 ... 3.99419959 2.96582991 4.00012785]\n", + "[-0.02678485 -0.00887378 -0.10679302 ... 0.02058953 0.01832963\n", + " -0.01798368]\n", + "[4.12207242 4.07438549 4.07237509 ... 3.99454907 2.96595686 4.00034878]\n", + "[-0.02695678 -0.00852147 -0.10685782 ... 0.0206399 0.0182975\n", + " -0.0179475 ]\n", + "[4.12198271 4.07438839 4.07242563 ... 3.99489705 2.96608003 4.00057284]\n", + "[-0.02713337 -0.00817526 -0.1069161 ... 0.02068988 0.0182691\n", + " -0.0179088 ]\n", + "[4.12187119 4.07438773 4.07247058 ... 3.99524327 2.96619944 4.00079989]\n", + "[-0.0273141 -0.00783578 -0.10696782 ... 0.02073941 0.0182446\n", + " -0.0178676 ]\n", + "[4.12173899 4.07438388 4.07251028 ... 3.99558745 2.96631511 4.00102979]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAdI0lEQVR4nO3dfXAc9Z3n8fd3Ro+2ZRvbwgabRDYQwDiLUQTH0y0EzJYh2ZDlyAH7xBpSDnebxCShOJNUjmyqtspUUtnwdEv5wMBmOXO7BBKKZXEIEKiD8CCDYoyNlycHixgsy/jZsjSa7/3RPdKMNLJka3pG6v68iqnp7unu36891Gda3+n5tbk7IiKSHKlKd0BERMpLwS8ikjAKfhGRhFHwi4gkjIJfRCRhqirdgZGYMWOGNzU1VbobIiLjytq1a7e7e+PA5eMi+Juammhtba10N0RExhUz+32x5Sr1iIgkjIJfRCRhFPwiIgmj4BcRSZjIgt/MVpnZNjNbX+S1G83MzWxGVO2LiEhxUZ7x3w8sHrjQzI4DLgY+iLBtEREZQmTB7+7PAzuKvPQPwE1AWYcF3Xcww7sde9FopCKSdGW9jt/MvgR86O6/M7Ph1l0KLAX41Kc+Nap2dx3oYdFPnqNjz0EuPPlobv0vf0RjQ+2o9ikiMl6V7ctdM5sAfA/4nyNZ391XunuLu7c0Ng764dlheXrjx3TsOQjAM29t45LbnufZt7aNap8iIuNVOa/qOR6YC/zOzDYDc4DXzGxW1A2/8n5hxWn73m6W3P8qf/6/X+IXr3/Ige7eqLsgIjJmlK3U4+5vAEfn5sPwb3H37VG3nR/8E2vS7AuD/sV3O3nx3U4aaqs454TpnDUveHxmZgPp1KFLUSIi41VkwW9mq4ELgBlm1g7c4u73RtXeULbt6eK97fsAqEmnePo7F/D3T2zk39b9gWz4Pe+egxnWvPkxa978GIC66hQnz5rMqcdO5sSjJzG3cRLzZkzk2Kn1+kAQkXEvsuB396uHeb0pqrbztW7+pG964XFTmTWljjuuPp3vXnoyj7z2If/auoXNnfsLtunqydK2ZSdtW3YWLK9KGcdMrWP21HqOmVLPzMl1zJxcS2NDLdMn1jJjUg1HTaxhan01VWn9Nk5ExqZxMTrnaOSXec6cO61v+pgp9fzt50/gv19wPO9s28tv3+vkpfc6ad38CdvCL4IHymSdLTsOsGXHgWHbbairYuqEaqbUB4+G2moa6qpoqKtmUl0VE2vSTKytYlJtFRNq0kyoqaK+Js2EmjT11Wnqa9LUVaWprU5RW5ViuKugRERGKvbB//IQwZ9jZpw4s4ETZzbw12c3AdCx5yAbtu5m49bdvN+xj/e27+X97fvYvrd7xO3u6cqwpyvDFob/kBiJ2qoUddVpaqtS1FanqEmnqK1KU1MVTNdUBY/qtFGdDpZVhdPV6RRVKaMqHbxelcq9ZqRTqfDZqEoF88Gz9T0XPKx/OhXOp8xIpeh7feDyVLiNGf3L815LWfBaME3fvD7sRKIR6+DfdaCHtz7aDUDKoPnTR41ou8aGWs5vaOT8zxReRnqgu5cPdx7gw50H+Hh3F9t2d/HR7i4693azfe9BOvd2s2N/N7sO9FDq34kdzGQ5mMmWdqdjXP6HgZH7MMj7sMitkwqmcx8YEG6T+xAh+BDJbd+3r3AbwmnL22d/e3nLBmxLwfqF6wa96JsYtF/y5snbhkH96P8ANMhb1/LW72umoO3Cfeb1acDnabE2+vvX36f89YvNWJE+FdvnsOsP1cCg14Zvo3Cd4u0N3Vr+Pkd3EjKSzW3I1uHmS0+muoTl41gH/9rf7+gL4AWzpzCpdnSHW1+T5oSjJ3HC0ZMOuV5v1tl1oKfgsaerh73hXwF7DmbYl3t093KgO8O+g73s7+nlYE8vB3p62d/dS1dPLwd7snT3Jivwc9yh153gGiz94lqS66bFJ1GdLt3+Yh38BWWepsFlnqikU8a0iTVMm1hTkv31Zp3uTJaDmV66erJ90wczwYdCdyZ49PQGj4OZLD29TqY3t8zJZHPL8qezZLLBfG82eK0362SyHgRub/9rvQ694XQ2S7DcIZsNtsl68Aim6Z8O53vdcQ+nw/U9nHbCbbKOQ99rIhKNWAf/UF/sjjfplFFfE3zhmxQehn/WPfzQCJb3hh8OuQ+R/PWyDo6D0zede809+Asityy3D89rK5jOWyfcloHLC7YFKNw+1//glf5tw/8K9tc/nXfc4Ya59sjfb96+c/P9rw/eX+G2hR+o+W0O/vcv7N/AfeVvP3AfBXsrWD9vnSH3U3z9ga8N0cSQKw3VRuGmw287kv6MdPuRtJ1TVeLLyGMb/Pu7M7zRvqtv/owynvHL6OVq8iksvv+TilRIbC82b/tgJ5nwF1onzWzgqBKVXURExrvYBn9+ff+MuSO7mkdEJAli+1f0VWcex6enT+CV93dw0SkzK90dEZExI7bBf8yUei5vnsPlzXMq3RURkTEltqUeEREpTsEvIpIwCn4RkYRR8IuIJIyCX0QkYRT8IiIJo+AXEUkYBb+ISMIo+EVEEiay4DezVWa2zczW5y37kZm9ZWbrzOxRM5saVfsiIlJclGf89wOLByx7Cljg7n8E/Adwc4Tti4hIEZEFv7s/D+wYsOxX7p4JZ18CNJCOiEiZVbLGfy3w70O9aGZLzazVzFo7OjrK2C0RkXirSPCb2feADPDgUOu4+0p3b3H3lsbGxvJ1TkQk5so+LLOZXQN8EbjIh7vRpIiIlFxZg9/MFgP/Azjf3feXs20REQlEeTnnauC3wElm1m5m1wF3Ag3AU2bWZmZ3R9W+iIgUF9kZv7tfXWTxvVG1JyIiI6Nf7oqIJIyCX0QkYRT8IiIJo+AXEUkYBb+ISMIo+EVEEkbBLyKSMAp+EZGEUfCLiCSMgl9EJGEU/CIiCaPgFxFJGAW/iEjCKPhFRBKm7HfgKpvOd+H956G3B6bNgxMXVbpHIiJjQnyD/w+vw+M3BNOnXq7gFxEJxbfUk8r7TMv2VK4fIiJjTHyDP13TP92r4BcRyYlx8Ff3Tyv4RUT6xDf4VeoRESkqsuA3s1Vmts3M1uctm2ZmT5nZ2+HzUVG1r1KPiEhxUZ7x3w8sHrBsOfC0u58IPB3OR0OlHhGRoiILfnd/HtgxYPFlwAPh9APAl6NqvzD4uyNrRkRkvCl3jX+mu28FCJ+PHmpFM1tqZq1m1trR0XH4LaXygj+bOfztRURiasx+uevuK929xd1bGhsbD38HKvWIiBRV7uD/2MyOAQift0XWkko9IiJFlTv4HwOuCaevAX4ZWUsq9YiIFBXl5Zyrgd8CJ5lZu5ldB6wALjazt4GLw/loqNQjIlJUZIO0ufvVQ7x0UVRtFii4jl+lHhGRnDH75e6oFfxyV6UeEZGc+Aa/zvhFRIqKcfCrxi8iUkx8gz+VBiycccj2VrI3IiJjRnyDH1TuEREpIubBr3KPiMhA8Q5+XdkjIjJIvINfpR4RkUFiHvwq9YiIDJSg4NcZv4gIxD34NVCbiMgg8Q5+lXpERAZJUPCr1CMiAnEPfpV6REQGiXfwq9QjIjJIgoJfpR4REYh78KvUIyIySLyDX7/cFREZJObBnzdWj2r8IiJA3INfpR4RkUEiu9n6oZjZt4CvAg68ASxx966SN6RSj0ji9PT00N7eTldX6SNlrKqrq2POnDlUV1cPvzIVCH4zmw18E5jv7gfM7F+Aq4D7S96YSj0iidPe3k5DQwNNTU2Y2fAbjHPuTmdnJ+3t7cydO3dE21Sq1FMF1JtZFTAB+EMkrRSUehT8IknQ1dXF9OnTExH6AGbG9OnTD+svnLIHv7t/CPwY+ADYCuxy918NXM/MlppZq5m1dnR0HFljBaUeBb9IUiQl9HMO93jLHvxmdhRwGTAXOBaYaGZ/OXA9d1/p7i3u3tLY2HhkjemXuyJSZp2dnSxcuJCFCxcya9YsZs+e3Tff3T3y7xpXrVrFRx99FEkfK/Hl7iLgfXfvADCzR4BzgH8ueUv65a6IlNn06dNpa2sD4Ac/+AGTJk3ixhtvPOz9rFq1iubmZmbNmlXqLlYk+D8AzjKzCcAB4CKgNZKWdDmniIwhDzzwAHfddRfd3d2cc8453HnnnWSzWZYsWUJbWxvuztKlS5k5cyZtbW1ceeWV1NfX88orr1BTUzN8AyNU9uB395fN7GHgNSADvA6sjKQxlXpEEq1p+b9Ftu/NK75wWOuvX7+eRx99lBdffJGqqiqWLl3KQw89xPHHH8/27dt54403ANi5cydTp07ljjvu4M4772ThwoUl7/sha/z5tXczO3fAa18/0kbd/RZ3P9ndF7j7X7n7wSPd1yGp1CMiY8Svf/1rXn31VVpaWli4cCHPPfcc7777LieccAKbNm1i2bJlrFmzhilTpkTel+G+3P123vQdA167tsR9KT2VekRkjHB3rr32Wtra2mhra2PTpk18//vfZ/r06axbt47zzjuP22+/na997WuR92W4Uo8NMV1sfuxRqUck0Q63HBOlRYsWccUVV7Bs2TJmzJhBZ2cn+/bto76+nrq6Or7yla8wd+5crr/+egAaGhrYs2dPJH0ZLvh9iOli82OPSj0iMkZ89rOf5ZZbbmHRokVks1mqq6u5++67SafTXHfddbg7Zsatt94KwJIlS/jqV78ayZe75j50fpvZfuAdgrP748Npwvl57j6xZD05hJaWFm9tPYILf177GTwWfhWx8C/gy/+rtB0TkTFn48aNnHLKKZXuRtkVO24zW+vuLQPXHe6Mf3z/62mQNhGRQQ4Z/O7++/x5M5sO/DHwgbuvjbJjJaFB2kREBhnucs7HzWxBOH0MsJ7gap6fmdkNZejf6OiqHhGRQYa7nHOuu68Pp5cAT7n7nwL/ifFwOadKPSIigwwX/Pn1kYuAJwDcfQ+QjapTJaNSj4jIIMN9ubvFzL4BtAPNwJMAZlYPjOxWL5WU0nX8IiIDDXfGfx1wKvA3wJXuvjNcfhZwX4T9Ko38Uo9uxCIiZVCKYZmXLFnCpk2bIuvjcFf1bAOuL7L8WeDZqDpVMvrlroiU2UiGZXZ33J1Uqvi59333RXtePdxVPY8d6hFpz0pBwS8iY8Q777zDggULuP7662lubmbr1q0sXbqUlpYWTj31VH74wx/2rXveeefR1tZGJpNh6tSpLF++nNNOO42zzz6bbdu2jbovw9X4zwa2AKuBlxkP4/Pk0z13RZLtBxGOdPmDXYe9yYYNG7jvvvu4++67AVixYgXTpk0jk8nw+c9/niuuuIL58+cXbLNr1y7OP/98VqxYwbe//W1WrVrF8uXLR9X14Wr8s4DvAguA24CLge3u/py7PzeqlstBZ/wiMoYcf/zxnHHGGX3zq1evprm5mebmZjZu3MiGDRsGbVNfX88ll1wCwOc+9zk2b9486n4cMvjdvdfdn3T3awi+0H0H+E14pc/Yp+AXkTFk4sT+4c3efvttbrvtNp555hnWrVvH4sWL6erqGrRN/uBs6XSaTGb0P0Yd9g5cZlYLfAG4GmgCbgceGXXL5aBSj0iyHUE5plx2795NQ0MDkydPZuvWraxZs4bFixeXpe1DBr+ZPUBQ5vl34O/yfsU7PmhYZhEZo5qbm5k/fz4LFixg3rx5nHvuucNvVCLDDcucBfaFs/krGuDuPjnCvvU54mGZD3wCtzYF07VT4OYPStovERl7NCxzvyMaltndh/vyd2xTqUdEZJCKBLuZTTWzh83sLTPbaGZnR9KQBmkTERlk2C93I3Ib8KS7X2FmNcCESFpJDxiW2R1sfP0UQUSk1Moe/GY2meBmLn8D4O7dQDSn42ZgafDeYD6bKfwwEJFYyt2/NikO9V1tMZUo9cwDOoD7zOx1M7vHzAbdu9fMlppZq5m1dnR0HHlrKveIJEpdXR2dnZ2HHYbjlbvT2dlJXV3diLepRKmnimCI52+4+8tmdhuwHPh+/kruvhJYCcFVPUfcWroaMgeCaf2ISyT25syZQ3t7O6M6YRxn6urqmDNnzojXr0TwtwPt7v5yOP8wQfBHI6WbsYgkSXV1NXPnzq10N8a0spd63P0jghu8nBQuuggYPEBFqWhMfhGRApW6qucbwIPhFT3vEdzPNxoar0dEpEBFgt/d24BBvyaLhIJfRKTA+P5l7kjo17siIgXiH/w64xcRKaDgFxFJmPgHv0o9IiIF4h/8+uWuiEiBBAS/fsAlIpIv/sGfGjBCp4hIwsU/+FXqEREpkIDgV6lHRCRf/INfpR4RkQLxD36VekRECiQg+FXqERHJl4Dgzz/jV/CLiMQ/+PXLXRGRAvEPfpV6REQKJCD4VeoREckX/+BXqUdEpED8g1+lHhGRAgkIfl3HLyKSL/7Br1/uiogUqFjwm1nazF43s8cjbajgDlw64xcRqeQZ/zJgY+St6NaLIiIFKhL8ZjYH+AJwT+SNqdQjIlKgUmf8PwVuArJDrWBmS82s1cxaOzo6jrwlfbkrIlKg7MFvZl8Etrn72kOt5+4r3b3F3VsaGxuPvEFdzikiUqASZ/znAl8ys83AQ8CFZvbPkbWmUo+ISIGyB7+73+zuc9y9CbgKeMbd/zKyBlXqEREpEP/r+FXqEREpUDX8KtFx998Av4m0EQ3SJiJSIP5n/BqkTUSkQPyDX6UeEZECCQh+lXpERPLFP/hV6hERKRD/4NcgbSIiBRIW/PoBl4hI/INfpR4RkQLxD379cldEpEACgj//ck6VekRE4h/8KvWIiBSIf/Cr1CMiUiD+wZ9K9097FrK9leuLiMgYEP/gNyss9+jXuyKScPEPfigs96jOLyIJl5Dg10BtIiI5CQl+DdQmIpKTjODXJZ0iIn2SEfwq9YiI9ElI8KvUIyKSk4zgV6lHRKRP2YPfzI4zs2fNbKOZvWlmyyJvtKDUo1/vikiyVQ2/SsllgO+4+2tm1gCsNbOn3H1DZC0WlHo0UJuIJFvZz/jdfau7vxZO7wE2ArMjbVSlHhGRPhWt8ZtZE3A68HKR15aaWauZtXZ0dIyuId1+UUSkT8WC38wmAT8HbnD33QNfd/eV7t7i7i2NjY2ja0y3XxQR6VOR4DezaoLQf9DdH4m8QZV6RET6VOKqHgPuBTa6+0/K0qhKPSIifSpxxn8u8FfAhWbWFj4ujbTFtIZlFhHJKfvlnO7+/wAra6Maj19EpE8yfrmr8fhFRPokJPg1SJuISE5Cgl+DtImI5CQj+HU5p4hIn2QEv0o9IiJ9EhL8KvWIiOQkI/hV6hER6ZOM4Nd4/CIifRIS/BqPX0QkJxnBr1KPiEifZAS/BmkTEemTwOBXqUdEki0Zwa9Sj4hIn2QEv0o9IiJ9Ehj8OuMXkWRLRvBrPH4RkT5lvxFLReRfx//hWnjqFvjUWdBwDEyYDhOmQVU9pJLxOSgiyZaM4K+f2j+9fzu88FN4och6VfVQXRd8UKRrghJRqgosHT4bpNLBvKXyHtb/jPXP56b7nhlmWW42b3nBMhvwerF1hlvOYa5zBPsacjej2PaIVh/Fjd4O+e8RYbuDdhXFzerKewO8aI7hSIyVfozAwH+zxSsKS9ajlIzgP7YZ5l8GG3556PUyB4KHiMhY8id/r+A/bKkU/Nd/gn2d8MGLsPkF2PYm7N8B+zuD596Dle6liEhZVCT4zWwxcBuQBu5x9xVlaXjidDjlT4PHQNlscLbf0xVc8tnbHXwR7L2QzQQPzwbreS+4B/MF01nAw3nvn8aDNg61rOh0jufN5y0vWIchlo9inSPa15A7GsW2I9jPqPd7BG2UvN1BOyvhvnK7jGCfh26wzO0NoezHXWKp0kZ12YPfzNLAXcDFQDvwqpk95u4byt2XAqkU1EwMHiIiMVaJy1jOBN5x9/fcvRt4CLisAv0QEUmkSgT/bGBL3nx7uKyAmS01s1Yza+3o6Chb50RE4q4SwV/smqpBBTh3X+nuLe7e0tjYWIZuiYgkQyWCvx04Lm9+DvCHCvRDRCSRKhH8rwInmtlcM6sBrgIeq0A/REQSqexX9bh7xsy+DqwhuJxzlbu/We5+iIgkVUWu43f3J4AnKtG2iEjSmY+DHzaYWQfw+yPcfAawvYTdGS+SeNxJPGZI5nEn8Zjh8I/70+4+6OqYcRH8o2Fmre7eUul+lFsSjzuJxwzJPO4kHjOU7rg1DrGISMIo+EVEEiYJwb+y0h2okCQedxKPGZJ53Ek8ZijRcce+xi8iIoWScMYvIiJ5FPwiIgkT6+A3s8VmtsnM3jGz5ZXuTxTM7Dgze9bMNprZm2a2LFw+zcyeMrO3w+ejKt3XUjOztJm9bmaPh/Nzzezl8Jj/bzgkSKyY2VQze9jM3grf87Pj/l6b2bfC/7fXm9lqM6uL43ttZqvMbJuZrc9bVvS9tcDtYbatM7Pmw2krtsGfd8OXS4D5wNVmNr+yvYpEBviOu58CnAX8bXicy4Gn3f1E4OlwPm6WARvz5m8F/iE85k+A6yrSq2jdBjzp7icDpxEcf2zfazObDXwTaHH3BQTDvFxFPN/r+4HFA5YN9d5eApwYPpYC/3g4DcU2+EnIDV/cfau7vxZO7yEIgtkEx/pAuNoDwJcr08NomNkc4AvAPeG8ARcCD4erxPGYJwN/DNwL4O7d7r6TmL/XBEPL1JtZFTAB2EoM32t3fx7YMWDxUO/tZcA/eeAlYKqZHTPStuIc/CO64UucmFkTcDrwMjDT3bdC8OEAHF25nkXip8BNQDacnw7sdPdMOB/H93se0AHcF5a47jGzicT4vXb3D4EfAx8QBP4uYC3xf69zhnpvR5VvcQ7+Ed3wJS7MbBLwc+AGd99d6f5Eycy+CGxz97X5i4usGrf3uwpoBv7R3U8H9hGjsk4xYU37MmAucCwwkaDMMVDc3uvhjOr/9zgHf2Ju+GJm1QSh/6C7PxIu/jj3p1/4vK1S/YvAucCXzGwzQQnvQoK/AKaG5QCI5/vdDrS7+8vh/MMEHwRxfq8XAe+7e4e79wCPAOcQ//c6Z6j3dlT5FufgT8QNX8La9r3ARnf/Sd5LjwHXhNPXAL8sd9+i4u43u/scd28ieF+fcfe/AJ4FrghXi9UxA7j7R8AWMzspXHQRsIEYv9cEJZ6zzGxC+P967phj/V7nGeq9fQz46/DqnrOAXbmS0Ii4e2wfwKXAfwDvAt+rdH8iOsbzCP7EWwe0hY9LCWreTwNvh8/TKt3XiI7/AuDxcHoe8ArwDvCvQG2l+xfB8S4EWsP3+xfAUXF/r4G/A94C1gM/A2rj+F4Dqwm+x+ghOKO/bqj3lqDUc1eYbW8QXPU04rY0ZIOISMLEudQjIiJFKPhFRBJGwS8ikjAKfhGRhFHwi4gkjIJfEsHMXgyfm8zsz0u87+8Wa0tkrNLlnJIoZnYBcKO7f/Ewtkm7e+8hXt/r7pNK0T+RctAZvySCme0NJ1cA/9nM2sJx3tNm9iMzezUc1/xr4foXhPc5+D8EP5DBzH5hZmvDseGXhstWEIwc2WZmD+a3Ff6q8kfhOPJvmNmVefv+Td64+g+Gv0rFzFaY2YawLz8u57+RJEfV8KuIxMpy8s74wwDf5e5nmFkt8IKZ/Spc90xggbu/H85f6+47zKweeNXMfu7uy83s6+6+sEhblxP80vY0YEa4zfPha6cDpxKMr/ICcK6ZbQD+DDjZ3d3Mppb86EXQGb/InxCMedJGMJz1dIKbWwC8khf6AN80s98BLxEMkHUih3YesNrde939Y+A54Iy8fbe7e5ZgmI0mYDfQBdxjZpcD+0d9dCJFKPgl6Qz4hrsvDB9z3T13xr+vb6Xgu4FFwNnufhrwOlA3gn0P5WDedC9Q5cH48mcSjLT6ZeDJwzoSkRFS8EvS7AEa8ubXAP8tHNoaM/tMeHOTgaYAn7j7fjM7meA2lzk9ue0HeB64MvweoZHg7lmvDNWx8J4KU9z9CeAGgjKRSMmpxi9Jsw7IhCWb+wnuYdsEvBZ+wdpB8dv4PQlcb2brgE0E5Z6clcA6M3vNg+Ghcx4FzgZ+RzCC6k3u/lH4wVFMA/BLM6sj+GvhW0d2iCKHpss5RUQSRqUeEZGEUfCLiCSMgl9EJGEU/CIiCaPgFxFJGAW/iEjCKPhFRBLm/wN4Lu97+vuppAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "als.fit(train, test)\n", + "plot_learning_curve(als)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "c377cda2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.336192041852941\n", + "0.3099205032508016\n" + ] + } + ], + "source": [ + "print(als.test_mse_record[-1])\n", + "print(als.train_mse_record[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "0b23e873", + "metadata": {}, + "outputs": [], + "source": [ + "user0prediction = als.predict()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "b09e636b", + "metadata": {}, + "outputs": [], + "source": [ + "user0actual = ratings[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "99fba63f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0005735931449630593" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_squared_error(user0prediction, user0actual)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als-jg-edits.ipynb b/recsys/als-jg-edits.ipynb new file mode 100644 index 0000000..dcd6981 --- /dev/null +++ b/recsys/als-jg-edits.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 106, + "id": "7c78c2e5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "3ea8dfe7", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "e5487124", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + "[[0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [2.5 2. 2. ... 0. 0. 0. ]\n", + " [3. 0. 0. ... 0. 0. 0. ]\n", + " [5. 0. 0. ... 0. 0. 0. ]]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 3 4.0\n", + "2 1 6 4.0\n", + "3 1 47 5.0\n", + "4 1 50 5.0\n", + ".. ... ... ...\n", + "227 1 3744 4.0\n", + "228 1 3793 5.0\n", + "229 1 3809 4.0\n", + "230 1 4006 4.0\n", + "231 1 5060 5.0\n", + "\n", + "[232 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "columns=[\"userId\", \"movieId\", \"rating\"]\n", + "print(df)\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "data = []\n", + "for row in df.itertuples():\n", + " if row.userId == 1:\n", + " data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "stream_df = pd.DataFrame(data=data, columns=columns)\n", + "print(ratings)\n", + "print(stream_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "7808d9b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdmovieIdrating
0114.0
1134.0
2164.0
31475.0
41505.0
............
222137444.0
223137935.0
224138094.0
225140064.0
226150605.0
\n", + "

227 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 3 4.0\n", + "2 1 6 4.0\n", + "3 1 47 5.0\n", + "4 1 50 5.0\n", + ".. ... ... ...\n", + "222 1 3744 4.0\n", + "223 1 3793 5.0\n", + "224 1 3809 4.0\n", + "225 1 4006 4.0\n", + "226 1 5060 5.0\n", + "\n", + "[227 rows x 3 columns]" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def create_split(stream_df):\n", + " \"\"\"\n", + " split into train and test sets\n", + " User vectors \n", + " \n", + " train vectors on the ratings matrix\n", + " no need to test\n", + " \n", + " then train the user vectors with the streaming update, test on test set\n", + " \n", + " then call it on the train data for those new users and the test data for those users\n", + " \"\"\"\n", + " test_data = np.zeros((n_users, n_items))\n", + " data = []\n", + " for userId in stream_df[\"userId\"].unique():\n", + " user_df = stream_df[stream_df[\"userId\"] == userId]\n", + " random_five = user_df.sample(5)\n", + " for row in user_df.itertuples():\n", + " if any(random_five[\"movieId\"] == row.movieId):\n", + " test_data[row.userId - 1, row.movieId - 1] = row.rating\n", + " else:\n", + " data.append([row.userId, row.movieId, row.rating])\n", + " train_df = pd.DataFrame(data=data, columns=columns)\n", + " \n", + " return test_data, train_df\n", + " \n", + " '''\n", + " train = np.zeros(ratings.shape) \n", + " \n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", + "\n", + " train[user, None] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " return train, test'''\n", + "\n", + "test, train_df = create_split(stream_df)\n", + "train_df" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "fdd592a0", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production) \n", + " for i in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " return self \n", + " \n", + " def fit_stream(self, ratings, test, train_df):\n", + " '''\n", + " when ratings stream in, add them to the rating matrix\n", + " run ALS update for user vector on entire rating matrix \n", + " test on test matrix\n", + " compute mse, add to list\n", + " '''\n", + " self.test_mse_record = []\n", + " for row in train_df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + " self.user_factors = self._als_step(ratings, self.user_factors, self.item_factors)\n", + " predictions = self.predict()\n", + " test_mse = self.compute_mse(test, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "c12565f4", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "586142de", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 100, num_features = 100, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "de62d106", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<__main__.ALSModel at 0x7f9e886f0250>" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "als.fit(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "1bb4f7e6", + "metadata": {}, + "outputs": [], + "source": [ + "movie_factors = als.item_factors" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "f794be70", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1f3/8dcn+0pCEpYACQFklZ2AghvuiPu+162l+rVWrba19dtqf36/1Vbrt3VpLa1Ua6m2dalL3cUFF8CArLLKGrYAgRASss75/THDJIFshEwmM/N+Ph7zyL3nbp+5DJ+5c+6555hzDhERiRxRwQ5AREQ6lhK/iEiEUeIXEYkwSvwiIhFGiV9EJMLEBDuA1sjKynJ5eXnBDkNEJKTMnz9/p3Ou28HlIZH48/LyKCgoCHYYIiIhxcw2NFYesKoeM8sxsw/NbLmZLTOz2w9afreZOTPLClQMIiJyqEBe8dcAdznnFphZKjDfzN5zzn1tZjnA6cDGAB5fREQaEbArfufcVufcAt90KbAc6O1b/H/AjwA9Niwi0sE6pI7fzPKAMcBcMzsP2OycW2RmzW0zDZgGkJub2wFRiki4qq6uprCwkIqKimCHEhAJCQn06dOH2NjYVq0f8MRvZinAS8AdeKt/7gXOaGk759x0YDpAfn6+fhmISJsVFhaSmppKXl4ezV1whiLnHLt27aKwsJB+/fq1apuAtuM3s1i8SX+mc+5lYADQD1hkZuuBPsACM+sZyDhEJLJVVFSQmZkZdkkfwMzIzMw8rF8zAbviN+8ZfhpY7px7FMA5twToXm+d9UC+c25noOI4YP3OMtISY+maHBfoQ4lIJxSOSf+Aw31vgbziPw64FjjFzBb6XlMDeLwmvfJVIZMf+YgTfv0hG3aVBSMEEZFOI2BX/M65T4Fmv4acc3mBOn69Y/DfrywFYF9lDU9+uIZfXzIq0IcVEQFg165dnHrqqQBs27aN6OhounXzPkw7b9484uJaVwsxY8YMpk6dSs+eR14zHhJP7h6JRYUllFXV+udfW7SFX5w3nOLyKmYt307J/mqum5RHakLr7oaLiByOzMxMFi5cCMD9999PSkoKd99992HvZ8aMGYwdO1aJvzVenL+pwXxFtYehP3+7QdnCTXt49PLRxEVHkRAb3ZHhiUgEe/bZZ3nyySepqqpi0qRJPPHEE3g8Hm644QYWLlyIc45p06bRo0cPFi5cyOWXX05iYuJh/VJoTFgn/tKKal5duKXF9d5fXsTI+98lPiaKb03sy/dOGUhaon4BiISjvHv+E7B9r3/o7Favu3TpUl555RU+//xzYmJimDZtGi+88AIDBgxg586dLFmyBIA9e/aQnp7O448/zhNPPMHo0aOPOM6wTvwz526ktKIGgKS4aJLiotm5rwqAjOQ4isuqGqxfWePhT7PX8dycDYzPyyA3I4krJ+QyvHdah8cuIuHt/fff58svvyQ/Px+A/fv3k5OTw5lnnsnKlSu5/fbbmTp1Kmec0eJjT4ctbBN/RXUtf569zj9/37nDOPPonmwq3k+/bsmkxMewYONuLvr9541s62H2am8L05lzN/LTqUOYduKADotdRMKfc44bb7yRBx544JBlixcv5q233uKxxx7jpZdeYvr06e167LBN/P8s2MTOfZUAZKclcOGYPsTFRJGeVFcvNja3K6cN7c77y4vo0SWe6ybl8dRH37DX9yvhgKc+XstNx/cnOip82wGLRIrDqY4JpNNOO41LLrmE22+/naysLHbt2kVZWRmJiYkkJCRw6aWX0q9fP26++WYAUlNTKS0tbZdjh23ir6rxkBwXTVlVLd85oT9xMY0/svDEVWMpWL+bkTlpdEmI5cIxvfn3V1uIMnjwrRUAFJdVsXDTHsb17dqRb0FEwtiIESO47777OO200/B4PMTGxvLUU08RHR3NTTfdhHMOM+NXv/oVADfccAPf/va32+XmrjnX+bvByc/Pd20ZiKWkvJqZ8zZww6R+JMYdfmudH7+4mH8UeFsFjcpJ5+4zBnH8UVlh/QSgSDhavnw5Q4cODXYYAdXYezSz+c65/IPXDesxd9OSYvmvyUe1KekDnDLU37sEizbt4dqn5/HOsm3tFZ6ISFCEdeI/UscflXVIFdHn3+wKUjQiIu1Dib8ZyfExnDMyu0HZ1pLw7M9bJNyFQrV2Wx3ue1Pib8EvLxzBd0/q75/fWrI/iNGISFskJCSwa9eusEz+B/rjT0hIaPU2Yduqp70kxEZz/aQ8/vjxWgC26YpfJOT06dOHwsJCduzYEexQAuLACFytpcTfCt1TE4iOMmo9jp37qqisqSU+Rn36iISK2NjYVo9OFQlU1dMK0VFG99R4//z2ksogRiMicmSU+FupZ1pd/Znq+UUklCnxt1KvtET/tFr2iEgoU+JvpYZX/Er8IhK6lPhbKbte4t+mqh4RCWEBS/xmlmNmH5rZcjNbZma3+8ofNrMVZrbYzF4xs/RAxdCesutV9WzRFb+IhLBAXvHXAHc554YCxwK3mtkw4D1guHNuJLAK+EkAY2g39at6Cnfril9EQlfAEr9zbqtzboFvuhRYDvR2zr3rnDvQ4f0coPVPHQRRv6xkDnTKuXzrXr5cXxzcgERE2qhD6vjNLA8YA8w9aNGNwFtNbDPNzArMrKAzPG2XkRzH+aN6+ed/+ebysHz8W0TCX8ATv5mlAC8Bdzjn9tYrvxdvddDMxrZzzk13zuU75/K7desW6DBb5a4zBhMX7T1lX23cwxfqqVNEQlBAE7+ZxeJN+jOdcy/XK78OOAe42oXQZXNORhKXja+rmXr+y00Nlq/fWcarCzezp7zq4E1FRDqNgPXVY95hqp4GljvnHq1XPgX4MXCSc648UMcPlKsm9OVvczYC8M7SbRSXVZEUF82PXlzM64u34ByMyU3n5VsmaaQuEemUAnnFfxxwLXCKmS30vaYCTwCpwHu+sqcCGEO7G9arC6P6pAFQVeth5pwNvLSgkNcWeZM+eKuB5m/YHcQoRUSaFrArfufcp0Bjl7xvBuqYHeWqY3JZVLgEgCc/WtOgjf8Blzz1BT85awjXTuxLUpw6QRWRzkNP7rbBxWP7MCy7CwAV1R7W7SxrdL0H31rBb99f3ZGhiYi0SIm/DWKio3jwohFEHfR7Zmh2F47u1aVB2b+/2ozHEzL3r0UkAijxt9GonHRumTygQdnpw3rwq4tHMqhHir+sqLSSBRtV3y8inYcS/xG447RBjM7xdjUUE2WcMzKb4b3TePfOk7hyQo5/vTeXbAtWiCIih1DiPwKx0VE8d9MEfjxlCE9fP55BPVL9y84anu2ffn3xFkorqoMRoojIIZT4j1BqQiy3TB7ASYMaPl08cUAmWSlxAOworeQXr38djPBERA6hxB8gsdFR3H/e0f75F+cXsmjTniBGJCLipcQfQOeM7MXpw3r459Wjp4h0Bkr8ATZpQKZ/esW20iBGIiLipcQfYIN71t3wXbVdiV9Egk99CQTY4B4NE7/H44iKMpZuLmHtzjIGdk9hSM9UdegmIh1GiT/AMlPiyUqJZ+e+SiqqPXy1aQ+PfbCaj1fVDS5zwsAsnrlhAtEHPwosIhIAqurpAIN71j3Je/EfPm+Q9AFmr97J3+dt7OiwRCRCKfF3gME9urS4zq/fXsGO0soOiEZEIp0SfwcYUu8G7wE3HtePFQ9MIS8zCYDSihqe+XxdR4cmIhFIib8DnDq0O2mJsf75/lnJ3H3mIBJio7nnrCH+8r/P3UhFdW0wQhSRCKKbux0gMyWe2T8+mfnrd1NeVcukAZn+wVlOG9qD3umJbN6zn93l1by2aAuX5ee0sEcRkbbTFX8H6ZIQy8lDunP2yGy6Jsf5y2Oio/jWxL7++YffWUlRaUUwQhSRCKHE3wlcMT6XjOS6Dt3OeexT/lWwCec0gIuItD8l/k4gLSmW310xmgPPcBWVVvLDFxfz59m62Ssi7S9gid/McszsQzNbbmbLzOx2X3mGmb1nZqt9f7sGKoZQcsLAbjxw/nBi6j3E9fC7K1m9vZQ95VXMnLuBR99bpWogETliFqjqBDPLBrKdcwvMLBWYD1wAXA8UO+ceMrN7gK7OuR83t6/8/HxXUFAQkDg7m137Krnsj1/wzQ7vAO6x0UZSXAwl+70DuYzKSee+c4dRUV3LxP51HcCpywcROZiZzXfO5R9S3lH1yGb2KvCE7zXZObfV9+XwkXNucHPbRlLiB2+fPmc/Npvq2pb/baKjjGP7Z/DHa/NJiVcjLRGp01Ti75A6fjPLA8YAc4EezrmtAL6/3ZvYZpqZFZhZwY4dOxpbJWwN6pHKtybmtWrdWo/jszW7+PmrSwMblIiEjYAnfjNLAV4C7nDO7W3tds656c65fOdcfrdu3VreIMzcdspRDR76+s4J/Zpd/+UFm3lt0ZZAhyUiYSCgdQNmFos36c90zr3sK95uZtn1qnqKAhlDqEpPiuP3V4/lkXdXMnlQd75/6lGsLtrHRyt3EBNlXDuxL/sqapi1oohdZVUA/OAfC4kyeO/r7ZRX1XLPWUMY0C2lhSOJSKQJ5M1dA57FeyP3jnrlDwO76t3czXDO/ai5fUVaHX9TSvZX86+CTeTnZTA6Jx2A0opqTn/0E7btPbS1T3JcNN+alMcJA7PYVlLBa4u2UFFdy9DsLlw5IZekuGjiYqJIjoshMTaaKHULLRJWOvzmrpkdD8wGlgAeX/FP8dbz/xPIBTYClzrnmh2MVom/ee8s28Z3n5t/RPtIiY/hlxeN4LxRvaiq8bCvsob0xFh9GYiEsKC36jkSSvwt+/XbK3h5wWbSEmOJiTaWbWn17ZRDxEQZNR5HfEwUVx2Ty8/PGabmoiIhSIk/wmzfW8FrC7ewcnsp/1m8lYqaWi4d14f8vAxeml/Iim2lJMVFU1XjobSihqpaT5P7eumWSYzrq+fsREKNEn8E219VS3lVDZkp8Y0u37xnPxc++RlFTQwEc8m4Pjxy6ahAhigiAdBU4tcTPxEgMS6axLjoJpf3Tk/klVuP45UFheRlJXPioG6s3VHGBU9+BsAbi7fws3OGUVZZw//+Zzke5/jVJSPpkhDb5D5FpPNS4hfAm/y/d8pA//yoPmkMze7C8q17qaj28Ou3VzBrRRFbS7yth3IzkvjJ1KHBCldEjoB655RGmRlXH5Prn585d6M/6QO8vmgLHk/nryYUkUMp8UuTLhrbm/SkxqtztpRUMH/j7g6OSETagxK/NCkpLqbBVf/BXlcXESIhSYlfmnXdxDy6+q76TxrUjb9/+xj/sjeXbKWmmWagItI56eauNKt7lwT+fetxfL1lL6cO7UF0lNE9NZ6i0kp27qvii7W7OGFg5HWiJxLKdMUvLeqbmcxZI7KJi4kiOso4e2S2f5mqe0RCjxK/HLZzR/XyT7+9dBuVNbVBjEZEDpcSvxy2MTnp9OmaCMDeiho+WbUzyBGJyOFQ4pfDZmYNrvpV3SMSWpT4pU3OHVmX+L0Dv9QEMRoRORxK/NImQ7NTGdAtGYD91bV8sFwDqYmECiV+aRMz4+x6V/0F65sdS0dEOhElfmmzwT1S/dOb9xw69KOIdE5K/NJm2ekJ/umtJfuDGImIHA4lfmmz3umJ/un6PXeKSOcWsMRvZjPMrMjMltYrG21mc8xsoZkVmNmEQB1fAi8rJZ4Y32DsxWVVVFTrQS6RUBDIK/5ngCkHlf0a+IVzbjTwc9+8hKjoKKNHl7rqni17VN0jEgoClvidc58ABzf1cEAX33QaoCd/Qpyqe0RCT0f3znkH8I6ZPYL3S2dSBx9f2ln9G7y64hcJDR19c/cW4E7nXA5wJ/B0Uyua2TTffYCCHTt2dFiAcniy0+qu+LeoSadISOjoxH8d8LJv+l9Akzd3nXPTnXP5zrn8bt3U33tn1UtNOkVCTkcn/i3ASb7pU4DVHXx8aWe96l/xq45fJCQErI7fzJ4HJgNZZlYI3Ad8B/idmcUAFcC0QB1fOobq+EVCT8ASv3PuyiYWjQvUMaXj5WQk+afX7yyjrLKG5HiN6CnSmenJXTkiXRJiGdLT22dPjcfx1cY9QY5IRFqixC9HbEK/DP/0vHW7ghiJiLSGEr8csfF5dYl/7rpi5m8o5n//8zWrtpcGMSoRaYoqY+WI1b/in7uumEuf+gKPg1krivjgrsnBC0xEGqUrfjliPbokkJdZd5PX47x/v9lRxjY18RTpdJpN/GZ2Tb3p4w5a9r1ABSWh57LxOY2WLyrUzV6RzqalK/4f1Jt+/KBlN7ZzLBLCbj5xAN+a2PeQ8sVK/CKdTkuJ35qYbmxeIlhUlPGL847m91eP5YSBWf7yxYUlQYxKRBrTUuJ3TUw3Ni8RzsyYOiKbX144wl+2aNMenNNHRaQzaalVzxAzW4z36n6AbxrffP+ARiYhq0/XRLomxbK7vJq9FTXMWVvMxAGZwQ5LRHxaSvxDOyQKCStmRn5eBu99vR2AG56Zxz+mTWRUTnqQIxMRaKGqxzm3of4L2AeMBbJ88yKNuvuMwaQnxQJQUe3hiQ/XBDkiETmgpeacb5jZcN90NrAUb2ue58zsjg6IT0LU4J6p/PXGuuEWvvhmF9W1niBGJCIHtHRzt59zbqlv+gbgPefcucAxqDmntGBE7zT/mLz7Kms45Tcf8ei7K/UFIBJkLSX+6nrTpwJvAjjnSgH975VmmVmDpp2bivfz2Kw1/OCfi6j1qKWPSLC0lPg3mdltZnYh3rr9twHMLBGIDXRwEvpOGHjosJmvL9rC71XnLxI0LSX+m4CjgeuBy51zBx7DPBb4SwDjkjBx3FGZREcd+qzfjM/WUVFdG4SIRKSlVj1FzrmbnXPnO+ferVf+oXPukcCHJ6EuPSmOn509lP7dkvnZOcPo09Vb57+7vJpXvtoc5OhEIpM191Slmb3W3MbOufPaPaJG5Ofnu4KCgo44lATYnz5Zy/++udw/f/LgbmQkx/PDMwfTMy2hmS1F5HCZ2XznXP7B5S09wDUR2AQ8D8xF/fPIEbpsfA6/fX8VZVXeap4PV+4AYHd5FTOuHx/M0EQiRkt1/D2BnwLDgd8BpwM7nXMfO+c+bm5DM5thZkVmtvSg8tvMbKWZLTOzXx9J8BJ60hJj+cM148hKiW9Q/tHKIvXdL9JBWqrjr3XOve2cuw7vDd01wEdmdlsr9v0MMKV+gZmdDJwPjHTOHQ3oPkEEOnFQNz7+4WR+e/lof5nHwW3PL9BwjSIdoMURuMws3swuAv4G3Ao8Brzc0nbOuU+A4oOKbwEecs5V+tYpOuyIJSwkx8dwwZjePHrZKH/Zl+t3c9bvZvPRSn0sRAKppS4bngU+x9uG/xfOufHOuQecc21tjjEIOMHM5prZx2bWZKWumU0zswIzK9ixY0cbDyed3ZThPUmJr7vVVOtx/PDFxRSXVQUxKpHw1tIV/7V4k/XtwOdmttf3KjWzvW04XgzQFW+10Q+Bf5pZozeMnXPTnXP5zrn8bt0OfQhIwkNSXAy/uWwUA7un+Mt2lFbyk5cXqx9/kQBpqY4/yjmX6nt1qfdKdc51acPxCoGXndc8vN0+ZLWwjYS5M4/uyXs/OIm/1GvV886y7bw4vzCIUYmErxbr+NvZv4FTAMxsEBAH7OzgGKSTOnlId645Ntc//8AbX+vpXpEACFjiN7PngS+AwWZWaGY3ATOA/r4mni8A1zn9npd6fjp1KL18D3LtrahhyWaN2SvS3lp6gKvNnHNXNrHomkAdU0JfUlwMEwdk8dICbzXPok17GJ+XEeSoRMJLR1f1iLRodE6af3pRoa74RdqbEr90OiP71I3Nu7hwTzNrikhbKPFLpzMkO5W4aO9Hc8OucvaUq02/SHtS4pdOJz4mmqHZqf75hZt01S/SnpT4pVManVNX3fP4rDUaqlGkHSnxS6d0xYRcYnwjd83fsJvHZ60OckQi4UOJXzqlodld+P6pA/3zv31/Nc9+vj54AYmEESV+6bRumTyAif0z/fP3vbaMf3y5MYgRiYQHJX7ptGKjo/jTdfmMza2r77/n5SVq4ilyhJT4pVNLiY/hLzdMYHhvb5+AzsHbS7cFOSqR0KbEL51eWmIs1x7b1z+/VUM0ihwRJX4JCdlpif7prSX7gxiJSOhT4peQkO3rsRPQoOwiR0iJX0JCdnr9K/4Kjc4lcgSU+CUkpMTHkOobm7eyxsPu8uogRyQSupT4JWRkp9dV92zZ463n31dZw9LNJfoFIHIYAjYQi0h765mWyKrt+wB4+tN19OiSwPPzNlKyv5oTBmYx/dp8EuOi/etXVNcSE2XEROv6RqQ+JX4JGb3q3eB95avNDZbNXr2Tcf/zHsN7pVFZ62F/VQ1rd5QRE2385foJTByQefDuRCKWLoUkZPSsl/gbU15Vy7z1xSzatIdV2/dR43FUVHu477WleNS7p4hfIAdbn2FmRb6B1Q9edreZOTPLCtTxJfxkN5L4H7poBD+aMpiE2KY/yqu272P67LW6DyDiE8iqnmeAJ4C/1i80sxzgdEC9bclh6VnvIS6Ap64Zx5ThPQG4bmIe63aWsa+yhriYKOKio3h+3kZmzvV+zB56awUz527givG5XD8pj+R41XJK5ArYFb9z7hOguJFF/wf8CNDllxyW/lnJDeZPG9rdP50cH8Pw3mkc2z+TsbldGd47jR+cPojkejd7NxXv5+F3VnL6ox8zb11jH02RyNChdfxmdh6w2Tm3qCOPK+EhJyOJH5w+iDG56fz9O8e02FonMyWep68fz2lDe5CaUHeFv6Wkgu/8tYDSCj0LIJHJAlnvaWZ5wBvOueFmlgR8CJzhnCsxs/VAvnNuZxPbTgOmAeTm5o7bsGFDwOKU8FdRXcurCzfz4Fsr2ON7+OunU4cw7cQBQY5MJHDMbL5zLv+Q8g5M/COAD4By3+I+wBZggnOu2X528/PzXUFBQcDilMjxwryN3PPyEsDb62d+367UOseqbaXUeBy5GUlcPj6H6lrv/4sh2ansKa9i8+799EpPZPLg7kT7hoQU6eyaSvwddofLObcE8FfKtnTFLxIIF4zpzW/eW8WO0kpK9lfzwYqiBsuLSisp2LC7ye2P7tWFX144glH1BoMXCTWBbM75PPAFMNjMCs3spkAdS6S1EmKjufuMQW3eftmWvZz/5Gec8/hs7n1lib/rCJFQEtCqnvaiqh5pb9/s2MeWPfupqPZQ6/HQLyuFKIM/fPQNry3aQr+sZNISYykur6JnlwSio4x564qprPE02E9qfAy3nzaQa47tS0JsdBNHEwmOoNTxtxclfulItR7XaD3+xl3l3P6Pr/hq46Fj/o7OSeeFaccq+Uun0lTiV5cNIgdp6uZtbmYSL908ide+dxwPXjSCvMwk/7KFm/bw+qItHRWiyBFR4hc5DFFRxsg+6Vw5IZd37jyRqSN6+pf9bY6aHEtoUOIXaaP4mGj+54IRxMV4/xstKixh0aZDq4FEOhslfpEjkJEcxzkjsv3z02evDWI0Iq2jnqpEjtCNx/fjZd/4AG8u2crq7aUM7JFKRXUtSzeXULh7P+lJsWQkx1FeVcvonHTdBJagUuIXOULDe6dx6pDufLCiCOfgrn8tYnCPVN5auo19lTWHrD+ub1e+e2J/du6r4qKxvfUlIB1OzTlF2sHiwj2c98Rnh73d8Udl8ZcbxhOr4SElANScUySARvZJ57sn9j+kPDcjiTOG9WB0E108fLpmJxc8+Rkz525gw66yQIcpAqiqR6Td3HPWEBzwp9lr6ZYSzy8vHMGpQ7tj5n0uwDnHtOfm897X2xtst2zLXu59xTtQ3TXH5vLjKUNITYjt6PAlgqiqR6Sd7SitJCM5rtEHwSqqa/lnwSb6dE1k9uqd/OWz9YesE2Xe+wCX5edw0dg+6g1U2kxdNoh0QoW7y3l76TbeWbaNL9cf2ivocUdlctKgbny0cgdFpZWceXQPBnZPJTk+hkkDMjWEpDRLiV+kE3PO8fd5G3n603Ws3dG6uv6kuGgevmQUZ4/MbnlliUi6uSvSiZkZVx/Tl1l3TebLe0/j4rF9WtymvKqWu/+1iM3qGloOkxK/SCfTLTWehy8ZyZUTcgHIy0zi/nOH8d2T+pPftyvH9s/wr7u/upYHXv86WKFKiFJVj0gnVrK/mi4JMf6WQQd8ub6YS5/6wj//5FVjVeUjh1BVj0gISkuMPSTpA4zPy+DScXXVQff++/BGA3POsXRzCW8u2cqaotJ2iVVCh5oEiISon507jM/W7GRLSQV7yqu5fPoXPHjhSCYOyGy2CeiO0kpu/tt85vvGFo6LieKlmycxok9aR4UuQaYrfpEQ1SUhlt9cNpoYX5LfVLyfa56ey5V/mkNZI30EOed4d9k2LnjyM3/SB6iq8fDAf74mFKp9pX0o8YuEsIkDMvn91WOJja67wp+3rpjfvr/KP79w0x7++sV6rvrTXKY9N7/RVkDz1hUf8kSxhK+AVfWY2QzgHKDIOTfcV/YwcC5QBXwD3OCc08gVIkfgjKN78vItx/HgW8v5/JtdAMz4bD0j+6TztzkbmLuu+JBtuiTE8Ohlo/l0zU6e+Xw9AA/852vG9u3Ksi17qaiupWeXBJ6bs4G0xFjuPH0QKXpYLGwErFWPmZ0I7AP+Wi/xnwHMcs7VmNmvAJxzP25pX2rVI9Iy5xxX/3muP/k3Jsrg6mP6cufpg8hIjqO4rIqTH/mIkv3Vze57RO80nrlhPJkp8e0ac02th6paD/Ex0Xy4ooieaQkM7617De2lqVY9AfsKd859YmZ5B5W9W292DnBJoI4vEmnMjF9eOIKzH5tNWVWtvzzK4PRhPRjUI5VzRvZicM9U/7KM5DjuOWsIP3l5SbP7XrK5hGnPzeeFacce0oX0n2ev5cOVRdx52iDy8zKa2INXeVUNM+ds5Iu1u/A4x1cb9zT40jHzNk2dOkJNUwMpoO34fYn/jQNX/Actex34h3Pub01sOw2YBpCbmztuwwYNZC3SGv9ZvJVb/74AgNhoY8b14zlhYLcm1/d4HNc/8yWfrNqBGQzolkJpRTXb91Yesu6onHRunTyA04b2ICrKmL+hmIv/UPc8wcT+mcREG+P6drxgdRYAAA0zSURBVKV/txSGZXdhQLdkzIzV20v51ox5bC2paDb++Jgonp92LGNzu7bxDMgBQemrp6nEb2b3AvnARa4VAaiqR+TwPD9vI28v3cZNx/fjxEFNJ/0Dqms9rN6+j17pCaQnxeGco6i0ku6p8fzxk7U89NaKBusP6pHCI5eOYvona3lj8dZm990rLYExfbvyxTe7KC6ravV7uO2Uozh5SPcmvwDW7Sxjf1Utg3qkEKOBbBrVaRK/mV0H3Ayc6pwrb81+lPhFgsfjcdzxj4W8tmhLu+wvKS6ab5/Qnx5d4snpmsS6nWV8sKKIMTnpPDdnQ4MvBzN46ppxDO+dxv97fRlRZlwyrg+LC0t4bNZqnIPU+Bj+58LhnD+6d7vEF046ReI3synAo8BJzrkdrd2PEr9IcDnnmLuumFkrinj28/VU1ngaXS8xNprzR/ciKS6G9bvKKFhfzN6KumcKkuKi+cv14zmmf2aj28/fUMyVf5pLVRP7b0paYiyzf3wyXRJiKdxdzh8++oaUhBimHN2TMRFcZdThid/MngcmA1nAduA+4CdAPHCg2cEc59zNLe1LiV+k81i0aQ9XTJ/D/mrvDWQzeOaGCaTEx3BU9xTSEutGD9tfVcuMz9bxxTe7yM/ryqX5OfROT2x2/5+v2ckfPv6G2at3HlZct51yFLefOpCzH/uUldtL/bE9cskoLh7Xcm+n4Uj98YtIu/lyfTFPffQNfTOTuWx8H4b07NLux/jim11c8/Rcaj11OSorJY7M5Hg2FJdx6pAeHHdUFj99pfkWSWmJscy666R2b4oaCpT4RSTkLN1cwj++3MTanfs4e0Qvrjomt8HyWo9jym8/YXXRvmb3c8awHjxx1VjiYiLrJrASv4iEpY27yrn7X4uYt77uCeUeXeK5/9yjuWXmAn/ZuL5dufuMwUwc0Pj9hXCkxC8iYcs5R8GG3by9dBvb91bwX5OPYlivLtz/2jJ/lxQHjMlN55TB3cnPywj7LwElfhGJOM45HvtgDb/9YBUHp7qYKOP1245naHb735/oLDQQi4hEHDPj9tMGMuuuyZx9UDcQNR7HvwoK/fObisuZt64Yj6fzXwwfKXW3JyJhr19WMk9cNYZJ8zL5738v9V/9v754C/eePZR3l23je89/Ra3HMWlAJt875SiK9lZSWVPLeaN6kxgXHdw30M5U1SMiEaWm1sOxD85i575D+yJqzMg+acy4fjxZIdgcVFU9IiJATHQU5xzGwPSLC0s4+ZGP+NXbK9hWUkFNrSfkRyvTFb+IRJw1Rfs45/HZVFTXdQ0xoncal4zrwwcriti1r5KtJRXNdirXLyuZb03sy7XH9u20ncSpVY+ISD3b91awdHMJAEd1TyE3IwmzhoPUf7iiiAfe+Jq1O8ua3M+VE3J48KKRAY21rZT4RUTaoNbjeO/r7Tz96VoWbSqhxuPh4IY/b91+QqdsFtrhI3CJiISD6ChjyvCeTBne01+2v6qWK6Z/waJC7y+Gh99ZydPX5R/yi6Gz6pwVUyIinVhiXDQPXTySA3l+1ooifvjiYjbv2R/cwFpJiV9EpA2GZnfh8vwc//yL8ws5/lezeOSdlZ2+1Y8Sv4hIGz1wwXCmjqirAnIOnvhwDY/PWhPEqFqmxC8i0kax0VE8dsUY/vfC4RzVPcVf/uh7q/h8zeENJNORlPhFRI5ATHQUVx/Tl/98/3iOO6qut897Xl5CeVVNM1sGjxK/iEg7iI+J5v8uG02XBG9jyY3F5Xz/+YVU1x7e+MEdQYlfRKSddO+SwM/PPdo///7y7Xz72YJW9wvUUZT4RUTa0cVje/Pdk/r75z9etYOzfjebrzbuDmJUDQUs8ZvZDDMrMrOl9coyzOw9M1vt+9s1UMcXEQkGM+OeKUO4ZfIAf9mO0kounz6Hj1YWBTGyOoG84n8GmHJQ2T3AB865gcAHvnkRkbBiZvx4yhD+euME0pNiAaiq8fBfMxewuHBPkKMLYOJ3zn0CFB9UfD7wrG/6WeCCQB1fRCTYThzUjVdvPY7e6YkAlFfVctkfv+CBN77m7aVbKa2oDkpcAe2kzczygDecc8N983ucc+n1lu92zjVa3WNm04BpALm5ueM2bNgQsDhFRAJpTVEpF/3+c/ZWNGzeGRcdxWXj+/CdE/qzu7yaFVv3ctLgbmQkxzFnbTEfrigiNtq49+xhbTpuUHrnPJLEX5965xSRULdqeym3/G0+3+xouovnxqQmxPDVz05vU5//nWUEru1mlu0LKBvoHHc6REQCbFCPVF6/7Xh+ffFIvn18v1Z341xaUcP8De3bIqiju2V+DbgOeMj399UOPr6ISNAkxcVw2Xhvx27OOd5Zto2nPl7L+l1leDyuQVVQn66JnDGsJ6cM6c7o3PSmdtkmAUv8ZvY8MBnIMrNC4D68Cf+fZnYTsBG4NFDHFxHpzMyMKcOzmTK8bvzfqhoPs1ZsJz0pjgl5GURFBaZ//4AlfufclU0sOjVQxxQRCWVxMVENvggCRU/uiohEGCV+EZEIo8QvIhJhlPhFRCKMEr+ISIRR4hcRiTBK/CIiESagffW0FzPbAbS1l7YsoPOOetzxdD7q6FzU0bloKFzOR1/nXLeDC0Mi8R8JMytorJOiSKXzUUfnoo7ORUPhfj5U1SMiEmGU+EVEIkwkJP7pwQ6gk9H5qKNzUUfnoqGwPh9hX8cvIiINRcIVv4iI1KPELyISYcI68ZvZFDNbaWZrzOyeYMfT0cxsvZktMbOFZlbgK8sws/fMbLXvb4tjHocqM5thZkVmtrReWaPv37we831WFpvZ2OBF3v6aOBf3m9lm3+djoZlNrbfsJ75zsdLMzgxO1IFhZjlm9qGZLTezZWZ2u688Yj4bYZv4zSwaeBI4CxgGXGlmbRuqPrSd7JwbXa9N8j3AB865gcAHvvlw9Qww5aCypt7/WcBA32sa8IcOirGjPMOh5wLg/3yfj9HOuTcBfP9PrgCO9m3ze9//p3BRA9zlnBsKHAvc6nvPEfPZCNvED0wA1jjn1jrnqoAXgPODHFNncD7wrG/6WeCCIMYSUM65T4Dig4qbev/nA391XnOAdDML/FBIHaSJc9GU84EXnHOVzrl1wBq8/5/CgnNuq3NugW+6FFgO9CaCPhvhnPh7A5vqzRf6yiKJA941s/lmNs1X1sM5txW8/wGA7kGLLjiaev+R+nn5nq/6Yka9ar+IORdmlgeMAeYSQZ+NcE78jY1SHGltV49zzo3F+1P1VjM7MdgBdWKR+Hn5AzAAGA1sBX7jK4+Ic2FmKcBLwB3Oub3NrdpIWUifj3BO/IVATr35PsCWIMUSFM65Lb6/RcAreH+ubz/wM9X3tyh4EQZFU+8/4j4vzrntzrla55wH+BN11Tlhfy7MLBZv0p/pnHvZVxwxn41wTvxfAgPNrJ+ZxeG9WfVakGPqMGaWbGapB6aBM4CleM/Bdb7VrgNeDU6EQdPU+38N+JavBcexQMmBn/3h6qB66gvxfj7Aey6uMLN4M+uH96bmvI6OL1DMzICngeXOuUfrLYqcz4ZzLmxfwFRgFfANcG+w4+ng994fWOR7LTvw/oFMvC0WVvv+ZgQ71gCeg+fxVmFU471qu6mp94/35/yTvs/KEiA/2PF3wLl4zvdeF+NNbtn11r/Xdy5WAmcFO/52PhfH462qWQws9L2mRtJnQ102iIhEmHCu6hERkUYo8YuIRBglfhGRCKPELyISYZT4RUQijBK/RAQz+9z3N8/Mrmrnff+0sWOJdFZqzikRxcwmA3c75845jG2inXO1zSzf55xLaY/4RDqCrvglIpjZPt/kQ8AJvv7n7zSzaDN72My+9HVW9l3f+pN9fbb/He9DO5jZv30d3i070OmdmT0EJPr2N7P+sXxPej5sZkvNOy7C5fX2/ZGZvWhmK8xspu9pUszsITP72hfLIx15jiRyxAQ7AJEOdg/1rvh9CbzEOTfezOKBz8zsXd+6E4Dhzts1McCNzrliM0sEvjSzl5xz95jZ95xzoxs51kV4O0AbBWT5tvnEt2wM3v7utwCfAceZ2dd4u04Y4pxzZpbe7u9eBF3xi5yBtx+WhXi75s3E2zcNwLx6SR/g+2a2CJiDt9OugTTveOB55+0IbTvwMTC+3r4LnbeDtIVAHrAXqAD+bGYXAeVH/O5EGqHEL5HOgNtc3ShU/ZxzB674y/wree8NnAZMdM6NAr4CElqx76ZU1puuBWKcczV4f2W8hHcQkLcP652ItJISv0SaUiC13vw7wC2+bnoxs0G+3kwPlgbsds6Vm9kQvEP2HVB9YPuDfAJc7ruP0A04kWZ6ufT1D5/mvEMg3oG3mkik3amOXyLNYqDGV2XzDPA7vNUsC3w3WHfQ+HCUbwM3m9livD1Wzqm3bDqw2MwWOOeurlf+CjARbw+pDviRc26b74ujManAq2aWgPfXwp1te4sizVNzThGRCKOqHhGRCKPELyISYZT4RUQijBK/iEiEUeIXEYkwSvwiIhFGiV9EJML8fwu7pqO85lGRAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "als.fit_stream(ratings, test, train_df)\n", + "plot_learning_curve(als)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als-stream-2.ipynb b/recsys/als-stream-2.ipynb new file mode 100644 index 0000000..e865c85 --- /dev/null +++ b/recsys/als-stream-2.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "id": "8e0e6a4f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5b033873", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "a00a310f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 50 5.0\n", + "2 1 151 5.0\n", + "3 1 223 3.0\n", + "4 1 296 3.0\n", + ".. ... ... ...\n", + "180 601 112556 4.0\n", + "181 601 122916 3.5\n", + "182 601 152081 4.5\n", + "183 601 170705 5.0\n", + "184 601 177765 4.5\n", + "\n", + "[185 rows x 3 columns]\n", + " userId movieId rating\n", + "0 1 3 4.0\n", + "1 1 6 4.0\n", + "2 1 47 5.0\n", + "3 1 70 3.0\n", + "4 1 101 5.0\n", + ".. ... ... ...\n", + "545 601 168326 4.0\n", + "546 601 170697 4.0\n", + "547 601 172591 4.5\n", + "548 601 174055 4.0\n", + "549 601 176371 4.0\n", + "\n", + "[550 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "\n", + "user_vector_matrix = dict()\n", + "movie_vector_matrix = dict()\n", + "\n", + "columns = ['userId', 'movieId', 'rating']\n", + "test_data = []\n", + "train_data = []\n", + "\n", + "for row in df.itertuples():\n", + " if (row.userId % 100 == 1):\n", + " if row.Index % 4 == 0:\n", + " test_data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " train_data.append([row.userId, row.movieId, row.rating])\n", + "\n", + "test_df = pd.DataFrame(data=test_data, columns=columns)\n", + "train_df = pd.DataFrame(data=train_data, columns=columns)\n", + "max_train_movie = max(train_df['movieId'])\n", + "print(df)\n", + "print(test_df)\n", + "print(train_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a6a8fb84", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSStreamingModel:\n", + " def __init__(self, l, num_features, alpha):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.alpha = alpha\n", + " self.user_features = dict()\n", + " self.movie_features = np.random.randint(100, size=(max_train_movie, num_features))\n", + " print(self.movie_features.shape)\n", + " self.ratings = dict()\n", + " \n", + " def fit(self, train):\n", + " for row in train.itertuples():\n", + " #print(\"Update\", row.Index)\n", + " #start = time.time()\n", + " self.update_user_vector(row)\n", + " #print(\"Took\", time.time()-start)\n", + " return self \n", + "\n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \n", + " ratings: 1xnum_movies\n", + " solve_vecs: 1xnum_features\n", + " fixed_vecs: 1xnum_features\n", + " RF * (F^-1F + lI)^-1\n", + " num_features x num_features\n", + " \n", + " num_users x num_movies * num_movies x num_features\n", + " num_users x num_features \n", + " \n", + " (610, 193609) (610, 200) (193609, 200)\n", + " ratings user movies\n", + " (1, 40) (1, 40) (1, 193609)\n", + " b has to be 1x40\n", + " ratings is 1xY fixedVecs is Yx40\n", + " user movies ratings\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " #print(A.shape)\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def update_user_vector(self, row):\n", + " rating = row.rating\n", + " userId = row.userId\n", + " movieId = row.movieId\n", + "\n", + " if userId in self.user_features:\n", + " user_vector = self.user_features[userId]\n", + " rating_vector = self.ratings[userId]\n", + " else:\n", + " user_vector = np.random.randint(100, size=(1, self.num_features))\n", + " rating_vector = np.zeros((1, max_train_movie))\n", + "\n", + " movie_vector = self.movie_features\n", + " rating_vector[0, movieId-1] = rating\n", + " self.ratings[userId] = rating_vector\n", + " #print(user_vector.shape, movie_vector.shape, rating_vector.shape)\n", + " new_user_vector = self._als_step(rating_vector, user_vector, movie_vector)\n", + " self.user_features[userId] = new_user_vector\n", + " \n", + " def predict_set(self, data):\n", + " \n", + " correct_results = []\n", + " predicted_results = []\n", + " for row in data.itertuples():\n", + " prediction = self.predict_rating(row.userId, row.movieId)\n", + " predicted_results.append(prediction)\n", + " correct_results.append(row.rating)\n", + " \n", + " return self.compute_mse(correct_results, predicted_results)\n", + " \n", + " def predict_rating(self, userId, movieId):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " if userId not in self.user_features or movieId not in self.movie_features:\n", + " return 0\n", + " user_vector = self.user_features[userId]\n", + " movie_vector = self.movie_features[movieId-1]\n", + " prediction = user_vector.dot(movie_vector.T)\n", + " if np.isnan(prediction) or prediction > 5:\n", + " return 5\n", + " if prediction < 0:\n", + " return 0\n", + " return prediction\n", + "\n", + " def compute_mse(self, y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mse = mean_squared_error(np.asarray(y_true), np.asarray(y_pred))\n", + " return mse" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "285ebde1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(176371, 40)\n", + "16.894003342096422\n", + "16.5193922235925\n" + ] + } + ], + "source": [ + "als = ALSStreamingModel(.01, 100, .1)\n", + "als.fit(train_df)\n", + "print(als.predict_set(test_df))\n", + "print(als.predict_set(train_df))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als-streaming-test.ipynb b/recsys/als-streaming-test.ipynb new file mode 100644 index 0000000..627783c --- /dev/null +++ b/recsys/als-streaming-test.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 121, + "id": "8e0e6a4f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "5b033873", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "a00a310f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 47 5.0\n", + "2 1 101 5.0\n", + "3 1 157 5.0\n", + "4 1 223 3.0\n", + "... ... ... ...\n", + "33607 610 160527 4.5\n", + "33608 610 161582 4.0\n", + "33609 610 163937 3.5\n", + "33610 610 166528 4.0\n", + "33611 610 168250 5.0\n", + "\n", + "[33612 rows x 3 columns]\n", + " userId movieId rating\n", + "0 1 3 4.0\n", + "1 1 6 4.0\n", + "2 1 50 5.0\n", + "3 1 70 3.0\n", + "4 1 110 4.0\n", + "... ... ... ...\n", + "67219 610 164179 5.0\n", + "67220 610 166534 4.0\n", + "67221 610 168248 5.0\n", + "67222 610 168252 5.0\n", + "67223 610 170875 3.0\n", + "\n", + "[67224 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "\n", + "user_vector_matrix = dict()\n", + "movie_vector_matrix = dict()\n", + "\n", + "columns = ['userId', 'movieId', 'rating']\n", + "test_data = []\n", + "train_data = []\n", + "\n", + "for row in df.itertuples():\n", + " if row.Index % 3 == 0:\n", + " test_data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " train_data.append([row.userId, row.movieId, row.rating])\n", + "\n", + "test_df = pd.DataFrame(data=test_data, columns=columns)\n", + "train_df = pd.DataFrame(data=train_data, columns=columns)\n", + " \n", + "print(df)\n", + "print(test_df)\n", + "print(train_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "a6a8fb84", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSStreamingModel:\n", + " def __init__(self, l, num_features, alpha):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.alpha = alpha\n", + " self.user_features = dict()\n", + " self.movie_features = dict()\n", + " \n", + " def fit(self, train):\n", + " for row in train.itertuples():\n", + " self.update_user_vector(row)\n", + " return self\n", + " \n", + "\n", + " def update_user_vector(self, row):\n", + " rating = row.rating\n", + " userId = row.userId\n", + " movieId = row.movieId\n", + "\n", + " if userId in self.user_features:\n", + " user_vector = self.user_features[userId]\n", + " else:\n", + " user_vector = np.random.randint(100, size=self.num_features)\n", + "\n", + " if movieId in self.movie_features:\n", + " movie_vector = self.movie_features[movieId]\n", + " else:\n", + " movie_vector = np.random.randint(100, size=self.num_features)\n", + " self.movie_features[movieId] = movie_vector\n", + " #print(user_vector)\n", + " sub_result = rating - np.dot(np.transpose(user_vector), movie_vector)\n", + " new_user_vector = self.alpha * sub_result * movie_vector + self.l * user_vector\n", + " #print(new_user_vector)\n", + " self.user_features[userId] = new_user_vector\n", + " \n", + " def predict_set(self, data):\n", + " \n", + " correct_results = []\n", + " predicted_results = []\n", + " for row in data.itertuples():\n", + " prediction = self.predict_rating(row.userId, row.movieId)\n", + " predicted_results.append(prediction)\n", + " correct_results.append(row.rating)\n", + " \n", + " return self.compute_mse(correct_results, predicted_results)\n", + " \n", + " def predict_rating(self, userId, movieId):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " if userId not in self.user_features or movieId not in self.movie_features:\n", + " return 0\n", + " user_vector = self.user_features[userId]\n", + " movie_vector = self.movie_features[movieId]\n", + " prediction = user_vector.dot(movie_vector.T)\n", + " if np.isnan(prediction) or prediction > 5:\n", + " return 5\n", + " if prediction < 0:\n", + " return 0\n", + " return prediction\n", + "\n", + " def compute_mse(self, y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mse = mean_squared_error(np.asarray(y_true), np.asarray(y_pred))\n", + " return mse" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "285ebde1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in add\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: overflow encountered in multiply\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in multiply\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.833913483279781\n", + "4.487340830655718\n" + ] + } + ], + "source": [ + "als = ALSStreamingModel(.01, 40, .1)\n", + "als.fit(train_df)\n", + "print(als.predict_set(test_df))\n", + "print(als.predict_set(train_df))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als.ipynb b/recsys/als.ipynb new file mode 100644 index 0000000..f1f753a --- /dev/null +++ b/recsys/als.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7c78c2e5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3ea8dfe7", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e5487124", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "[[4. 0. 4. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [2.5 2. 2. ... 0. 0. 0. ]\n", + " [3. 0. 0. ... 0. 0. 0. ]\n", + " [5. 0. 0. ... 0. 0. 0. ]]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "print(df)\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "print(ratings)\n", + "for row in df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "print(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7808d9b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " ...,\n", + " [2.5, 2. , 2. , ..., 0. , 0. , 0. ],\n", + " [3. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [5. , 0. , 0. , ..., 0. , 0. , 0. ]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# taken from ethen8181\n", + "def create_train_test(ratings):\n", + " \"\"\"\n", + " split into training and test sets,\n", + " remove 10 ratings from each user\n", + " and assign them to the test set\n", + " \"\"\"\n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", + "\n", + " train[user, test_index] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " assert np.all(train * test == 0)\n", + " return train, test\n", + "\n", + "train, test = create_train_test(ratings)\n", + "del ratings\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fdd592a0", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train, test):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production)\n", + " self.test_mse_record = []\n", + " self.train_mse_record = [] \n", + " for i in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " predictions = self.predict()\n", + " test_mse = self.compute_mse(test, predictions)\n", + " train_mse = self.compute_mse(train, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " self.train_mse_record.append(train_mse)\n", + " \n", + " return self \n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " print(ratings.shape, solve_vecs.shape, fixed_vecs.shape)\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "c12565f4", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.plot(model.train_mse_record, label = 'Train', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "586142de", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 1, num_features = 200, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "de62d106", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(610, 193609) (610, 200) (193609, 200)\n", + "(200, 200)\n", + "(610, 200)\n", + "(193609, 610) (193609, 200) (610, 200)\n", + "(200, 200)\n", + "(193609, 200)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAWoElEQVR4nO3df5BdZZ3n8ffHBAhiJPwIoEQmARkF4hBji4M4CitgYPzJYCGuOyzgZpnVGdSy1rhuLY7OH0F3a0fFKjbFBKgtDbrDMlrLLyOlMjuKEDRAADHAYNkbNDEoCgwDke/+0QenaZ5O50ffezvp96vq1j3nOc859/vQVXxyznPuuakqJEka6wWDLkCSNDUZEJKkJgNCktRkQEiSmgwISVLTzEEXMJkOPPDAmj9//qDLkKRdxu233/6Lqprb2rZbBcT8+fNZs2bNoMuQpF1Gkp+Mt81LTJKkJgNCktRkQEiSmnarOQhJ2lZPP/00w8PDPPnkk4MupS9mzZrFvHnz2GOPPbZ5HwNC0rQ0PDzM7NmzmT9/PkkGXU5PVRWbN29meHiYBQsWbPN+XmKSNC09+eSTHHDAAbt9OAAk4YADDtjus6WeBUSSlUk2Jlk3qu2zSX6U5M4k1ySZM86+DyW5K8naJN63KqknpkM4PGtHxtrLM4grgCVj2lYDC6vqD4AfAx/fyv4nVdWiqhrqUX2SpK3oWUBU1c3AI2PavlFVW7rVW4B5vfp8SZrKNm/ezKJFi1i0aBGHHHIIhx566O/Wn3rqqW0+zsqVK/nZz37WkxoHOUl9HvCVcbYV8I0kBfyPqlox3kGSLAWWAhx22GGTXqQk9cIBBxzA2rVrAfjkJz/Ji170Ij760Y9u93FWrlzJ4sWLOeSQQya7xMEERJJPAFuAL43T5YSq2pDkIGB1kh91ZyTP04XHCoChoSF/Hk/SLu/KK6/ki1/8Ik899RSvf/3rueSSS3jmmWc499xzWbt2LVXF0qVLOfjgg1m7di1nnXUWe++9N7feeit77rnnpNXR94BIcg7wVuDNNc7vnVbVhu59Y5JrgOOAZkBI0s6av+zanh37oeV/vF39161bxzXXXMN3v/tdZs6cydKlS7nqqqs44ogj+MUvfsFdd90FwK9+9SvmzJnDF77wBS655BIWLVo06bX39TbXJEuAjwFvr6onxumzT5LZzy4DpwLrWn0laXfzzW9+k9tuu42hoSEWLVrEd77zHR544AFe/vKXc99993HhhRdy4403su+++/a8lp6dQSRZBZwIHJhkGLiIkbuW9mLkshHALVV1QZKXApdV1enAwcA13faZwJer6oZe1SlJU0lVcd555/HpT3/6edvuvPNOrr/+ej7/+c9z9dVXs2LFuNOzk6JnAVFVZzea/2acvhuA07vlB4Fje1WXJI21vZeBeunkk0/mzDPP5MILL+TAAw9k8+bNPP744+y9997MmjWLd7/73SxYsIALLrgAgNmzZ/Ob3/ymJ7X4qA1JmkJe9apXcdFFF3HyySfzzDPPsMcee3DppZcyY8YMzj//fKqKJFx88cUAnHvuubz//e/vySR1xpkn3iUNDQ2VPxgkaVvce++9HHXUUYMuo69aY05y+3hfSPZZTJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkDcBkPO773HPP5b777utZjX5RTpIGYFse911VVBUveEH73/KXX355T2v0DEKSppD777+fhQsXcsEFF7B48WIefvhhli5dytDQEMcccwyf+tSnftf3DW94A2vXrmXLli3MmTOHZcuWceyxx3L88cezcePGna7FMwhJ+mQPn4z6yUe3e5d77rmHyy+/nEsvvRSA5cuXs//++7NlyxZOOukkzjzzTI4++ujn7PPoo4/ypje9ieXLl/ORj3yElStXsmzZsp0q3TMISZpijjjiCF772tf+bn3VqlUsXryYxYsXc++993LPPfc8b5+9996b0047DYDXvOY1PPTQQztdh2cQkjTF7LPPPr9bXr9+PZ/73Oe49dZbmTNnDu973/t48sknn7fP6If0zZgxgy1btux0HQaEJO3AZaB++fWvf83s2bN58YtfzMMPP8yNN97IkiVL+vLZBoQkTWGLFy/m6KOPZuHChRx++OGccMIJfftsH/ctaVrycd8jfNy3JGm7GRCSpCYDQtK0tTtdYp/IjozVgJA0Lc2aNYvNmzdPi5CoKjZv3sysWbO2az/vYpI0Lc2bN4/h4WE2bdo06FL6YtasWcybN2+79ulZQCRZCbwV2FhVC7u2zwJvA54CHgDOrapfNfZdAnwOmAFcVlXLe1WnpOlpjz32YMGCBYMuY0rr5SWmK4Cx3+ZYDSysqj8Afgx8fOxOSWYAXwROA44Gzk5y9Nh+kqTe6llAVNXNwCNj2r5RVc9+//sWoHW+cxxwf1U9WFVPAVcB7+hVnZKktkFOUp8HXN9oPxT46aj14a6tKcnSJGuSrJku1xIlqR8GEhBJPgFsAb7U2txoG/c2g6paUVVDVTU0d+7cySpRkqa9vt/FlOQcRiav31zt+8uGgZeNWp8HbOhHbZKkf9HXM4ju7qSPAW+vqifG6XYbcGSSBUn2BN4DfL1fNUqSRvQsIJKsAr4HvCLJcJLzgUuA2cDqJGuTXNr1fWmS6wC6SewPAjcC9wJfraq7e1WnJKnNp7lK0jTm01wlSdvNgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWrqWUAkWZlkY5J1o9reneTuJM8kGdrKvg8luSvJ2iRrelWjJGl8vTyDuAJYMqZtHXAGcPM27H9SVS2qqnGDRJLUOzN7deCqujnJ/DFt9wIk6dXHSpImyVSdgyjgG0luT7J0ax2TLE2yJsmaTZs29ak8Sdr9TdWAOKGqFgOnAR9I8sbxOlbViqoaqqqhuXPn9q9CSdrNTcmAqKoN3ftG4BrguMFWJEnTz5QLiCT7JJn97DJwKiOT25KkPurlba6rgO8Br0gynOT8JO9KMgwcD1yb5Mau70uTXNftejDwf5PcAdwKXFtVN/SqTklSWy/vYjp7nE3XNPpuAE7vlh8Eju1VXZKkbTPlLjFJkqYGA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWraakAked+o5RPGbPtgr4qSJA3eRGcQHxm1/IUx286b5FokSVPIRAGRcZZb68/dmKxMsjHJulFt705yd5JnkgxtZd8lSe5Lcn+SZRPUKEnqgYkCosZZbq2PdQWwZEzbOuAM4ObxdkoyA/gicBpwNHB2kqMn+CxJ0iSbOcH2Vya5k5GzhSO6Zbr1w7e2Y1XdnGT+mLZ7AZKtnnwcB9xfVQ92fa8C3gHcM0GtkqRJNFFAHNWXKp7rUOCno9aHgdeN1znJUmApwGGHHdbbyiRpGtnqJaaq+snoF/AYsBg4sFvvhdbpxbiXs6pqRVUNVdXQ3Llze1SSJE0/E93m+n+SLOyWX8LIHMJ5wP9M8qEe1TQMvGzU+jxgQ48+S5I0jokmqRdU1bN3IZ0LrK6qtzFyyadXt7neBhyZZEGSPYH3AF/v0WdJksYxUUA8PWr5zcB1AFX1G+CZre2YZBXwPeAVSYaTnJ/kXUmGgeOBa5Pc2PV9aZJnj70F+CBwI3Av8NWqunv7hyZJ2hkTTVL/NMmfM3LZZzFwA0CSvYE9trZjVZ09zqZrGn03AKePWr+OLowkSYMx0RnE+cAxwL8FzqqqX3Xtfwhc3sO6JEkDttUziKraCFzQaP8W8K1eFSVJGrytBkSSrU4OV9XbJ7ccSdJUMdEcxPGMfGltFfB9Jnj+kiRp9zFRQBwCnAKcDbwXuBZY5V1FkrT7m+ib1L+tqhuq6hxGJqbvB77d3dkkSdqNTXQGQZK9gD9m5CxiPvB54H/3tixJ0qBNNEl9JbAQuB74y1HfqpYk7eYmOoP4N8DjwO8DfzHqMd0Bqqpe3MPaJEkDNNH3ICb6Ip0kaTdlAEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTT0LiCQrk2xMsm5U2/5JVidZ373vN86+v02ytntt9XexJUm90csziCuAJWPalgE3VdWRwE3dess/VdWi7vX2HtYoSRpHzwKiqm4GHhnT/A7gym75SuCdvfp8SdLO6fccxMFV9TBA937QOP1mJVmT5JYkWw2RJEu7vms2bdo02fVK0rQ1VSepD6uqIeC9wF8nOWK8jlW1oqqGqmpo7ty5/atQknZz/Q6Inyd5CUD3vrHVqao2dO8PAt8GXt2vAiVJI/odEF8HzumWzwG+NrZDkv2S7NUtHwicANzTtwolSUBvb3NdBXwPeEWS4STnA8uBU5KsB07p1kkylOSybtejgDVJ7gC+BSyvKgNCkvpsZq8OXFVnj7PpzY2+a4D3d8vfBV7Vq7okSdtmqk5SS5IGzICQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlq6mlAJFmZZGOSdaPa9k+yOsn67n2/cfY9p+uzPsk5vaxTkvR8vT6DuAJYMqZtGXBTVR0J3NStP0eS/YGLgNcBxwEXjRckkqTe6GlAVNXNwCNjmt8BXNktXwm8s7HrW4DVVfVIVf0SWM3zg0aS1EODmIM4uKoeBujeD2r0ORT46aj14a7teZIsTbImyZpNmzZNerGSNF1N1UnqNNqq1bGqVlTVUFUNzZ07t8dlSdL0MYiA+HmSlwB07xsbfYaBl41anwds6ENtkqTOIALi68CzdyWdA3yt0edG4NQk+3WT06d2bZKkPun1ba6rgO8Br0gynOR8YDlwSpL1wCndOkmGklwGUFWPAJ8Gbuten+raJEl9kqrmpf1d0tDQUK1Zs2bQZUjSLiPJ7VU11No2VSepJUkDZkBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1DSQgklyYZF2Su5N8qLH9xCSPJlnbvf7LIOqUpOlsZr8/MMlC4N8BxwFPATckubaq1o/p+vdV9dZ+1ydJGjGIM4ijgFuq6omq2gJ8B3jXAOqQJG3FIAJiHfDGJAckeSFwOvCyRr/jk9yR5Pokx4x3sCRLk6xJsmbTpk29qlmSpp2+X2KqqnuTXAysBh4D7gC2jOn2A+D3quqxJKcDfwccOc7xVgArAIaGhqpnhUvSNDOQSeqq+puqWlxVbwQeAdaP2f7rqnqsW74O2CPJgQMoVZKmrUHdxXRQ934YcAawasz2Q5KkWz6OkTo397tOSZrO+n6JqXN1kgOAp4EPVNUvk1wAUFWXAmcCf5ZkC/BPwHuqystHktRHAwmIqvqjRtulo5YvAS7pa1GSpOfwm9SSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKasjs9Ay/JJuAng65jOx0I/GLQRfSZY54eHPOu4feqam5rw24VELuiJGuqamjQdfSTY54eHPOuz0tMkqQmA0KS1GRADN6KQRcwAI55enDMuzjnICRJTZ5BSJKaDAhJUpMB0QdJ9k+yOsn67n2/cfqd0/VZn+ScxvavJ1nX+4p33s6MOckLk1yb5EdJ7k6yvL/Vb58kS5Lcl+T+JMsa2/dK8pVu+/eTzB+17eNd+31J3tLPunfUjo43ySlJbk9yV/f+r/pd+47amb9xt/2wJI8l+Wi/ap4UVeWrxy/gM8CybnkZcHGjz/7Ag937ft3yfqO2nwF8GVg36PH0eszAC4GTuj57An8PnDboMY0zzhnAA8DhXa13AEeP6fMfgEu75fcAX+mWj+767wUs6I4zY9Bj6uF4Xw28tFteCPy/QY+n12Metf1q4H8BHx30eLbn5RlEf7wDuLJbvhJ4Z6PPW4DVVfVIVf0SWA0sAUjyIuAjwF/1odbJssNjrqonqupbAFX1FPADYF4fat4RxwH3V9WDXa1XMTL20Ub/t/hb4M1J0rVfVVX/XFX/CNzfHW8q2+HxVtUPq2pD1343MCvJXn2peufszN+YJO9k5B8/d/ep3kljQPTHwVX1MED3flCjz6HAT0etD3dtAJ8G/hvwRC+LnGQ7O2YAkswB3gbc1KM6d9aEYxjdp6q2AI8CB2zjvlPNzox3tD8BflhV/9yjOifTDo85yT7Ax4C/7EOdk27moAvYXST5JnBIY9MntvUQjbZKsgh4eVV9eOx1zUHr1ZhHHX8msAr4fFU9uP0V9sVWxzBBn23Zd6rZmfGObEyOAS4GTp3EunppZ8b8l8B/r6rHuhOKXYoBMUmq6uTxtiX5eZKXVNXDSV4CbGx0GwZOHLU+D/g2cDzwmiQPMfL3OijJt6vqRAash2N+1gpgfVX99SSU2yvDwMtGrc8DNozTZ7gLvX2BR7Zx36lmZ8ZLknnANcCfVtUDvS93UuzMmF8HnJnkM8Ac4JkkT1bVJb0vexIMehJkOryAz/LcCdvPNPrsD/wjI5O0+3XL+4/pM59dZ5J6p8bMyHzL1cALBj2WCcY5k5Hrywv4lwnMY8b0+QDPncD8ard8DM+dpH6QqT9JvTPjndP1/5NBj6NfYx7T55PsYpPUAy9gOrwYuf56E7C+e3/2f4JDwGWj+p3HyETl/cC5jePsSgGxw2Nm5F9oBdwLrO1e7x/0mLYy1tOBHzNyp8snurZPAW/vlmcxcgfL/cCtwOGj9v1Et999TNE7tSZrvMB/Bh4f9TddCxw06PH0+m886hi7XED4qA1JUpN3MUmSmgwISVKTASFJajIgJElNBoQkqcmAkDpJvtu9z0/y3kk+9n9qfZY0lXmbqzRGkhMZuV/9rduxz4yq+u1Wtj9WVS+ajPqkfvEMQuokeaxbXA78UZK1ST6cZEaSzya5LcmdSf591//EJN9K8mXgrq7t77rfOrg7ydKubTmwd3e8L43+rIz4bJJ13e8knDXq2N9O8rfd72J8adTTQZcnuaer5b/287+RphefxSQ93zJGnUF0/6N/tKpe2z2e+h+SfKPrexywsEYe1w1wXlU9kmRv4LYkV1fVsiQfrKpFjc86A1gEHAsc2O1zc7ft1Yw8jmMD8A/ACUnuAd4FvLKqqnvardQTnkFIEzsV+NMka4HvM/IYkSO7bbeOCgeAv0hyB3ALIw9vO5KtewOwqqp+W1U/B74DvHbUsYer6hlGHksxH/g18CRwWZIz2LUeAa9djAEhTSzAn1fVou61oKqePYN4/HedRuYuTgaOr6pjgR8y8oyeiY49ntG/lfBbYGaN/NbAcYw8yPCdwA3bNRJpOxgQ0vP9Bpg9av1G4M+S7AGQ5Pe7H4IZa1/gl1X1RJJXAn84atvTz+4/xs3AWd08x1zgjYw87K2p+3XBfavqOuBDjFyeknrCOQjp+e4EtnSXiq4APsfI5Z0fdBPFm2j/hOoNwAVJ7mTk6ay3jNq2ArgzyQ+q6l+Par+Gkd/8uIORJ9j+x6r6WRcwLbOBryWZxcjZx4d3bIjSxLzNVZLU5CUmSVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLU9P8BzqAP3Gh6ntwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "als.fit(train, test)\n", + "plot_learning_curve(als)" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "id": "1bb4f7e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.802698171033642\n", + "1.0273115267397006\n" + ] + } + ], + "source": [ + "print(als.test_mse_record[-1])\n", + "print(als.train_mse_record[-1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/recsys_client.py b/recsys/recsys_client.py new file mode 100644 index 0000000..70db2dd --- /dev/null +++ b/recsys/recsys_client.py @@ -0,0 +1,43 @@ +import sys +from tqdm import tqdm +import argparse +import os +import json +import time + +from threading import Timer + +import psutil + +from ralf.client import RalfClient + +client = RalfClient() + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Specify experiment config") + + # Experiment related + parser.add_argument( + "--data-dir", + type=str, + default="/Users/sarahwooders/repos/flink-feature-flow/datasets", + ) + parser.add_argument( + "--exp-dir", + type=str, + default="/Users/sarahwooders/repos/flink-feature-flow/RayServer/experiments", + ) + parser.add_argument("--file", type=str, default=None) + args = parser.parse_args() + + #user_id = "1" + #res = client.point_query(key=user_id, table_name="user_vectors") + #print(res) + res = client.bulk_query(table_name="user_vectors") + print("User Vectors") + print([r for r in res]) + ''' + res = client.bulk_query(table_name="movie_vectors") + print([r for r in res]) + ''' \ No newline at end of file diff --git a/recsys/recsys_server.py b/recsys/recsys_server.py new file mode 100644 index 0000000..fa8fc6a --- /dev/null +++ b/recsys/recsys_server.py @@ -0,0 +1,242 @@ +import numpy as np +import pandas as pd +import ray +from ralf.operator import Operator, DEFAULT_STATE_CACHE_SIZE +from ralf.operators.source import Source +from ralf.state import Record, Schema +from ralf.core import Ralf +from ralf.table import Table +import argparse +import os +import time +import csv + +NUM_MOVIES = 193609 # hard-coded for small dataset but could loop through source to check + +@ray.remote +class RatingSource(Source): + + """Read in rows from MovieLens rating dataset. + Each row provides a user_id, movie_id, and rating. + """ + + def __init__( + self, + send_rate, + filename, + cache_size=DEFAULT_STATE_CACHE_SIZE, + ): + schema = Schema( + "key", + { + # generate key? + "key": str, + "user_id": int, + "movie_id": int, + "rating": int, + }, + ) + + super().__init__(schema, cache_size, num_worker_threads=1) + print("Reading CSV", filename) + df = pd.read_csv(filename) + self.data = [] + for index, row in df.iterrows(): + self.data.append(row.to_dict()) + self.send_rate = send_rate + self.ts = 0 + + def next(self): + try: + if self.ts < len(self.data): + d = self.data[self.ts] + t = time.time() + + record = Record( + key=str(d["userId"]), + user_id=int(d["userId"]), + movie_id=int(d["movieId"]), + rating=int(d["rating"]), + ) + self.ts += 1 + time.sleep(1 / self.send_rate) + return [record] + else: + print("STOP ITERATION", self.ts) + except Exception as e: + print(e) + raise StopIteration + +@ray.remote +class UserOperator(Operator): + def __init__( + self, + cache_size=DEFAULT_STATE_CACHE_SIZE, + lazy=False, + num_worker_threads=1, + num_features=10, + alpha=.25, + l=.1, + ): + + schema = Schema( + "key", + { + "key": str, + "user_id": int, + "movie_id": int, + "user_vector": np.array, + "movie_vector": np.array, + }, + ) + super().__init__(schema, cache_size, lazy, num_worker_threads) + self.rating_matrix = dict() + self.user_matrix = dict() + self.movie_matrix = dict() + self.num_features = num_features + self.alpha = alpha + self.l = l + + def on_record(self, record: Record) -> Record: + try: + key = record.key + user_id = record.user_id + movie_id = record.movie_id + rating = record.rating + + if user_id in self.user_matrix: + user_vector = self.user_matrix[user_id] + ratings = self.rating_matrix[user_id] + else: + user_vector = np.random.randint(100, size=self.num_features) + ratings = np.random.randint(1, size=NUM_MOVIES) + if movie_id in self.movie_matrix: + movie_vector = self.movie_matrix[movie_id] + else: + movie_vector = np.random.randint(100, size=self.num_features) + ''' + with open("movie_vectors.csv", "a") as f: + csvwriter = csv.writer(f) + csvwriter.writerow([str(movie_id), str(movie_vector)]) + ''' + ratings[movie_id-1] = rating + self.rating_matrix[user_id] = ratings + self.movie_matrix[movie_id] = movie_vector + # recompute features + print(self.movie_matrix) + sub_result = rating - np.dot(np.transpose(user_vector), movie_vector) + new_user_vector = self.alpha * sub_result * movie_vector + self.l * user_vector + self.user_matrix[user_id] = new_user_vector + record = Record( + key=key, + user_id=user_id, + movie_id=movie_id, + user_vector=new_user_vector, + movie_vector=movie_vector, + ) + print("Sending record from user", record.movie_id) + return [record] + + except Exception as e: + print(e) + + +# Currently unnecessary? +@ray.remote +class MovieOperator(Operator): + def __init__( + self, + cache_size=DEFAULT_STATE_CACHE_SIZE, + lazy=False, + num_worker_threads=1, + ): + + schema = Schema( + "key", + { + "key": str, + "movie_id": int, + "movie_vector": np.array, + }, + ) + super().__init__(schema, cache_size, lazy, num_worker_threads) + + def on_record(self, record: Record) -> Record: + # Currently, not updating the movies table (only the user) + print("Hit record", record) + new_record = Record( + key=str(record.movie_id), + movie_id=record.movie_id, + movie_vector=record.movie_vector, + ) + return [new_record] + +def from_file(send_rate: int, f: str): + return Table([], RatingSource, send_rate, f) + +def create_doc_pipeline(args): + ralf_conn = Ralf( + metric_dir=os.path.join(args.exp_dir, args.exp), log_wandb=False, exp_id=args.exp + ) + + # create pipeline + source = from_file(args.send_rate, os.path.join(args.data_dir, args.file)) + user_vectors = source.map(UserOperator, args, num_replicas=1).as_queryable("user_vectors") + #movies = source.join(user_vectors, MovieOperator).as_queryable("movie_vectors") + #movie_vectors = user_vectors.map(MovieOperator).as_queryable("movie_vectors") + # deploy + ralf_conn.deploy(source, "source") + + return ralf_conn + + +def main(): + + parser = argparse.ArgumentParser(description="Specify experiment config") + parser.add_argument("--send-rate", type=int, default=100) + parser.add_argument("--timesteps", type=int, default=10) + + # Experiment related + # TODO: add wikipedia dataset + parser.add_argument( + "--data-dir", + type=str, + default="/Users/amitnarang/Downloads/ml-latest-small", + ) + parser.add_argument( + "--exp-dir", + type=str, + default="/Users/amitnarang/ralf-experiments", + ) + + parser.add_argument("--file", type=str, default=None) + parser.add_argument("--exp", type=str) # experiment id + args = parser.parse_args() + # create experiment directory + ex_id = args.exp + ex_dir = os.path.join(args.exp_dir, ex_id) + os.mkdir(ex_dir) + + # create stl pipeline + ralf_conn = create_doc_pipeline(args) + ralf_conn.run() + + # snapshot stats + run_duration = 120 + snapshot_interval = 10 + start = time.time() + while time.time() - start < run_duration: + snapshot_time = ralf_conn.snapshot() + remaining_time = snapshot_interval - snapshot_time + if remaining_time < 0: + print( + f"snapshot interval is {snapshot_interval} but it took {snapshot_time} to perform it!" + ) + time.sleep(0) + else: + print("writing snapshot", snapshot_time) + time.sleep(remaining_time) + + +if __name__ == "__main__": + main()