From a02f985602040692bcf42c55c6cddfc7a0195e6a Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Tue, 26 Oct 2021 20:22:53 -0700 Subject: [PATCH 1/9] WIP --- recsys/recsys_server.py | 179 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 recsys/recsys_server.py diff --git a/recsys/recsys_server.py b/recsys/recsys_server.py new file mode 100644 index 0000000..37843f8 --- /dev/null +++ b/recsys/recsys_server.py @@ -0,0 +1,179 @@ +import numpy as np +import pandas as pd +import ray +from ralf.operator import Operator, DEFAULT_STATE_CACHE_SIZE +from ralf.operators import ( + Source, +) +from ralf.state import Record, Schema +from ralf.core import Ralf +from ralf.table import Table + +NUM_MOVIES = 193609 # hard-coded for small dataset but could loop through source to check + +@ray.remote +class RatingSource(Source): + + """Read in rows from MovieLens rating dataset. + Each row provides a user_id, movie_id, and rating. + """ + + def __init__( + self, + send_rate, + filename, + cache_size=DEFAULT_STATE_CACHE_SIZE, + ): + schema = Schema( + "key", + { + # generate key? + "key": int, + "user_id": int, + "movie_id": int, + "rating": int, + }, + ) + + super().__init__(schema, cache_size, num_worker_threads=1) + print("Reading CSV", filename) + df = pd.read_csv(filename) + self.data = [] + for index, row in df.iterrows(): + self.data.append(row.to_dict()) + self.send_rate = send_rate + self.ts = 0 + self.matrix = dict() + + def next(self): + try: + if self.ts < len(self.data): + + d = self.data[self.ts] + t = time.time() + + ratings[movie_id - 1] = rating + self.matrix[user_id] = ratings + + record = Record( + key=d["userId"], + user_id=d["userId"], + movie_id=d["movieId"], + rating=d["rating"], + ) + self.ts += 1 + time.sleep(1 / self.send_rate) + return [record] + else: + print("STOP ITERATION", self.ts) + except Exception as e: + print(e) + raise StopIteration + +@ray.remote +class Users(Operator): + def __init__( + self, + cache_size=DEFAULT_STATE_CACHE_SIZE, + lazy=False, + num_worker_threads=1, + num_features=10, + alpha=.25, + l=.1, + ): + + schema = Schema( + "key", + { + "key": int, + "user_id": int, + "features": np.array, + }, + ) + super().__init__(schema, cache_size, lazy, num_worker_threads) + self.rating_matrix = dict() + self.user_matrix = dict() + self.movie_matrix = dict() + self.num_features = num_features + self.alpha = alpha + self.l = l + + def on_record(self, record: Record) -> Record: + + try: + user_id = record.user_id + movie_id = record.movie_id + rating = record.rating + + if user_id in self.user_matrix: + user_vector = self.user_matrix[user_id] + ratings = self.rating_matrix[user_id] + else: + user_vector = np.random.rand(self.num_features) + ratings = np.random.rand(NUM_MOVIES) + + if movie_id in self.movie_matrix: + movie_vector = self.movie_matrix[movie_id] + else: + movie_vector = np.random.rand(self.num_features) + + ratings[movie_id-1] = rating + self.rating_matrix[user_id] = ratings + + # recompute features + sub_result = rating - np.dot(np.transpose(user_vector), movie_vector) + new_user_vector = alpha * sub_result * movie_vector + self.l * user_vector + + self.matrix[user_id] = new_user_vector + record = Record( + key=d["userId"], + user_id=d["userId"], + features=new_user_vector, + ) + return [record] + + except Exception as e: + print(e) + +''' +# Currently unnecessary? +@ray.remote +class Movies(Operator): + def __init__( + self, + cache_size=DEFAULT_STATE_CACHE_SIZE, + lazy=False, + num_worker_threads=1, + ): + + schema = Schema( + "key", + { + "key": int, + "user_id": int, + "movie_id": int, + "rating": int, + }, + ) + super().__init__(schema, cache_size, lazy, num_worker_threads) + + def on_record(self, record: Record) -> None: + # Currently, not updating the movies table (only the user) + return None +''' + +def from_file(send_rate: int, f: str): + return Table([], RatingSource, send_rate, f) + +def create_doc_pipeline(args): + ralf_conn = Ralf( + metric_dir=os.path.join(args.exp_dir, args.exp), log_wandb=True, exp_id=args.exp + ) + + # create pipeline + source = from_file(args.send_rate, os.path.join(args.data_dir, args.file)) + user_vectors = source.map(UserOperator, args, num_replicas=8).as_queryable("user_vectors") + # deploy + ralf_conn.deploy(source, "source") + + return ralf_conn From 15252afd95c3ccf8693bd23a5fac16d21b119440 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Wed, 27 Oct 2021 00:06:36 -0700 Subject: [PATCH 2/9] WIP --- recsys/recsys_client.py | 38 +++++++++++++++ recsys/recsys_server.py | 104 ++++++++++++++++++++++++++++++---------- 2 files changed, 116 insertions(+), 26 deletions(-) create mode 100644 recsys/recsys_client.py diff --git a/recsys/recsys_client.py b/recsys/recsys_client.py new file mode 100644 index 0000000..3ea46bc --- /dev/null +++ b/recsys/recsys_client.py @@ -0,0 +1,38 @@ +import sys +from tqdm import tqdm +import argparse +import os +import json +import time + +from threading import Timer + +import psutil + +from ralf.client import RalfClient + +client = RalfClient() + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Specify experiment config") + + # Experiment related + parser.add_argument( + "--data-dir", + type=str, + default="/Users/sarahwooders/repos/flink-feature-flow/datasets", + ) + parser.add_argument( + "--exp-dir", + type=str, + default="/Users/sarahwooders/repos/flink-feature-flow/RayServer/experiments", + ) + parser.add_argument("--file", type=str, default=None) + args = parser.parse_args() + + #user_id = "1" + #res = client.point_query(key=user_id, table_name="user_vectors") + #print(res) + res = client.bulk_query(table_name="user_vectors") + print([r for r in res]) \ No newline at end of file diff --git a/recsys/recsys_server.py b/recsys/recsys_server.py index 37843f8..4d33a4d 100644 --- a/recsys/recsys_server.py +++ b/recsys/recsys_server.py @@ -2,12 +2,13 @@ import pandas as pd import ray from ralf.operator import Operator, DEFAULT_STATE_CACHE_SIZE -from ralf.operators import ( - Source, -) +from ralf.operators.source import Source from ralf.state import Record, Schema from ralf.core import Ralf from ralf.table import Table +import argparse +import os +import time NUM_MOVIES = 193609 # hard-coded for small dataset but could loop through source to check @@ -28,7 +29,7 @@ def __init__( "key", { # generate key? - "key": int, + "key": str, "user_id": int, "movie_id": int, "rating": int, @@ -43,23 +44,18 @@ def __init__( self.data.append(row.to_dict()) self.send_rate = send_rate self.ts = 0 - self.matrix = dict() def next(self): try: if self.ts < len(self.data): - d = self.data[self.ts] t = time.time() - - ratings[movie_id - 1] = rating - self.matrix[user_id] = ratings record = Record( - key=d["userId"], - user_id=d["userId"], - movie_id=d["movieId"], - rating=d["rating"], + key=str(d["userId"]), + user_id=int(d["userId"]), + movie_id=int(d["movieId"]), + rating=int(d["rating"]), ) self.ts += 1 time.sleep(1 / self.send_rate) @@ -71,7 +67,7 @@ def next(self): raise StopIteration @ray.remote -class Users(Operator): +class UserOperator(Operator): def __init__( self, cache_size=DEFAULT_STATE_CACHE_SIZE, @@ -85,7 +81,7 @@ def __init__( schema = Schema( "key", { - "key": int, + "key": str, "user_id": int, "features": np.array, }, @@ -101,6 +97,7 @@ def __init__( def on_record(self, record: Record) -> Record: try: + key = record.key user_id = record.user_id movie_id = record.movie_id rating = record.rating @@ -109,25 +106,27 @@ def on_record(self, record: Record) -> Record: user_vector = self.user_matrix[user_id] ratings = self.rating_matrix[user_id] else: - user_vector = np.random.rand(self.num_features) - ratings = np.random.rand(NUM_MOVIES) - + user_vector = np.random.randint(100, size=self.num_features) + ratings = np.random.randint(1, size=NUM_MOVIES) if movie_id in self.movie_matrix: movie_vector = self.movie_matrix[movie_id] else: - movie_vector = np.random.rand(self.num_features) - + movie_vector = np.random.randint(100, size=self.num_features) + + print(type(movie_id)) ratings[movie_id-1] = rating self.rating_matrix[user_id] = ratings # recompute features + print(user_vector, movie_vector) sub_result = rating - np.dot(np.transpose(user_vector), movie_vector) - new_user_vector = alpha * sub_result * movie_vector + self.l * user_vector - - self.matrix[user_id] = new_user_vector + print(sub_result) + new_user_vector = self.alpha * sub_result * movie_vector + self.l * user_vector + print(new_user_vector) + self.user_matrix[user_id] = new_user_vector record = Record( - key=d["userId"], - user_id=d["userId"], + key=key, + user_id=user_id, features=new_user_vector, ) return [record] @@ -167,7 +166,7 @@ def from_file(send_rate: int, f: str): def create_doc_pipeline(args): ralf_conn = Ralf( - metric_dir=os.path.join(args.exp_dir, args.exp), log_wandb=True, exp_id=args.exp + metric_dir=os.path.join(args.exp_dir, args.exp), log_wandb=False, exp_id=args.exp ) # create pipeline @@ -177,3 +176,56 @@ def create_doc_pipeline(args): ralf_conn.deploy(source, "source") return ralf_conn + + +def main(): + + parser = argparse.ArgumentParser(description="Specify experiment config") + parser.add_argument("--send-rate", type=int, default=100) + parser.add_argument("--timesteps", type=int, default=10) + + # Experiment related + # TODO: add wikipedia dataset + parser.add_argument( + "--data-dir", + type=str, + default="/Users/amitnarang/Downloads/ml-latest-small", + ) + parser.add_argument( + "--exp-dir", + type=str, + default="/Users/amitnarang/ralf-experiments", + ) + + parser.add_argument("--file", type=str, default=None) + parser.add_argument("--exp", type=str) # experiment id + args = parser.parse_args() + print(args) + # create experiment directory + ex_id = args.exp + ex_dir = os.path.join(args.exp_dir, ex_id) + os.mkdir(ex_dir) + + # create stl pipeline + ralf_conn = create_doc_pipeline(args) + ralf_conn.run() + + # snapshot stats + run_duration = 120 + snapshot_interval = 10 + start = time.time() + while time.time() - start < run_duration: + snapshot_time = ralf_conn.snapshot() + remaining_time = snapshot_interval - snapshot_time + if remaining_time < 0: + print( + f"snapshot interval is {snapshot_interval} but it took {snapshot_time} to perform it!" + ) + time.sleep(0) + else: + print("writing snapshot", snapshot_time) + time.sleep(remaining_time) + + +if __name__ == "__main__": + main() From b8741f3eb4c96678fc4ce7534812f69f0794ecad Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Wed, 27 Oct 2021 00:57:54 -0700 Subject: [PATCH 3/9] WIP --- recsys/recsys_client.py | 7 ++++- recsys/recsys_server.py | 58 +++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/recsys/recsys_client.py b/recsys/recsys_client.py index 3ea46bc..70db2dd 100644 --- a/recsys/recsys_client.py +++ b/recsys/recsys_client.py @@ -35,4 +35,9 @@ #res = client.point_query(key=user_id, table_name="user_vectors") #print(res) res = client.bulk_query(table_name="user_vectors") - print([r for r in res]) \ No newline at end of file + print("User Vectors") + print([r for r in res]) + ''' + res = client.bulk_query(table_name="movie_vectors") + print([r for r in res]) + ''' \ No newline at end of file diff --git a/recsys/recsys_server.py b/recsys/recsys_server.py index 4d33a4d..8288f67 100644 --- a/recsys/recsys_server.py +++ b/recsys/recsys_server.py @@ -9,6 +9,7 @@ import argparse import os import time +import csv NUM_MOVIES = 193609 # hard-coded for small dataset but could loop through source to check @@ -83,7 +84,9 @@ def __init__( { "key": str, "user_id": int, - "features": np.array, + "movie_id": int, + "user_vector": np.array, + "movie_vector": np.array, }, ) super().__init__(schema, cache_size, lazy, num_worker_threads) @@ -95,7 +98,6 @@ def __init__( self.l = l def on_record(self, record: Record) -> Record: - try: key = record.key user_id = record.user_id @@ -112,32 +114,36 @@ def on_record(self, record: Record) -> Record: movie_vector = self.movie_matrix[movie_id] else: movie_vector = np.random.randint(100, size=self.num_features) - - print(type(movie_id)) + ''' + with open("movie_vectors.csv", "a") as f: + csvwriter = csv.writer(f) + csvwriter.writerow([str(movie_id), str(movie_vector)]) + ''' ratings[movie_id-1] = rating self.rating_matrix[user_id] = ratings - + self.movie_matrix[movie_id] = movie_vector # recompute features - print(user_vector, movie_vector) + print(self.movie_matrix) sub_result = rating - np.dot(np.transpose(user_vector), movie_vector) - print(sub_result) new_user_vector = self.alpha * sub_result * movie_vector + self.l * user_vector - print(new_user_vector) self.user_matrix[user_id] = new_user_vector record = Record( key=key, user_id=user_id, - features=new_user_vector, + movie_id=movie_id, + user_vector=new_user_vector, + movie_vector=movie_vector, ) + print("Sending record from user", record.movie_id) return [record] except Exception as e: print(e) -''' + # Currently unnecessary? @ray.remote -class Movies(Operator): +class MovieOperator(Operator): def __init__( self, cache_size=DEFAULT_STATE_CACHE_SIZE, @@ -148,18 +154,22 @@ def __init__( schema = Schema( "key", { - "key": int, - "user_id": int, + "key": str, "movie_id": int, - "rating": int, + "movie_vector": np.array, }, ) super().__init__(schema, cache_size, lazy, num_worker_threads) - def on_record(self, record: Record) -> None: + def on_record(self, record: Record) -> Record: # Currently, not updating the movies table (only the user) - return None -''' + print("Hit record", record) + new_record = Record( + key=str(record.movie_id), + movie_id=record.movie_id, + movie_vector=record.movie_vector, + ) + return [new_record] def from_file(send_rate: int, f: str): return Table([], RatingSource, send_rate, f) @@ -172,6 +182,8 @@ def create_doc_pipeline(args): # create pipeline source = from_file(args.send_rate, os.path.join(args.data_dir, args.file)) user_vectors = source.map(UserOperator, args, num_replicas=8).as_queryable("user_vectors") + #movies = source.join(user_vectors, MovieOperator).as_queryable("movie_vectors") + #movie_vectors = user_vectors.map(MovieOperator).as_queryable("movie_vectors") # deploy ralf_conn.deploy(source, "source") @@ -200,7 +212,6 @@ def main(): parser.add_argument("--file", type=str, default=None) parser.add_argument("--exp", type=str) # experiment id args = parser.parse_args() - print(args) # create experiment directory ex_id = args.exp ex_dir = os.path.join(args.exp_dir, ex_id) @@ -215,16 +226,7 @@ def main(): snapshot_interval = 10 start = time.time() while time.time() - start < run_duration: - snapshot_time = ralf_conn.snapshot() - remaining_time = snapshot_interval - snapshot_time - if remaining_time < 0: - print( - f"snapshot interval is {snapshot_interval} but it took {snapshot_time} to perform it!" - ) - time.sleep(0) - else: - print("writing snapshot", snapshot_time) - time.sleep(remaining_time) + pass if __name__ == "__main__": From 3172cae89478b1c2a54f74adea3146742b3409dd Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Thu, 28 Oct 2021 17:17:29 -0700 Subject: [PATCH 4/9] WIP Notebook --- recsys/als-streaming-test.ipynb | 238 ++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 recsys/als-streaming-test.ipynb diff --git a/recsys/als-streaming-test.ipynb b/recsys/als-streaming-test.ipynb new file mode 100644 index 0000000..afe139e --- /dev/null +++ b/recsys/als-streaming-test.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8e0e6a4f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5b033873", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a00a310f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 101 5.0\n", + "2 1 223 3.0\n", + "3 1 333 5.0\n", + "4 1 441 4.0\n", + "... ... ... ...\n", + "16801 610 150401 3.0\n", + "16802 610 156726 4.5\n", + "16803 610 159093 3.0\n", + "16804 610 161582 4.0\n", + "16805 610 166528 4.0\n", + "\n", + "[16806 rows x 3 columns]\n", + " userId movieId rating\n", + "0 1 3 4.0\n", + "1 1 6 4.0\n", + "2 1 47 5.0\n", + "3 1 50 5.0\n", + "4 1 70 3.0\n", + "... ... ... ...\n", + "84025 610 166534 4.0\n", + "84026 610 168248 5.0\n", + "84027 610 168250 5.0\n", + "84028 610 168252 5.0\n", + "84029 610 170875 3.0\n", + "\n", + "[84030 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "\n", + "user_vector_matrix = dict()\n", + "movie_vector_matrix = dict()\n", + "\n", + "columns = ['userId', 'movieId', 'rating']\n", + "test_data = []\n", + "train_data = []\n", + "\n", + "for row in df.itertuples():\n", + " if row.Index % 6 == 0:\n", + " test_data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " train_data.append([row.userId, row.movieId, row.rating])\n", + "\n", + "test_df = pd.DataFrame(data=test_data, columns=columns)\n", + "train_df = pd.DataFrame(data=train_data, columns=columns)\n", + " \n", + "print(df)\n", + "print(test_df)\n", + "print(train_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "a6a8fb84", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSStreamingModel:\n", + " def __init__(self, l, num_features, alpha):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.alpha = alpha\n", + " self.user_features = dict()\n", + " self.movie_features = dict()\n", + " \n", + " def fit(self, train):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " \n", + " for row in train.itertuples():\n", + " self.update_user_vector(row)\n", + " \n", + " return self \n", + "\n", + " def update_user_vector(self, row):\n", + " rating = row.rating\n", + " userId = row.userId\n", + " movieId = row.movieId\n", + "\n", + " if userId in self.user_features:\n", + " user_vector = self.user_features[userId]\n", + " else:\n", + " user_vector = np.random.randint(100, size=self.num_features)\n", + "\n", + " if movieId in self.movie_features:\n", + " movie_vector = self.movie_features[movieId]\n", + " else:\n", + " movie_vector = np.random.randint(100, size=self.num_features)\n", + " self.movie_features[movieId] = movie_vector\n", + " #print(user_vector)\n", + " sub_result = rating - np.dot(np.transpose(user_vector), movie_vector)\n", + " new_user_vector = self.alpha * sub_result * movie_vector + self.l * user_vector\n", + " #print(new_user_vector)\n", + " self.user_features[userId] = new_user_vector\n", + " \n", + " def predict_test(self, test):\n", + " \n", + " correct_results = []\n", + " predicted_results = []\n", + " for row in test.itertuples():\n", + " prediction = self.predict_rating(row.userId, row.movieId)\n", + " predicted_results.append(prediction)\n", + " correct_results.append(row.rating)\n", + " \n", + " return self.compute_mse(correct_results, predicted_results)\n", + " \n", + " def predict_rating(self, userId, movieId):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " if userId not in self.user_features or movieId not in self.movie_features:\n", + " return 0\n", + " user_vector = self.user_features[userId]\n", + " movie_vector = self.movie_features[movieId]\n", + " prediction = user_vector.dot(movie_vector.T)\n", + " if np.isnan(prediction) or prediction > 5:\n", + " return 5\n", + " if prediction < 0:\n", + " return 0\n", + " return prediction\n", + "\n", + " def compute_mse(self, y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mse = mean_squared_error(np.asarray(y_true), np.asarray(y_pred))\n", + " return mse" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "1ada4c5b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: overflow encountered in multiply\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: invalid value encountered in add\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: invalid value encountered in multiply\n" + ] + }, + { + "data": { + "text/plain": [ + "4.671605379031298" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "als = ALSStreamingModel(.01, 10, .1)\n", + "als.fit(train_df)\n", + "als.predict_test(test_df)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 470200ea7d39d33e3b12982546c8920c25c9eea8 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Fri, 29 Oct 2021 11:15:24 -0700 Subject: [PATCH 5/9] Edit both notebooks --- recsys/als-streaming-test.ipynb | 81 ++++----- recsys/als.ipynb | 306 ++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+), 44 deletions(-) create mode 100644 recsys/als.ipynb diff --git a/recsys/als-streaming-test.ipynb b/recsys/als-streaming-test.ipynb index afe139e..9874f8a 100644 --- a/recsys/als-streaming-test.ipynb +++ b/recsys/als-streaming-test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 89, "id": "8e0e6a4f", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 90, "id": "5b033873", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 91, "id": "a00a310f", "metadata": {}, "outputs": [ @@ -50,18 +50,18 @@ "[100836 rows x 4 columns]\n", " userId movieId rating\n", "0 1 1 4.0\n", - "1 1 101 5.0\n", - "2 1 223 3.0\n", - "3 1 333 5.0\n", - "4 1 441 4.0\n", + "1 1 110 4.0\n", + "2 1 235 4.0\n", + "3 1 362 5.0\n", + "4 1 527 5.0\n", "... ... ... ...\n", - "16801 610 150401 3.0\n", - "16802 610 156726 4.5\n", - "16803 610 159093 3.0\n", - "16804 610 161582 4.0\n", - "16805 610 166528 4.0\n", + "14401 610 152077 4.0\n", + "14402 610 158238 5.0\n", + "14403 610 160527 4.5\n", + "14404 610 163981 3.5\n", + "14405 610 170875 3.0\n", "\n", - "[16806 rows x 3 columns]\n", + "[14406 rows x 3 columns]\n", " userId movieId rating\n", "0 1 3 4.0\n", "1 1 6 4.0\n", @@ -69,13 +69,13 @@ "3 1 50 5.0\n", "4 1 70 3.0\n", "... ... ... ...\n", - "84025 610 166534 4.0\n", - "84026 610 168248 5.0\n", - "84027 610 168250 5.0\n", - "84028 610 168252 5.0\n", - "84029 610 170875 3.0\n", + "86425 610 166528 4.0\n", + "86426 610 166534 4.0\n", + "86427 610 168248 5.0\n", + "86428 610 168250 5.0\n", + "86429 610 168252 5.0\n", "\n", - "[84030 rows x 3 columns]\n" + "[86430 rows x 3 columns]\n" ] } ], @@ -90,7 +90,7 @@ "train_data = []\n", "\n", "for row in df.itertuples():\n", - " if row.Index % 6 == 0:\n", + " if row.Index % 7 == 0:\n", " test_data.append([row.userId, row.movieId, row.rating])\n", " else:\n", " train_data.append([row.userId, row.movieId, row.rating])\n", @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 92, "id": "a6a8fb84", "metadata": {}, "outputs": [], @@ -119,12 +119,6 @@ " self.movie_features = dict()\n", " \n", " def fit(self, train):\n", - " \"\"\"\n", - " pass in training and testing at the same time to record\n", - " model convergence, assuming both dataset is in the form\n", - " of User x Item matrix with cells as ratings\n", - " \"\"\"\n", - " \n", " for row in train.itertuples():\n", " self.update_user_vector(row)\n", " \n", @@ -138,7 +132,7 @@ " if userId in self.user_features:\n", " user_vector = self.user_features[userId]\n", " else:\n", - " user_vector = np.random.randint(100, size=self.num_features)\n", + " user_vector = np.random.randint(5, size=self.num_features)\n", "\n", " if movieId in self.movie_features:\n", " movie_vector = self.movie_features[movieId]\n", @@ -151,11 +145,11 @@ " #print(new_user_vector)\n", " self.user_features[userId] = new_user_vector\n", " \n", - " def predict_test(self, test):\n", + " def predict_set(self, data):\n", " \n", " correct_results = []\n", " predicted_results = []\n", - " for row in test.itertuples():\n", + " for row in data.itertuples():\n", " prediction = self.predict_rating(row.userId, row.movieId)\n", " predicted_results.append(prediction)\n", " correct_results.append(row.rating)\n", @@ -183,34 +177,33 @@ }, { "cell_type": "code", - "execution_count": 66, - "id": "1ada4c5b", + "execution_count": 94, + "id": "2f287172", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: overflow encountered in multiply\n", - "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: invalid value encountered in add\n", - "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:38: RuntimeWarning: invalid value encountered in multiply\n" + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: overflow encountered in multiply\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in add\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in multiply\n" ] }, { - "data": { - "text/plain": [ - "4.671605379031298" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "4.660211023184784\n", + "4.335815110494042\n" + ] } ], "source": [ "als = ALSStreamingModel(.01, 10, .1)\n", "als.fit(train_df)\n", - "als.predict_test(test_df)" + "print(als.predict_set(test_df))\n", + "print(als.predict_set(train_df))" ] } ], diff --git a/recsys/als.ipynb b/recsys/als.ipynb new file mode 100644 index 0000000..7f42caf --- /dev/null +++ b/recsys/als.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 96, + "id": "f74e18c0", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "3a949759", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "f2587d6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "[[4. 0. 4. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [2.5 2. 2. ... 0. 0. 0. ]\n", + " [3. 0. 0. ... 0. 0. 0. ]\n", + " [5. 0. 0. ... 0. 0. 0. ]]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "print(df)\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "print(ratings)\n", + "for row in df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "print(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "a0946387", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " ...,\n", + " [2.5, 2. , 2. , ..., 0. , 0. , 0. ],\n", + " [3. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [5. , 0. , 0. , ..., 0. , 0. , 0. ]])" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# taken from ethen8181\n", + "def create_train_test(ratings):\n", + " \"\"\"\n", + " split into training and test sets,\n", + " remove 10 ratings from each user\n", + " and assign them to the test set\n", + " \"\"\"\n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 10, replace = False)\n", + "\n", + " train[user, test_index] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " assert np.all(train * test == 0)\n", + " return train, test\n", + "\n", + "train, test = create_train_test(ratings)\n", + "del ratings\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "cc7a4432", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train, test):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production)\n", + " self.test_mse_record = []\n", + " self.train_mse_record = [] \n", + " for i in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " predictions = self.predict()\n", + " if i == self.n_iters - 1:\n", + " print(predictions)\n", + " test_mse = self.compute_mse(test, predictions)\n", + " train_mse = self.compute_mse(train, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " self.train_mse_record.append(train_mse)\n", + " \n", + " return self \n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "aed5f9c3", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.plot(model.train_mse_record, label = 'Train', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "72963156", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 100, num_features = 40, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "73986d38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 3.01584919e+00 7.03761958e-01 9.32923123e-01 ... 0.00000000e+00\n", + " 0.00000000e+00 -4.66165251e-02]\n", + " [ 1.21037585e-01 4.66169999e-02 5.68496229e-02 ... 0.00000000e+00\n", + " 0.00000000e+00 1.30703778e-02]\n", + " [ 3.37121563e-02 1.46594177e-02 5.57874374e-02 ... 0.00000000e+00\n", + " 0.00000000e+00 -1.23980449e-03]\n", + " ...\n", + " [ 1.75126135e+00 2.56531084e+00 2.99363499e+00 ... 0.00000000e+00\n", + " 0.00000000e+00 1.58009790e-02]\n", + " [ 5.57267434e-01 4.96896384e-01 1.30707562e-01 ... 0.00000000e+00\n", + " 0.00000000e+00 1.81274689e-03]\n", + " [ 5.60345787e+00 -1.21776760e-01 -3.45874283e-02 ... 0.00000000e+00\n", + " 0.00000000e+00 4.40058011e-02]]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAedUlEQVR4nO3de7gcdZ3n8fe3u88tJzeSHC6ToCdhHG5BQjy4Iig6oHJbdV0cZJYZJqAZdncUZXnczPrsiPo8M3G9jYiPbB4NsPOw0RWvgwIKo8uOFyDBGGJCBAXhaCAnIeRykpM+3f3dP6q6u7pPn0uSru6Tqs/redq6/ap+v7LCt37nV7/6lbk7IiKSHpl2F0BERFpLgV9EJGUU+EVEUkaBX0QkZRT4RURSJtfuAkzFggULvL+/v93FEBE5pmzYsGGnu/fVrz8mAn9/fz/r169vdzFERI4pZva7RuvV1CMikjKxBX4zW2tmO8xsc4NtN5uZm9mCuPIXEZHG4qzx3wlcUr/SzE4G3gI8F2PeIiIyjtja+N39YTPrb7Dpc8CHge/ElbeIpNfo6CiDg4OMjIy0uygt093dzaJFi+jo6JhS+pY+3DWztwO/d/dfmtlkaVcCKwFe8YpXtKB0IpIEg4ODzJo1i/7+fiaLM0ng7uzatYvBwUEWL148pX1a9nDXzGYAHwH+birp3X2Nuw+4+0Bf35jeSCIiDY2MjDB//vxUBH0AM2P+/PmH9RdOK2v8pwCLgXJtfxHwuJm91t1faHZmLw3neWbnfvYfKjK/t5OlC+c0OwsRmabSEvTLDvd8Wxb43f0J4Pjyspk9Cwy4+8448vuXJ3dw89d/CcC7zlnIZ69aFkc2IiLHnDi7c64DfgacamaDZnZ9XHk10tuZrczvP1RoZdYikmK7du1i2bJlLFu2jBNPPJGFCxdWlvP5/JSPs3btWl54oemNIUC8vXqunmR7f1x5A/R2VU/tQL4YZ1YiIhXz589n48aNANxyyy3MnDmTm2+++bCPs3btWpYvX86JJ57Y7CIeG0M2HInermqNfzivGr+ItN9dd93FF7/4RfL5PK9//eu57bbbKJVKrFixgo0bN+LurFy5khNOOIGNGzdy1VVX0dPTw6OPPkpnZ2fTypHgwF89tWE19YikUv+q78V27GdXX35Y6Tdv3sy3vvUtfvrTn5LL5Vi5ciVf/epXOeWUU9i5cydPPPEEAC+//DJz587lC1/4ArfddhvLljX/+WRyA39nNPCrqUdE2uvBBx/kscceY2BgAICDBw9y8skn87a3vY1t27Zx4403ctlll/HWt7419rIkN/BHa/xq6hGRNnN3rrvuOj7xiU+M2bZp0ybuu+8+br31Vr7xjW+wZs2aWMuS2MA/I9Kr54Bq/CKpdLjNMXG6+OKLufLKK7nxxhtZsGABu3btYnh4mJ6eHrq7u3n3u9/N4sWLueGGGwCYNWsW+/bti6UsiQ38XbkMuYxRKDn5Yol8oURnTqNQi0h7nHXWWXz0ox/l4osvplQq0dHRwe233042m+X666/H3TEzPvnJTwKwYsUK3vve98bycNfcvWkHi8vAwIAfyYdYXn3LA+wdCZp5Nv7dW5g7o3n/x4nI9LR161ZOP/30dhej5Rqdt5ltcPeB+rSJrgLPjLTz6yUuEZFAogP/DL3EJSIyRqIDf69q/CIiYyQ78Ktnj4jIGIkO/DM6VeMXEamX6MA/MzJezwG9xCUiAiQ88M/QeD0i0mLNGJZ5xYoVbNu2LbYyJvYFLqjtzjmsXj0i0gJTGZbZ3XF3MpnGde877rgj1jImu8YfebirGr+ItNPTTz/N0qVLueGGG1i+fDnbt29n5cqVDAwMcOaZZ/Lxj3+8kvaCCy5g48aNFAoF5s6dy6pVqzj77LM577zz2LFjx1GXJT01fvXqEUmfW2L81vYtew57ly1btnDHHXdw++23A7B69WrmzZtHoVDgzW9+M1deeSVnnHFGzT579uzhwgsvZPXq1dx0002sXbuWVatWHVXRE17jj77ApRq/iLTXKaecwrnnnltZXrduHcuXL2f58uVs3bqVLVu2jNmnp6eHSy+9FIDXvOY1PPvss0ddjkTX+KNf4VJ3ThFpt97e3sr8U089xec//3keffRR5s6dyzXXXMPIyMiYfaKDs2WzWQqFo49lyQ78nRqyQSTVjqA5plX27t3LrFmzmD17Ntu3b+eBBx7gkksuaUneyQ78GrJBRKap5cuXc8YZZ7B06VKWLFnC+eef37K8Ez0s86bBl3n7bT8BYOnC2dz7/jc0u2giMs1oWOaqVA7L3KtePSIiYyQ78HfqzV0RkXrJDvxdeoFLJI2OhSbsZjrc80104K/pxz9apFRK1z8GkTTq7u5m165dqQn+7s6uXbvo7u6e8j6J7tWTzRg9HVkOjhZxh4OjxZp2fxFJnkWLFjE4OMjQ0FC7i9Iy3d3dLFq0aMrpEx8Fe7uCwA8wnC8o8IskXEdHB4sXL253Maa1RDf1gHr2iIjUS3zgn6GePSIiNRIf+GeqZ4+ISI3EB/4ZGq9HRKRG4gN/7Ve4VOMXEYkt8JvZWjPbYWabI+s+ZWZPmtkmM/uWmc2NK/8yfYVLRKRWnDX+O4H6MUZ/CCx191cDvwb+Nsb8AfXqERGpF1vgd/eHgZfq1v3A3cvV7p8DU3/j4Ahp2AYRkVrtbOO/DrhvvI1mttLM1pvZ+qN5A6+mxq+HuyIi7Qn8ZvYRoADcPV4ad1/j7gPuPtDX13fEeWmEThGRWi0fv8DMrgWuAC7yFoyiVPNwV716RERaG/jN7BLgvwIXuvuBVuRZ051TNX4RkVi7c64DfgacamaDZnY9cBswC/ihmW00s9vjyr9sRpde4BIRiYqtxu/uVzdY/ZW48htPdMgGfXBdRCQFb+7WDNmgfvwiIskP/NE2ftX4RURSEPijvXoOqFePiEjyA7+GbBARqZX4wN+Vy5DNGAD5Yol8odTmEomItFfiA7+Z0avmHhGRiuQG/ie/B/94FnzmNG7JVHuRarweEUm7lg/Z0DKFEXj5OQDm55ZUVh9Qzx4RSbnk1viznZXZrky1lq8unSKSdukI/FZ9oKthG0Qk7RIc+Dsqs11WreWrxi8iaZfcwJ+pBv5Oq9by1atHRNIuuYE/0tTTWVPjV1OPiKRbggN/tcbfQTXwq1ePiKRdggN/tcafo1rL18dYRCTtUhH4ozV+vcAlImmX4MBffTct56OVedX4RSTtEhz4qzX+rKvGLyJSlpLAX63x7z042ii1iEhqJDjwV3v1RGv8//r0TjYNvtyOEomITAsJDvzVGn/GCwy88jgAiiXnQ1/byMiomnxEJJ1SEfitmOczf3Z25TOMvxkaZvV9T7arZCIibZXcwJ+JjDhdKvDK43r471ecUVl150+f5ftPbG9DwURE2iu5gd+sZrweSqO859yTuei04yur/tPdj3P9nY/xm6H9bSigiEh7JDfwQ01zD8U8ZsY//PuzOGF2V2X1Q0/u4G2fe5j33rWe//PY8wztO9SGgoqItE5yv8AFQc+ecu/NYjBz/Kxu/vn9F/DpB7bx9Q2DuEOh5Dy49UUe3PoiAP3zZ3D6SbM5/aTZ9C/o5eTjejh53gzm93ZiZm06GRGR5kh44I/W+Kv994+f1c3/uPJs/vK8fj5x7xYeeealmt2e3XWAZ3cd4L7NL9Ss78xmOH52FyfO7ub42V0smNlF38wu5s/sYl5vJ/NndjKvt5PjZnQyp6eDbEY3CRGZfhIe+CNt/MX8mM1LF87ha399Hs/sHObBLS/ywy0vsuG53RRL3vBw+WKJwd0HGdx9cNKszWBWV445MzqY0xP8ZnV1MKs7x6zuDmZ2ZZnZnaO3K0dvZ46eziwzwl93R5aejmAa/DJ05bK6kYhIU6Q68JctXtDL+964hPe9cQkjo0WeenE/W7fvZduL+3j+pQM8v/sgg7sPsG9k6uP8uMPekQJ7Rwo8z+Q3iqnIZYzOXIbOXIaucNqRzdCZDeZzGaMjG6zLZY1cJkNH1shlg23ZjNVMMxkja0Y2G04zRiacVuchY9X1GYNMuC1jYBbsmwnTWbi+vB0iyxkwLHjubrVTo3q88jRYX5uWyHJlfwiXy/PhMcK8IChzOV15HyJp6vcjPP54x7MG+0bLITKdJTzwN27qmUh3R5azFs3hrEVzxmwbPlTgxb0jvLB3hJ378wztO8TQvkO8NHyIXfvz7BrOs/tAnt3DefYexk1iqgolp5Av6rvBx4DKDYK6m0J4U6lua3wDIbo8zrHKa6v7hsenetOC2htR+YZZcxOLpLHK/zRYHz1GJJ/qeovkOTZt/f8v0Z2j+9XmZWO2R868pqz1eZTTRMtSs2+D9I3OZey+Y8vXeFvjLeMfq3H5AG69+hw6c83ri5OewF86+jF6ertyLOmbyZK+mZOmLRRL7B0psOfgKHsOjrL34Cj7RgrsHRll/0iB/YcKDB8qMJwvcCBfZPhQkQP5AgdHixzMFzk4WmRktMjIaImR0SL5Yglv3AIl01D5Wnl0obpG5LCUmvwff8ID/9SaeuKQy2aY1xs87G0Gd2e06BwqFMkXSuSLJfKFEqPFEvmCky+WKBRL4dQplMrT8BeuL3p1uVhySuFyqeQUS1D0cL48LTklD/7hldOXHErReXc8nC+6QyS9E8S9aJryP+KSO6USOMF6d6+kLXk1aAbz5W1Busr+5f3CfMrzpfBYlQDc4DiE807tMUrhykbrw90qx6lPI3IsSHbgj77ANcWmnunKzOjMWVP/3JN4lG9g9TeF8s2imm6CG0hd+kY3GCd6gwqXK/O1ZRkvz3Kaah6VlLV/tdTtX5mPlKX+xhdNW16Onks1p+j2xumjy5XzarSemoVJ09Surz1+Zd0U9p3Kscaur9l73OMCdGSb+999bIHfzNYCVwA73H1puG4e8DWgH3gW+DN33x1XGepf4BJphWi7PDRoQBZpszirj3cCl9StWwU85O6vAh4Kl+PTxqYeEZHpKrbA7+4PAy/VrX4HcFc4fxfwzrjyB+pq/PrkoogItH6snhPcfTtAOD1+vIRmttLM1pvZ+qGhoSPLTTV+EZExpu2TQndf4+4D7j7Q19d3ZAdR4BcRGaPVgf9FMzsJIJzuiDW3I3iBS0Qk6Vod+L8LXBvOXwt8J9bcVOMXERkjtsBvZuuAnwGnmtmgmV0PrAbeYmZPAW8Jl+Oj7pwiImPE1o/f3a8eZ9NFceU5Rs2QDerVIyIC0/jhblNEv7urGr+ICJD0wK+mHhGRMVIU+NWrR0QEEh/41atHRKRewgO/avwiIvUmDPxmdk1k/vy6bX8TV6GaJpucYZlFRJplshr/TZH5L9Rtu67JZWk+NfWIiIwxWeC3ceYbLU8/auoRERljssA/0cdCp/+H5tSdU0RkjMne3D3NzDYR1O5PCecJl5fEWrJmiDb1NOFj6yIiSTBZ4D+9JaWIS4K+uSsi0iwTBn53/1102czmA28EnnP3DXEWrCnU1CMiMsZk3TnvNbPyh9JPAjYT9Ob5JzP7YAvKd3TUq0dEZIzJHu4udvfN4fwK4Ifu/m+Bf8Mx0Z1TvXpEROpNFvij0fIi4PsA7r4PKMVVqKZR4BcRGWOyh7vPm9n7gUFgOXA/gJn1AB0T7TgtqKlHRGSMyWr81wNnAn8FXOXuL4frXwfcEWO5mkNDNoiIjDFZr54dwA0N1v8I+FFchWoa9eoRERljwsBvZt+daLu7v725xWkyNfWIiIwxWRv/ecDzwDrgEY6F8Xmi9M1dEZExJgv8JwJvAa4G/hz4HrDO3X8Vd8GaQk09IiJjTPhw192L7n6/u19L8ED3aeDHYU+f6U8fWxcRGWOyGj9m1gVcTlDr7wduBb4Zb7GaRP34RUTGmOzh7l3AUuA+4GORt3iPDWrqEREZY7Ia/18Aw8CfAB8wqzzbNcDdfXaMZTt6NcMyF8Ad7Nh6Pi0i0myT9eM/tj/GbhYMzVwei784CrnOifcREUm4YzuwT4X68ouI1FDgFxFJmRQEfvXsERGJSlngV41fRCQFgV8fXBcRiUp+4NcH10VEaiQ/8KupR0SkRlsCv5l9yMx+ZWabzWydmXXHlpl69YiI1Gh54DezhcAHgAF3XwpkgffElqF69YiI1GhXU08O6DGzHDAD+ENsOSnwi4jUaHngd/ffA58GngO2A3vc/Qf16cxspZmtN7P1Q0NDR55hVkMzi4hEtaOp5zjgHcBi4I+AXjO7pj6du69x9wF3H+jr6zvyDFXjFxGp0Y6mnouBZ9x9yN1HCcb2f31sualXj4hIjXYE/ueA15nZDAvGeb4I2BpbburVIyJSox1t/I8A9wCPA0+EZVgTW4b64LqISI1JP70YB3f/KPDRlmSWUY1fRCQqBW/uKvCLiESlIPCrV4+ISFQKAr9q/CIiUSkI/OrOKSISlbLAr149IiIpCPwaskFEJCoFgV9NPSIiUSkL/OrVIyKSgsCvXj0iIlEpCPzRIRtU4xcRSX7g18fWRURqJD/wq6lHRKRGCgK/Hu6KiESlLPCrxi8ikoLArzZ+EZEoBX4RkZRJQeBXU4+ISFQKAr969YiIRKUg8KtXj4hIVLoCv97cFRFJQeDPaFhmEZGo5Ad+NfWIiNRIWeBXjV9EJAWBX716RESiUhD49c1dEZGoFAR+1fhFRKKSH/ij4/GXRsG9fWUREZkGUhD4M3VdOtWzR0TSLfmBH9SzR0QkIiWBX+38IiJl6Qj8Ne386tkjIumWjsCvph4RkYqUBH419YiIlLUl8JvZXDO7x8yeNLOtZnZerBlqvB4RkYrc5Eli8Xngfne/0sw6gRmx5qamHhGRipYHfjObDbwR+CsAd88D8UbjrPrxi4iUtaOpZwkwBNxhZr8wsy+bWW99IjNbaWbrzWz90NDQ0eWoph4RkYp2BP4csBz4krufAwwDq+oTufsadx9w94G+vr6jy1FNPSIiFe0I/IPAoLs/Ei7fQ3AjiI969YiIVLQ88Lv7C8DzZnZquOoiYEusmaqpR0Skol29et4P3B326PktsCLW3PTBdRGRirYEfnffCAy0LEN9cF1EpCIlb+6qqUdEpCyFgV81fhFJt5QEfvXqEREpS0ng1wfXRUTKUhL4VeMXESlT4BcRSZmUBH716hERKUtJ4FeNX0SkLCWBX2/uioiUpSPwRz+2rqYeEUm5dAR+NfWIiFSkJPDrzV0RkbIUBn419YhIuqUk8KupR0SkLIWBXzV+EUm3lAR+NfWIiJSlJPBHavyjB9pXDhGRaSAdgX/midX53/0Etv+yfWUREWmzdAT+E86ExRcG816Ce2+CUqm9ZRIRaZN0BH4zuPyz1bb+36+Hx+9sa5FERNolHYEfYMEfw/kfrC4/eAvs39G24oiItEt6Aj/AG26C4xYH8yN74I7L4FffBvf2lktEpIXSFfg7euDyT1eXdz0FX78W1rwJHvmfMLRNNwERSbxcuwvQcn98MVz+GXjwY3Bob7Bu+8bgB0EPoJNeDX2nwoJTYe4rYM4imL0QOrrbV24RkSYxPwZquAMDA75+/frmHvTAS/Cvn4NH10BhZGr7dM+B3r7g1zMPeuZCz3HB+q5Z0DU7mHb2QufMYNrRE05nBPPRdwpERGJkZhvcfWDM+tQG/rJ9L8DWf4bf/hie+X9waE88+ZRZNrgB5LrDX1f1l+0KbgzZzvAXnc8F3xXIdkAmF07L87lgWvllg3zK85kcWCayPjq1yHwmmK+kzYydr/nZ2HVYg+0WWW8N0tZvt3ivgUhKjBf409fUU2/WifDa9wW/UjFo5x96Enb+GnY+BXsGYe/vYe8fwItHn58XIb8/+Mn4KjcGq52vn1a2MX6aKU0n2R/G2UaDearzh7NfZbcGeVbm6/ejdr7RfhPtO2k5DicN46yfrEyT5DvZcSfb50j3rTnMVI450XGPIg3ApZ9samuBAn9UJgsnnBH86pVKcHA3DA8Fv4O7YeTlcLoXDu0Lnhkc2hcE9UP7g+Eh8sPBdHQkmDbj5pEGHr5gN/3/IBWJ39v+XoG/LTIZ6J0f/DjtyI9THIXRg1A4BIXy9BAUD0EhHwwbXcwH60qjQfpiPpiWCuF0tLpc/hVHg2BZmS8GN6tSIZwvBH/ReCmc1s97dbsXa+fdq2lpNF+qznupmt6j632c9eG28rKIxE6Bv9WyHXrAO5HoDYIG8/XTmm00TnPY0/rj0DhtNF3D+fr9mCRtNH2D+Ynya5gndfsyfvpG5ahJN5U0E60fp0xj9mWC9YeT35iDHea+h3nMifY/qjShTHNDtQK/TC+Vh7vpesVEpJX0X5eISMoo8IuIpEzbAr+ZZc3sF2Z2b7vKICKSRu2s8d8IbG1j/iIiqdSWwG9mi4DLgS+3I38RkTRrV43/H4EPA+N23DazlWa23szWDw0Nta5kIiIJ1/LAb2ZXADvcfcNE6dx9jbsPuPtAX19fi0onIpJ8LR+kzcz+AfgLoAB0A7OBb7r7NRPsMwT87gizXADsPMJ9j2VpPO80njOk87zTeM5w+Of9SncfU3Nu6+icZvYm4GZ3vyLGPNY3Gp0u6dJ43mk8Z0jneafxnKF5561+/CIiKdPWIRvc/cfAj9tZBhGRtElDjX9NuwvQJmk87zSeM6TzvNN4ztCk8z4mvsAlIiLNk4Yav4iIRCjwi4ikTKIDv5ldYmbbzOxpM1vV7vLEwcxONrMfmdlWM/uVmd0Yrp9nZj80s6fC6XHtLmuz1Q/0Z2aLzeyR8Jy/Zmad7S5js5nZXDO7x8yeDK/5eUm/1mb2ofDf9mYzW2dm3Um81ma21sx2mNnmyLqG19YCt4axbZOZLT+cvBIb+M0sC3wRuBQ4A7jazBp8TPeYVwD+i7ufDrwO+M/hea4CHnL3VwEPhctJUz/Q3yeBz4XnvBu4vi2litfngfvd/TTgbILzT+y1NrOFwAeAAXdfCmSB95DMa30ncEnduvGu7aXAq8LfSuBLh5NRYgM/8FrgaXf/rbvnga8C72hzmZrO3be7++Ph/D6CQLCQ4FzvCpPdBbyzPSWMR/1Af2ZmwJ8C94RJknjOs4E3Al8BcPe8u79Mwq81QbfzHjPLATOA7STwWrv7w8BLdavHu7bvAP6XB34OzDWzk6aaV5ID/0Lg+cjyYLguscysHzgHeAQ4wd23Q3BzAI5vX8liUT/Q33zgZXcvhMtJvN5LgCHgjrCJ68tm1kuCr7W7/x74NPAcQcDfA2wg+de6bLxre1TxLcmB3xqsS2zfVTObCXwD+KC77213eeI0zkB/abjeOWA58CV3PwcYJkHNOo2EbdrvABYDfwT0EjRz1EvatZ7MUf17T3LgHwROjiwvAv7QprLEysw6CIL+3e7+zXD1i+U//cLpjnaVLwbnA283s2cJmvD+lOAvgLlhcwAk83oPAoPu/ki4fA/BjSDJ1/pi4Bl3H3L3UeCbwOtJ/rUuG+/aHlV8S3Lgfwx4Vfj0v5PggdB321ympgvbtr8CbHX3z0Y2fRe4Npy/FvhOq8sWF3f/W3df5O79BNf1X9z9PwA/Aq4MkyXqnAHc/QXgeTM7NVx1EbCFBF9rgiae15nZjPDfevmcE32tI8a7tt8F/jLs3fM6YE+5SWhK3D2xP+Ay4NfAb4CPtLs8MZ3jBQR/4m0CNoa/ywjavB8Cngqn89pd1pjO/03AveH8EuBR4Gng60BXu8sXw/kuA9aH1/vbwHFJv9bAx4Angc3APwFdSbzWwDqC5xijBDX668e7tgRNPV8MY9sTBL2eppyXhmwQEUmZJDf1iIhIAwr8IiIpo8AvIpIyCvwiIimjwC8ikjIK/JIKZvbTcNpvZn/e5GP/t0Z5iUxX6s4pqWJmbwJudvcrDmOfrLsXJ9i+391nNqN8Iq2gGr+kgpntD2dXA28ws43hOO9ZM/uUmT0Wjmv+12H6N4XfOfjfBC/IYGbfNrMN4djwK8N1qwlGjtxoZndH8wrfqvxUOI78E2Z2VeTYP46Mq393+FYqZrbazLaEZfl0K/8/kvTITZ5EJFFWEanxhwF8j7ufa2ZdwE/M7Adh2tcCS939mXD5Ond/ycx6gMfM7BvuvsrM/sbdlzXI610Eb9qeDSwI93k43HYOcCbB+Co/Ac43sy3AvwNOc3c3s7lNP3sRVOMXeSvBmCcbCYaznk/wcQuARyNBH+ADZvZL4OcEA2S9ioldAKxz96K7vwj8X+DcyLEH3b1EMMxGP7AXGAG+bGbvAg4c9dmJNKDAL2lnwPvdfVn4W+zu5Rr/cCVR8GzgYuA8dz8b+AXQPYVjj+dQZL4I5DwYX/61BCOtvhO4/7DORGSKFPglbfYBsyLLDwD/MRzaGjP7k/DjJvXmALvd/YCZnUbwmcuy0fL+dR4GrgqfI/QRfD3r0fEKFn5TYY67fx/4IEEzkUjTqY1f0mYTUAibbO4k+IZtP/B4+IB1iMaf8bsfuMHMNgHbCJp7ytYAm8zscQ+Ghy77FnAe8EuCEVQ/7O4vhDeORmYB3zGzboK/Fj50ZKcoMjF15xQRSRk19YiIpIwCv4hIyijwi4ikjAK/iEjKKPCLiKSMAr+ISMoo8IuIpMz/BzcgO0/LsHX+AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "als.fit(train, test)\n", + "plot_learning_curve(als)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "42f889b2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10.251992169875896\n", + "4.404787604174319\n" + ] + } + ], + "source": [ + "print(als.test_mse_record[-1])\n", + "print(als.train_mse_record[-1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 02207bf0e093e472ae92266d3c37568f7d661ba3 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Fri, 29 Oct 2021 12:40:15 -0700 Subject: [PATCH 6/9] WIP --- recsys/als-half.ipynb | 281 ++++++++++++++++++++++++++++++++ recsys/als-streaming-test.ipynb | 56 +++---- recsys/als.ipynb | 67 +++----- recsys/recsys_server.py | 13 +- 4 files changed, 343 insertions(+), 74 deletions(-) create mode 100644 recsys/als-half.ipynb diff --git a/recsys/als-half.ipynb b/recsys/als-half.ipynb new file mode 100644 index 0000000..72d2dae --- /dev/null +++ b/recsys/als-half.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 42, + "id": "d2a4455a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "77ed3484", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "af66f007", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns] 100836\n", + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "50413 325 3927 3.0 1039397688\n", + "50414 325 3981 2.0 1039398309\n", + "50415 325 3994 4.0 1039398793\n", + "50416 325 4017 3.0 1039396037\n", + "50417 325 4034 4.0 1039398396\n", + "\n", + "[50418 rows x 4 columns] 50418\n", + "[[0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]\n", + " [0. 0. 0. ... 0. 0. 0.]]\n", + "[[4. 0. 4. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [3.5 4. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]]\n" + ] + } + ], + "source": [ + "big_df = pd.read_csv(ratings_path, sep = ',')\n", + "big_df.sort_values('timestamp')\n", + "num_rows = big_df.shape[0]\n", + "print(big_df, num_rows)\n", + "df = big_df.iloc[:int(num_rows/2)]\n", + "print(df, df.shape[0])\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "print(ratings)\n", + "for row in df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "print(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f9e63f83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " ...,\n", + " [3.5, 4. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# taken from ethen8181\n", + "def create_train_test(ratings):\n", + " \"\"\"\n", + " split into training and test sets,\n", + " remove 10 ratings from each user\n", + " and assign them to the test set\n", + " \"\"\"\n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", + "\n", + " train[user, test_index] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " assert np.all(train * test == 0)\n", + " return train, test\n", + "\n", + "train, test = create_train_test(ratings)\n", + "del ratings\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "bd9b47bb", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train, test):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production)\n", + " self.test_mse_record = []\n", + " self.train_mse_record = [] \n", + " for _ in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " predictions = self.predict()\n", + " test_mse = self.compute_mse(test, predictions)\n", + " train_mse = self.compute_mse(train, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " self.train_mse_record.append(train_mse)\n", + " \n", + " return self \n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "73d89971", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.plot(model.train_mse_record, label = 'Train', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "16b9cf07", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 100, num_features = 40, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf2be80c", + "metadata": {}, + "outputs": [], + "source": [ + "als.fit(train, test)\n", + "plot_learning_curve(als)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c377cda2", + "metadata": {}, + "outputs": [], + "source": [ + "print(als.test_mse_record[-1])\n", + "print(als.train_mse_record[-1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als-streaming-test.ipynb b/recsys/als-streaming-test.ipynb index 9874f8a..b10a3e7 100644 --- a/recsys/als-streaming-test.ipynb +++ b/recsys/als-streaming-test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 89, + "execution_count": 106, "id": "8e0e6a4f", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 107, "id": "5b033873", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 108, "id": "a00a310f", "metadata": {}, "outputs": [ @@ -50,32 +50,32 @@ "[100836 rows x 4 columns]\n", " userId movieId rating\n", "0 1 1 4.0\n", - "1 1 110 4.0\n", - "2 1 235 4.0\n", - "3 1 362 5.0\n", - "4 1 527 5.0\n", + "1 1 47 5.0\n", + "2 1 101 5.0\n", + "3 1 157 5.0\n", + "4 1 223 3.0\n", "... ... ... ...\n", - "14401 610 152077 4.0\n", - "14402 610 158238 5.0\n", - "14403 610 160527 4.5\n", - "14404 610 163981 3.5\n", - "14405 610 170875 3.0\n", + "33607 610 160527 4.5\n", + "33608 610 161582 4.0\n", + "33609 610 163937 3.5\n", + "33610 610 166528 4.0\n", + "33611 610 168250 5.0\n", "\n", - "[14406 rows x 3 columns]\n", + "[33612 rows x 3 columns]\n", " userId movieId rating\n", "0 1 3 4.0\n", "1 1 6 4.0\n", - "2 1 47 5.0\n", - "3 1 50 5.0\n", - "4 1 70 3.0\n", + "2 1 50 5.0\n", + "3 1 70 3.0\n", + "4 1 110 4.0\n", "... ... ... ...\n", - "86425 610 166528 4.0\n", - "86426 610 166534 4.0\n", - "86427 610 168248 5.0\n", - "86428 610 168250 5.0\n", - "86429 610 168252 5.0\n", + "67219 610 164179 5.0\n", + "67220 610 166534 4.0\n", + "67221 610 168248 5.0\n", + "67222 610 168252 5.0\n", + "67223 610 170875 3.0\n", "\n", - "[86430 rows x 3 columns]\n" + "[67224 rows x 3 columns]\n" ] } ], @@ -90,7 +90,7 @@ "train_data = []\n", "\n", "for row in df.itertuples():\n", - " if row.Index % 7 == 0:\n", + " if row.Index % 3 == 0:\n", " test_data.append([row.userId, row.movieId, row.rating])\n", " else:\n", " train_data.append([row.userId, row.movieId, row.rating])\n", @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 109, "id": "a6a8fb84", "metadata": {}, "outputs": [], @@ -177,8 +177,8 @@ }, { "cell_type": "code", - "execution_count": 94, - "id": "2f287172", + "execution_count": 110, + "id": "fcefa486", "metadata": {}, "outputs": [ { @@ -194,8 +194,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "4.660211023184784\n", - "4.335815110494042\n" + "5.058386885636082\n", + "4.707202784719743\n" ] } ], diff --git a/recsys/als.ipynb b/recsys/als.ipynb index 7f42caf..7ad77b3 100644 --- a/recsys/als.ipynb +++ b/recsys/als.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 96, - "id": "f74e18c0", + "execution_count": 141, + "id": "a8462b38", "metadata": {}, "outputs": [], "source": [ @@ -16,8 +16,8 @@ }, { "cell_type": "code", - "execution_count": 97, - "id": "3a949759", + "execution_count": 142, + "id": "d86767af", "metadata": {}, "outputs": [], "source": [ @@ -26,8 +26,8 @@ }, { "cell_type": "code", - "execution_count": 98, - "id": "f2587d6e", + "execution_count": 143, + "id": "539bf12f", "metadata": {}, "outputs": [ { @@ -79,8 +79,8 @@ }, { "cell_type": "code", - "execution_count": 99, - "id": "a0946387", + "execution_count": 144, + "id": "da9af87d", "metadata": {}, "outputs": [ { @@ -91,11 +91,11 @@ " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", " ...,\n", " [2.5, 2. , 2. , ..., 0. , 0. , 0. ],\n", - " [3. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", " [5. , 0. , 0. , ..., 0. , 0. , 0. ]])" ] }, - "execution_count": 99, + "execution_count": 144, "metadata": {}, "output_type": "execute_result" } @@ -128,8 +128,8 @@ }, { "cell_type": "code", - "execution_count": 100, - "id": "cc7a4432", + "execution_count": 145, + "id": "67ff43ae", "metadata": {}, "outputs": [], "source": [ @@ -157,8 +157,6 @@ " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", " predictions = self.predict()\n", - " if i == self.n_iters - 1:\n", - " print(predictions)\n", " test_mse = self.compute_mse(test, predictions)\n", " train_mse = self.compute_mse(train, predictions)\n", " self.test_mse_record.append(test_mse)\n", @@ -193,8 +191,8 @@ }, { "cell_type": "code", - "execution_count": 101, - "id": "aed5f9c3", + "execution_count": 146, + "id": "1bc4e9f0", "metadata": {}, "outputs": [], "source": [ @@ -210,8 +208,8 @@ }, { "cell_type": "code", - "execution_count": 102, - "id": "72963156", + "execution_count": 147, + "id": "79a51d82", "metadata": {}, "outputs": [], "source": [ @@ -220,32 +218,13 @@ }, { "cell_type": "code", - "execution_count": 103, - "id": "73986d38", + "execution_count": 148, + "id": "5fb986b1", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 3.01584919e+00 7.03761958e-01 9.32923123e-01 ... 0.00000000e+00\n", - " 0.00000000e+00 -4.66165251e-02]\n", - " [ 1.21037585e-01 4.66169999e-02 5.68496229e-02 ... 0.00000000e+00\n", - " 0.00000000e+00 1.30703778e-02]\n", - " [ 3.37121563e-02 1.46594177e-02 5.57874374e-02 ... 0.00000000e+00\n", - " 0.00000000e+00 -1.23980449e-03]\n", - " ...\n", - " [ 1.75126135e+00 2.56531084e+00 2.99363499e+00 ... 0.00000000e+00\n", - " 0.00000000e+00 1.58009790e-02]\n", - " [ 5.57267434e-01 4.96896384e-01 1.30707562e-01 ... 0.00000000e+00\n", - " 0.00000000e+00 1.81274689e-03]\n", - " [ 5.60345787e+00 -1.21776760e-01 -3.45874283e-02 ... 0.00000000e+00\n", - " 0.00000000e+00 4.40058011e-02]]\n" - ] - }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAedUlEQVR4nO3de7gcdZ3n8fe3u88tJzeSHC6ToCdhHG5BQjy4Iig6oHJbdV0cZJYZJqAZdncUZXnczPrsiPo8M3G9jYiPbB4NsPOw0RWvgwIKo8uOFyDBGGJCBAXhaCAnIeRykpM+3f3dP6q6u7pPn0uSru6Tqs/redq6/ap+v7LCt37nV7/6lbk7IiKSHpl2F0BERFpLgV9EJGUU+EVEUkaBX0QkZRT4RURSJtfuAkzFggULvL+/v93FEBE5pmzYsGGnu/fVrz8mAn9/fz/r169vdzFERI4pZva7RuvV1CMikjKxBX4zW2tmO8xsc4NtN5uZm9mCuPIXEZHG4qzx3wlcUr/SzE4G3gI8F2PeIiIyjtja+N39YTPrb7Dpc8CHge/ElbeIpNfo6CiDg4OMjIy0uygt093dzaJFi+jo6JhS+pY+3DWztwO/d/dfmtlkaVcCKwFe8YpXtKB0IpIEg4ODzJo1i/7+fiaLM0ng7uzatYvBwUEWL148pX1a9nDXzGYAHwH+birp3X2Nuw+4+0Bf35jeSCIiDY2MjDB//vxUBH0AM2P+/PmH9RdOK2v8pwCLgXJtfxHwuJm91t1faHZmLw3neWbnfvYfKjK/t5OlC+c0OwsRmabSEvTLDvd8Wxb43f0J4Pjyspk9Cwy4+8448vuXJ3dw89d/CcC7zlnIZ69aFkc2IiLHnDi7c64DfgacamaDZnZ9XHk10tuZrczvP1RoZdYikmK7du1i2bJlLFu2jBNPPJGFCxdWlvP5/JSPs3btWl54oemNIUC8vXqunmR7f1x5A/R2VU/tQL4YZ1YiIhXz589n48aNANxyyy3MnDmTm2+++bCPs3btWpYvX86JJ57Y7CIeG0M2HInermqNfzivGr+ItN9dd93FF7/4RfL5PK9//eu57bbbKJVKrFixgo0bN+LurFy5khNOOIGNGzdy1VVX0dPTw6OPPkpnZ2fTypHgwF89tWE19YikUv+q78V27GdXX35Y6Tdv3sy3vvUtfvrTn5LL5Vi5ciVf/epXOeWUU9i5cydPPPEEAC+//DJz587lC1/4ArfddhvLljX/+WRyA39nNPCrqUdE2uvBBx/kscceY2BgAICDBw9y8skn87a3vY1t27Zx4403ctlll/HWt7419rIkN/BHa/xq6hGRNnN3rrvuOj7xiU+M2bZp0ybuu+8+br31Vr7xjW+wZs2aWMuS2MA/I9Kr54Bq/CKpdLjNMXG6+OKLufLKK7nxxhtZsGABu3btYnh4mJ6eHrq7u3n3u9/N4sWLueGGGwCYNWsW+/bti6UsiQ38XbkMuYxRKDn5Yol8oURnTqNQi0h7nHXWWXz0ox/l4osvplQq0dHRwe233042m+X666/H3TEzPvnJTwKwYsUK3vve98bycNfcvWkHi8vAwIAfyYdYXn3LA+wdCZp5Nv7dW5g7o3n/x4nI9LR161ZOP/30dhej5Rqdt5ltcPeB+rSJrgLPjLTz6yUuEZFAogP/DL3EJSIyRqIDf69q/CIiYyQ78Ktnj4jIGIkO/DM6VeMXEamX6MA/MzJezwG9xCUiAiQ88M/QeD0i0mLNGJZ5xYoVbNu2LbYyJvYFLqjtzjmsXj0i0gJTGZbZ3XF3MpnGde877rgj1jImu8YfebirGr+ItNPTTz/N0qVLueGGG1i+fDnbt29n5cqVDAwMcOaZZ/Lxj3+8kvaCCy5g48aNFAoF5s6dy6pVqzj77LM577zz2LFjx1GXJT01fvXqEUmfW2L81vYtew57ly1btnDHHXdw++23A7B69WrmzZtHoVDgzW9+M1deeSVnnHFGzT579uzhwgsvZPXq1dx0002sXbuWVatWHVXRE17jj77ApRq/iLTXKaecwrnnnltZXrduHcuXL2f58uVs3bqVLVu2jNmnp6eHSy+9FIDXvOY1PPvss0ddjkTX+KNf4VJ3ThFpt97e3sr8U089xec//3keffRR5s6dyzXXXMPIyMiYfaKDs2WzWQqFo49lyQ78nRqyQSTVjqA5plX27t3LrFmzmD17Ntu3b+eBBx7gkksuaUneyQ78GrJBRKap5cuXc8YZZ7B06VKWLFnC+eef37K8Ez0s86bBl3n7bT8BYOnC2dz7/jc0u2giMs1oWOaqVA7L3KtePSIiYyQ78HfqzV0RkXrJDvxdeoFLJI2OhSbsZjrc80104K/pxz9apFRK1z8GkTTq7u5m165dqQn+7s6uXbvo7u6e8j6J7tWTzRg9HVkOjhZxh4OjxZp2fxFJnkWLFjE4OMjQ0FC7i9Iy3d3dLFq0aMrpEx8Fe7uCwA8wnC8o8IskXEdHB4sXL253Maa1RDf1gHr2iIjUS3zgn6GePSIiNRIf+GeqZ4+ISI3EB/4ZGq9HRKRG4gN/7Ve4VOMXEYkt8JvZWjPbYWabI+s+ZWZPmtkmM/uWmc2NK/8yfYVLRKRWnDX+O4H6MUZ/CCx191cDvwb+Nsb8AfXqERGpF1vgd/eHgZfq1v3A3cvV7p8DU3/j4Ahp2AYRkVrtbOO/DrhvvI1mttLM1pvZ+qN5A6+mxq+HuyIi7Qn8ZvYRoADcPV4ad1/j7gPuPtDX13fEeWmEThGRWi0fv8DMrgWuAC7yFoyiVPNwV716RERaG/jN7BLgvwIXuvuBVuRZ051TNX4RkVi7c64DfgacamaDZnY9cBswC/ihmW00s9vjyr9sRpde4BIRiYqtxu/uVzdY/ZW48htPdMgGfXBdRCQFb+7WDNmgfvwiIskP/NE2ftX4RURSEPijvXoOqFePiEjyA7+GbBARqZX4wN+Vy5DNGAD5Yol8odTmEomItFfiA7+Z0avmHhGRiuQG/ie/B/94FnzmNG7JVHuRarweEUm7lg/Z0DKFEXj5OQDm55ZUVh9Qzx4RSbnk1viznZXZrky1lq8unSKSdukI/FZ9oKthG0Qk7RIc+Dsqs11WreWrxi8iaZfcwJ+pBv5Oq9by1atHRNIuuYE/0tTTWVPjV1OPiKRbggN/tcbfQTXwq1ePiKRdggN/tcafo1rL18dYRCTtUhH4ozV+vcAlImmX4MBffTct56OVedX4RSTtEhz4qzX+rKvGLyJSlpLAX63x7z042ii1iEhqJDjwV3v1RGv8//r0TjYNvtyOEomITAsJDvzVGn/GCwy88jgAiiXnQ1/byMiomnxEJJ1SEfitmOczf3Z25TOMvxkaZvV9T7arZCIibZXcwJ+JjDhdKvDK43r471ecUVl150+f5ftPbG9DwURE2iu5gd+sZrweSqO859yTuei04yur/tPdj3P9nY/xm6H9bSigiEh7JDfwQ01zD8U8ZsY//PuzOGF2V2X1Q0/u4G2fe5j33rWe//PY8wztO9SGgoqItE5yv8AFQc+ecu/NYjBz/Kxu/vn9F/DpB7bx9Q2DuEOh5Dy49UUe3PoiAP3zZ3D6SbM5/aTZ9C/o5eTjejh53gzm93ZiZm06GRGR5kh44I/W+Kv994+f1c3/uPJs/vK8fj5x7xYeeealmt2e3XWAZ3cd4L7NL9Ss78xmOH52FyfO7ub42V0smNlF38wu5s/sYl5vJ/NndjKvt5PjZnQyp6eDbEY3CRGZfhIe+CNt/MX8mM1LF87ha399Hs/sHObBLS/ywy0vsuG53RRL3vBw+WKJwd0HGdx9cNKszWBWV445MzqY0xP8ZnV1MKs7x6zuDmZ2ZZnZnaO3K0dvZ46eziwzwl93R5aejmAa/DJ05bK6kYhIU6Q68JctXtDL+964hPe9cQkjo0WeenE/W7fvZduL+3j+pQM8v/sgg7sPsG9k6uP8uMPekQJ7Rwo8z+Q3iqnIZYzOXIbOXIaucNqRzdCZDeZzGaMjG6zLZY1cJkNH1shlg23ZjNVMMxkja0Y2G04zRiacVuchY9X1GYNMuC1jYBbsmwnTWbi+vB0iyxkwLHjubrVTo3q88jRYX5uWyHJlfwiXy/PhMcK8IChzOV15HyJp6vcjPP54x7MG+0bLITKdJTzwN27qmUh3R5azFs3hrEVzxmwbPlTgxb0jvLB3hJ378wztO8TQvkO8NHyIXfvz7BrOs/tAnt3DefYexk1iqgolp5Av6rvBx4DKDYK6m0J4U6lua3wDIbo8zrHKa6v7hsenetOC2htR+YZZcxOLpLHK/zRYHz1GJJ/qeovkOTZt/f8v0Z2j+9XmZWO2R868pqz1eZTTRMtSs2+D9I3OZey+Y8vXeFvjLeMfq3H5AG69+hw6c83ri5OewF86+jF6ertyLOmbyZK+mZOmLRRL7B0psOfgKHsOjrL34Cj7RgrsHRll/0iB/YcKDB8qMJwvcCBfZPhQkQP5AgdHixzMFzk4WmRktMjIaImR0SL5Yglv3AIl01D5Wnl0obpG5LCUmvwff8ID/9SaeuKQy2aY1xs87G0Gd2e06BwqFMkXSuSLJfKFEqPFEvmCky+WKBRL4dQplMrT8BeuL3p1uVhySuFyqeQUS1D0cL48LTklD/7hldOXHErReXc8nC+6QyS9E8S9aJryP+KSO6USOMF6d6+kLXk1aAbz5W1Busr+5f3CfMrzpfBYlQDc4DiE807tMUrhykbrw90qx6lPI3IsSHbgj77ANcWmnunKzOjMWVP/3JN4lG9g9TeF8s2imm6CG0hd+kY3GCd6gwqXK/O1ZRkvz3Kaah6VlLV/tdTtX5mPlKX+xhdNW16Onks1p+j2xumjy5XzarSemoVJ09Surz1+Zd0U9p3Kscaur9l73OMCdGSb+999bIHfzNYCVwA73H1puG4e8DWgH3gW+DN33x1XGepf4BJphWi7PDRoQBZpszirj3cCl9StWwU85O6vAh4Kl+PTxqYeEZHpKrbA7+4PAy/VrX4HcFc4fxfwzrjyB+pq/PrkoogItH6snhPcfTtAOD1+vIRmttLM1pvZ+qGhoSPLTTV+EZExpu2TQndf4+4D7j7Q19d3ZAdR4BcRGaPVgf9FMzsJIJzuiDW3I3iBS0Qk6Vod+L8LXBvOXwt8J9bcVOMXERkjtsBvZuuAnwGnmtmgmV0PrAbeYmZPAW8Jl+Oj7pwiImPE1o/f3a8eZ9NFceU5Rs2QDerVIyIC0/jhblNEv7urGr+ICJD0wK+mHhGRMVIU+NWrR0QEEh/41atHRKRewgO/avwiIvUmDPxmdk1k/vy6bX8TV6GaJpucYZlFRJplshr/TZH5L9Rtu67JZWk+NfWIiIwxWeC3ceYbLU8/auoRERljssA/0cdCp/+H5tSdU0RkjMne3D3NzDYR1O5PCecJl5fEWrJmiDb1NOFj6yIiSTBZ4D+9JaWIS4K+uSsi0iwTBn53/1102czmA28EnnP3DXEWrCnU1CMiMsZk3TnvNbPyh9JPAjYT9Ob5JzP7YAvKd3TUq0dEZIzJHu4udvfN4fwK4Ifu/m+Bf8Mx0Z1TvXpEROpNFvij0fIi4PsA7r4PKMVVqKZR4BcRGWOyh7vPm9n7gUFgOXA/gJn1AB0T7TgtqKlHRGSMyWr81wNnAn8FXOXuL4frXwfcEWO5mkNDNoiIjDFZr54dwA0N1v8I+FFchWoa9eoRERljwsBvZt+daLu7v725xWkyNfWIiIwxWRv/ecDzwDrgEY6F8Xmi9M1dEZExJgv8JwJvAa4G/hz4HrDO3X8Vd8GaQk09IiJjTPhw192L7n6/u19L8ED3aeDHYU+f6U8fWxcRGWOyGj9m1gVcTlDr7wduBb4Zb7GaRP34RUTGmOzh7l3AUuA+4GORt3iPDWrqEREZY7Ia/18Aw8CfAB8wqzzbNcDdfXaMZTt6NcMyF8Ad7Nh6Pi0i0myT9eM/tj/GbhYMzVwei784CrnOifcREUm4YzuwT4X68ouI1FDgFxFJmRQEfvXsERGJSlngV41fRCQFgV8fXBcRiUp+4NcH10VEaiQ/8KupR0SkRlsCv5l9yMx+ZWabzWydmXXHlpl69YiI1Gh54DezhcAHgAF3XwpkgffElqF69YiI1GhXU08O6DGzHDAD+ENsOSnwi4jUaHngd/ffA58GngO2A3vc/Qf16cxspZmtN7P1Q0NDR55hVkMzi4hEtaOp5zjgHcBi4I+AXjO7pj6du69x9wF3H+jr6zvyDFXjFxGp0Y6mnouBZ9x9yN1HCcb2f31sualXj4hIjXYE/ueA15nZDAvGeb4I2BpbburVIyJSox1t/I8A9wCPA0+EZVgTW4b64LqISI1JP70YB3f/KPDRlmSWUY1fRCQqBW/uKvCLiESlIPCrV4+ISFQKAr9q/CIiUSkI/OrOKSISlbLAr149IiIpCPwaskFEJCoFgV9NPSIiUSkL/OrVIyKSgsCvXj0iIlEpCPzRIRtU4xcRSX7g18fWRURqJD/wq6lHRKRGCgK/Hu6KiESlLPCrxi8ikoLArzZ+EZEoBX4RkZRJQeBXU4+ISFQKAr969YiIRKUg8KtXj4hIVLoCv97cFRFJQeDPaFhmEZGo5Ad+NfWIiNRIWeBXjV9EJAWBX716RESiUhD49c1dEZGoFAR+1fhFRKKSH/ij4/GXRsG9fWUREZkGUhD4M3VdOtWzR0TSLfmBH9SzR0QkIiWBX+38IiJl6Qj8Ne386tkjIumWjsCvph4RkYqUBH419YiIlLUl8JvZXDO7x8yeNLOtZnZerBlqvB4RkYrc5Eli8Xngfne/0sw6gRmx5qamHhGRipYHfjObDbwR+CsAd88D8UbjrPrxi4iUtaOpZwkwBNxhZr8wsy+bWW99IjNbaWbrzWz90NDQ0eWoph4RkYp2BP4csBz4krufAwwDq+oTufsadx9w94G+vr6jy1FNPSIiFe0I/IPAoLs/Ei7fQ3AjiI969YiIVLQ88Lv7C8DzZnZquOoiYEusmaqpR0Skol29et4P3B326PktsCLW3PTBdRGRirYEfnffCAy0LEN9cF1EpCIlb+6qqUdEpCyFgV81fhFJt5QEfvXqEREpS0ng1wfXRUTKUhL4VeMXESlT4BcRSZmUBH716hERKUtJ4FeNX0SkLCWBX2/uioiUpSPwRz+2rqYeEUm5dAR+NfWIiFSkJPDrzV0RkbIUBn419YhIuqUk8KupR0SkLIWBXzV+EUm3lAR+NfWIiJSlJPBHavyjB9pXDhGRaSAdgX/midX53/0Etv+yfWUREWmzdAT+E86ExRcG816Ce2+CUqm9ZRIRaZN0BH4zuPyz1bb+36+Hx+9sa5FERNolHYEfYMEfw/kfrC4/eAvs39G24oiItEt6Aj/AG26C4xYH8yN74I7L4FffBvf2lktEpIXSFfg7euDyT1eXdz0FX78W1rwJHvmfMLRNNwERSbxcuwvQcn98MVz+GXjwY3Bob7Bu+8bgB0EPoJNeDX2nwoJTYe4rYM4imL0QOrrbV24RkSYxPwZquAMDA75+/frmHvTAS/Cvn4NH10BhZGr7dM+B3r7g1zMPeuZCz3HB+q5Z0DU7mHb2QufMYNrRE05nBPPRdwpERGJkZhvcfWDM+tQG/rJ9L8DWf4bf/hie+X9waE88+ZRZNrgB5LrDX1f1l+0KbgzZzvAXnc8F3xXIdkAmF07L87lgWvllg3zK85kcWCayPjq1yHwmmK+kzYydr/nZ2HVYg+0WWW8N0tZvt3ivgUhKjBf409fUU2/WifDa9wW/UjFo5x96Enb+GnY+BXsGYe/vYe8fwItHn58XIb8/+Mn4KjcGq52vn1a2MX6aKU0n2R/G2UaDearzh7NfZbcGeVbm6/ejdr7RfhPtO2k5DicN46yfrEyT5DvZcSfb50j3rTnMVI450XGPIg3ApZ9samuBAn9UJgsnnBH86pVKcHA3DA8Fv4O7YeTlcLoXDu0Lnhkc2hcE9UP7g+Eh8sPBdHQkmDbj5pEGHr5gN/3/IBWJ39v+XoG/LTIZ6J0f/DjtyI9THIXRg1A4BIXy9BAUD0EhHwwbXcwH60qjQfpiPpiWCuF0tLpc/hVHg2BZmS8GN6tSIZwvBH/ReCmc1s97dbsXa+fdq2lpNF+qznupmt6j632c9eG28rKIxE6Bv9WyHXrAO5HoDYIG8/XTmm00TnPY0/rj0DhtNF3D+fr9mCRtNH2D+Ynya5gndfsyfvpG5ahJN5U0E60fp0xj9mWC9YeT35iDHea+h3nMifY/qjShTHNDtQK/TC+Vh7vpesVEpJX0X5eISMoo8IuIpEzbAr+ZZc3sF2Z2b7vKICKSRu2s8d8IbG1j/iIiqdSWwG9mi4DLgS+3I38RkTRrV43/H4EPA+N23DazlWa23szWDw0Nta5kIiIJ1/LAb2ZXADvcfcNE6dx9jbsPuPtAX19fi0onIpJ8LR+kzcz+AfgLoAB0A7OBb7r7NRPsMwT87gizXADsPMJ9j2VpPO80njOk87zTeM5w+Of9SncfU3Nu6+icZvYm4GZ3vyLGPNY3Gp0u6dJ43mk8Z0jneafxnKF5561+/CIiKdPWIRvc/cfAj9tZBhGRtElDjX9NuwvQJmk87zSeM6TzvNN4ztCk8z4mvsAlIiLNk4Yav4iIRCjwi4ikTKIDv5ldYmbbzOxpM1vV7vLEwcxONrMfmdlWM/uVmd0Yrp9nZj80s6fC6XHtLmuz1Q/0Z2aLzeyR8Jy/Zmad7S5js5nZXDO7x8yeDK/5eUm/1mb2ofDf9mYzW2dm3Um81ma21sx2mNnmyLqG19YCt4axbZOZLT+cvBIb+M0sC3wRuBQ4A7jazBp8TPeYVwD+i7ufDrwO+M/hea4CHnL3VwEPhctJUz/Q3yeBz4XnvBu4vi2litfngfvd/TTgbILzT+y1NrOFwAeAAXdfCmSB95DMa30ncEnduvGu7aXAq8LfSuBLh5NRYgM/8FrgaXf/rbvnga8C72hzmZrO3be7++Ph/D6CQLCQ4FzvCpPdBbyzPSWMR/1Af2ZmwJ8C94RJknjOs4E3Al8BcPe8u79Mwq81QbfzHjPLATOA7STwWrv7w8BLdavHu7bvAP6XB34OzDWzk6aaV5ID/0Lg+cjyYLguscysHzgHeAQ4wd23Q3BzAI5vX8liUT/Q33zgZXcvhMtJvN5LgCHgjrCJ68tm1kuCr7W7/x74NPAcQcDfA2wg+de6bLxre1TxLcmB3xqsS2zfVTObCXwD+KC77213eeI0zkB/abjeOWA58CV3PwcYJkHNOo2EbdrvABYDfwT0EjRz1EvatZ7MUf17T3LgHwROjiwvAv7QprLEysw6CIL+3e7+zXD1i+U//cLpjnaVLwbnA283s2cJmvD+lOAvgLlhcwAk83oPAoPu/ki4fA/BjSDJ1/pi4Bl3H3L3UeCbwOtJ/rUuG+/aHlV8S3Lgfwx4Vfj0v5PggdB321ympgvbtr8CbHX3z0Y2fRe4Npy/FvhOq8sWF3f/W3df5O79BNf1X9z9PwA/Aq4MkyXqnAHc/QXgeTM7NVx1EbCFBF9rgiae15nZjPDfevmcE32tI8a7tt8F/jLs3fM6YE+5SWhK3D2xP+Ay4NfAb4CPtLs8MZ3jBQR/4m0CNoa/ywjavB8Cngqn89pd1pjO/03AveH8EuBR4Gng60BXu8sXw/kuA9aH1/vbwHFJv9bAx4Angc3APwFdSbzWwDqC5xijBDX668e7tgRNPV8MY9sTBL2eppyXhmwQEUmZJDf1iIhIAwr8IiIpo8AvIpIyCvwiIimjwC8ikjIK/JIKZvbTcNpvZn/e5GP/t0Z5iUxX6s4pqWJmbwJudvcrDmOfrLsXJ9i+391nNqN8Iq2gGr+kgpntD2dXA28ws43hOO9ZM/uUmT0Wjmv+12H6N4XfOfjfBC/IYGbfNrMN4djwK8N1qwlGjtxoZndH8wrfqvxUOI78E2Z2VeTYP46Mq393+FYqZrbazLaEZfl0K/8/kvTITZ5EJFFWEanxhwF8j7ufa2ZdwE/M7Adh2tcCS939mXD5Ond/ycx6gMfM7BvuvsrM/sbdlzXI610Eb9qeDSwI93k43HYOcCbB+Co/Ac43sy3AvwNOc3c3s7lNP3sRVOMXeSvBmCcbCYaznk/wcQuARyNBH+ADZvZL4OcEA2S9ioldAKxz96K7vwj8X+DcyLEH3b1EMMxGP7AXGAG+bGbvAg4c9dmJNKDAL2lnwPvdfVn4W+zu5Rr/cCVR8GzgYuA8dz8b+AXQPYVjj+dQZL4I5DwYX/61BCOtvhO4/7DORGSKFPglbfYBsyLLDwD/MRzaGjP7k/DjJvXmALvd/YCZnUbwmcuy0fL+dR4GrgqfI/QRfD3r0fEKFn5TYY67fx/4IEEzkUjTqY1f0mYTUAibbO4k+IZtP/B4+IB1iMaf8bsfuMHMNgHbCJp7ytYAm8zscQ+Ghy77FnAe8EuCEVQ/7O4vhDeORmYB3zGzboK/Fj50ZKcoMjF15xQRSRk19YiIpIwCv4hIyijwi4ikjAK/iEjKKPCLiKSMAr+ISMoo8IuIpMz/BzcgO0/LsHX+AAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAd6UlEQVR4nO3dfZRcdZ3n8fe3qvopnYQmDySaMCZgRCAOoWlcnlZgiDzJquvBYWRxmYAnw54dBVnGyejOgjo7G44edxE4MowGcQ4bZhdEGZUnUYY9iiQB2xASIg8CaQ2kachTJ/1QVd/9497qvlVd/ZCkqm/n3s/rnDp1697fvb/fpcL3/vp7f/W75u6IiEh6ZOJugIiITC4FfhGRlFHgFxFJGQV+EZGUUeAXEUmZXNwNmIg5c+b4okWL4m6GiMhh5ZlnnnnL3edWrj8sAv+iRYvYsGFD3M0QETmsmNlr1dYr1SMikjIK/CIiKaPALyKSModFjl9EZKIGBwfp6uqir68v7qZMmubmZhYuXEhDQ8OEyivwi0iidHV1MWPGDBYtWoSZxd2cunN3enp66OrqYvHixRPaR6keEUmUvr4+Zs+enYqgD2BmzJ49+4D+wklsj//t3gF+99Ze9vYXmN3ayNIFR8TdJBGZJGkJ+iUHer6JDfyPb3mTv7pvIwCfOHkB37hsWcwtEhGZGhKb6pneNHxN29ufj7ElIpImPT09LFu2jGXLljF//nwWLFgw9HlgYGDCx1mzZg1vvPFGXdqY2B7/tEjg7x1Q4BeRyTF79mw6OzsBuOmmm5g+fTo33HDDAR9nzZo1tLe3M3/+/Fo3MbmBf3pTdmh5b38hxpaIiATuvvtubr/9dgYGBjjjjDO47bbbKBaLrFixgs7OTtydlStXMm/ePDo7O7nssstoaWlh3bp1NDY21qwdiQ38rZEe/z6lekRSadGqH9ft2K+u/sgBld+0aRMPPPAAv/zlL8nlcqxcuZJ7772XY489lrfeeovnnnsOgJ07d9LW1satt97KbbfdxrJltb8/mdzA3xhJ9Sjwi0jMfvrTn7J+/Xo6OjoA2L9/P0cffTQXXHABW7du5dprr+Xiiy/m/PPPr3tbkhv4dXNXRKYQd+eqq67iq1/96ohtGzdu5KGHHuKb3/wm999/P3feeWdd25LgwD+c4983UMDdUze2VyTtDjQdU0/Lly/n0ksv5dprr2XOnDn09PTQ29tLS0sLzc3NfPKTn2Tx4sVcc801AMyYMYM9e/bUpS2JDfxNuSwNWWOw4OSLTn++SHNDdvwdRUTq4AMf+AA33ngjy5cvp1gs0tDQwB133EE2m+Xqq68e6pzefPPNAKxYsYLPfOYzdbm5a+5es4PVS0dHhx/Mg1hO+vKj7No/CMAz/3U5s6c31bppIjLFbNmyheOPPz7uZky6audtZs+4e0dl2cT+gAvKf8TVqyGdIiJAwgN/NM+vH3GJiAQSHvg1pFNEpFKyA3+jhnSKiFRKduCvGNIpIiKJD/zq8YuIVEp04J+uHL+ITLJaTMu8YsUKtm7dWrc2JvYHXADTNF+PiEyyiUzL7O64O5lM9b73XXfdVdc2JrzHHx3OqRy/iMTnpZdeYunSpVxzzTW0t7ezfft2Vq5cSUdHByeeeCJf+cpXhsqeddZZdHZ2ks/naWtrY9WqVZx00kmcfvrp7Nix45Dbkugev4ZziqTcTXV81vZNuw54l82bN3PXXXdxxx13ALB69WpmzZpFPp/n3HPP5dJLL+WEE04o22fXrl2cffbZrF69muuvv541a9awatWqQ2p6onv8urkrIlPJsccey6mnnjr0ee3atbS3t9Pe3s6WLVvYvHnziH1aWlq46KKLADjllFN49dVXD7kdye7xN0YfxqJUj4jEq7W1dWj5xRdf5JZbbmHdunW0tbVxxRVX0NfXN2Kf6ORs2WyWfP7QO7HJDvyaskEk3Q4iHTNZdu/ezYwZM5g5cybbt2/nkUce4cILL5yUuhMd+Kcr1SMiU1R7ezsnnHACS5cu5ZhjjuHMM8+ctLoTPS3zlu27ueiW/wfA++ZN59HPn13rponIFKNpmYelclrm8ufuKscvIgJJD/zK8YuIjJDwwK9x/CJpdDiksGvpQM830YG/KZchlwkesD5YcPrzSveIJF1zczM9PT2pCf7uTk9PD83NzRPeJ9GjesyMaY1ZdvcFvf19/QWacnrgukiSLVy4kK6uLrq7u+NuyqRpbm5m4cKFEy6f6MAPwZDOUuDf25/nyNbaPaleRKaehoYGFi9eHHczprS6pXrMbI2Z7TCzTZF1XzOzF8xso5k9YGZt9aq/pCzPrxu8IiJ1zfF/F6j8GdpjwFJ3/2Pgt8Df1LF+oPIGr3L8IiJ1C/zu/iTwdsW6R9291O3+FTDxpNRBKhvSqZE9IiKxjuq5CnhotI1mttLMNpjZhkO5SdOqh7GIiJSJJfCb2ZeAPHDPaGXc/U5373D3jrlz5x50XZqvR0Sk3KSP6jGzK4FLgPN8EgbaRnP8+/QULhGRyQ38ZnYh8NfA2e6+bzLqnBbJ8avHLyJS3+Gca4GngOPMrMvMrgZuA2YAj5lZp5ndUa/6S6Yrxy8iUqZuPX53/1SV1d+pV32jUapHRKRcoufqAd3cFRGplPjAP03j+EVEyiQ+8Leqxy8iUibxgX+6cvwiImUSH/inNSrVIyISlfjAr5u7IiLlEh/4NZxTRKRcch/E8s5r8PqvOGKwn9Mzb/BU8UT1+EVESHLg71oPD6ykAbg8ezpPFU9kIF9ksFCkIZv4P3REREaV3AiYbRhabMoWh5Z1g1dE0i65gT8zHPhbMsO5/V7l+UUk5ZIb+LPDD1VvtkjgV49fRFIuwYE/kuqJ9Ph1g1dE0i7BgX+4x98Y6fHv0wPXRSTlEhz4h3v80cCvHr+IpF06Aj/DwV45fhFJuwQH/uFUT0P05u6AAr+IpFtyA39kOGeDR3v8yvGLSLolN/BHUj05pXpERIYkOPAPp3qykcCvm7siknYJDvzDPf6sq8cvIlKSjsBfHBxa1tTMIpJ2CQ78w6mejA8H/u279uPucbRIRGRKSG7gj4zqyRSH0zvPvr6TH3T+Po4WiYhMCQkO/FnAADAvcPmpC4Y2/bcfPs8fdu6PqWEiIvFKbuA3K0v3fOnC9/JHs6YBsKcvz1/d9xuKRaV8RCR9khv4oSzwt+acb/zpSWSCPwL4xUs9fPlfntfwThFJnYQH/siTJQuDdCyaxV+cfezQqrufeo1zv/4E/2fDNvKFYpUDiIgkT8ID/3CPn8IAANctX8JZ750ztLp7Tz9fuG8jH/z7x1l1/0ae2LqDfZrPR0QSLLkPW4eKwB8M6WzKZfneVR/kB52/5+aHX+DN3f0AvN07wL3rt3Hv+m1kM8Zx82aw7I/aeP/8GRw7dzrHzG1l3oxmMqVckYjIYSrZgT8TTfUMDK/OGJ9oX8gFJ87nH558hXvXvc6OPf3DRYvO5u272bx9d9nhGrMZ3t3WzLvbWpg/s5m5M5uYO72JuTOamNXayKzWRo6c1kjbtAZaGrKY6SIhIlNPsgN/lR5/VGtTjus//D6uO28Jv972Dj957g3+9bfdvNy9l2q/8RooFHm1Zx+v9uwbt+qGrDGzuYEZzTlmtjQwvSnHtMYc05uyTGvKMa0hy7TGLC2NOVoaMrQ0ZmluyNKUy9LUkKE5l6Uxl6EpfDVkMzSW3rMZGnJGQzZDLmO6wIjIAUlR4B8YtVgmY5zynlmc8p5Z/C2wu2+Qjdt28dzvd/Fy915e6d7LK2/1snPfyIvHaAYLTk/vAD29o9dbK7mMkcsaDZkMuayRy2ZoyATvpW3ZTLCczdjQe3aUz9lMhqwF/12yFqwrWzYjmynfbhYsZ0r7ZcJlK5Uf3pax8m2ZTGTZqpezKu9GtCxYuL8RKVOxX5Cpi5YPjlNZtnRsIssWHttKy0Z5XeFxKJVhuEzlviJxSnjgj5xeceJBe2ZzA2ctmcNZS+aUrd/bn2f7zv107dxP9+5+duzpY8eefnp6B3h77wBv9w6wc/8AO/cN0p+fvFFC+aKTLzp9aGTS4aTqRSG40pR9rixX2g7lF6DocYhehIbWVVycIscYak+pDZH2ETl2acdq66PXs+gxGKXsaOXLLoultlWtI9L2il2GaqooX3HYsv8eo51D5fry5o3dvspyo7Wjcktlfbdf3k5jrnZjcRIe+MdO9Ryo6U05lsybwZJ5M8Yt2zdYYHffIHv68uzpy7O3L8/e/jy9/Xn2DeTZN1AIX3n6Bov0DRbYP1igP1+kPx98HgiXB/IFBgvOQL7IQKHI4NDLKehHaIctd/DSwvDamFojU1mxxvOL1S3wm9ka4BJgh7svDdfNAv4ZWAS8Cvypu79TrzZMNNVTD80NQc7+qPGvEYekGPb2BwtF8gUnXywOfS4UfejikC8G2wvu4foixSLki0G5QtEpenCsQuRVdKdQhII7xbJ1HlkX/MMsrS86eKSMO0P7FT1o89Cye1A2XO84xbA+Lx3Hy48ZBMzhdR4ep+BA2XGDMFo6rntwXErrI/uW/r8qnUdpX48eJ9Km6PZi2J7hQF7+2d3D9/r+WxCZqHr2+L8L3AZ8L7JuFfC4u682s1Xh57+uWwsy5T/gSqJMxmjMWE3/DJT6qrxwlC4KpYsFFZ9HXDxKZUbZ7qUrG5RdcMqOH+5DdHtp36HlSHshUrZamcj5RbdXqXvM8hX/nYZrq7xwlp9L9LiV+41WHyPqq1JmxPrR28dE9h/nWJXrSxqytf3/u26B392fNLNFFas/BpwTLt8NPEE9A3+NUz0itVDKy4ef4myKpNRkdxPnuft2gPD9qNEKmtlKM9tgZhu6u7sPrrbIw1gmO9UjIjJVTdn8gLvf6e4d7t4xd+7cgztItMd/AKN6RESSbLID/5tm9i6A8H1HXWsr6/Er8IuIwOQH/geBK8PlK4Ef1rU2pXpEREaoW+A3s7XAU8BxZtZlZlcDq4EPm9mLwIfDz/UT43BOEZGpqp6jej41yqbz6lXnCJloj19TLYuIwBS+uVsTSvWIiIyQ8MCvVI+ISKWEB/5Ij7+oVI+ICKQp8KvHLyICJD7wK9UjIlIp2YFfo3pEREZIduBXqkdEZISEB36lekREKo0Z+M3sisjymRXb/rJejaoZTcssIjLCeD3+6yPLt1Zsu6rGbam9suGcCvwiIjB+4LdRlqt9nnqU4xcRGWG8wD/WU6Cn/hNEleoRERlhvEna3m9mGwl698eGy4Sfj6lry2oho/n4RUQqjRf4j5+UVtSLUj0iIiOMGfjd/bXoZzObDXwIeN3dn6lnw2pCqR4RkRHGG875IzNbGi6/C9hEMJrnn8zsuklo36HRqB4RkRHGu7m72N03hcsrgMfc/d8B/4bDbTinUj0iIsD4gT/aTT4P+AmAu+8BivVqVM0o1SMiMsJ4N3e3mdlngS6gHXgYwMxagIaxdpwSMurxi4hUGq/HfzVwIvDnwGXuvjNcfxpwVx3bVRtZzc4pIlJpvFE9O4Brqqz/OfDzejWqZjRJm4jICGMGfjN7cKzt7v7R2janxhT4RURGGC/HfzqwDVgLPM3hMD9PVDZyenrmrogIMH7gnw98GPgUcDnwY2Ctuz9f74bVhHr8IiIjjHlz190L7v6wu19JcEP3JeCJcKTP1KfALyIywng9fsysCfgIQa9/EfBN4Pv1bVaNZLIE2SkHL0KxEK4TEUmv8W7u3g0sBR4Cvhz5Fe/hI9sIhf5guTCowC8iqTdej//TQC/wPuBzZkP3dg1wd59Zx7bVRlngH4CG5njbIyISs/HG8R/+D2OPjuzRtA0iIuP+cvfwF73Bqxk6RURSFvg1skdEJAWBP6NUj4hIVPIDv6ZmFhEpk7LAr1SPiEgsgd/MPm9mz5vZJjNba2b1G2NZNjWzevwiIpMe+M1sAfA5oMPdlwJZ4M/qVqGeuysiUiauVE8OaDGzHDAN+EPdalKqR0SkzKQHfnf/PfB14HVgO7DL3R+tLGdmK81sg5lt6O7uPvgK9cB1EZEycaR6jgQ+BiwG3g20mtkVleXc/U5373D3jrlz5x58hRnl+EVEouJI9SwHfufu3e4+SDDT5xl1q003d0VEysQR+F8HTjOzaRbM+nYesKVutSnHLyJSJo4c/9PAfcCzwHNhG+6sW4Xq8YuIlBn3QSz14O43AjdOSmUazikiUka/3BURSZnkB36N6hERKZP8wK8cv4hImRQEfqV6RESiUhb41eMXEUlB4I8+iEU9fhGRFAR+PXNXRCQqXYFfqR4RkRQE/oxSPSIiUckP/Orxi4iUUeAXEUmZFAR+PYhFRCQqXYFfo3pERNIQ+JXqERGJSn7gzyjVIyISlfzAr0naRETKpCDwK9UjIhKVssCvVI+ISAoCf/SXu+rxi4ikIPBrkjYRkah0BX6lekREUhD4M0r1iIhEJT/wa1SPiEiZlAV+pXpERFIQ+JXqERGJSkHgV49fRCQqXYFfwzlFRFIQ+DNZwIJlL0KxEGtzRETilvzAD0r3iIhEpDDwK90jIumWksCvqZlFREpSGPiV6hGRdEtJ4NfIHhGRkpQEfqV6RERKYgn8ZtZmZveZ2QtmtsXMTq9rhXrurojIkNz4ReriFuBhd7/UzBqBaXWtTcM5RUSGTHrgN7OZwIeAPwdw9wGgvtG4LNWTr2tVIiJTXRypnmOAbuAuM/u1mX3bzForC5nZSjPbYGYburu7D61GjeoRERkSR+DPAe3At9z9ZKAXWFVZyN3vdPcOd++YO3fuodWoVI+IyJA4An8X0OXuT4ef7yO4ENRPtMev4ZwiknKTHvjd/Q1gm5kdF646D9hc10ozGs4pIlIS16iezwL3hCN6XgFW1LU25fhFRIbEEvjdvRPomLQKNUmbiMiQlPxyV4FfRKQkJYE/+txdpXpEJN1SEvg1nFNEpCR9gb+oX+6KSLqlI/BnlOoRESlJR+BXqkdEZEgKA79SPSKSbikJ/Er1iIiUpCTwK9UjIlKSvsCvUT0iknLpCPwa1SMiMiQdgV+pHhGRISkM/JqrR0TSLSWBX/Pxi4iUpDDwK9UjIumWksCvVI+ISElKAr96/CIiJekI/C2zhpdf/xXs/kN8bRERiVk6Av+7lsH8Pw6W8/vhZ/893vaIiMQoHYE/k4Hz/274c+c98Mam+NojIhKjdAR+gGPOhiXnhx8cHvvbWJsjIhKX9AR+gA9/BSw85Zd/Blsfirc9IiIxSFfgP+p4OPnTw5/vvRx+fAPsfye+NomITLJ0BX6Ac78I02YHy16E9f8It3bAYzfCa0/pQS0iknjm7nG3YVwdHR2+YcOG2h2w52X4yQ1BuqdScxssaId5S2H+B+DIxdB2NLQeFdwkFhE5TJjZM+7eMWJ9KgM/gDts+Rd45Iuwa9v45bNNMH0etM6B6UcFvw1oORJa2qD5CGiaEbwaW6GhNXhvnAa5FmgIX9lGMKvteYiIjGK0wJ+rVjgVzOCEjwYjfV75eXCj97ePwN43qpcv9MOu14PXwVcKuWbINYXvjcEFJdcUXBRyTcGvjLONwSuTG/5cWs40hO+5ile24nMmeLfs8HbLhO/ZivfM8LtVlDOLbKv2suF9hj6Hy1hknZXvV7atSlkRqZv0Bv6ShmY47qLg5R6kgd7cFLx2bIGdrwevvp01qMyDH5Dl99fgWElXugBELggjlqu8j7VtzPfKOiveYZRtVFmOnMOB7De0W5U6h5Yr96N8udp+Y+07bjsOpAyjrJ9IO8Y5nwkft8o+B7vvqP2Psc7/UOoe4zgX3Vw+9cwhUuCPMoM57w1eJ368fFv/Hti7A3rfgt4dwUig/TuD9/494Ws3DPQGr8F94Wv/8KuoCeImzoMLMYAX4m2KSNwu+HsF/liUcvizjz34YxQLkO+Dwb4gdZQPX4X+YNbQfH9wcShbzgcTy5XWF/PDr0JkuTgYHL9YGF72QmRdfvizF8u3e2ldMXiPlittcw/W4ZH9i5GXDy/jVdb7KOsryotI3SnwT6ZMNrzp2xp3S6a2qheLastV3sfaNuF3Rn6uVjZarupy5X6MUzZavsryWPVVrZOKfRm9fLV2lJWbSJkJrh+13RVGrD/Q+g5l34m0ycfYdoB1j3ecTG1DtQK/TD2lG8YiUhcamC4ikjIK/CIiKRNb4DezrJn92sx+FFcbRETSKM4e/7XAlhjrFxFJpVgCv5ktBD4CfDuO+kVE0iyuHv//Ar4AaOC2iMgkm/TAb2aXADvc/Zlxyq00sw1mtqG7u3uSWiciknyTPjunmf0P4NNAHmgGZgLfd/crxtinG3jtIKucA7x1kPseztJ43mk8Z0jneafxnOHAz/s97j63cmWs0zKb2TnADe5+SR3r2FBtWtKkS+N5p/GcIZ3nncZzhtqdt8bxi4ikTKxTNrj7E8ATcbZBRCRt0tDjvzPuBsQkjeedxnOGdJ53Gs8ZanTeh8WjF0VEpHbS0OMXEZEIBX4RkZRJdOA3swvNbKuZvWRmq+JuTz2Y2dFm9nMz22Jmz5vZteH6WWb2mJm9GL4fGXdba61yoj8zW2xmT4fn/M9m1hh3G2vNzNrM7D4zeyH8zk9P+ndtZp8P/21vMrO1ZtacxO/azNaY2Q4z2xRZV/W7tcA3w9i20czaD6SuxAZ+M8sCtwMXAScAnzKzE+JtVV3kgf/i7scDpwH/OTzPVcDj7r4EeDz8nDSVE/3dDPzP8JzfAa6OpVX1dQvwsLu/HziJ4PwT+12b2QLgc0CHuy8FssCfkczv+rvAhRXrRvtuLwKWhK+VwLcOpKLEBn7gg8BL7v6Kuw8A9wIfi7lNNefu29392XB5D0EgWEBwrneHxe4GPl79CIenyon+zMyAPwHuC4sk8ZxnAh8CvgPg7gPuvpOEf9cEw85bzCwHTAO2k8Dv2t2fBN6uWD3ad/sx4Hse+BXQZmbvmmhdSQ78C4Btkc9d4brEMrNFwMnA08A8d98OwcUBOCq+ltVF5UR/s4Gd7p4PPyfx+z4G6AbuClNc3zazVhL8Xbv774GvA68TBPxdwDMk/7suGe27PaT4luTAb1XWJXbsqplNB+4HrnP33XG3p55GmegvDd93DmgHvuXuJwO9JCitU02Y0/4YsBh4N9BKkOaolLTvejyH9O89yYG/Czg68nkh8IeY2lJXZtZAEPTvcffvh6vfLP3pF77viKt9dXAm8FEze5UghfcnBH8BtIXpAEjm990FdLn70+Hn+wguBEn+rpcDv3P3bncfBL4PnEHyv+uS0b7bQ4pvSQ7864El4d3/RoIbQg/G3KaaC3Pb3wG2uPs3IpseBK4Ml68EfjjZbasXd/8bd1/o7osIvtefuft/AH4OXBoWS9Q5A7j7G8A2MzsuXHUesJkEf9cEKZ7TzGxa+G+9dM6J/q4jRvtuHwT+Yzi65zRgVyklNCHuntgXcDHwW+Bl4Etxt6dO53gWwZ94G4HO8HUxQc77ceDF8H1W3G2t0/mfA/woXD4GWAe8BPxfoCnu9tXhfJcBG8Lv+wfAkUn/roEvAy8Am4B/ApqS+F0DawnuYwwS9OivHu27JUj13B7GtucIRj1NuC5N2SAikjJJTvWIiEgVCvwiIimjwC8ikjIK/CIiKaPALyKSMgr8kgpm9svwfZGZXV7jY3+xWl0iU5WGc0qqmNk5wA3ufskB7JN198IY2/e6+/RatE9kMqjHL6lgZnvDxdXAvzWzznCe96yZfc3M1ofzmv9FWP6c8DkH/5vgBzKY2Q/M7JlwbviV4brVBDNHdprZPdG6wl9Vfi2cR/45M7sscuwnIvPq3xP+KhUzW21mm8O2fH0y/xtJeuTGLyKSKKuI9PjDAL7L3U81sybgF2b2aFj2g8BSd/9d+Pkqd3/bzFqA9WZ2v7uvMrO/dPdlVer6BMEvbU8C5oT7PBluOxk4kWB+lV8AZ5rZZuDfA+93dzeztpqfvQjq8YucTzDnSSfBdNazCR5uAbAuEvQBPmdmvwF+RTBB1hLGdhaw1t0L7v4m8K/AqZFjd7l7kWCajUXAbqAP+LaZfQLYd8hnJ1KFAr+knQGfdfdl4Wuxu5d6/L1DhYJ7A8uB0939JODXQPMEjj2a/shyAch5ML/8BwlmWv048PABnYnIBCnwS9rsAWZEPj8C/KdwamvM7H3hw00qHQG84+77zOz9BI+5LBks7V/hSeCy8D7CXIKnZ60brWHhMxWOcPefANcRpIlEak45fkmbjUA+TNl8l+AZtouAZ8MbrN1Uf4zfw8A1ZrYR2EqQ7im5E9hoZs96MD10yQPA6cBvCGZQ/YK7vxFeOKqZAfzQzJoJ/lr4/MGdosjYNJxTRCRllOoREUkZBX4RkZRR4BcRSRkFfhGRlFHgFxFJGQV+EZGUUeAXEUmZ/w+P7CqHhHCnXgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -263,16 +242,16 @@ }, { "cell_type": "code", - "execution_count": 105, - "id": "42f889b2", + "execution_count": 149, + "id": "699c6905", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "10.251992169875896\n", - "4.404787604174319\n" + "10.136669617836302\n", + "4.401030427985024\n" ] } ], diff --git a/recsys/recsys_server.py b/recsys/recsys_server.py index 8288f67..fa8fc6a 100644 --- a/recsys/recsys_server.py +++ b/recsys/recsys_server.py @@ -181,7 +181,7 @@ def create_doc_pipeline(args): # create pipeline source = from_file(args.send_rate, os.path.join(args.data_dir, args.file)) - user_vectors = source.map(UserOperator, args, num_replicas=8).as_queryable("user_vectors") + user_vectors = source.map(UserOperator, args, num_replicas=1).as_queryable("user_vectors") #movies = source.join(user_vectors, MovieOperator).as_queryable("movie_vectors") #movie_vectors = user_vectors.map(MovieOperator).as_queryable("movie_vectors") # deploy @@ -226,7 +226,16 @@ def main(): snapshot_interval = 10 start = time.time() while time.time() - start < run_duration: - pass + snapshot_time = ralf_conn.snapshot() + remaining_time = snapshot_interval - snapshot_time + if remaining_time < 0: + print( + f"snapshot interval is {snapshot_interval} but it took {snapshot_time} to perform it!" + ) + time.sleep(0) + else: + print("writing snapshot", snapshot_time) + time.sleep(remaining_time) if __name__ == "__main__": From c24cb489142d32ea5605002907af203540466ac7 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Sat, 30 Oct 2021 20:04:59 -0700 Subject: [PATCH 7/9] WIP --- recsys/als-half.ipynb | 404 ++++++++++++++++++++++++++++++-- recsys/als-stream-2.ipynb | 263 +++++++++++++++++++++ recsys/als-streaming-test.ipynb | 26 +- recsys/als.ipynb | 63 +++-- 4 files changed, 704 insertions(+), 52 deletions(-) create mode 100644 recsys/als-stream-2.ipynb diff --git a/recsys/als-half.ipynb b/recsys/als-half.ipynb index 72d2dae..46a5a28 100644 --- a/recsys/als-half.ipynb +++ b/recsys/als-half.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 42, + "execution_count": 61, "id": "d2a4455a", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 62, "id": "77ed3484", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 74, "id": "af66f007", "metadata": {}, "outputs": [ @@ -97,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 64, "id": "f9e63f83", "metadata": {}, "outputs": [ @@ -113,7 +113,7 @@ " [0. , 0. , 0. , ..., 0. , 0. , 0. ]])" ] }, - "execution_count": 45, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -140,13 +140,13 @@ " return train, test\n", "\n", "train, test = create_train_test(ratings)\n", - "del ratings\n", + "#del ratings\n", "train" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 65, "id": "bd9b47bb", "metadata": {}, "outputs": [], @@ -175,6 +175,7 @@ " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", " predictions = self.predict()\n", + " #print(predictions)\n", " test_mse = self.compute_mse(test, predictions)\n", " train_mse = self.compute_mse(train, predictions)\n", " self.test_mse_record.append(test_mse)\n", @@ -202,6 +203,7 @@ " def compute_mse(y_true, y_pred):\n", " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", " mask = np.nonzero(y_true)\n", + " print(y_pred[mask])\n", " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", " return mse\n", " " @@ -209,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 66, "id": "73d89971", "metadata": {}, "outputs": [], @@ -226,20 +228,344 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 67, "id": "16b9cf07", "metadata": {}, "outputs": [], "source": [ - "als = ALSModel(n_iters = 100, num_features = 40, l = 0.01)" + "als = ALSModel(n_iters = 100, num_features = 200, l = 0.01)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "id": "bf2be80c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.5037482 0.6464488 0.86420217 ... 0.18081841 0.10623224 0.15315889]\n", + "[1.14166979 0.3457674 0.73312602 ... 0.37752427 0.1315888 0.42903076]\n", + "[-0.25377314 -0.24611038 -0.47028482 ... -0.077076 0.01770958\n", + " -0.01431049]\n", + "[3.49954347 3.88175469 3.59930154 ... 3.8229048 2.87641366 3.68848983]\n", + "[-0.16703589 -0.17537446 -0.34193023 ... -0.0678052 0.04234823\n", + " -0.04945025]\n", + "[3.6585508 3.90115889 3.75023126 ... 3.8478806 2.90612924 3.80817785]\n", + "[-0.11922917 -0.13327906 -0.26897441 ... -0.05161294 0.04280381\n", + " -0.05740035]\n", + "[3.72858365 3.90939432 3.80916617 ... 3.8579699 2.90682217 3.85453407]\n", + "[-0.0907974 -0.10580113 -0.21947042 ... -0.03978622 0.03795843\n", + " -0.0567409 ]\n", + "[3.78747759 3.92678063 3.83840323 ... 3.86847173 2.90692725 3.87766997]\n", + "[-0.0734607 -0.08751785 -0.18335912 ... -0.03069853 0.03188109\n", + " -0.05328649]\n", + "[3.84034476 3.94640919 3.86016853 ... 3.876998 2.90817406 3.89036646]\n", + "[-0.06276966 -0.07545507 -0.15794879 ... -0.02344582 0.02601958\n", + " -0.04940785]\n", + "[3.88517186 3.96427019 3.87889441 ... 3.88397644 2.91025717 3.89835878]\n", + "[-0.05597785 -0.06731001 -0.14078108 ... -0.01758457 0.02104115\n", + " -0.04598931]\n", + "[3.92140666 3.97911736 3.89539414 ... 3.89008054 2.91282365 3.90453294]\n", + "[-0.05141777 -0.06147258 -0.12945775 ... -0.01281145 0.01712118\n", + " -0.04321025]\n", + "[3.94992425 3.99094713 3.90996067 ... 3.89566766 2.91559247 3.91011964]\n", + "[-0.0481329 -0.05694909 -0.12205999 ... -0.00888987 0.01418895\n", + " -0.04099246]\n", + "[3.9721065 4.00021658 3.92286078 ... 3.90088537 2.91837343 3.91550464]\n", + "[-0.04559752 -0.05317288 -0.11722055 ... -0.00563463 0.01208643\n", + " -0.0392035 ]\n", + "[3.98933229 4.00748452 3.93435975 ... 3.90579054 2.92105244 3.92073167]\n", + "[-0.04352929 -0.04984013 -0.11402214 ... -0.00290292 0.01064313\n", + " -0.03772649]\n", + "[4.00278291 4.01325748 3.94469266 ... 3.91040811 2.92356761 3.92574601]\n", + "[-0.04177675 -0.04679892 -0.11186843 ... -0.00058539 0.00970561\n", + " -0.03647418]\n", + "[4.01339884 4.01793974 3.9540498 ... 3.91475373 2.92589046 3.93048889]\n", + "[-0.04025657 -0.04398041 -0.11037822 ... 0.00140167 0.00914664\n", + " -0.03538525]\n", + "[4.02189892 4.02183216 3.96257746 ... 3.91884111 2.92801348 3.93492408]\n", + "[-0.03892031 -0.04135798 -0.10931012 ... 0.00312258 0.00886558\n", + " -0.03441737]\n", + "[4.02881984 4.02514961 3.97038595 ... 3.9226837 2.92994224 3.9390391 ]\n", + "[-0.03773729 -0.03892405 -0.10851209 ... 0.00462688 0.00878538\n", + " -0.03354125]\n", + "[4.03455735 4.02804204 3.97755908 ... 3.92629477 2.93169003 3.94283902]\n", + "[-0.03668612 -0.03667762 -0.10788869 ... 0.00595288 0.00884833\n", + " -0.0327364 ]\n", + "[4.03940174 4.03061294 3.98416223 ... 3.92968719 2.93327419 3.94633975]\n", + "[-0.03575042 -0.03461824 -0.10738009 ... 0.00713034 0.00901185\n", + " -0.03198832]\n", + "[4.04356601 4.03293376 3.99024844 ... 3.93287324 2.93471357 3.94956275]\n", + "[-0.03491676 -0.03274348 -0.10694878 ... 0.00818248 0.00924487\n", + " -0.03128663]\n", + "[4.04720702 4.03505422 3.99586239 ... 3.93586457 2.93602683 3.9525315 ]\n", + "[-0.03417365 -0.03104825 -0.10657119 ... 0.00912762 0.00952486\n", + " -0.03062384]\n", + "[4.05044099 4.03700953 4.00104308 ... 3.93867228 2.93723136 3.95526937]\n", + "[-0.03351095 -0.0295251 -0.10623247 ... 0.0099803 0.00983558\n", + " -0.02999447]\n", + "[4.05335461 4.03882513 4.00582531 ... 3.94130695 2.93834279 3.95779841]\n", + "[-0.03291961 -0.0281647 -0.10592318 ... 0.01075226 0.01016539\n", + " -0.02939452]\n", + "[4.05601305 4.04051998 4.01024071 ... 3.94377875 2.93937471 3.96013888]\n", + "[-0.03239151 -0.02695649 -0.10563727 ... 0.01145307 0.01050593\n", + " -0.02882101]\n", + "[4.05846567 4.04210856 4.01431828 ... 3.94609745 2.94033869 3.96230897]\n", + "[-0.0319193 -0.02588921 -0.10537078 ... 0.01209066 0.0108513\n", + " -0.02827168]\n", + "[4.06075009 4.04360232 4.01808474 ... 3.9482725 2.94124444 3.96432493]\n", + "[-0.03149638 -0.02495137 -0.10512109 ... 0.0126717 0.0111973\n", + " -0.02774484]\n", + "[4.06289523 4.04501055 4.02156477 ... 3.95031298 2.94210001 3.96620118]\n", + "[-0.0311168 -0.02413154 -0.10488637 ... 0.01320188 0.01154103\n", + " -0.02723916]\n", + "[4.06492342 4.04634098 4.02478118 ... 3.95222763 2.94291198 3.96795047]\n", + "[-0.0307752 -0.02341863 -0.10466535 ... 0.01368614 0.01188047\n", + " -0.02675359]\n", + "[4.06685205 4.0476002 4.02775507 ... 3.95402483 2.94368571 3.9695841 ]\n", + "[-0.03046679 -0.02280205 -0.10445706 ... 0.01412878 0.01221427\n", + " -0.02628728]\n", + "[4.06869471 4.04879394 4.03050591 ... 3.95571256 2.94442556 3.97111209]\n", + "[-0.03018728 -0.02227178 -0.10426075 ... 0.01453361 0.01254155\n", + " -0.02583952]\n", + "[4.07046216 4.04992721 4.03305169 ... 3.95729844 2.94513507 3.97254333]\n", + "[-0.02993284 -0.02181847 -0.10407583 ... 0.01490404 0.01286176\n", + " -0.0254097 ]\n", + "[4.07216291 4.05100449 4.03540901 ... 3.95878967 2.94581709 3.97388577]\n", + "[-0.02970008 -0.02143346 -0.10390181 ... 0.01524312 0.01317456\n", + " -0.02499729]\n", + "[4.07380384 4.05202979 4.03759318 ... 3.96019303 2.94647395 3.97514649]\n", + "[-0.02948599 -0.02110876 -0.10373824 ... 0.01555362 0.01347977\n", + " -0.0246018 ]\n", + "[4.07539049 4.05300677 4.0396183 ... 3.96151491 2.94710755 3.97633186]\n", + "[-0.02928792 -0.02083703 -0.10358479 ... 0.01583806 0.01377732\n", + " -0.02422277]\n", + "[4.07692743 4.0539387 4.04149737 ... 3.96276129 2.94771947 3.97744759]\n", + "[-0.02910353 -0.02061162 -0.10344111 ... 0.0160987 0.01406719\n", + " -0.0238598 ]\n", + "[4.07841846 4.05482862 4.04324237 ... 3.96393776 2.94831102 3.97849885]\n", + "[-0.0289308 -0.02042643 -0.10330693 ... 0.01633765 0.0143494\n", + " -0.02351247]\n", + "[4.07986678 4.05567927 4.04486432 ... 3.9650495 2.9488833 3.9794903 ]\n", + "[-0.02876793 -0.02027598 -0.10318201 ... 0.01655681 0.01462399\n", + " -0.02318039]\n", + "[4.08127512 4.05649319 4.04637335 ... 3.96610137 2.94943728 3.98042619]\n", + "[-0.02861338 -0.02015529 -0.10306613 ... 0.01675793 0.01489099\n", + " -0.02286318]\n", + "[4.08264588 4.05727268 4.04777881 ... 3.96709783 2.94997377 3.98131037]\n", + "[-0.02846582 -0.02005987 -0.10295909 ... 0.01694262 0.01515044\n", + " -0.02256045]\n", + "[4.08398113 4.05801988 4.04908925 ... 3.96804303 2.95049353 3.98214635]\n", + "[-0.02832411 -0.01998571 -0.10286073 ... 0.01711233 0.01540235\n", + " -0.02227184]\n", + "[4.08528276 4.05873676 4.05031257 ... 3.96894081 2.95099722 3.98293735]\n", + "[-0.02818725 -0.01992918 -0.10277092 ... 0.01726843 0.01564675\n", + " -0.02199696]\n", + "[4.08655245 4.0594251 4.05145599 ... 3.96979469 2.95148548 3.98368632]\n", + "[-0.02805444 -0.01988704 -0.1026895 ... 0.01741213 0.01588362\n", + " -0.02173545]\n", + "[4.08779175 4.06008658 4.05252615 ... 3.97060795 2.95195886 3.98439596]\n", + "[-0.02792496 -0.01985636 -0.10261638 ... 0.01754459 0.01611295\n", + " -0.02148693]\n", + "[4.08900207 4.0607227 4.05352916 ... 3.9713836 2.95241794 3.98506876]\n", + "[-0.02779825 -0.01983456 -0.10255145 ... 0.01766684 0.01633472\n", + " -0.02125105]\n", + "[4.09018471 4.06133488 4.05447059 ... 3.97212439 2.95286322 3.98570702]\n", + "[-0.02767382 -0.0198193 -0.10249462 ... 0.01777982 0.01654889\n", + " -0.02102743]\n", + "[4.09134089 4.06192439 4.05535556 ... 3.97283289 2.95329522 3.98631285]\n", + "[-0.0275513 -0.01980849 -0.10244582 ... 0.01788442 0.01675542\n", + " -0.02081571]\n", + "[4.09247174 4.06249242 4.05618875 ... 3.97351145 2.95371442 3.98688822]\n", + "[-0.02743039 -0.01980026 -0.10240495 ... 0.01798144 0.01695425\n", + " -0.02061552]\n", + "[4.0935783 4.06304006 4.05697444 ... 3.97416223 2.95412131 3.98743496]\n", + "[-0.02731088 -0.01979295 -0.10237196 ... 0.01807161 0.01714535\n", + " -0.02042652]\n", + "[4.09466155 4.0635683 4.05771656 ... 3.97478725 2.95451636 3.98795475]\n", + "[-0.02719259 -0.01978505 -0.10234678 ... 0.0181556 0.01732864\n", + " -0.02024834]\n", + "[4.09572239 4.06407805 4.05841868 ... 3.97538834 2.95490001 3.98844918]\n", + "[-0.02707543 -0.01977522 -0.10232934 ... 0.01823404 0.01750409\n", + " -0.02008064]\n", + "[4.09676164 4.06457016 4.05908405 ... 3.97596722 2.95527272 3.98891971]\n", + "[-0.02695936 -0.01976225 -0.10231956 ... 0.01830749 0.01767162\n", + " -0.01992305]\n", + "[4.09778006 4.06504539 4.05971566 ... 3.97652548 2.95563493 3.98936772]\n", + "[-0.02684438 -0.01974506 -0.10231739 ... 0.01837646 0.01783121\n", + " -0.01977525]\n", + "[4.09877835 4.06550444 4.0603162 ... 3.97706457 2.95598706 3.98979451]\n", + "[-0.02673053 -0.01972266 -0.10232274 ... 0.01844144 0.01798279\n", + " -0.01963688]\n", + "[4.09975711 4.06594795 4.06088814 ... 3.97758586 2.95632953 3.99020129]\n", + "[-0.0266179 -0.01969418 -0.10233555 ... 0.01850286 0.01812633\n", + " -0.0195076 ]\n", + "[4.1007169 4.0663765 4.06143372 ... 3.97809064 2.95666275 3.99058919]\n", + "[-0.0265066 -0.01965881 -0.10235573 ... 0.01856112 0.01826179\n", + " -0.01938709]\n", + "[4.10165818 4.06679063 4.06195495 ... 3.97858007 2.95698712 3.99095931]\n", + "[-0.02639679 -0.01961582 -0.10238319 ... 0.0186166 0.01838916\n", + " -0.01927502]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[4.10258135 4.06719081 4.06245367 ... 3.97905527 2.95730302 3.99131266]\n", + "[-0.02628865 -0.01956456 -0.10241783 ... 0.01866962 0.0185084\n", + " -0.01917104]\n", + "[4.10348675 4.0675775 4.06293153 ... 3.97951729 2.95761083 3.99165022]\n", + "[-0.02618239 -0.01950443 -0.10245954 ... 0.01872051 0.01861952\n", + " -0.01907485]\n", + "[4.10437463 4.06795108 4.06339004 ... 3.97996709 2.9579109 3.99197292]\n", + "[-0.02607824 -0.0194349 -0.10250822 ... 0.01876955 0.01872252\n", + " -0.0189861 ]\n", + "[4.10524515 4.06831191 4.06383052 ... 3.9804056 2.9582036 3.99228163]\n", + "[-0.02597647 -0.01935546 -0.10256371 ... 0.01881699 0.01881742\n", + " -0.01890449]\n", + "[4.10609844 4.06866033 4.06425417 ... 3.9808337 2.95848924 3.99257721]\n", + "[-0.02587735 -0.01926568 -0.1026259 ... 0.01886309 0.01890424\n", + " -0.01882969]\n", + "[4.10693451 4.0689966 4.06466209 ... 3.9812522 2.95876816 3.99286046]\n", + "[-0.02578117 -0.01916515 -0.1026946 ... 0.01890806 0.01898303\n", + " -0.01876139]\n", + "[4.10775332 4.06932101 4.06505521 ... 3.98166189 2.95904066 3.99313219]\n", + "[-0.02568827 -0.01905353 -0.10276966 ... 0.0189521 0.01905385\n", + " -0.01869925]\n", + "[4.10855476 4.06963377 4.06543439 ... 3.98206351 2.95930704 3.99339315]\n", + "[-0.02559896 -0.01893049 -0.10285088 ... 0.01899541 0.01911678\n", + " -0.01864298]\n", + "[4.10933863 4.06993511 4.06580037 ... 3.98245777 2.95956757 3.99364407]\n", + "[-0.02551361 -0.01879577 -0.10293805 ... 0.01903815 0.0191719\n", + " -0.01859223]\n", + "[4.11010468 4.07022519 4.06615382 ... 3.98284534 2.95982252 3.99388567]\n", + "[-0.02543256 -0.01864913 -0.10303096 ... 0.01908048 0.01921932\n", + " -0.0185467 ]\n", + "[4.11085258 4.0705042 4.0664953 ... 3.98322686 2.96007213 3.99411865]\n", + "[-0.02535618 -0.01849039 -0.10312934 ... 0.01912255 0.01925918\n", + " -0.01850607]\n", + "[4.11158194 4.07077227 4.06682532 ... 3.98360295 2.96031664 3.99434369]\n", + "[-0.02528484 -0.0183194 -0.10323294 ... 0.01916448 0.01929162\n", + " -0.01847001]\n", + "[4.11229232 4.07102954 4.06714429 ... 3.98397418 2.96055627 3.99456145]\n", + "[-0.02521893 -0.01813607 -0.10334147 ... 0.01920638 0.01931681\n", + " -0.01843819]\n", + "[4.1129832 4.07127613 4.06745259 ... 3.98434112 2.96079121 3.99477259]\n", + "[-0.02515883 -0.01794034 -0.10345463 ... 0.01924838 0.01933493\n", + " -0.0184103 ]\n", + "[4.11365403 4.07151215 4.06775051 ... 3.9847043 2.96102164 3.99497775]\n", + "[-0.02510491 -0.01773221 -0.10357207 ... 0.01929055 0.01934621\n", + " -0.018386 ]\n", + "[4.11430422 4.0717377 4.06803833 ... 3.98506421 2.96124775 3.99517754]\n", + "[-0.02505755 -0.01751172 -0.10369346 ... 0.01933298 0.01935087\n", + " -0.01836497]\n", + "[4.11493312 4.07195287 4.06831624 ... 3.98542133 2.96146967 3.99537259]\n", + "[-0.02501711 -0.01727899 -0.10381842 ... 0.01937576 0.01934917\n", + " -0.01834688]\n", + "[4.11554006 4.07215776 4.06858443 ... 3.9857761 2.96168754 3.9955635 ]\n", + "[-0.02498395 -0.01703416 -0.10394655 ... 0.01941893 0.01934139\n", + " -0.0183314 ]\n", + "[4.11612437 4.07235246 4.06884302 ... 3.98612895 2.96190149 3.99575084]\n", + "[-0.02495841 -0.01677745 -0.10407745 ... 0.01946255 0.01932782\n", + " -0.01831821]\n", + "[4.11668532 4.07253707 4.06909213 ... 3.98648026 2.96211162 3.9959352 ]\n", + "[-0.02494081 -0.01650913 -0.10421069 ... 0.01950667 0.01930879\n", + " -0.01830697]\n", + "[4.11722222 4.07271168 4.06933184 ... 3.98683039 2.96231802 3.99611713]\n", + "[-0.02493144 -0.01622952 -0.10434581 ... 0.01955132 0.01928464\n", + " -0.01829735]\n", + "[4.11773435 4.07287641 4.06956223 ... 3.98717967 2.96252077 3.99629718]\n", + "[-0.02493059 -0.01593902 -0.10448236 ... 0.01959652 0.01925574\n", + " -0.01828905]\n", + "[4.11822104 4.07303135 4.06978334 ... 3.98752839 2.96271991 3.99647587]\n", + "[-0.02493849 -0.01563807 -0.10461987 ... 0.0196423 0.01922245\n", + " -0.01828173]\n", + "[4.11868162 4.07317665 4.06999521 ... 3.98787683 2.96291551 3.99665371]\n", + "[-0.02495536 -0.01532718 -0.10475784 ... 0.01968865 0.01918519\n", + " -0.01827509]\n", + "[4.11911546 4.07331243 4.0701979 ... 3.98822521 2.9631076 3.99683117]\n", + "[-0.02498137 -0.01500691 -0.10489579 ... 0.01973558 0.01914437\n", + " -0.01826882]\n", + "[4.11952198 4.07343885 4.07039143 ... 3.98857372 2.9632962 3.99700873]\n", + "[-0.02501666 -0.01467787 -0.10503323 ... 0.01978309 0.01910042\n", + " -0.01826262]\n", + "[4.11990066 4.07355608 4.07057585 ... 3.98892254 2.96348132 3.99718683]\n", + "[-0.02506133 -0.01434075 -0.10516964 ... 0.01983114 0.01905376\n", + " -0.0182562 ]\n", + "[4.12025103 4.0736643 4.0707512 ... 3.98927179 2.96366297 3.99736586]\n", + "[-0.02511542 -0.01399627 -0.10530455 ... 0.01987973 0.01900486\n", + " -0.01824929]\n", + "[4.12057271 4.0737637 4.07091755 ... 3.98962156 2.96384113 3.99754623]\n", + "[-0.02517894 -0.01364517 -0.10543745 ... 0.01992882 0.01895416\n", + " -0.01824163]\n", + "[4.12086539 4.07385452 4.07107497 ... 3.98997191 2.96401581 3.99772828]\n", + "[-0.02525187 -0.01328828 -0.10556788 ... 0.01997837 0.01890212\n", + " -0.01823295]\n", + "[4.12112886 4.07393699 4.07122355 ... 3.99032286 2.96418696 3.99791234]\n", + "[-0.0253341 -0.01292642 -0.10569537 ... 0.02002834 0.01884919\n", + " -0.01822304]\n", + "[4.12136301 4.07401137 4.07136338 ... 3.99067441 2.96435459 3.9980987 ]\n", + "[-0.02542552 -0.01256048 -0.10581946 ... 0.02007869 0.01879583\n", + " -0.01821167]\n", + "[4.1215678 4.07407793 4.07149459 ... 3.99102652 2.96451864 3.99828761]\n", + "[-0.02552593 -0.01219133 -0.10593973 ... 0.02012935 0.01874249\n", + " -0.01819864]\n", + "[4.12174334 4.07413698 4.07161733 ... 3.99137911 2.9646791 3.9984793 ]\n", + "[-0.02563513 -0.01181988 -0.10605577 ... 0.02018028 0.0186896\n", + " -0.01818378]\n", + "[4.12188981 4.07418881 4.07173177 ... 3.99173207 2.96483593 3.99867395]\n", + "[-0.02575283 -0.01144705 -0.10616721 ... 0.02023141 0.01863757\n", + " -0.01816691]\n", + "[4.1220075 4.07423376 4.07183809 ... 3.99208529 2.96498911 3.99887171]\n", + "[-0.02587874 -0.01107376 -0.10627369 ... 0.02028269 0.01858684\n", + " -0.0181479 ]\n", + "[4.12209684 4.07427216 4.07193649 ... 3.9924386 2.96513859 3.99907271]\n", + "[-0.02601251 -0.0107009 -0.10637489 ... 0.02033403 0.01853777\n", + " -0.01812664]\n", + "[4.12215831 4.07430437 4.07202722 ... 3.99279182 2.96528437 3.99927703]\n", + "[-0.02615376 -0.01032938 -0.10647054 ... 0.02038539 0.01849075\n", + " -0.01810301]\n", + "[4.12219254 4.07433075 4.07211052 ... 3.99314475 2.9654264 3.9994847 ]\n", + "[-0.02630207 -0.00996008 -0.10656037 ... 0.02043668 0.01844611\n", + " -0.01807695]\n", + "[4.12220023 4.07435168 4.07218666 ... 3.99349718 2.96556468 3.99969574]\n", + "[-0.026457 -0.00959384 -0.10664417 ... 0.02048785 0.01840419\n", + " -0.0180484 ]\n", + "[4.12218217 4.07436752 4.07225594 ... 3.99384888 2.96569919 3.99991014]\n", + "[-0.02661809 -0.00923148 -0.10672177 ... 0.02053882 0.01836527\n", + " -0.01801731]\n", + "[4.12213925 4.07437867 4.07231864 ... 3.99419959 2.96582991 4.00012785]\n", + "[-0.02678485 -0.00887378 -0.10679302 ... 0.02058953 0.01832963\n", + " -0.01798368]\n", + "[4.12207242 4.07438549 4.07237509 ... 3.99454907 2.96595686 4.00034878]\n", + "[-0.02695678 -0.00852147 -0.10685782 ... 0.0206399 0.0182975\n", + " -0.0179475 ]\n", + "[4.12198271 4.07438839 4.07242563 ... 3.99489705 2.96608003 4.00057284]\n", + "[-0.02713337 -0.00817526 -0.1069161 ... 0.02068988 0.0182691\n", + " -0.0179088 ]\n", + "[4.12187119 4.07438773 4.07247058 ... 3.99524327 2.96619944 4.00079989]\n", + "[-0.0273141 -0.00783578 -0.10696782 ... 0.02073941 0.0182446\n", + " -0.0178676 ]\n", + "[4.12173899 4.07438388 4.07251028 ... 3.99558745 2.96631511 4.00102979]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAdI0lEQVR4nO3dfXAc9Z3n8fd3Ro+2ZRvbwgabRDYQwDiLUQTH0y0EzJYh2ZDlyAH7xBpSDnebxCShOJNUjmyqtspUUtnwdEv5wMBmOXO7BBKKZXEIEKiD8CCDYoyNlycHixgsy/jZsjSa7/3RPdKMNLJka3pG6v68iqnp7unu36891Gda3+n5tbk7IiKSHKlKd0BERMpLwS8ikjAKfhGRhFHwi4gkjIJfRCRhqirdgZGYMWOGNzU1VbobIiLjytq1a7e7e+PA5eMi+Juammhtba10N0RExhUz+32x5Sr1iIgkjIJfRCRhFPwiIgmj4BcRSZjIgt/MVpnZNjNbX+S1G83MzWxGVO2LiEhxUZ7x3w8sHrjQzI4DLgY+iLBtEREZQmTB7+7PAzuKvPQPwE1AWYcF3Xcww7sde9FopCKSdGW9jt/MvgR86O6/M7Ph1l0KLAX41Kc+Nap2dx3oYdFPnqNjz0EuPPlobv0vf0RjQ+2o9ikiMl6V7ctdM5sAfA/4nyNZ391XunuLu7c0Ng764dlheXrjx3TsOQjAM29t45LbnufZt7aNap8iIuNVOa/qOR6YC/zOzDYDc4DXzGxW1A2/8n5hxWn73m6W3P8qf/6/X+IXr3/Ige7eqLsgIjJmlK3U4+5vAEfn5sPwb3H37VG3nR/8E2vS7AuD/sV3O3nx3U4aaqs454TpnDUveHxmZgPp1KFLUSIi41VkwW9mq4ELgBlm1g7c4u73RtXeULbt6eK97fsAqEmnePo7F/D3T2zk39b9gWz4Pe+egxnWvPkxa978GIC66hQnz5rMqcdO5sSjJzG3cRLzZkzk2Kn1+kAQkXEvsuB396uHeb0pqrbztW7+pG964XFTmTWljjuuPp3vXnoyj7z2If/auoXNnfsLtunqydK2ZSdtW3YWLK9KGcdMrWP21HqOmVLPzMl1zJxcS2NDLdMn1jJjUg1HTaxhan01VWn9Nk5ExqZxMTrnaOSXec6cO61v+pgp9fzt50/gv19wPO9s28tv3+vkpfc6ad38CdvCL4IHymSdLTsOsGXHgWHbbairYuqEaqbUB4+G2moa6qpoqKtmUl0VE2vSTKytYlJtFRNq0kyoqaK+Js2EmjT11Wnqa9LUVaWprU5RW5ViuKugRERGKvbB//IQwZ9jZpw4s4ETZzbw12c3AdCx5yAbtu5m49bdvN+xj/e27+X97fvYvrd7xO3u6cqwpyvDFob/kBiJ2qoUddVpaqtS1FanqEmnqK1KU1MVTNdUBY/qtFGdDpZVhdPV6RRVKaMqHbxelcq9ZqRTqfDZqEoF88Gz9T0XPKx/OhXOp8xIpeh7feDyVLiNGf3L815LWfBaME3fvD7sRKIR6+DfdaCHtz7aDUDKoPnTR41ou8aGWs5vaOT8zxReRnqgu5cPdx7gw50H+Hh3F9t2d/HR7i4693azfe9BOvd2s2N/N7sO9FDq34kdzGQ5mMmWdqdjXP6HgZH7MMj7sMitkwqmcx8YEG6T+xAh+BDJbd+3r3AbwmnL22d/e3nLBmxLwfqF6wa96JsYtF/y5snbhkH96P8ANMhb1/LW72umoO3Cfeb1acDnabE2+vvX36f89YvNWJE+FdvnsOsP1cCg14Zvo3Cd4u0N3Vr+Pkd3EjKSzW3I1uHmS0+muoTl41gH/9rf7+gL4AWzpzCpdnSHW1+T5oSjJ3HC0ZMOuV5v1tl1oKfgsaerh73hXwF7DmbYl3t093KgO8O+g73s7+nlYE8vB3p62d/dS1dPLwd7snT3Jivwc9yh153gGiz94lqS66bFJ1GdLt3+Yh38BWWepsFlnqikU8a0iTVMm1hTkv31Zp3uTJaDmV66erJ90wczwYdCdyZ49PQGj4OZLD29TqY3t8zJZHPL8qezZLLBfG82eK0362SyHgRub/9rvQ694XQ2S7DcIZsNtsl68Aim6Z8O53vdcQ+nw/U9nHbCbbKOQ99rIhKNWAf/UF/sjjfplFFfE3zhmxQehn/WPfzQCJb3hh8OuQ+R/PWyDo6D0zede809+Asityy3D89rK5jOWyfcloHLC7YFKNw+1//glf5tw/8K9tc/nXfc4Ya59sjfb96+c/P9rw/eX+G2hR+o+W0O/vcv7N/AfeVvP3AfBXsrWD9vnSH3U3z9ga8N0cSQKw3VRuGmw287kv6MdPuRtJ1TVeLLyGMb/Pu7M7zRvqtv/owynvHL6OVq8iksvv+TilRIbC82b/tgJ5nwF1onzWzgqBKVXURExrvYBn9+ff+MuSO7mkdEJAli+1f0VWcex6enT+CV93dw0SkzK90dEZExI7bBf8yUei5vnsPlzXMq3RURkTEltqUeEREpTsEvIpIwCn4RkYRR8IuIJIyCX0QkYRT8IiIJo+AXEUkYBb+ISMIo+EVEEiay4DezVWa2zczW5y37kZm9ZWbrzOxRM5saVfsiIlJclGf89wOLByx7Cljg7n8E/Adwc4Tti4hIEZEFv7s/D+wYsOxX7p4JZ18CNJCOiEiZVbLGfy3w70O9aGZLzazVzFo7OjrK2C0RkXirSPCb2feADPDgUOu4+0p3b3H3lsbGxvJ1TkQk5so+LLOZXQN8EbjIh7vRpIiIlFxZg9/MFgP/Azjf3feXs20REQlEeTnnauC3wElm1m5m1wF3Ag3AU2bWZmZ3R9W+iIgUF9kZv7tfXWTxvVG1JyIiI6Nf7oqIJIyCX0QkYRT8IiIJo+AXEUkYBb+ISMIo+EVEEkbBLyKSMAp+EZGEUfCLiCSMgl9EJGEU/CIiCaPgFxFJGAW/iEjCKPhFRBKm7HfgKpvOd+H956G3B6bNgxMXVbpHIiJjQnyD/w+vw+M3BNOnXq7gFxEJxbfUk8r7TMv2VK4fIiJjTHyDP13TP92r4BcRyYlx8Ff3Tyv4RUT6xDf4VeoRESkqsuA3s1Vmts3M1uctm2ZmT5nZ2+HzUVG1r1KPiEhxUZ7x3w8sHrBsOfC0u58IPB3OR0OlHhGRoiILfnd/HtgxYPFlwAPh9APAl6NqvzD4uyNrRkRkvCl3jX+mu28FCJ+PHmpFM1tqZq1m1trR0XH4LaXygj+bOfztRURiasx+uevuK929xd1bGhsbD38HKvWIiBRV7uD/2MyOAQift0XWkko9IiJFlTv4HwOuCaevAX4ZWUsq9YiIFBXl5Zyrgd8CJ5lZu5ldB6wALjazt4GLw/loqNQjIlJUZIO0ufvVQ7x0UVRtFii4jl+lHhGRnDH75e6oFfxyV6UeEZGc+Aa/zvhFRIqKcfCrxi8iUkx8gz+VBiycccj2VrI3IiJjRnyDH1TuEREpIubBr3KPiMhA8Q5+XdkjIjJIvINfpR4RkUFiHvwq9YiIDJSg4NcZv4gIxD34NVCbiMgg8Q5+lXpERAZJUPCr1CMiAnEPfpV6REQGiXfwq9QjIjJIgoJfpR4REYh78KvUIyIySLyDX7/cFREZJObBnzdWj2r8IiJA3INfpR4RkUEiu9n6oZjZt4CvAg68ASxx966SN6RSj0ji9PT00N7eTldX6SNlrKqrq2POnDlUV1cPvzIVCH4zmw18E5jv7gfM7F+Aq4D7S96YSj0iidPe3k5DQwNNTU2Y2fAbjHPuTmdnJ+3t7cydO3dE21Sq1FMF1JtZFTAB+EMkrRSUehT8IknQ1dXF9OnTExH6AGbG9OnTD+svnLIHv7t/CPwY+ADYCuxy918NXM/MlppZq5m1dnR0HFljBaUeBb9IUiQl9HMO93jLHvxmdhRwGTAXOBaYaGZ/OXA9d1/p7i3u3tLY2HhkjemXuyJSZp2dnSxcuJCFCxcya9YsZs+e3Tff3T3y7xpXrVrFRx99FEkfK/Hl7iLgfXfvADCzR4BzgH8ueUv65a6IlNn06dNpa2sD4Ac/+AGTJk3ixhtvPOz9rFq1iubmZmbNmlXqLlYk+D8AzjKzCcAB4CKgNZKWdDmniIwhDzzwAHfddRfd3d2cc8453HnnnWSzWZYsWUJbWxvuztKlS5k5cyZtbW1ceeWV1NfX88orr1BTUzN8AyNU9uB395fN7GHgNSADvA6sjKQxlXpEEq1p+b9Ftu/NK75wWOuvX7+eRx99lBdffJGqqiqWLl3KQw89xPHHH8/27dt54403ANi5cydTp07ljjvu4M4772ThwoUl7/sha/z5tXczO3fAa18/0kbd/RZ3P9ndF7j7X7n7wSPd1yGp1CMiY8Svf/1rXn31VVpaWli4cCHPPfcc7777LieccAKbNm1i2bJlrFmzhilTpkTel+G+3P123vQdA167tsR9KT2VekRkjHB3rr32Wtra2mhra2PTpk18//vfZ/r06axbt47zzjuP22+/na997WuR92W4Uo8NMV1sfuxRqUck0Q63HBOlRYsWccUVV7Bs2TJmzJhBZ2cn+/bto76+nrq6Or7yla8wd+5crr/+egAaGhrYs2dPJH0ZLvh9iOli82OPSj0iMkZ89rOf5ZZbbmHRokVks1mqq6u5++67SafTXHfddbg7Zsatt94KwJIlS/jqV78ayZe75j50fpvZfuAdgrP748Npwvl57j6xZD05hJaWFm9tPYILf177GTwWfhWx8C/gy/+rtB0TkTFn48aNnHLKKZXuRtkVO24zW+vuLQPXHe6Mf3z/62mQNhGRQQ4Z/O7++/x5M5sO/DHwgbuvjbJjJaFB2kREBhnucs7HzWxBOH0MsJ7gap6fmdkNZejf6OiqHhGRQYa7nHOuu68Pp5cAT7n7nwL/ifFwOadKPSIigwwX/Pn1kYuAJwDcfQ+QjapTJaNSj4jIIMN9ubvFzL4BtAPNwJMAZlYPjOxWL5WU0nX8IiIDDXfGfx1wKvA3wJXuvjNcfhZwX4T9Ko38Uo9uxCIiZVCKYZmXLFnCpk2bIuvjcFf1bAOuL7L8WeDZqDpVMvrlroiU2UiGZXZ33J1Uqvi59333RXtePdxVPY8d6hFpz0pBwS8iY8Q777zDggULuP7662lubmbr1q0sXbqUlpYWTj31VH74wx/2rXveeefR1tZGJpNh6tSpLF++nNNOO42zzz6bbdu2jbovw9X4zwa2AKuBlxkP4/Pk0z13RZLtBxGOdPmDXYe9yYYNG7jvvvu4++67AVixYgXTpk0jk8nw+c9/niuuuIL58+cXbLNr1y7OP/98VqxYwbe//W1WrVrF8uXLR9X14Wr8s4DvAguA24CLge3u/py7PzeqlstBZ/wiMoYcf/zxnHHGGX3zq1evprm5mebmZjZu3MiGDRsGbVNfX88ll1wCwOc+9zk2b9486n4cMvjdvdfdn3T3awi+0H0H+E14pc/Yp+AXkTFk4sT+4c3efvttbrvtNp555hnWrVvH4sWL6erqGrRN/uBs6XSaTGb0P0Yd9g5cZlYLfAG4GmgCbgceGXXL5aBSj0iyHUE5plx2795NQ0MDkydPZuvWraxZs4bFixeXpe1DBr+ZPUBQ5vl34O/yfsU7PmhYZhEZo5qbm5k/fz4LFixg3rx5nHvuucNvVCLDDcucBfaFs/krGuDuPjnCvvU54mGZD3wCtzYF07VT4OYPStovERl7NCxzvyMaltndh/vyd2xTqUdEZJCKBLuZTTWzh83sLTPbaGZnR9KQBmkTERlk2C93I3Ib8KS7X2FmNcCESFpJDxiW2R1sfP0UQUSk1Moe/GY2meBmLn8D4O7dQDSn42ZgafDeYD6bKfwwEJFYyt2/NikO9V1tMZUo9cwDOoD7zOx1M7vHzAbdu9fMlppZq5m1dnR0HHlrKveIJEpdXR2dnZ2HHYbjlbvT2dlJXV3diLepRKmnimCI52+4+8tmdhuwHPh+/kruvhJYCcFVPUfcWroaMgeCaf2ISyT25syZQ3t7O6M6YRxn6urqmDNnzojXr0TwtwPt7v5yOP8wQfBHI6WbsYgkSXV1NXPnzq10N8a0spd63P0jghu8nBQuuggYPEBFqWhMfhGRApW6qucbwIPhFT3vEdzPNxoar0dEpEBFgt/d24BBvyaLhIJfRKTA+P5l7kjo17siIgXiH/w64xcRKaDgFxFJmPgHv0o9IiIF4h/8+uWuiEiBBAS/fsAlIpIv/sGfGjBCp4hIwsU/+FXqEREpkIDgV6lHRCRf/INfpR4RkQLxD36VekRECiQg+FXqERHJl4Dgzz/jV/CLiMQ/+PXLXRGRAvEPfpV6REQKJCD4VeoREckX/+BXqUdEpED8g1+lHhGRAgkIfl3HLyKSL/7Br1/uiogUqFjwm1nazF43s8cjbajgDlw64xcRqeQZ/zJgY+St6NaLIiIFKhL8ZjYH+AJwT+SNqdQjIlKgUmf8PwVuArJDrWBmS82s1cxaOzo6jrwlfbkrIlKg7MFvZl8Etrn72kOt5+4r3b3F3VsaGxuPvEFdzikiUqASZ/znAl8ys83AQ8CFZvbPkbWmUo+ISIGyB7+73+zuc9y9CbgKeMbd/zKyBlXqEREpEP/r+FXqEREpUDX8KtFx998Av4m0EQ3SJiJSIP5n/BqkTUSkQPyDX6UeEZECCQh+lXpERPLFP/hV6hERKRD/4NcgbSIiBRIW/PoBl4hI/INfpR4RkQLxD379cldEpEACgj//ck6VekRE4h/8KvWIiBSIf/Cr1CMiUiD+wZ9K9097FrK9leuLiMgYEP/gNyss9+jXuyKScPEPfigs96jOLyIJl5Dg10BtIiI5CQl+DdQmIpKTjODXJZ0iIn2SEfwq9YiI9ElI8KvUIyKSk4zgV6lHRKRP2YPfzI4zs2fNbKOZvWlmyyJvtKDUo1/vikiyVQ2/SsllgO+4+2tm1gCsNbOn3H1DZC0WlHo0UJuIJFvZz/jdfau7vxZO7wE2ArMjbVSlHhGRPhWt8ZtZE3A68HKR15aaWauZtXZ0dIyuId1+UUSkT8WC38wmAT8HbnD33QNfd/eV7t7i7i2NjY2ja0y3XxQR6VOR4DezaoLQf9DdH4m8QZV6RET6VOKqHgPuBTa6+0/K0qhKPSIifSpxxn8u8FfAhWbWFj4ujbTFtIZlFhHJKfvlnO7+/wAra6Maj19EpE8yfrmr8fhFRPokJPg1SJuISE5Cgl+DtImI5CQj+HU5p4hIn2QEv0o9IiJ9EhL8KvWIiOQkI/hV6hER6ZOM4Nd4/CIifRIS/BqPX0QkJxnBr1KPiEifZAS/BmkTEemTwOBXqUdEki0Zwa9Sj4hIn2QEv0o9IiJ9Ehj8OuMXkWRLRvBrPH4RkT5lvxFLReRfx//hWnjqFvjUWdBwDEyYDhOmQVU9pJLxOSgiyZaM4K+f2j+9fzu88FN4och6VfVQXRd8UKRrghJRqgosHT4bpNLBvKXyHtb/jPXP56b7nhlmWW42b3nBMhvwerF1hlvOYa5zBPsacjej2PaIVh/Fjd4O+e8RYbuDdhXFzerKewO8aI7hSIyVfozAwH+zxSsKS9ajlIzgP7YZ5l8GG3556PUyB4KHiMhY8id/r+A/bKkU/Nd/gn2d8MGLsPkF2PYm7N8B+zuD596Dle6liEhZVCT4zWwxcBuQBu5x9xVlaXjidDjlT4PHQNlscLbf0xVc8tnbHXwR7L2QzQQPzwbreS+4B/MF01nAw3nvn8aDNg61rOh0jufN5y0vWIchlo9inSPa15A7GsW2I9jPqPd7BG2UvN1BOyvhvnK7jGCfh26wzO0NoezHXWKp0kZ12YPfzNLAXcDFQDvwqpk95u4byt2XAqkU1EwMHiIiMVaJy1jOBN5x9/fcvRt4CLisAv0QEUmkSgT/bGBL3nx7uKyAmS01s1Yza+3o6Chb50RE4q4SwV/smqpBBTh3X+nuLe7e0tjYWIZuiYgkQyWCvx04Lm9+DvCHCvRDRCSRKhH8rwInmtlcM6sBrgIeq0A/REQSqexX9bh7xsy+DqwhuJxzlbu/We5+iIgkVUWu43f3J4AnKtG2iEjSmY+DHzaYWQfw+yPcfAawvYTdGS+SeNxJPGZI5nEn8Zjh8I/70+4+6OqYcRH8o2Fmre7eUul+lFsSjzuJxwzJPO4kHjOU7rg1DrGISMIo+EVEEiYJwb+y0h2okCQedxKPGZJ53Ek8ZijRcce+xi8iIoWScMYvIiJ5FPwiIgkT6+A3s8VmtsnM3jGz5ZXuTxTM7Dgze9bMNprZm2a2LFw+zcyeMrO3w+ejKt3XUjOztJm9bmaPh/Nzzezl8Jj/bzgkSKyY2VQze9jM3grf87Pj/l6b2bfC/7fXm9lqM6uL43ttZqvMbJuZrc9bVvS9tcDtYbatM7Pmw2krtsGfd8OXS4D5wNVmNr+yvYpEBviOu58CnAX8bXicy4Gn3f1E4OlwPm6WARvz5m8F/iE85k+A6yrSq2jdBjzp7icDpxEcf2zfazObDXwTaHH3BQTDvFxFPN/r+4HFA5YN9d5eApwYPpYC/3g4DcU2+EnIDV/cfau7vxZO7yEIgtkEx/pAuNoDwJcr08NomNkc4AvAPeG8ARcCD4erxPGYJwN/DNwL4O7d7r6TmL/XBEPL1JtZFTAB2EoM32t3fx7YMWDxUO/tZcA/eeAlYKqZHTPStuIc/CO64UucmFkTcDrwMjDT3bdC8OEAHF25nkXip8BNQDacnw7sdPdMOB/H93se0AHcF5a47jGzicT4vXb3D4EfAx8QBP4uYC3xf69zhnpvR5VvcQ7+Ed3wJS7MbBLwc+AGd99d6f5Eycy+CGxz97X5i4usGrf3uwpoBv7R3U8H9hGjsk4xYU37MmAucCwwkaDMMVDc3uvhjOr/9zgHf2Ju+GJm1QSh/6C7PxIu/jj3p1/4vK1S/YvAucCXzGwzQQnvQoK/AKaG5QCI5/vdDrS7+8vh/MMEHwRxfq8XAe+7e4e79wCPAOcQ//c6Z6j3dlT5FufgT8QNX8La9r3ARnf/Sd5LjwHXhNPXAL8sd9+i4u43u/scd28ieF+fcfe/AJ4FrghXi9UxA7j7R8AWMzspXHQRsIEYv9cEJZ6zzGxC+P967phj/V7nGeq9fQz46/DqnrOAXbmS0Ii4e2wfwKXAfwDvAt+rdH8iOsbzCP7EWwe0hY9LCWreTwNvh8/TKt3XiI7/AuDxcHoe8ArwDvCvQG2l+xfB8S4EWsP3+xfAUXF/r4G/A94C1gM/A2rj+F4Dqwm+x+ghOKO/bqj3lqDUc1eYbW8QXPU04rY0ZIOISMLEudQjIiJFKPhFRBJGwS8ikjAKfhGRhFHwi4gkjIJfEsHMXgyfm8zsz0u87+8Wa0tkrNLlnJIoZnYBcKO7f/Ewtkm7e+8hXt/r7pNK0T+RctAZvySCme0NJ1cA/9nM2sJx3tNm9iMzezUc1/xr4foXhPc5+D8EP5DBzH5hZmvDseGXhstWEIwc2WZmD+a3Ff6q8kfhOPJvmNmVefv+Td64+g+Gv0rFzFaY2YawLz8u57+RJEfV8KuIxMpy8s74wwDf5e5nmFkt8IKZ/Spc90xggbu/H85f6+47zKweeNXMfu7uy83s6+6+sEhblxP80vY0YEa4zfPha6cDpxKMr/ICcK6ZbQD+DDjZ3d3Mppb86EXQGb/InxCMedJGMJz1dIKbWwC8khf6AN80s98BLxEMkHUih3YesNrde939Y+A54Iy8fbe7e5ZgmI0mYDfQBdxjZpcD+0d9dCJFKPgl6Qz4hrsvDB9z3T13xr+vb6Xgu4FFwNnufhrwOlA3gn0P5WDedC9Q5cH48mcSjLT6ZeDJwzoSkRFS8EvS7AEa8ubXAP8tHNoaM/tMeHOTgaYAn7j7fjM7meA2lzk9ue0HeB64MvweoZHg7lmvDNWx8J4KU9z9CeAGgjKRSMmpxi9Jsw7IhCWb+wnuYdsEvBZ+wdpB8dv4PQlcb2brgE0E5Z6clcA6M3vNg+Ghcx4FzgZ+RzCC6k3u/lH4wVFMA/BLM6sj+GvhW0d2iCKHpss5RUQSRqUeEZGEUfCLiCSMgl9EJGEU/CIiCaPgFxFJGAW/iEjCKPhFRBLm/wN4Lu97+vuppAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "als.fit(train, test)\n", "plot_learning_curve(als)" @@ -247,14 +573,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "id": "c377cda2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.336192041852941\n", + "0.3099205032508016\n" + ] + } + ], "source": [ "print(als.test_mse_record[-1])\n", "print(als.train_mse_record[-1])" ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "0b23e873", + "metadata": {}, + "outputs": [], + "source": [ + "user0prediction = als.predict()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "b09e636b", + "metadata": {}, + "outputs": [], + "source": [ + "user0actual = ratings[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "99fba63f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0005735931449630593" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_squared_error(user0prediction, user0actual)" + ] } ], "metadata": { diff --git a/recsys/als-stream-2.ipynb b/recsys/als-stream-2.ipynb new file mode 100644 index 0000000..79a9b9f --- /dev/null +++ b/recsys/als-stream-2.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "id": "8e0e6a4f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5b033873", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a00a310f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 47 5.0\n", + "2 1 101 5.0\n", + "3 1 157 5.0\n", + "4 1 223 3.0\n", + "... ... ... ...\n", + "33607 610 160527 4.5\n", + "33608 610 161582 4.0\n", + "33609 610 163937 3.5\n", + "33610 610 166528 4.0\n", + "33611 610 168250 5.0\n", + "\n", + "[33612 rows x 3 columns]\n", + " userId movieId rating\n", + "0 1 3 4.0\n", + "1 1 6 4.0\n", + "2 1 50 5.0\n", + "3 1 70 3.0\n", + "4 1 110 4.0\n", + "... ... ... ...\n", + "67219 610 164179 5.0\n", + "67220 610 166534 4.0\n", + "67221 610 168248 5.0\n", + "67222 610 168252 5.0\n", + "67223 610 170875 3.0\n", + "\n", + "[67224 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "\n", + "user_vector_matrix = dict()\n", + "movie_vector_matrix = dict()\n", + "\n", + "columns = ['userId', 'movieId', 'rating']\n", + "test_data = []\n", + "train_data = []\n", + "\n", + "for row in df.itertuples():\n", + " if row.Index % 3 == 0:\n", + " test_data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " train_data.append([row.userId, row.movieId, row.rating])\n", + "\n", + "test_df = pd.DataFrame(data=test_data, columns=columns)\n", + "train_df = pd.DataFrame(data=train_data, columns=columns)\n", + "max_train_movie = max(train_df['movieId'])\n", + "print(df)\n", + "print(test_df)\n", + "print(train_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "a6a8fb84", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSStreamingModel:\n", + " def __init__(self, l, num_features, alpha):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.alpha = alpha\n", + " self.user_features = dict()\n", + " self.movie_features = np.random.randint(100, size=(max_train_movie, num_features))\n", + " self.ratings = dict()\n", + " \n", + " def fit(self, train):\n", + " for row in train.itertuples():\n", + " self.update_user_vector(row)\n", + " return self \n", + "\n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \n", + " ratings: 1xnum_movies\n", + " solve_vecs: 1xnum_features\n", + " fixed_vecs: 1xnum_features\n", + " RF * (F^-1F + lI)^-1\n", + " num_features x num_features\n", + " \n", + " num_users x num_movies * num_movies x num_features\n", + " num_users x num_features \n", + " \n", + " (610, 193609) (610, 200) (193609, 200)\n", + " ratings user movies\n", + " (1, 40) (1, 40) (1, 193609)\n", + " b has to be 1x40\n", + " ratings is 1xY fixedVecs is Yx40\n", + " user movies ratings\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " #print(A.shape)\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def update_user_vector(self, row):\n", + " rating = row.rating\n", + " userId = row.userId\n", + " movieId = row.movieId\n", + "\n", + " if userId in self.user_features:\n", + " user_vector = self.user_features[userId]\n", + " rating_vector = self.ratings[userId]\n", + " else:\n", + " user_vector = np.random.randint(100, size=(1, self.num_features))\n", + " rating_vector = np.zeros((1, max_train_movie))\n", + "\n", + " movie_vector = self.movie_features\n", + " rating_vector[0, movieId-1] = rating\n", + " self.ratings[userId] = rating_vector\n", + " #print(user_vector.shape, movie_vector.shape, rating_vector.shape)\n", + " new_user_vector = self._als_step(rating_vector, user_vector, movie_vector)\n", + " self.user_features[userId] = new_user_vector\n", + " \n", + " def predict_set(self, data):\n", + " \n", + " correct_results = []\n", + " predicted_results = []\n", + " for row in data.itertuples():\n", + " prediction = self.predict_rating(row.userId, row.movieId)\n", + " predicted_results.append(prediction)\n", + " correct_results.append(row.rating)\n", + " \n", + " return self.compute_mse(correct_results, predicted_results)\n", + " \n", + " def predict_rating(self, userId, movieId):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " if userId not in self.user_features or movieId not in self.movie_features:\n", + " return 0\n", + " user_vector = self.user_features[userId]\n", + " movie_vector = self.movie_features[movieId]\n", + " prediction = user_vector.dot(movie_vector.T)\n", + " if np.isnan(prediction) or prediction > 5:\n", + " return 5\n", + " if prediction < 0:\n", + " return 0\n", + " return prediction\n", + "\n", + " def compute_mse(self, y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mse = mean_squared_error(np.asarray(y_true), np.asarray(y_pred))\n", + " return mse" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "285ebde1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 40) (193609, 40) (1, 193609)\n", + "(40, 40)\n", + "(1, 40) (193609, 40) (1, 193609)\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mALSStreamingModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m.01\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m40\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, train)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitertuples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate_user_vector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mupdate_user_vector\u001b[0;34m(self, row)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0muserId\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrating_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muser_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmovie_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrating_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0mnew_user_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_als_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrating_vector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_vector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmovie_vector\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_features\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0muserId\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_user_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36m_als_step\u001b[0;34m(self, ratings, solve_vecs, fixed_vecs)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0muser\u001b[0m \u001b[0mmovies\u001b[0m \u001b[0mratings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \"\"\"\n\u001b[0;32m---> 36\u001b[0;31m \u001b[0mA\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfixed_vecs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixed_vecs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meye\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_features\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mratings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixed_vecs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "als = ALSStreamingModel(.01, 40, .1)\n", + "als.fit(train_df)\n", + "print(als.predict_set(test_df))\n", + "print(als.predict_set(train_df))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recsys/als-streaming-test.ipynb b/recsys/als-streaming-test.ipynb index b10a3e7..627783c 100644 --- a/recsys/als-streaming-test.ipynb +++ b/recsys/als-streaming-test.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 106, + "execution_count": 121, "id": "8e0e6a4f", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 122, "id": "5b033873", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 123, "id": "a00a310f", "metadata": {}, "outputs": [ @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 124, "id": "a6a8fb84", "metadata": {}, "outputs": [], @@ -121,8 +121,8 @@ " def fit(self, train):\n", " for row in train.itertuples():\n", " self.update_user_vector(row)\n", - " \n", - " return self \n", + " return self\n", + " \n", "\n", " def update_user_vector(self, row):\n", " rating = row.rating\n", @@ -132,7 +132,7 @@ " if userId in self.user_features:\n", " user_vector = self.user_features[userId]\n", " else:\n", - " user_vector = np.random.randint(5, size=self.num_features)\n", + " user_vector = np.random.randint(100, size=self.num_features)\n", "\n", " if movieId in self.movie_features:\n", " movie_vector = self.movie_features[movieId]\n", @@ -177,16 +177,16 @@ }, { "cell_type": "code", - "execution_count": 110, - "id": "fcefa486", + "execution_count": 125, + "id": "285ebde1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: overflow encountered in multiply\n", "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in add\n", + "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: overflow encountered in multiply\n", "/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:32: RuntimeWarning: invalid value encountered in multiply\n" ] }, @@ -194,13 +194,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "5.058386885636082\n", - "4.707202784719743\n" + "4.833913483279781\n", + "4.487340830655718\n" ] } ], "source": [ - "als = ALSStreamingModel(.01, 10, .1)\n", + "als = ALSStreamingModel(.01, 40, .1)\n", "als.fit(train_df)\n", "print(als.predict_set(test_df))\n", "print(als.predict_set(train_df))" diff --git a/recsys/als.ipynb b/recsys/als.ipynb index 7ad77b3..f1f753a 100644 --- a/recsys/als.ipynb +++ b/recsys/als.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 141, - "id": "a8462b38", + "execution_count": 1, + "id": "7c78c2e5", "metadata": {}, "outputs": [], "source": [ @@ -16,8 +16,8 @@ }, { "cell_type": "code", - "execution_count": 142, - "id": "d86767af", + "execution_count": 2, + "id": "3ea8dfe7", "metadata": {}, "outputs": [], "source": [ @@ -26,8 +26,8 @@ }, { "cell_type": "code", - "execution_count": 143, - "id": "539bf12f", + "execution_count": 3, + "id": "e5487124", "metadata": {}, "outputs": [ { @@ -79,8 +79,8 @@ }, { "cell_type": "code", - "execution_count": 144, - "id": "da9af87d", + "execution_count": 4, + "id": "7808d9b0", "metadata": {}, "outputs": [ { @@ -91,11 +91,11 @@ " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", " ...,\n", " [2.5, 2. , 2. , ..., 0. , 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [3. , 0. , 0. , ..., 0. , 0. , 0. ],\n", " [5. , 0. , 0. , ..., 0. , 0. , 0. ]])" ] }, - "execution_count": 144, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -112,7 +112,7 @@ " train = ratings.copy()\n", " for user in range(ratings.shape[0]):\n", " test_index = np.random.choice(\n", - " np.flatnonzero(ratings[user]), size = 10, replace = False)\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", "\n", " train[user, test_index] = 0.0\n", " test[user, test_index] = ratings[user, test_index]\n", @@ -128,8 +128,8 @@ }, { "cell_type": "code", - "execution_count": 145, - "id": "67ff43ae", + "execution_count": 22, + "id": "fdd592a0", "metadata": {}, "outputs": [], "source": [ @@ -169,6 +169,7 @@ " when updating the user matrix,\n", " the item matrix is the fixed vector and vice versa\n", " \"\"\"\n", + " print(ratings.shape, solve_vecs.shape, fixed_vecs.shape)\n", " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", " b = ratings.dot(fixed_vecs)\n", " A_inv = np.linalg.inv(A)\n", @@ -191,8 +192,8 @@ }, { "cell_type": "code", - "execution_count": 146, - "id": "1bc4e9f0", + "execution_count": 23, + "id": "c12565f4", "metadata": {}, "outputs": [], "source": [ @@ -208,23 +209,35 @@ }, { "cell_type": "code", - "execution_count": 147, - "id": "79a51d82", + "execution_count": 24, + "id": "586142de", "metadata": {}, "outputs": [], "source": [ - "als = ALSModel(n_iters = 100, num_features = 40, l = 0.01)" + "als = ALSModel(n_iters = 1, num_features = 200, l = 0.01)" ] }, { "cell_type": "code", - "execution_count": 148, - "id": "5fb986b1", + "execution_count": 25, + "id": "de62d106", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(610, 193609) (610, 200) (193609, 200)\n", + "(200, 200)\n", + "(610, 200)\n", + "(193609, 610) (193609, 200) (610, 200)\n", + "(200, 200)\n", + "(193609, 200)\n" + ] + }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAd6UlEQVR4nO3dfZRcdZ3n8fe3qvopnYQmDySaMCZgRCAOoWlcnlZgiDzJquvBYWRxmYAnw54dBVnGyejOgjo7G44edxE4MowGcQ4bZhdEGZUnUYY9iiQB2xASIg8CaQ2kachTJ/1QVd/9497qvlVd/ZCkqm/n3s/rnDp1697fvb/fpcL3/vp7f/W75u6IiEh6ZOJugIiITC4FfhGRlFHgFxFJGQV+EZGUUeAXEUmZXNwNmIg5c+b4okWL4m6GiMhh5ZlnnnnL3edWrj8sAv+iRYvYsGFD3M0QETmsmNlr1dYr1SMikjIK/CIiKaPALyKSModFjl9EZKIGBwfp6uqir68v7qZMmubmZhYuXEhDQ8OEyivwi0iidHV1MWPGDBYtWoSZxd2cunN3enp66OrqYvHixRPaR6keEUmUvr4+Zs+enYqgD2BmzJ49+4D+wklsj//t3gF+99Ze9vYXmN3ayNIFR8TdJBGZJGkJ+iUHer6JDfyPb3mTv7pvIwCfOHkB37hsWcwtEhGZGhKb6pneNHxN29ufj7ElIpImPT09LFu2jGXLljF//nwWLFgw9HlgYGDCx1mzZg1vvPFGXdqY2B7/tEjg7x1Q4BeRyTF79mw6OzsBuOmmm5g+fTo33HDDAR9nzZo1tLe3M3/+/Fo3MbmBf3pTdmh5b38hxpaIiATuvvtubr/9dgYGBjjjjDO47bbbKBaLrFixgs7OTtydlStXMm/ePDo7O7nssstoaWlh3bp1NDY21qwdiQ38rZEe/z6lekRSadGqH9ft2K+u/sgBld+0aRMPPPAAv/zlL8nlcqxcuZJ7772XY489lrfeeovnnnsOgJ07d9LW1satt97KbbfdxrJltb8/mdzA3xhJ9Sjwi0jMfvrTn7J+/Xo6OjoA2L9/P0cffTQXXHABW7du5dprr+Xiiy/m/PPPr3tbkhv4dXNXRKYQd+eqq67iq1/96ohtGzdu5KGHHuKb3/wm999/P3feeWdd25LgwD+c4983UMDdUze2VyTtDjQdU0/Lly/n0ksv5dprr2XOnDn09PTQ29tLS0sLzc3NfPKTn2Tx4sVcc801AMyYMYM9e/bUpS2JDfxNuSwNWWOw4OSLTn++SHNDdvwdRUTq4AMf+AA33ngjy5cvp1gs0tDQwB133EE2m+Xqq68e6pzefPPNAKxYsYLPfOYzdbm5a+5es4PVS0dHhx/Mg1hO+vKj7No/CMAz/3U5s6c31bppIjLFbNmyheOPPz7uZky6audtZs+4e0dl2cT+gAvKf8TVqyGdIiJAwgN/NM+vH3GJiAQSHvg1pFNEpFKyA3+jhnSKiFRKduCvGNIpIiKJD/zq8YuIVEp04J+uHL+ITLJaTMu8YsUKtm7dWrc2JvYHXADTNF+PiEyyiUzL7O64O5lM9b73XXfdVdc2JrzHHx3OqRy/iMTnpZdeYunSpVxzzTW0t7ezfft2Vq5cSUdHByeeeCJf+cpXhsqeddZZdHZ2ks/naWtrY9WqVZx00kmcfvrp7Nix45Dbkugev4ZziqTcTXV81vZNuw54l82bN3PXXXdxxx13ALB69WpmzZpFPp/n3HPP5dJLL+WEE04o22fXrl2cffbZrF69muuvv541a9awatWqQ2p6onv8urkrIlPJsccey6mnnjr0ee3atbS3t9Pe3s6WLVvYvHnziH1aWlq46KKLADjllFN49dVXD7kdye7xN0YfxqJUj4jEq7W1dWj5xRdf5JZbbmHdunW0tbVxxRVX0NfXN2Kf6ORs2WyWfP7QO7HJDvyaskEk3Q4iHTNZdu/ezYwZM5g5cybbt2/nkUce4cILL5yUuhMd+Kcr1SMiU1R7ezsnnHACS5cu5ZhjjuHMM8+ctLoTPS3zlu27ueiW/wfA++ZN59HPn13rponIFKNpmYelclrm8ufuKscvIgJJD/zK8YuIjJDwwK9x/CJpdDiksGvpQM830YG/KZchlwkesD5YcPrzSveIJF1zczM9PT2pCf7uTk9PD83NzRPeJ9GjesyMaY1ZdvcFvf19/QWacnrgukiSLVy4kK6uLrq7u+NuyqRpbm5m4cKFEy6f6MAPwZDOUuDf25/nyNbaPaleRKaehoYGFi9eHHczprS6pXrMbI2Z7TCzTZF1XzOzF8xso5k9YGZt9aq/pCzPrxu8IiJ1zfF/F6j8GdpjwFJ3/2Pgt8Df1LF+oPIGr3L8IiJ1C/zu/iTwdsW6R9291O3+FTDxpNRBKhvSqZE9IiKxjuq5CnhotI1mttLMNpjZhkO5SdOqh7GIiJSJJfCb2ZeAPHDPaGXc/U5373D3jrlz5x50XZqvR0Sk3KSP6jGzK4FLgPN8EgbaRnP8+/QULhGRyQ38ZnYh8NfA2e6+bzLqnBbJ8avHLyJS3+Gca4GngOPMrMvMrgZuA2YAj5lZp5ndUa/6S6Yrxy8iUqZuPX53/1SV1d+pV32jUapHRKRcoufqAd3cFRGplPjAP03j+EVEyiQ+8Leqxy8iUibxgX+6cvwiImUSH/inNSrVIyISlfjAr5u7IiLlEh/4NZxTRKRcch/E8s5r8PqvOGKwn9Mzb/BU8UT1+EVESHLg71oPD6ykAbg8ezpPFU9kIF9ksFCkIZv4P3REREaV3AiYbRhabMoWh5Z1g1dE0i65gT8zHPhbMsO5/V7l+UUk5ZIb+LPDD1VvtkjgV49fRFIuwYE/kuqJ9Ph1g1dE0i7BgX+4x98Y6fHv0wPXRSTlEhz4h3v80cCvHr+IpF06Aj/DwV45fhFJuwQH/uFUT0P05u6AAr+IpFtyA39kOGeDR3v8yvGLSLolN/BHUj05pXpERIYkOPAPp3qykcCvm7siknYJDvzDPf6sq8cvIlKSjsBfHBxa1tTMIpJ2CQ78w6mejA8H/u279uPucbRIRGRKSG7gj4zqyRSH0zvPvr6TH3T+Po4WiYhMCQkO/FnAADAvcPmpC4Y2/bcfPs8fdu6PqWEiIvFKbuA3K0v3fOnC9/JHs6YBsKcvz1/d9xuKRaV8RCR9khv4oSzwt+acb/zpSWSCPwL4xUs9fPlfntfwThFJnYQH/siTJQuDdCyaxV+cfezQqrufeo1zv/4E/2fDNvKFYpUDiIgkT8ID/3CPn8IAANctX8JZ750ztLp7Tz9fuG8jH/z7x1l1/0ae2LqDfZrPR0QSLLkPW4eKwB8M6WzKZfneVR/kB52/5+aHX+DN3f0AvN07wL3rt3Hv+m1kM8Zx82aw7I/aeP/8GRw7dzrHzG1l3oxmMqVckYjIYSrZgT8TTfUMDK/OGJ9oX8gFJ87nH558hXvXvc6OPf3DRYvO5u272bx9d9nhGrMZ3t3WzLvbWpg/s5m5M5uYO72JuTOamNXayKzWRo6c1kjbtAZaGrKY6SIhIlNPsgN/lR5/VGtTjus//D6uO28Jv972Dj957g3+9bfdvNy9l2q/8RooFHm1Zx+v9uwbt+qGrDGzuYEZzTlmtjQwvSnHtMYc05uyTGvKMa0hy7TGLC2NOVoaMrQ0ZmluyNKUy9LUkKE5l6Uxl6EpfDVkMzSW3rMZGnJGQzZDLmO6wIjIAUlR4B8YtVgmY5zynlmc8p5Z/C2wu2+Qjdt28dzvd/Fy915e6d7LK2/1snPfyIvHaAYLTk/vAD29o9dbK7mMkcsaDZkMuayRy2ZoyATvpW3ZTLCczdjQe3aUz9lMhqwF/12yFqwrWzYjmynfbhYsZ0r7ZcJlK5Uf3pax8m2ZTGTZqpezKu9GtCxYuL8RKVOxX5Cpi5YPjlNZtnRsIssWHttKy0Z5XeFxKJVhuEzlviJxSnjgj5xeceJBe2ZzA2ctmcNZS+aUrd/bn2f7zv107dxP9+5+duzpY8eefnp6B3h77wBv9w6wc/8AO/cN0p+fvFFC+aKTLzp9aGTS4aTqRSG40pR9rixX2g7lF6DocYhehIbWVVycIscYak+pDZH2ETl2acdq66PXs+gxGKXsaOXLLoultlWtI9L2il2GaqooX3HYsv8eo51D5fry5o3dvspyo7Wjcktlfbdf3k5jrnZjcRIe+MdO9Ryo6U05lsybwZJ5M8Yt2zdYYHffIHv68uzpy7O3L8/e/jy9/Xn2DeTZN1AIX3n6Bov0DRbYP1igP1+kPx98HgiXB/IFBgvOQL7IQKHI4NDLKehHaIctd/DSwvDamFojU1mxxvOL1S3wm9ka4BJgh7svDdfNAv4ZWAS8Cvypu79TrzZMNNVTD80NQc7+qPGvEYekGPb2BwtF8gUnXywOfS4UfejikC8G2wvu4foixSLki0G5QtEpenCsQuRVdKdQhII7xbJ1HlkX/MMsrS86eKSMO0P7FT1o89Cye1A2XO84xbA+Lx3Hy48ZBMzhdR4ep+BA2XGDMFo6rntwXErrI/uW/r8qnUdpX48eJ9Km6PZi2J7hQF7+2d3D9/r+WxCZqHr2+L8L3AZ8L7JuFfC4u682s1Xh57+uWwsy5T/gSqJMxmjMWE3/DJT6qrxwlC4KpYsFFZ9HXDxKZUbZ7qUrG5RdcMqOH+5DdHtp36HlSHshUrZamcj5RbdXqXvM8hX/nYZrq7xwlp9L9LiV+41WHyPqq1JmxPrR28dE9h/nWJXrSxqytf3/u26B392fNLNFFas/BpwTLt8NPEE9A3+NUz0itVDKy4ef4myKpNRkdxPnuft2gPD9qNEKmtlKM9tgZhu6u7sPrrbIw1gmO9UjIjJVTdn8gLvf6e4d7t4xd+7cgztItMd/AKN6RESSbLID/5tm9i6A8H1HXWsr6/Er8IuIwOQH/geBK8PlK4Ef1rU2pXpEREaoW+A3s7XAU8BxZtZlZlcDq4EPm9mLwIfDz/UT43BOEZGpqp6jej41yqbz6lXnCJloj19TLYuIwBS+uVsTSvWIiIyQ8MCvVI+ISKWEB/5Ij7+oVI+ICKQp8KvHLyICJD7wK9UjIlIp2YFfo3pEREZIduBXqkdEZISEB36lekREKo0Z+M3sisjymRXb/rJejaoZTcssIjLCeD3+6yPLt1Zsu6rGbam9suGcCvwiIjB+4LdRlqt9nnqU4xcRGWG8wD/WU6Cn/hNEleoRERlhvEna3m9mGwl698eGy4Sfj6lry2oho/n4RUQqjRf4j5+UVtSLUj0iIiOMGfjd/bXoZzObDXwIeN3dn6lnw2pCqR4RkRHGG875IzNbGi6/C9hEMJrnn8zsuklo36HRqB4RkRHGu7m72N03hcsrgMfc/d8B/4bDbTinUj0iIsD4gT/aTT4P+AmAu+8BivVqVM0o1SMiMsJ4N3e3mdlngS6gHXgYwMxagIaxdpwSMurxi4hUGq/HfzVwIvDnwGXuvjNcfxpwVx3bVRtZzc4pIlJpvFE9O4Brqqz/OfDzejWqZjRJm4jICGMGfjN7cKzt7v7R2janxhT4RURGGC/HfzqwDVgLPM3hMD9PVDZyenrmrogIMH7gnw98GPgUcDnwY2Ctuz9f74bVhHr8IiIjjHlz190L7v6wu19JcEP3JeCJcKTP1KfALyIywng9fsysCfgIQa9/EfBN4Pv1bVaNZLIE2SkHL0KxEK4TEUmv8W7u3g0sBR4Cvhz5Fe/hI9sIhf5guTCowC8iqTdej//TQC/wPuBzZkP3dg1wd59Zx7bVRlngH4CG5njbIyISs/HG8R/+D2OPjuzRtA0iIuP+cvfwF73Bqxk6RURSFvg1skdEJAWBP6NUj4hIVPIDv6ZmFhEpk7LAr1SPiEgsgd/MPm9mz5vZJjNba2b1G2NZNjWzevwiIpMe+M1sAfA5oMPdlwJZ4M/qVqGeuysiUiauVE8OaDGzHDAN+EPdalKqR0SkzKQHfnf/PfB14HVgO7DL3R+tLGdmK81sg5lt6O7uPvgK9cB1EZEycaR6jgQ+BiwG3g20mtkVleXc/U5373D3jrlz5x58hRnl+EVEouJI9SwHfufu3e4+SDDT5xl1q003d0VEysQR+F8HTjOzaRbM+nYesKVutSnHLyJSJo4c/9PAfcCzwHNhG+6sW4Xq8YuIlBn3QSz14O43AjdOSmUazikiUka/3BURSZnkB36N6hERKZP8wK8cv4hImRQEfqV6RESiUhb41eMXEUlB4I8+iEU9fhGRFAR+PXNXRCQqXYFfqR4RkRQE/oxSPSIiUckP/Orxi4iUUeAXEUmZFAR+PYhFRCQqXYFfo3pERNIQ+JXqERGJSn7gzyjVIyISlfzAr0naRETKpCDwK9UjIhKVssCvVI+ISAoCf/SXu+rxi4ikIPBrkjYRkah0BX6lekREUhD4M0r1iIhEJT/wa1SPiEiZlAV+pXpERFIQ+JXqERGJSkHgV49fRCQqXYFfwzlFRFIQ+DNZwIJlL0KxEGtzRETilvzAD0r3iIhEpDDwK90jIumWksCvqZlFREpSGPiV6hGRdEtJ4NfIHhGRkpQEfqV6RERKYgn8ZtZmZveZ2QtmtsXMTq9rhXrurojIkNz4ReriFuBhd7/UzBqBaXWtTcM5RUSGTHrgN7OZwIeAPwdw9wGgvtG4LNWTr2tVIiJTXRypnmOAbuAuM/u1mX3bzForC5nZSjPbYGYburu7D61GjeoRERkSR+DPAe3At9z9ZKAXWFVZyN3vdPcOd++YO3fuodWoVI+IyJA4An8X0OXuT4ef7yO4ENRPtMev4ZwiknKTHvjd/Q1gm5kdF646D9hc10ozGs4pIlIS16iezwL3hCN6XgFW1LU25fhFRIbEEvjdvRPomLQKNUmbiMiQlPxyV4FfRKQkJYE/+txdpXpEJN1SEvg1nFNEpCR9gb+oX+6KSLqlI/BnlOoRESlJR+BXqkdEZEgKA79SPSKSbikJ/Er1iIiUpCTwK9UjIlKSvsCvUT0iknLpCPwa1SMiMiQdgV+pHhGRISkM/JqrR0TSLSWBX/Pxi4iUpDDwK9UjIumWksCvVI+ISElKAr96/CIiJekI/C2zhpdf/xXs/kN8bRERiVk6Av+7lsH8Pw6W8/vhZ/893vaIiMQoHYE/k4Hz/274c+c98Mam+NojIhKjdAR+gGPOhiXnhx8cHvvbWJsjIhKX9AR+gA9/BSw85Zd/Blsfirc9IiIxSFfgP+p4OPnTw5/vvRx+fAPsfye+NomITLJ0BX6Ac78I02YHy16E9f8It3bAYzfCa0/pQS0iknjm7nG3YVwdHR2+YcOG2h2w52X4yQ1BuqdScxssaId5S2H+B+DIxdB2NLQeFdwkFhE5TJjZM+7eMWJ9KgM/gDts+Rd45Iuwa9v45bNNMH0etM6B6UcFvw1oORJa2qD5CGiaEbwaW6GhNXhvnAa5FmgIX9lGMKvteYiIjGK0wJ+rVjgVzOCEjwYjfV75eXCj97ePwN43qpcv9MOu14PXwVcKuWbINYXvjcEFJdcUXBRyTcGvjLONwSuTG/5cWs40hO+5ile24nMmeLfs8HbLhO/ZivfM8LtVlDOLbKv2suF9hj6Hy1hknZXvV7atSlkRqZv0Bv6ShmY47qLg5R6kgd7cFLx2bIGdrwevvp01qMyDH5Dl99fgWElXugBELggjlqu8j7VtzPfKOiveYZRtVFmOnMOB7De0W5U6h5Yr96N8udp+Y+07bjsOpAyjrJ9IO8Y5nwkft8o+B7vvqP2Psc7/UOoe4zgX3Vw+9cwhUuCPMoM57w1eJ368fFv/Hti7A3rfgt4dwUig/TuD9/494Ws3DPQGr8F94Wv/8KuoCeImzoMLMYAX4m2KSNwu+HsF/liUcvizjz34YxQLkO+Dwb4gdZQPX4X+YNbQfH9wcShbzgcTy5XWF/PDr0JkuTgYHL9YGF72QmRdfvizF8u3e2ldMXiPlittcw/W4ZH9i5GXDy/jVdb7KOsryotI3SnwT6ZMNrzp2xp3S6a2qheLastV3sfaNuF3Rn6uVjZarupy5X6MUzZavsryWPVVrZOKfRm9fLV2lJWbSJkJrh+13RVGrD/Q+g5l34m0ycfYdoB1j3ecTG1DtQK/TD2lG8YiUhcamC4ikjIK/CIiKRNb4DezrJn92sx+FFcbRETSKM4e/7XAlhjrFxFJpVgCv5ktBD4CfDuO+kVE0iyuHv//Ar4AaOC2iMgkm/TAb2aXADvc/Zlxyq00sw1mtqG7u3uSWiciknyTPjunmf0P4NNAHmgGZgLfd/crxtinG3jtIKucA7x1kPseztJ43mk8Z0jneafxnOHAz/s97j63cmWs0zKb2TnADe5+SR3r2FBtWtKkS+N5p/GcIZ3nncZzhtqdt8bxi4ikTKxTNrj7E8ATcbZBRCRt0tDjvzPuBsQkjeedxnOGdJ53Gs8ZanTeh8WjF0VEpHbS0OMXEZEIBX4RkZRJdOA3swvNbKuZvWRmq+JuTz2Y2dFm9nMz22Jmz5vZteH6WWb2mJm9GL4fGXdba61yoj8zW2xmT4fn/M9m1hh3G2vNzNrM7D4zeyH8zk9P+ndtZp8P/21vMrO1ZtacxO/azNaY2Q4z2xRZV/W7tcA3w9i20czaD6SuxAZ+M8sCtwMXAScAnzKzE+JtVV3kgf/i7scDpwH/OTzPVcDj7r4EeDz8nDSVE/3dDPzP8JzfAa6OpVX1dQvwsLu/HziJ4PwT+12b2QLgc0CHuy8FssCfkczv+rvAhRXrRvtuLwKWhK+VwLcOpKLEBn7gg8BL7v6Kuw8A9wIfi7lNNefu29392XB5D0EgWEBwrneHxe4GPl79CIenyon+zMyAPwHuC4sk8ZxnAh8CvgPg7gPuvpOEf9cEw85bzCwHTAO2k8Dv2t2fBN6uWD3ad/sx4Hse+BXQZmbvmmhdSQ78C4Btkc9d4brEMrNFwMnA08A8d98OwcUBOCq+ltVF5UR/s4Gd7p4PPyfx+z4G6AbuClNc3zazVhL8Xbv774GvA68TBPxdwDMk/7suGe27PaT4luTAb1XWJXbsqplNB+4HrnP33XG3p55GmegvDd93DmgHvuXuJwO9JCitU02Y0/4YsBh4N9BKkOaolLTvejyH9O89yYG/Czg68nkh8IeY2lJXZtZAEPTvcffvh6vfLP3pF77viKt9dXAm8FEze5UghfcnBH8BtIXpAEjm990FdLn70+Hn+wguBEn+rpcDv3P3bncfBL4PnEHyv+uS0b7bQ4pvSQ7864El4d3/RoIbQg/G3KaaC3Pb3wG2uPs3IpseBK4Ml68EfjjZbasXd/8bd1/o7osIvtefuft/AH4OXBoWS9Q5A7j7G8A2MzsuXHUesJkEf9cEKZ7TzGxa+G+9dM6J/q4jRvtuHwT+Yzi65zRgVyklNCHuntgXcDHwW+Bl4Etxt6dO53gWwZ94G4HO8HUxQc77ceDF8H1W3G2t0/mfA/woXD4GWAe8BPxfoCnu9tXhfJcBG8Lv+wfAkUn/roEvAy8Am4B/ApqS+F0DawnuYwwS9OivHu27JUj13B7GtucIRj1NuC5N2SAikjJJTvWIiEgVCvwiIimjwC8ikjIK/CIiKaPALyKSMgr8kgpm9svwfZGZXV7jY3+xWl0iU5WGc0qqmNk5wA3ufskB7JN198IY2/e6+/RatE9kMqjHL6lgZnvDxdXAvzWzznCe96yZfc3M1ofzmv9FWP6c8DkH/5vgBzKY2Q/M7JlwbviV4brVBDNHdprZPdG6wl9Vfi2cR/45M7sscuwnIvPq3xP+KhUzW21mm8O2fH0y/xtJeuTGLyKSKKuI9PjDAL7L3U81sybgF2b2aFj2g8BSd/9d+Pkqd3/bzFqA9WZ2v7uvMrO/dPdlVer6BMEvbU8C5oT7PBluOxk4kWB+lV8AZ5rZZuDfA+93dzeztpqfvQjq8YucTzDnSSfBdNazCR5uAbAuEvQBPmdmvwF+RTBB1hLGdhaw1t0L7v4m8K/AqZFjd7l7kWCajUXAbqAP+LaZfQLYd8hnJ1KFAr+knQGfdfdl4Wuxu5d6/L1DhYJ7A8uB0939JODXQPMEjj2a/shyAch5ML/8BwlmWv048PABnYnIBCnwS9rsAWZEPj8C/KdwamvM7H3hw00qHQG84+77zOz9BI+5LBks7V/hSeCy8D7CXIKnZ60brWHhMxWOcPefANcRpIlEak45fkmbjUA+TNl8l+AZtouAZ8MbrN1Uf4zfw8A1ZrYR2EqQ7im5E9hoZs96MD10yQPA6cBvCGZQ/YK7vxFeOKqZAfzQzJoJ/lr4/MGdosjYNJxTRCRllOoREUkZBX4RkZRR4BcRSRkFfhGRlFHgFxFJGQV+EZGUUeAXEUmZ/w+P7CqHhHCnXgAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEGCAYAAAB/+QKOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAWoElEQVR4nO3df5BdZZ3n8ffHBAhiJPwIoEQmARkF4hBji4M4CitgYPzJYCGuOyzgZpnVGdSy1rhuLY7OH0F3a0fFKjbFBKgtDbrDMlrLLyOlMjuKEDRAADHAYNkbNDEoCgwDke/+0QenaZ5O50ffezvp96vq1j3nOc859/vQVXxyznPuuakqJEka6wWDLkCSNDUZEJKkJgNCktRkQEiSmgwISVLTzEEXMJkOPPDAmj9//qDLkKRdxu233/6Lqprb2rZbBcT8+fNZs2bNoMuQpF1Gkp+Mt81LTJKkJgNCktRkQEiSmnarOQhJ2lZPP/00w8PDPPnkk4MupS9mzZrFvHnz2GOPPbZ5HwNC0rQ0PDzM7NmzmT9/PkkGXU5PVRWbN29meHiYBQsWbPN+XmKSNC09+eSTHHDAAbt9OAAk4YADDtjus6WeBUSSlUk2Jlk3qu2zSX6U5M4k1ySZM86+DyW5K8naJN63KqknpkM4PGtHxtrLM4grgCVj2lYDC6vqD4AfAx/fyv4nVdWiqhrqUX2SpK3oWUBU1c3AI2PavlFVW7rVW4B5vfp8SZrKNm/ezKJFi1i0aBGHHHIIhx566O/Wn3rqqW0+zsqVK/nZz37WkxoHOUl9HvCVcbYV8I0kBfyPqlox3kGSLAWWAhx22GGTXqQk9cIBBxzA2rVrAfjkJz/Ji170Ij760Y9u93FWrlzJ4sWLOeSQQya7xMEERJJPAFuAL43T5YSq2pDkIGB1kh91ZyTP04XHCoChoSF/Hk/SLu/KK6/ki1/8Ik899RSvf/3rueSSS3jmmWc499xzWbt2LVXF0qVLOfjgg1m7di1nnXUWe++9N7feeit77rnnpNXR94BIcg7wVuDNNc7vnVbVhu59Y5JrgOOAZkBI0s6av+zanh37oeV/vF39161bxzXXXMN3v/tdZs6cydKlS7nqqqs44ogj+MUvfsFdd90FwK9+9SvmzJnDF77wBS655BIWLVo06bX39TbXJEuAjwFvr6onxumzT5LZzy4DpwLrWn0laXfzzW9+k9tuu42hoSEWLVrEd77zHR544AFe/vKXc99993HhhRdy4403su+++/a8lp6dQSRZBZwIHJhkGLiIkbuW9mLkshHALVV1QZKXApdV1enAwcA13faZwJer6oZe1SlJU0lVcd555/HpT3/6edvuvPNOrr/+ej7/+c9z9dVXs2LFuNOzk6JnAVFVZzea/2acvhuA07vlB4Fje1WXJI21vZeBeunkk0/mzDPP5MILL+TAAw9k8+bNPP744+y9997MmjWLd7/73SxYsIALLrgAgNmzZ/Ob3/ymJ7X4qA1JmkJe9apXcdFFF3HyySfzzDPPsMcee3DppZcyY8YMzj//fKqKJFx88cUAnHvuubz//e/vySR1xpkn3iUNDQ2VPxgkaVvce++9HHXUUYMuo69aY05y+3hfSPZZTJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkDcBkPO773HPP5b777utZjX5RTpIGYFse911VVBUveEH73/KXX355T2v0DEKSppD777+fhQsXcsEFF7B48WIefvhhli5dytDQEMcccwyf+tSnftf3DW94A2vXrmXLli3MmTOHZcuWceyxx3L88cezcePGna7FMwhJ+mQPn4z6yUe3e5d77rmHyy+/nEsvvRSA5cuXs//++7NlyxZOOukkzjzzTI4++ujn7PPoo4/ypje9ieXLl/ORj3yElStXsmzZsp0q3TMISZpijjjiCF772tf+bn3VqlUsXryYxYsXc++993LPPfc8b5+9996b0047DYDXvOY1PPTQQztdh2cQkjTF7LPPPr9bXr9+PZ/73Oe49dZbmTNnDu973/t48sknn7fP6If0zZgxgy1btux0HQaEJO3AZaB++fWvf83s2bN58YtfzMMPP8yNN97IkiVL+vLZBoQkTWGLFy/m6KOPZuHChRx++OGccMIJfftsH/ctaVrycd8jfNy3JGm7GRCSpCYDQtK0tTtdYp/IjozVgJA0Lc2aNYvNmzdPi5CoKjZv3sysWbO2az/vYpI0Lc2bN4/h4WE2bdo06FL6YtasWcybN2+79ulZQCRZCbwV2FhVC7u2zwJvA54CHgDOrapfNfZdAnwOmAFcVlXLe1WnpOlpjz32YMGCBYMuY0rr5SWmK4Cx3+ZYDSysqj8Afgx8fOxOSWYAXwROA44Gzk5y9Nh+kqTe6llAVNXNwCNj2r5RVc9+//sWoHW+cxxwf1U9WFVPAVcB7+hVnZKktkFOUp8HXN9oPxT46aj14a6tKcnSJGuSrJku1xIlqR8GEhBJPgFsAb7U2txoG/c2g6paUVVDVTU0d+7cySpRkqa9vt/FlOQcRiav31zt+8uGgZeNWp8HbOhHbZKkf9HXM4ju7qSPAW+vqifG6XYbcGSSBUn2BN4DfL1fNUqSRvQsIJKsAr4HvCLJcJLzgUuA2cDqJGuTXNr1fWmS6wC6SewPAjcC9wJfraq7e1WnJKnNp7lK0jTm01wlSdvNgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWrqWUAkWZlkY5J1o9reneTuJM8kGdrKvg8luSvJ2iRrelWjJGl8vTyDuAJYMqZtHXAGcPM27H9SVS2qqnGDRJLUOzN7deCqujnJ/DFt9wIk6dXHSpImyVSdgyjgG0luT7J0ax2TLE2yJsmaTZs29ak8Sdr9TdWAOKGqFgOnAR9I8sbxOlbViqoaqqqhuXPn9q9CSdrNTcmAqKoN3ftG4BrguMFWJEnTz5QLiCT7JJn97DJwKiOT25KkPurlba6rgO8Br0gynOT8JO9KMgwcD1yb5Mau70uTXNftejDwf5PcAdwKXFtVN/SqTklSWy/vYjp7nE3XNPpuAE7vlh8Eju1VXZKkbTPlLjFJkqYGA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWraakAked+o5RPGbPtgr4qSJA3eRGcQHxm1/IUx286b5FokSVPIRAGRcZZb68/dmKxMsjHJulFt705yd5JnkgxtZd8lSe5Lcn+SZRPUKEnqgYkCosZZbq2PdQWwZEzbOuAM4ObxdkoyA/gicBpwNHB2kqMn+CxJ0iSbOcH2Vya5k5GzhSO6Zbr1w7e2Y1XdnGT+mLZ7AZKtnnwcB9xfVQ92fa8C3gHcM0GtkqRJNFFAHNWXKp7rUOCno9aHgdeN1znJUmApwGGHHdbbyiRpGtnqJaaq+snoF/AYsBg4sFvvhdbpxbiXs6pqRVUNVdXQ3Llze1SSJE0/E93m+n+SLOyWX8LIHMJ5wP9M8qEe1TQMvGzU+jxgQ48+S5I0jokmqRdU1bN3IZ0LrK6qtzFyyadXt7neBhyZZEGSPYH3AF/v0WdJksYxUUA8PWr5zcB1AFX1G+CZre2YZBXwPeAVSYaTnJ/kXUmGgeOBa5Pc2PV9aZJnj70F+CBwI3Av8NWqunv7hyZJ2hkTTVL/NMmfM3LZZzFwA0CSvYE9trZjVZ09zqZrGn03AKePWr+OLowkSYMx0RnE+cAxwL8FzqqqX3Xtfwhc3sO6JEkDttUziKraCFzQaP8W8K1eFSVJGrytBkSSrU4OV9XbJ7ccSdJUMdEcxPGMfGltFfB9Jnj+kiRp9zFRQBwCnAKcDbwXuBZY5V1FkrT7m+ib1L+tqhuq6hxGJqbvB77d3dkkSdqNTXQGQZK9gD9m5CxiPvB54H/3tixJ0qBNNEl9JbAQuB74y1HfqpYk7eYmOoP4N8DjwO8DfzHqMd0Bqqpe3MPaJEkDNNH3ICb6Ip0kaTdlAEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTT0LiCQrk2xMsm5U2/5JVidZ373vN86+v02ytntt9XexJUm90csziCuAJWPalgE3VdWRwE3dess/VdWi7vX2HtYoSRpHzwKiqm4GHhnT/A7gym75SuCdvfp8SdLO6fccxMFV9TBA937QOP1mJVmT5JYkWw2RJEu7vms2bdo02fVK0rQ1VSepD6uqIeC9wF8nOWK8jlW1oqqGqmpo7ty5/atQknZz/Q6Inyd5CUD3vrHVqao2dO8PAt8GXt2vAiVJI/odEF8HzumWzwG+NrZDkv2S7NUtHwicANzTtwolSUBvb3NdBXwPeEWS4STnA8uBU5KsB07p1kkylOSybtejgDVJ7gC+BSyvKgNCkvpsZq8OXFVnj7PpzY2+a4D3d8vfBV7Vq7okSdtmqk5SS5IGzICQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlq6mlAJFmZZGOSdaPa9k+yOsn67n2/cfY9p+uzPsk5vaxTkvR8vT6DuAJYMqZtGXBTVR0J3NStP0eS/YGLgNcBxwEXjRckkqTe6GlAVNXNwCNjmt8BXNktXwm8s7HrW4DVVfVIVf0SWM3zg0aS1EODmIM4uKoeBujeD2r0ORT46aj14a7teZIsTbImyZpNmzZNerGSNF1N1UnqNNqq1bGqVlTVUFUNzZ07t8dlSdL0MYiA+HmSlwB07xsbfYaBl41anwds6ENtkqTOIALi68CzdyWdA3yt0edG4NQk+3WT06d2bZKkPun1ba6rgO8Br0gynOR8YDlwSpL1wCndOkmGklwGUFWPAJ8Gbuten+raJEl9kqrmpf1d0tDQUK1Zs2bQZUjSLiPJ7VU11No2VSepJUkDZkBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1DSQgklyYZF2Su5N8qLH9xCSPJlnbvf7LIOqUpOlsZr8/MMlC4N8BxwFPATckubaq1o/p+vdV9dZ+1ydJGjGIM4ijgFuq6omq2gJ8B3jXAOqQJG3FIAJiHfDGJAckeSFwOvCyRr/jk9yR5Pokx4x3sCRLk6xJsmbTpk29qlmSpp2+X2KqqnuTXAysBh4D7gC2jOn2A+D3quqxJKcDfwccOc7xVgArAIaGhqpnhUvSNDOQSeqq+puqWlxVbwQeAdaP2f7rqnqsW74O2CPJgQMoVZKmrUHdxXRQ934YcAawasz2Q5KkWz6OkTo397tOSZrO+n6JqXN1kgOAp4EPVNUvk1wAUFWXAmcCf5ZkC/BPwHuqystHktRHAwmIqvqjRtulo5YvAS7pa1GSpOfwm9SSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKasjs9Ay/JJuAng65jOx0I/GLQRfSZY54eHPOu4feqam5rw24VELuiJGuqamjQdfSTY54eHPOuz0tMkqQmA0KS1GRADN6KQRcwAI55enDMuzjnICRJTZ5BSJKaDAhJUpMB0QdJ9k+yOsn67n2/cfqd0/VZn+ScxvavJ1nX+4p33s6MOckLk1yb5EdJ7k6yvL/Vb58kS5Lcl+T+JMsa2/dK8pVu+/eTzB+17eNd+31J3tLPunfUjo43ySlJbk9yV/f+r/pd+47amb9xt/2wJI8l+Wi/ap4UVeWrxy/gM8CybnkZcHGjz/7Ag937ft3yfqO2nwF8GVg36PH0eszAC4GTuj57An8PnDboMY0zzhnAA8DhXa13AEeP6fMfgEu75fcAX+mWj+767wUs6I4zY9Bj6uF4Xw28tFteCPy/QY+n12Metf1q4H8BHx30eLbn5RlEf7wDuLJbvhJ4Z6PPW4DVVfVIVf0SWA0sAUjyIuAjwF/1odbJssNjrqonqupbAFX1FPADYF4fat4RxwH3V9WDXa1XMTL20Ub/t/hb4M1J0rVfVVX/XFX/CNzfHW8q2+HxVtUPq2pD1343MCvJXn2peufszN+YJO9k5B8/d/ep3kljQPTHwVX1MED3flCjz6HAT0etD3dtAJ8G/hvwRC+LnGQ7O2YAkswB3gbc1KM6d9aEYxjdp6q2AI8CB2zjvlPNzox3tD8BflhV/9yjOifTDo85yT7Ax4C/7EOdk27moAvYXST5JnBIY9MntvUQjbZKsgh4eVV9eOx1zUHr1ZhHHX8msAr4fFU9uP0V9sVWxzBBn23Zd6rZmfGObEyOAS4GTp3EunppZ8b8l8B/r6rHuhOKXYoBMUmq6uTxtiX5eZKXVNXDSV4CbGx0GwZOHLU+D/g2cDzwmiQPMfL3OijJt6vqRAash2N+1gpgfVX99SSU2yvDwMtGrc8DNozTZ7gLvX2BR7Zx36lmZ8ZLknnANcCfVtUDvS93UuzMmF8HnJnkM8Ac4JkkT1bVJb0vexIMehJkOryAz/LcCdvPNPrsD/wjI5O0+3XL+4/pM59dZ5J6p8bMyHzL1cALBj2WCcY5k5Hrywv4lwnMY8b0+QDPncD8ard8DM+dpH6QqT9JvTPjndP1/5NBj6NfYx7T55PsYpPUAy9gOrwYuf56E7C+e3/2f4JDwGWj+p3HyETl/cC5jePsSgGxw2Nm5F9oBdwLrO1e7x/0mLYy1tOBHzNyp8snurZPAW/vlmcxcgfL/cCtwOGj9v1Et999TNE7tSZrvMB/Bh4f9TddCxw06PH0+m886hi7XED4qA1JUpN3MUmSmgwISVKTASFJajIgJElNBoQkqcmAkDpJvtu9z0/y3kk+9n9qfZY0lXmbqzRGkhMZuV/9rduxz4yq+u1Wtj9WVS+ajPqkfvEMQuokeaxbXA78UZK1ST6cZEaSzya5LcmdSf591//EJN9K8mXgrq7t77rfOrg7ydKubTmwd3e8L43+rIz4bJJ13e8knDXq2N9O8rfd72J8adTTQZcnuaer5b/287+RphefxSQ93zJGnUF0/6N/tKpe2z2e+h+SfKPrexywsEYe1w1wXlU9kmRv4LYkV1fVsiQfrKpFjc86A1gEHAsc2O1zc7ft1Yw8jmMD8A/ACUnuAd4FvLKqqnvardQTnkFIEzsV+NMka4HvM/IYkSO7bbeOCgeAv0hyB3ALIw9vO5KtewOwqqp+W1U/B74DvHbUsYer6hlGHksxH/g18CRwWZIz2LUeAa9djAEhTSzAn1fVou61oKqePYN4/HedRuYuTgaOr6pjgR8y8oyeiY49ntG/lfBbYGaN/NbAcYw8yPCdwA3bNRJpOxgQ0vP9Bpg9av1G4M+S7AGQ5Pe7H4IZa1/gl1X1RJJXAn84atvTz+4/xs3AWd08x1zgjYw87K2p+3XBfavqOuBDjFyeknrCOQjp+e4EtnSXiq4APsfI5Z0fdBPFm2j/hOoNwAVJ7mTk6ay3jNq2ArgzyQ+q6l+Par+Gkd/8uIORJ9j+x6r6WRcwLbOBryWZxcjZx4d3bIjSxLzNVZLU5CUmSVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLU9P8BzqAP3Gh6ntwAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -242,16 +255,16 @@ }, { "cell_type": "code", - "execution_count": 149, - "id": "699c6905", + "execution_count": 176, + "id": "1bb4f7e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "10.136669617836302\n", - "4.401030427985024\n" + "11.802698171033642\n", + "1.0273115267397006\n" ] } ], From b52d3d76207330b583f0306090cc593430698584 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Sun, 31 Oct 2021 15:21:42 -0700 Subject: [PATCH 8/9] WIP --- recsys/als-stream-2.ipynb | 102 ++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/recsys/als-stream-2.ipynb b/recsys/als-stream-2.ipynb index 79a9b9f..e865c85 100644 --- a/recsys/als-stream-2.ipynb +++ b/recsys/als-stream-2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "id": "8e0e6a4f", "metadata": {}, "outputs": [], @@ -11,12 +11,13 @@ "import numpy as np\n", "from collections import defaultdict\n", "import matplotlib.pyplot as plt\n", - "from sklearn.metrics import mean_squared_error" + "from sklearn.metrics import mean_squared_error\n", + "import time" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 2, "id": "5b033873", "metadata": {}, "outputs": [], @@ -26,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 27, "id": "a00a310f", "metadata": {}, "outputs": [ @@ -48,34 +49,34 @@ "100835 610 170875 3.0 1493846415\n", "\n", "[100836 rows x 4 columns]\n", - " userId movieId rating\n", - "0 1 1 4.0\n", - "1 1 47 5.0\n", - "2 1 101 5.0\n", - "3 1 157 5.0\n", - "4 1 223 3.0\n", - "... ... ... ...\n", - "33607 610 160527 4.5\n", - "33608 610 161582 4.0\n", - "33609 610 163937 3.5\n", - "33610 610 166528 4.0\n", - "33611 610 168250 5.0\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 50 5.0\n", + "2 1 151 5.0\n", + "3 1 223 3.0\n", + "4 1 296 3.0\n", + ".. ... ... ...\n", + "180 601 112556 4.0\n", + "181 601 122916 3.5\n", + "182 601 152081 4.5\n", + "183 601 170705 5.0\n", + "184 601 177765 4.5\n", "\n", - "[33612 rows x 3 columns]\n", - " userId movieId rating\n", - "0 1 3 4.0\n", - "1 1 6 4.0\n", - "2 1 50 5.0\n", - "3 1 70 3.0\n", - "4 1 110 4.0\n", - "... ... ... ...\n", - "67219 610 164179 5.0\n", - "67220 610 166534 4.0\n", - "67221 610 168248 5.0\n", - "67222 610 168252 5.0\n", - "67223 610 170875 3.0\n", + "[185 rows x 3 columns]\n", + " userId movieId rating\n", + "0 1 3 4.0\n", + "1 1 6 4.0\n", + "2 1 47 5.0\n", + "3 1 70 3.0\n", + "4 1 101 5.0\n", + ".. ... ... ...\n", + "545 601 168326 4.0\n", + "546 601 170697 4.0\n", + "547 601 172591 4.5\n", + "548 601 174055 4.0\n", + "549 601 176371 4.0\n", "\n", - "[67224 rows x 3 columns]\n" + "[550 rows x 3 columns]\n" ] } ], @@ -90,10 +91,11 @@ "train_data = []\n", "\n", "for row in df.itertuples():\n", - " if row.Index % 3 == 0:\n", - " test_data.append([row.userId, row.movieId, row.rating])\n", - " else:\n", - " train_data.append([row.userId, row.movieId, row.rating])\n", + " if (row.userId % 100 == 1):\n", + " if row.Index % 4 == 0:\n", + " test_data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " train_data.append([row.userId, row.movieId, row.rating])\n", "\n", "test_df = pd.DataFrame(data=test_data, columns=columns)\n", "train_df = pd.DataFrame(data=train_data, columns=columns)\n", @@ -105,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 28, "id": "a6a8fb84", "metadata": {}, "outputs": [], @@ -117,11 +119,15 @@ " self.alpha = alpha\n", " self.user_features = dict()\n", " self.movie_features = np.random.randint(100, size=(max_train_movie, num_features))\n", + " print(self.movie_features.shape)\n", " self.ratings = dict()\n", " \n", " def fit(self, train):\n", " for row in train.itertuples():\n", + " #print(\"Update\", row.Index)\n", + " #start = time.time()\n", " self.update_user_vector(row)\n", + " #print(\"Took\", time.time()-start)\n", " return self \n", "\n", " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", @@ -187,7 +193,7 @@ " if userId not in self.user_features or movieId not in self.movie_features:\n", " return 0\n", " user_vector = self.user_features[userId]\n", - " movie_vector = self.movie_features[movieId]\n", + " movie_vector = self.movie_features[movieId-1]\n", " prediction = user_vector.dot(movie_vector.T)\n", " if np.isnan(prediction) or prediction > 5:\n", " return 5\n", @@ -203,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 29, "id": "285ebde1", "metadata": {}, "outputs": [ @@ -211,28 +217,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "(1, 40) (193609, 40) (1, 193609)\n", - "(40, 40)\n", - "(1, 40) (193609, 40) (1, 193609)\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mALSStreamingModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m.01\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m40\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_set\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, train)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitertuples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate_user_vector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mupdate_user_vector\u001b[0;34m(self, row)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0muserId\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrating_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muser_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmovie_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrating_vector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0mnew_user_vector\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_als_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrating_vector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_vector\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmovie_vector\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_features\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0muserId\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_user_vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36m_als_step\u001b[0;34m(self, ratings, solve_vecs, fixed_vecs)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0muser\u001b[0m \u001b[0mmovies\u001b[0m \u001b[0mratings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \"\"\"\n\u001b[0;32m---> 36\u001b[0;31m \u001b[0mA\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfixed_vecs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixed_vecs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meye\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_features\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ml\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mratings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixed_vecs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "(176371, 40)\n", + "16.894003342096422\n", + "16.5193922235925\n" ] } ], "source": [ - "als = ALSStreamingModel(.01, 40, .1)\n", + "als = ALSStreamingModel(.01, 100, .1)\n", "als.fit(train_df)\n", "print(als.predict_set(test_df))\n", "print(als.predict_set(train_df))" From 9ddd4d016f2d2c3c981a39e6349a9a361f946ff3 Mon Sep 17 00:00:00 2001 From: Amit Narang Date: Thu, 4 Nov 2021 10:55:51 -0700 Subject: [PATCH 9/9] Work followup from meeting --- recsys/als-jg-edits.ipynb | 438 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 recsys/als-jg-edits.ipynb diff --git a/recsys/als-jg-edits.ipynb b/recsys/als-jg-edits.ipynb new file mode 100644 index 0000000..dcd6981 --- /dev/null +++ b/recsys/als-jg-edits.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 106, + "id": "7c78c2e5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "3ea8dfe7", + "metadata": {}, + "outputs": [], + "source": [ + "ratings_path = \"/Users/amitnarang/Downloads/ml-latest-small/ratings.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "e5487124", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " userId movieId rating timestamp\n", + "0 1 1 4.0 964982703\n", + "1 1 3 4.0 964981247\n", + "2 1 6 4.0 964982224\n", + "3 1 47 5.0 964983815\n", + "4 1 50 5.0 964982931\n", + "... ... ... ... ...\n", + "100831 610 166534 4.0 1493848402\n", + "100832 610 168248 5.0 1493850091\n", + "100833 610 168250 5.0 1494273047\n", + "100834 610 168252 5.0 1493846352\n", + "100835 610 170875 3.0 1493846415\n", + "\n", + "[100836 rows x 4 columns]\n", + "[[0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " [0. 0. 0. ... 0. 0. 0. ]\n", + " ...\n", + " [2.5 2. 2. ... 0. 0. 0. ]\n", + " [3. 0. 0. ... 0. 0. 0. ]\n", + " [5. 0. 0. ... 0. 0. 0. ]]\n", + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 3 4.0\n", + "2 1 6 4.0\n", + "3 1 47 5.0\n", + "4 1 50 5.0\n", + ".. ... ... ...\n", + "227 1 3744 4.0\n", + "228 1 3793 5.0\n", + "229 1 3809 4.0\n", + "230 1 4006 4.0\n", + "231 1 5060 5.0\n", + "\n", + "[232 rows x 3 columns]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(ratings_path, sep = ',')\n", + "columns=[\"userId\", \"movieId\", \"rating\"]\n", + "print(df)\n", + "n_users = max(df['userId'])\n", + "n_items = max(df['movieId'])\n", + "ratings = np.zeros((n_users, n_items))\n", + "data = []\n", + "for row in df.itertuples():\n", + " if row.userId == 1:\n", + " data.append([row.userId, row.movieId, row.rating])\n", + " else:\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + "stream_df = pd.DataFrame(data=data, columns=columns)\n", + "print(ratings)\n", + "print(stream_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "7808d9b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdmovieIdrating
0114.0
1134.0
2164.0
31475.0
41505.0
............
222137444.0
223137935.0
224138094.0
225140064.0
226150605.0
\n", + "

227 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " userId movieId rating\n", + "0 1 1 4.0\n", + "1 1 3 4.0\n", + "2 1 6 4.0\n", + "3 1 47 5.0\n", + "4 1 50 5.0\n", + ".. ... ... ...\n", + "222 1 3744 4.0\n", + "223 1 3793 5.0\n", + "224 1 3809 4.0\n", + "225 1 4006 4.0\n", + "226 1 5060 5.0\n", + "\n", + "[227 rows x 3 columns]" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def create_split(stream_df):\n", + " \"\"\"\n", + " split into train and test sets\n", + " User vectors \n", + " \n", + " train vectors on the ratings matrix\n", + " no need to test\n", + " \n", + " then train the user vectors with the streaming update, test on test set\n", + " \n", + " then call it on the train data for those new users and the test data for those users\n", + " \"\"\"\n", + " test_data = np.zeros((n_users, n_items))\n", + " data = []\n", + " for userId in stream_df[\"userId\"].unique():\n", + " user_df = stream_df[stream_df[\"userId\"] == userId]\n", + " random_five = user_df.sample(5)\n", + " for row in user_df.itertuples():\n", + " if any(random_five[\"movieId\"] == row.movieId):\n", + " test_data[row.userId - 1, row.movieId - 1] = row.rating\n", + " else:\n", + " data.append([row.userId, row.movieId, row.rating])\n", + " train_df = pd.DataFrame(data=data, columns=columns)\n", + " \n", + " return test_data, train_df\n", + " \n", + " '''\n", + " train = np.zeros(ratings.shape) \n", + " \n", + " test = np.zeros(ratings.shape)\n", + " train = ratings.copy()\n", + " for user in range(ratings.shape[0]):\n", + " test_index = np.random.choice(\n", + " np.flatnonzero(ratings[user]), size = 5, replace = False)\n", + "\n", + " train[user, None] = 0.0\n", + " test[user, test_index] = ratings[user, test_index]\n", + " \n", + " # assert that training and testing set are truly disjoint\n", + " return train, test'''\n", + "\n", + "test, train_df = create_split(stream_df)\n", + "train_df" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "fdd592a0", + "metadata": {}, + "outputs": [], + "source": [ + "class ALSModel:\n", + " def __init__(self, l, num_features, n_iters):\n", + " self.l = l\n", + " self.num_features = num_features\n", + " self.n_iters = n_iters\n", + " \n", + " def fit(self, train):\n", + " \"\"\"\n", + " pass in training and testing at the same time to record\n", + " model convergence, assuming both dataset is in the form\n", + " of User x Item matrix with cells as ratings\n", + " \"\"\"\n", + " self.n_user, self.n_item = train.shape\n", + " self.user_factors = np.random.random((self.n_user, self.num_features))\n", + " self.item_factors = np.random.random((self.n_item, self.num_features))\n", + " \n", + " # record the training and testing mse for every iteration\n", + " # to show convergence later (usually, not worth it for production) \n", + " for i in range(self.n_iters):\n", + " self.user_factors = self._als_step(train, self.user_factors, self.item_factors)\n", + " self.item_factors = self._als_step(train.T, self.item_factors, self.user_factors) \n", + " return self \n", + " \n", + " def fit_stream(self, ratings, test, train_df):\n", + " '''\n", + " when ratings stream in, add them to the rating matrix\n", + " run ALS update for user vector on entire rating matrix \n", + " test on test matrix\n", + " compute mse, add to list\n", + " '''\n", + " self.test_mse_record = []\n", + " for row in train_df.itertuples():\n", + " ratings[row.userId - 1, row.movieId - 1] = row.rating\n", + " self.user_factors = self._als_step(ratings, self.user_factors, self.item_factors)\n", + " predictions = self.predict()\n", + " test_mse = self.compute_mse(test, predictions)\n", + " self.test_mse_record.append(test_mse)\n", + " \n", + " def _als_step(self, ratings, solve_vecs, fixed_vecs):\n", + " \"\"\"\n", + " when updating the user matrix,\n", + " the item matrix is the fixed vector and vice versa\n", + " \"\"\"\n", + " A = fixed_vecs.T.dot(fixed_vecs) + np.eye(self.num_features) * self.l\n", + " b = ratings.dot(fixed_vecs)\n", + " A_inv = np.linalg.inv(A)\n", + " solve_vecs = b.dot(A_inv)\n", + " return solve_vecs\n", + " \n", + " def predict(self):\n", + " \"\"\"predict ratings for every user and item\"\"\"\n", + " pred = self.user_factors.dot(self.item_factors.T)\n", + " return pred\n", + " \n", + " @staticmethod\n", + " def compute_mse(y_true, y_pred):\n", + " \"\"\"ignore zero terms prior to comparing the mse\"\"\"\n", + " mask = np.nonzero(y_true)\n", + " mse = mean_squared_error(y_true[mask], y_pred[mask])\n", + " return mse\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "c12565f4", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curve(model):\n", + " \"\"\"visualize the training/testing loss\"\"\"\n", + " linewidth = 3\n", + " plt.plot(model.test_mse_record, label = 'Test', linewidth = linewidth)\n", + " plt.xlabel('iterations')\n", + " plt.ylabel('MSE')\n", + " plt.legend(loc = 'best')" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "586142de", + "metadata": {}, + "outputs": [], + "source": [ + "als = ALSModel(n_iters = 100, num_features = 100, l = 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "de62d106", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<__main__.ALSModel at 0x7f9e886f0250>" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "als.fit(ratings)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "1bb4f7e6", + "metadata": {}, + "outputs": [], + "source": [ + "movie_factors = als.item_factors" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "f794be70", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1f3/8dcn+0pCEpYACQFklZ2AghvuiPu+162l+rVWrba19dtqf36/1Vbrt3VpLa1Ua6m2dalL3cUFF8CArLLKGrYAgRASss75/THDJIFshEwmM/N+Ph7zyL3nbp+5DJ+5c+6555hzDhERiRxRwQ5AREQ6lhK/iEiEUeIXEYkwSvwiIhFGiV9EJMLEBDuA1sjKynJ5eXnBDkNEJKTMnz9/p3Ou28HlIZH48/LyKCgoCHYYIiIhxcw2NFYesKoeM8sxsw/NbLmZLTOz2w9afreZOTPLClQMIiJyqEBe8dcAdznnFphZKjDfzN5zzn1tZjnA6cDGAB5fREQaEbArfufcVufcAt90KbAc6O1b/H/AjwA9Niwi0sE6pI7fzPKAMcBcMzsP2OycW2RmzW0zDZgGkJub2wFRiki4qq6uprCwkIqKimCHEhAJCQn06dOH2NjYVq0f8MRvZinAS8AdeKt/7gXOaGk759x0YDpAfn6+fhmISJsVFhaSmppKXl4ezV1whiLnHLt27aKwsJB+/fq1apuAtuM3s1i8SX+mc+5lYADQD1hkZuuBPsACM+sZyDhEJLJVVFSQmZkZdkkfwMzIzMw8rF8zAbviN+8ZfhpY7px7FMA5twToXm+d9UC+c25noOI4YP3OMtISY+maHBfoQ4lIJxSOSf+Aw31vgbziPw64FjjFzBb6XlMDeLwmvfJVIZMf+YgTfv0hG3aVBSMEEZFOI2BX/M65T4Fmv4acc3mBOn69Y/DfrywFYF9lDU9+uIZfXzIq0IcVEQFg165dnHrqqQBs27aN6OhounXzPkw7b9484uJaVwsxY8YMpk6dSs+eR14zHhJP7h6JRYUllFXV+udfW7SFX5w3nOLyKmYt307J/mqum5RHakLr7oaLiByOzMxMFi5cCMD9999PSkoKd99992HvZ8aMGYwdO1aJvzVenL+pwXxFtYehP3+7QdnCTXt49PLRxEVHkRAb3ZHhiUgEe/bZZ3nyySepqqpi0qRJPPHEE3g8Hm644QYWLlyIc45p06bRo0cPFi5cyOWXX05iYuJh/VJoTFgn/tKKal5duKXF9d5fXsTI+98lPiaKb03sy/dOGUhaon4BiISjvHv+E7B9r3/o7Favu3TpUl555RU+//xzYmJimDZtGi+88AIDBgxg586dLFmyBIA9e/aQnp7O448/zhNPPMHo0aOPOM6wTvwz526ktKIGgKS4aJLiotm5rwqAjOQ4isuqGqxfWePhT7PX8dycDYzPyyA3I4krJ+QyvHdah8cuIuHt/fff58svvyQ/Px+A/fv3k5OTw5lnnsnKlSu5/fbbmTp1Kmec0eJjT4ctbBN/RXUtf569zj9/37nDOPPonmwq3k+/bsmkxMewYONuLvr9541s62H2am8L05lzN/LTqUOYduKADotdRMKfc44bb7yRBx544JBlixcv5q233uKxxx7jpZdeYvr06e167LBN/P8s2MTOfZUAZKclcOGYPsTFRJGeVFcvNja3K6cN7c77y4vo0SWe6ybl8dRH37DX9yvhgKc+XstNx/cnOip82wGLRIrDqY4JpNNOO41LLrmE22+/naysLHbt2kVZWRmJiYkkJCRw6aWX0q9fP26++WYAUlNTKS0tbZdjh23ir6rxkBwXTVlVLd85oT9xMY0/svDEVWMpWL+bkTlpdEmI5cIxvfn3V1uIMnjwrRUAFJdVsXDTHsb17dqRb0FEwtiIESO47777OO200/B4PMTGxvLUU08RHR3NTTfdhHMOM+NXv/oVADfccAPf/va32+XmrjnX+bvByc/Pd20ZiKWkvJqZ8zZww6R+JMYdfmudH7+4mH8UeFsFjcpJ5+4zBnH8UVlh/QSgSDhavnw5Q4cODXYYAdXYezSz+c65/IPXDesxd9OSYvmvyUe1KekDnDLU37sEizbt4dqn5/HOsm3tFZ6ISFCEdeI/UscflXVIFdHn3+wKUjQiIu1Dib8ZyfExnDMyu0HZ1pLw7M9bJNyFQrV2Wx3ue1Pib8EvLxzBd0/q75/fWrI/iNGISFskJCSwa9eusEz+B/rjT0hIaPU2Yduqp70kxEZz/aQ8/vjxWgC26YpfJOT06dOHwsJCduzYEexQAuLACFytpcTfCt1TE4iOMmo9jp37qqisqSU+Rn36iISK2NjYVo9OFQlU1dMK0VFG99R4//z2ksogRiMicmSU+FupZ1pd/Znq+UUklCnxt1KvtET/tFr2iEgoU+JvpYZX/Er8IhK6lPhbKbte4t+mqh4RCWEBS/xmlmNmH5rZcjNbZma3+8ofNrMVZrbYzF4xs/RAxdCesutV9WzRFb+IhLBAXvHXAHc554YCxwK3mtkw4D1guHNuJLAK+EkAY2g39at6Cnfril9EQlfAEr9zbqtzboFvuhRYDvR2zr3rnDvQ4f0coPVPHQRRv6xkDnTKuXzrXr5cXxzcgERE2qhD6vjNLA8YA8w9aNGNwFtNbDPNzArMrKAzPG2XkRzH+aN6+ed/+ebysHz8W0TCX8ATv5mlAC8Bdzjn9tYrvxdvddDMxrZzzk13zuU75/K7desW6DBb5a4zBhMX7T1lX23cwxfqqVNEQlBAE7+ZxeJN+jOdcy/XK78OOAe42oXQZXNORhKXja+rmXr+y00Nlq/fWcarCzezp7zq4E1FRDqNgPXVY95hqp4GljvnHq1XPgX4MXCSc648UMcPlKsm9OVvczYC8M7SbRSXVZEUF82PXlzM64u34ByMyU3n5VsmaaQuEemUAnnFfxxwLXCKmS30vaYCTwCpwHu+sqcCGEO7G9arC6P6pAFQVeth5pwNvLSgkNcWeZM+eKuB5m/YHcQoRUSaFrArfufcp0Bjl7xvBuqYHeWqY3JZVLgEgCc/WtOgjf8Blzz1BT85awjXTuxLUpw6QRWRzkNP7rbBxWP7MCy7CwAV1R7W7SxrdL0H31rBb99f3ZGhiYi0SIm/DWKio3jwohFEHfR7Zmh2F47u1aVB2b+/2ozHEzL3r0UkAijxt9GonHRumTygQdnpw3rwq4tHMqhHir+sqLSSBRtV3y8inYcS/xG447RBjM7xdjUUE2WcMzKb4b3TePfOk7hyQo5/vTeXbAtWiCIih1DiPwKx0VE8d9MEfjxlCE9fP55BPVL9y84anu2ffn3xFkorqoMRoojIIZT4j1BqQiy3TB7ASYMaPl08cUAmWSlxAOworeQXr38djPBERA6hxB8gsdFR3H/e0f75F+cXsmjTniBGJCLipcQfQOeM7MXpw3r459Wjp4h0Bkr8ATZpQKZ/esW20iBGIiLipcQfYIN71t3wXbVdiV9Egk99CQTY4B4NE7/H44iKMpZuLmHtzjIGdk9hSM9UdegmIh1GiT/AMlPiyUqJZ+e+SiqqPXy1aQ+PfbCaj1fVDS5zwsAsnrlhAtEHPwosIhIAqurpAIN71j3Je/EfPm+Q9AFmr97J3+dt7OiwRCRCKfF3gME9urS4zq/fXsGO0soOiEZEIp0SfwcYUu8G7wE3HtePFQ9MIS8zCYDSihqe+XxdR4cmIhFIib8DnDq0O2mJsf75/lnJ3H3mIBJio7nnrCH+8r/P3UhFdW0wQhSRCKKbux0gMyWe2T8+mfnrd1NeVcukAZn+wVlOG9qD3umJbN6zn93l1by2aAuX5ee0sEcRkbbTFX8H6ZIQy8lDunP2yGy6Jsf5y2Oio/jWxL7++YffWUlRaUUwQhSRCKHE3wlcMT6XjOS6Dt3OeexT/lWwCec0gIuItD8l/k4gLSmW310xmgPPcBWVVvLDFxfz59m62Ssi7S9gid/McszsQzNbbmbLzOx2X3mGmb1nZqt9f7sGKoZQcsLAbjxw/nBi6j3E9fC7K1m9vZQ95VXMnLuBR99bpWogETliFqjqBDPLBrKdcwvMLBWYD1wAXA8UO+ceMrN7gK7OuR83t6/8/HxXUFAQkDg7m137Krnsj1/wzQ7vAO6x0UZSXAwl+70DuYzKSee+c4dRUV3LxP51HcCpywcROZiZzXfO5R9S3lH1yGb2KvCE7zXZObfV9+XwkXNucHPbRlLiB2+fPmc/Npvq2pb/baKjjGP7Z/DHa/NJiVcjLRGp01Ti75A6fjPLA8YAc4EezrmtAL6/3ZvYZpqZFZhZwY4dOxpbJWwN6pHKtybmtWrdWo/jszW7+PmrSwMblIiEjYAnfjNLAV4C7nDO7W3tds656c65fOdcfrdu3VreIMzcdspRDR76+s4J/Zpd/+UFm3lt0ZZAhyUiYSCgdQNmFos36c90zr3sK95uZtn1qnqKAhlDqEpPiuP3V4/lkXdXMnlQd75/6lGsLtrHRyt3EBNlXDuxL/sqapi1oohdZVUA/OAfC4kyeO/r7ZRX1XLPWUMY0C2lhSOJSKQJ5M1dA57FeyP3jnrlDwO76t3czXDO/ai5fUVaHX9TSvZX86+CTeTnZTA6Jx2A0opqTn/0E7btPbS1T3JcNN+alMcJA7PYVlLBa4u2UFFdy9DsLlw5IZekuGjiYqJIjoshMTaaKHULLRJWOvzmrpkdD8wGlgAeX/FP8dbz/xPIBTYClzrnmh2MVom/ee8s28Z3n5t/RPtIiY/hlxeN4LxRvaiq8bCvsob0xFh9GYiEsKC36jkSSvwt+/XbK3h5wWbSEmOJiTaWbWn17ZRDxEQZNR5HfEwUVx2Ty8/PGabmoiIhSIk/wmzfW8FrC7ewcnsp/1m8lYqaWi4d14f8vAxeml/Iim2lJMVFU1XjobSihqpaT5P7eumWSYzrq+fsREKNEn8E219VS3lVDZkp8Y0u37xnPxc++RlFTQwEc8m4Pjxy6ahAhigiAdBU4tcTPxEgMS6axLjoJpf3Tk/klVuP45UFheRlJXPioG6s3VHGBU9+BsAbi7fws3OGUVZZw//+Zzke5/jVJSPpkhDb5D5FpPNS4hfAm/y/d8pA//yoPmkMze7C8q17qaj28Ou3VzBrRRFbS7yth3IzkvjJ1KHBCldEjoB655RGmRlXH5Prn585d6M/6QO8vmgLHk/nryYUkUMp8UuTLhrbm/SkxqtztpRUMH/j7g6OSETagxK/NCkpLqbBVf/BXlcXESIhSYlfmnXdxDy6+q76TxrUjb9/+xj/sjeXbKWmmWagItI56eauNKt7lwT+fetxfL1lL6cO7UF0lNE9NZ6i0kp27qvii7W7OGFg5HWiJxLKdMUvLeqbmcxZI7KJi4kiOso4e2S2f5mqe0RCjxK/HLZzR/XyT7+9dBuVNbVBjEZEDpcSvxy2MTnp9OmaCMDeiho+WbUzyBGJyOFQ4pfDZmYNrvpV3SMSWpT4pU3OHVmX+L0Dv9QEMRoRORxK/NImQ7NTGdAtGYD91bV8sFwDqYmECiV+aRMz4+x6V/0F65sdS0dEOhElfmmzwT1S/dOb9xw69KOIdE5K/NJm2ekJ/umtJfuDGImIHA4lfmmz3umJ/un6PXeKSOcWsMRvZjPMrMjMltYrG21mc8xsoZkVmNmEQB1fAi8rJZ4Y32DsxWVVVFTrQS6RUBDIK/5ngCkHlf0a+IVzbjTwc9+8hKjoKKNHl7rqni17VN0jEgoClvidc58ABzf1cEAX33QaoCd/Qpyqe0RCT0f3znkH8I6ZPYL3S2dSBx9f2ln9G7y64hcJDR19c/cW4E7nXA5wJ/B0Uyua2TTffYCCHTt2dFiAcniy0+qu+LeoSadISOjoxH8d8LJv+l9Akzd3nXPTnXP5zrn8bt3U33tn1UtNOkVCTkcn/i3ASb7pU4DVHXx8aWe96l/xq45fJCQErI7fzJ4HJgNZZlYI3Ad8B/idmcUAFcC0QB1fOobq+EVCT8ASv3PuyiYWjQvUMaXj5WQk+afX7yyjrLKG5HiN6CnSmenJXTkiXRJiGdLT22dPjcfx1cY9QY5IRFqixC9HbEK/DP/0vHW7ghiJiLSGEr8csfF5dYl/7rpi5m8o5n//8zWrtpcGMSoRaYoqY+WI1b/in7uumEuf+gKPg1krivjgrsnBC0xEGqUrfjliPbokkJdZd5PX47x/v9lRxjY18RTpdJpN/GZ2Tb3p4w5a9r1ABSWh57LxOY2WLyrUzV6RzqalK/4f1Jt+/KBlN7ZzLBLCbj5xAN+a2PeQ8sVK/CKdTkuJ35qYbmxeIlhUlPGL847m91eP5YSBWf7yxYUlQYxKRBrTUuJ3TUw3Ni8RzsyYOiKbX144wl+2aNMenNNHRaQzaalVzxAzW4z36n6AbxrffP+ARiYhq0/XRLomxbK7vJq9FTXMWVvMxAGZwQ5LRHxaSvxDOyQKCStmRn5eBu99vR2AG56Zxz+mTWRUTnqQIxMRaKGqxzm3of4L2AeMBbJ88yKNuvuMwaQnxQJQUe3hiQ/XBDkiETmgpeacb5jZcN90NrAUb2ue58zsjg6IT0LU4J6p/PXGuuEWvvhmF9W1niBGJCIHtHRzt59zbqlv+gbgPefcucAxqDmntGBE7zT/mLz7Kms45Tcf8ei7K/UFIBJkLSX+6nrTpwJvAjjnSgH975VmmVmDpp2bivfz2Kw1/OCfi6j1qKWPSLC0lPg3mdltZnYh3rr9twHMLBGIDXRwEvpOGHjosJmvL9rC71XnLxI0LSX+m4CjgeuBy51zBx7DPBb4SwDjkjBx3FGZREcd+qzfjM/WUVFdG4SIRKSlVj1FzrmbnXPnO+ferVf+oXPukcCHJ6EuPSmOn509lP7dkvnZOcPo09Vb57+7vJpXvtoc5OhEIpM191Slmb3W3MbOufPaPaJG5Ofnu4KCgo44lATYnz5Zy/++udw/f/LgbmQkx/PDMwfTMy2hmS1F5HCZ2XznXP7B5S09wDUR2AQ8D8xF/fPIEbpsfA6/fX8VZVXeap4PV+4AYHd5FTOuHx/M0EQiRkt1/D2BnwLDgd8BpwM7nXMfO+c+bm5DM5thZkVmtvSg8tvMbKWZLTOzXx9J8BJ60hJj+cM148hKiW9Q/tHKIvXdL9JBWqrjr3XOve2cuw7vDd01wEdmdlsr9v0MMKV+gZmdDJwPjHTOHQ3oPkEEOnFQNz7+4WR+e/lof5nHwW3PL9BwjSIdoMURuMws3swuAv4G3Ao8Brzc0nbOuU+A4oOKbwEecs5V+tYpOuyIJSwkx8dwwZjePHrZKH/Zl+t3c9bvZvPRSn0sRAKppS4bngU+x9uG/xfOufHOuQecc21tjjEIOMHM5prZx2bWZKWumU0zswIzK9ixY0cbDyed3ZThPUmJr7vVVOtx/PDFxRSXVQUxKpHw1tIV/7V4k/XtwOdmttf3KjWzvW04XgzQFW+10Q+Bf5pZozeMnXPTnXP5zrn8bt0OfQhIwkNSXAy/uWwUA7un+Mt2lFbyk5cXqx9/kQBpqY4/yjmX6nt1qfdKdc51acPxCoGXndc8vN0+ZLWwjYS5M4/uyXs/OIm/1GvV886y7bw4vzCIUYmErxbr+NvZv4FTAMxsEBAH7OzgGKSTOnlId645Ntc//8AbX+vpXpEACFjiN7PngS+AwWZWaGY3ATOA/r4mni8A1zn9npd6fjp1KL18D3LtrahhyWaN2SvS3lp6gKvNnHNXNrHomkAdU0JfUlwMEwdk8dICbzXPok17GJ+XEeSoRMJLR1f1iLRodE6af3pRoa74RdqbEr90OiP71I3Nu7hwTzNrikhbKPFLpzMkO5W4aO9Hc8OucvaUq02/SHtS4pdOJz4mmqHZqf75hZt01S/SnpT4pVManVNX3fP4rDUaqlGkHSnxS6d0xYRcYnwjd83fsJvHZ60OckQi4UOJXzqlodld+P6pA/3zv31/Nc9+vj54AYmEESV+6bRumTyAif0z/fP3vbaMf3y5MYgRiYQHJX7ptGKjo/jTdfmMza2r77/n5SVq4ilyhJT4pVNLiY/hLzdMYHhvb5+AzsHbS7cFOSqR0KbEL51eWmIs1x7b1z+/VUM0ihwRJX4JCdlpif7prSX7gxiJSOhT4peQkO3rsRPQoOwiR0iJX0JCdnr9K/4Kjc4lcgSU+CUkpMTHkOobm7eyxsPu8uogRyQSupT4JWRkp9dV92zZ463n31dZw9LNJfoFIHIYAjYQi0h765mWyKrt+wB4+tN19OiSwPPzNlKyv5oTBmYx/dp8EuOi/etXVNcSE2XEROv6RqQ+JX4JGb3q3eB95avNDZbNXr2Tcf/zHsN7pVFZ62F/VQ1rd5QRE2385foJTByQefDuRCKWLoUkZPSsl/gbU15Vy7z1xSzatIdV2/dR43FUVHu477WleNS7p4hfIAdbn2FmRb6B1Q9edreZOTPLCtTxJfxkN5L4H7poBD+aMpiE2KY/yqu272P67LW6DyDiE8iqnmeAJ4C/1i80sxzgdEC9bclh6VnvIS6Ap64Zx5ThPQG4bmIe63aWsa+yhriYKOKio3h+3kZmzvV+zB56awUz527givG5XD8pj+R41XJK5ArYFb9z7hOguJFF/wf8CNDllxyW/lnJDeZPG9rdP50cH8Pw3mkc2z+TsbldGd47jR+cPojkejd7NxXv5+F3VnL6ox8zb11jH02RyNChdfxmdh6w2Tm3qCOPK+EhJyOJH5w+iDG56fz9O8e02FonMyWep68fz2lDe5CaUHeFv6Wkgu/8tYDSCj0LIJHJAlnvaWZ5wBvOueFmlgR8CJzhnCsxs/VAvnNuZxPbTgOmAeTm5o7bsGFDwOKU8FdRXcurCzfz4Fsr2ON7+OunU4cw7cQBQY5MJHDMbL5zLv+Q8g5M/COAD4By3+I+wBZggnOu2X528/PzXUFBQcDilMjxwryN3PPyEsDb62d+367UOseqbaXUeBy5GUlcPj6H6lrv/4sh2ansKa9i8+799EpPZPLg7kT7hoQU6eyaSvwddofLObcE8FfKtnTFLxIIF4zpzW/eW8WO0kpK9lfzwYqiBsuLSisp2LC7ye2P7tWFX144glH1BoMXCTWBbM75PPAFMNjMCs3spkAdS6S1EmKjufuMQW3eftmWvZz/5Gec8/hs7n1lib/rCJFQEtCqnvaiqh5pb9/s2MeWPfupqPZQ6/HQLyuFKIM/fPQNry3aQr+sZNISYykur6JnlwSio4x564qprPE02E9qfAy3nzaQa47tS0JsdBNHEwmOoNTxtxclfulItR7XaD3+xl3l3P6Pr/hq46Fj/o7OSeeFaccq+Uun0lTiV5cNIgdp6uZtbmYSL908ide+dxwPXjSCvMwk/7KFm/bw+qItHRWiyBFR4hc5DFFRxsg+6Vw5IZd37jyRqSN6+pf9bY6aHEtoUOIXaaP4mGj+54IRxMV4/xstKixh0aZDq4FEOhslfpEjkJEcxzkjsv3z02evDWI0Iq2jnqpEjtCNx/fjZd/4AG8u2crq7aUM7JFKRXUtSzeXULh7P+lJsWQkx1FeVcvonHTdBJagUuIXOULDe6dx6pDufLCiCOfgrn8tYnCPVN5auo19lTWHrD+ub1e+e2J/du6r4qKxvfUlIB1OzTlF2sHiwj2c98Rnh73d8Udl8ZcbxhOr4SElANScUySARvZJ57sn9j+kPDcjiTOG9WB0E108fLpmJxc8+Rkz525gw66yQIcpAqiqR6Td3HPWEBzwp9lr6ZYSzy8vHMGpQ7tj5n0uwDnHtOfm897X2xtst2zLXu59xTtQ3TXH5vLjKUNITYjt6PAlgqiqR6Sd7SitJCM5rtEHwSqqa/lnwSb6dE1k9uqd/OWz9YesE2Xe+wCX5edw0dg+6g1U2kxdNoh0QoW7y3l76TbeWbaNL9cf2ivocUdlctKgbny0cgdFpZWceXQPBnZPJTk+hkkDMjWEpDRLiV+kE3PO8fd5G3n603Ws3dG6uv6kuGgevmQUZ4/MbnlliUi6uSvSiZkZVx/Tl1l3TebLe0/j4rF9WtymvKqWu/+1iM3qGloOkxK/SCfTLTWehy8ZyZUTcgHIy0zi/nOH8d2T+pPftyvH9s/wr7u/upYHXv86WKFKiFJVj0gnVrK/mi4JMf6WQQd8ub6YS5/6wj//5FVjVeUjh1BVj0gISkuMPSTpA4zPy+DScXXVQff++/BGA3POsXRzCW8u2cqaotJ2iVVCh5oEiISon507jM/W7GRLSQV7yqu5fPoXPHjhSCYOyGy2CeiO0kpu/tt85vvGFo6LieKlmycxok9aR4UuQaYrfpEQ1SUhlt9cNpoYX5LfVLyfa56ey5V/mkNZI30EOed4d9k2LnjyM3/SB6iq8fDAf74mFKp9pX0o8YuEsIkDMvn91WOJja67wp+3rpjfvr/KP79w0x7++sV6rvrTXKY9N7/RVkDz1hUf8kSxhK+AVfWY2QzgHKDIOTfcV/YwcC5QBXwD3OCc08gVIkfgjKN78vItx/HgW8v5/JtdAMz4bD0j+6TztzkbmLuu+JBtuiTE8Ohlo/l0zU6e+Xw9AA/852vG9u3Ksi17qaiupWeXBJ6bs4G0xFjuPH0QKXpYLGwErFWPmZ0I7AP+Wi/xnwHMcs7VmNmvAJxzP25pX2rVI9Iy5xxX/3muP/k3Jsrg6mP6cufpg8hIjqO4rIqTH/mIkv3Vze57RO80nrlhPJkp8e0ac02th6paD/Ex0Xy4ooieaQkM7617De2lqVY9AfsKd859YmZ5B5W9W292DnBJoI4vEmnMjF9eOIKzH5tNWVWtvzzK4PRhPRjUI5VzRvZicM9U/7KM5DjuOWsIP3l5SbP7XrK5hGnPzeeFacce0oX0n2ev5cOVRdx52iDy8zKa2INXeVUNM+ds5Iu1u/A4x1cb9zT40jHzNk2dOkJNUwMpoO34fYn/jQNX/Actex34h3Pub01sOw2YBpCbmztuwwYNZC3SGv9ZvJVb/74AgNhoY8b14zlhYLcm1/d4HNc/8yWfrNqBGQzolkJpRTXb91Yesu6onHRunTyA04b2ICrKmL+hmIv/UPc8wcT+mcREG+P6drxgdRYAAA0zSURBVKV/txSGZXdhQLdkzIzV20v51ox5bC2paDb++Jgonp92LGNzu7bxDMgBQemrp6nEb2b3AvnARa4VAaiqR+TwPD9vI28v3cZNx/fjxEFNJ/0Dqms9rN6+j17pCaQnxeGco6i0ku6p8fzxk7U89NaKBusP6pHCI5eOYvona3lj8dZm990rLYExfbvyxTe7KC6ravV7uO2Uozh5SPcmvwDW7Sxjf1Utg3qkEKOBbBrVaRK/mV0H3Ayc6pwrb81+lPhFgsfjcdzxj4W8tmhLu+wvKS6ab5/Qnx5d4snpmsS6nWV8sKKIMTnpPDdnQ4MvBzN46ppxDO+dxv97fRlRZlwyrg+LC0t4bNZqnIPU+Bj+58LhnD+6d7vEF046ReI3synAo8BJzrkdrd2PEr9IcDnnmLuumFkrinj28/VU1ngaXS8xNprzR/ciKS6G9bvKKFhfzN6KumcKkuKi+cv14zmmf2aj28/fUMyVf5pLVRP7b0paYiyzf3wyXRJiKdxdzh8++oaUhBimHN2TMRFcZdThid/MngcmA1nAduA+4CdAPHCg2cEc59zNLe1LiV+k81i0aQ9XTJ/D/mrvDWQzeOaGCaTEx3BU9xTSEutGD9tfVcuMz9bxxTe7yM/ryqX5OfROT2x2/5+v2ckfPv6G2at3HlZct51yFLefOpCzH/uUldtL/bE9cskoLh7Xcm+n4Uj98YtIu/lyfTFPffQNfTOTuWx8H4b07NLux/jim11c8/Rcaj11OSorJY7M5Hg2FJdx6pAeHHdUFj99pfkWSWmJscy666R2b4oaCpT4RSTkLN1cwj++3MTanfs4e0Qvrjomt8HyWo9jym8/YXXRvmb3c8awHjxx1VjiYiLrJrASv4iEpY27yrn7X4uYt77uCeUeXeK5/9yjuWXmAn/ZuL5dufuMwUwc0Pj9hXCkxC8iYcs5R8GG3by9dBvb91bwX5OPYlivLtz/2jJ/lxQHjMlN55TB3cnPywj7LwElfhGJOM45HvtgDb/9YBUHp7qYKOP1245naHb735/oLDQQi4hEHDPj9tMGMuuuyZx9UDcQNR7HvwoK/fObisuZt64Yj6fzXwwfKXW3JyJhr19WMk9cNYZJ8zL5738v9V/9v754C/eePZR3l23je89/Ra3HMWlAJt875SiK9lZSWVPLeaN6kxgXHdw30M5U1SMiEaWm1sOxD85i575D+yJqzMg+acy4fjxZIdgcVFU9IiJATHQU5xzGwPSLC0s4+ZGP+NXbK9hWUkFNrSfkRyvTFb+IRJw1Rfs45/HZVFTXdQ0xoncal4zrwwcriti1r5KtJRXNdirXLyuZb03sy7XH9u20ncSpVY+ISD3b91awdHMJAEd1TyE3IwmzhoPUf7iiiAfe+Jq1O8ua3M+VE3J48KKRAY21rZT4RUTaoNbjeO/r7Tz96VoWbSqhxuPh4IY/b91+QqdsFtrhI3CJiISD6ChjyvCeTBne01+2v6qWK6Z/waJC7y+Gh99ZydPX5R/yi6Gz6pwVUyIinVhiXDQPXTySA3l+1ooifvjiYjbv2R/cwFpJiV9EpA2GZnfh8vwc//yL8ws5/lezeOSdlZ2+1Y8Sv4hIGz1wwXCmjqirAnIOnvhwDY/PWhPEqFqmxC8i0kax0VE8dsUY/vfC4RzVPcVf/uh7q/h8zeENJNORlPhFRI5ATHQUVx/Tl/98/3iOO6qut897Xl5CeVVNM1sGjxK/iEg7iI+J5v8uG02XBG9jyY3F5Xz/+YVU1x7e+MEdQYlfRKSddO+SwM/PPdo///7y7Xz72YJW9wvUUZT4RUTa0cVje/Pdk/r75z9etYOzfjebrzbuDmJUDQUs8ZvZDDMrMrOl9coyzOw9M1vt+9s1UMcXEQkGM+OeKUO4ZfIAf9mO0kounz6Hj1YWBTGyOoG84n8GmHJQ2T3AB865gcAHvnkRkbBiZvx4yhD+euME0pNiAaiq8fBfMxewuHBPkKMLYOJ3zn0CFB9UfD7wrG/6WeCCQB1fRCTYThzUjVdvPY7e6YkAlFfVctkfv+CBN77m7aVbKa2oDkpcAe2kzczygDecc8N983ucc+n1lu92zjVa3WNm04BpALm5ueM2bNgQsDhFRAJpTVEpF/3+c/ZWNGzeGRcdxWXj+/CdE/qzu7yaFVv3ctLgbmQkxzFnbTEfrigiNtq49+xhbTpuUHrnPJLEX5965xSRULdqeym3/G0+3+xouovnxqQmxPDVz05vU5//nWUEru1mlu0LKBvoHHc6REQCbFCPVF6/7Xh+ffFIvn18v1Z341xaUcP8De3bIqiju2V+DbgOeMj399UOPr6ISNAkxcVw2Xhvx27OOd5Zto2nPl7L+l1leDyuQVVQn66JnDGsJ6cM6c7o3PSmdtkmAUv8ZvY8MBnIMrNC4D68Cf+fZnYTsBG4NFDHFxHpzMyMKcOzmTK8bvzfqhoPs1ZsJz0pjgl5GURFBaZ//4AlfufclU0sOjVQxxQRCWVxMVENvggCRU/uiohEGCV+EZEIo8QvIhJhlPhFRCKMEr+ISIRR4hcRiTBK/CIiESagffW0FzPbAbS1l7YsoPOOetzxdD7q6FzU0bloKFzOR1/nXLeDC0Mi8R8JMytorJOiSKXzUUfnoo7ORUPhfj5U1SMiEmGU+EVEIkwkJP7pwQ6gk9H5qKNzUUfnoqGwPh9hX8cvIiINRcIVv4iI1KPELyISYcI68ZvZFDNbaWZrzOyeYMfT0cxsvZktMbOFZlbgK8sws/fMbLXvb4tjHocqM5thZkVmtrReWaPv37we831WFpvZ2OBF3v6aOBf3m9lm3+djoZlNrbfsJ75zsdLMzgxO1IFhZjlm9qGZLTezZWZ2u688Yj4bYZv4zSwaeBI4CxgGXGlmbRuqPrSd7JwbXa9N8j3AB865gcAHvvlw9Qww5aCypt7/WcBA32sa8IcOirGjPMOh5wLg/3yfj9HOuTcBfP9PrgCO9m3ze9//p3BRA9zlnBsKHAvc6nvPEfPZCNvED0wA1jjn1jrnqoAXgPODHFNncD7wrG/6WeCCIMYSUM65T4Dig4qbev/nA391XnOAdDML/FBIHaSJc9GU84EXnHOVzrl1wBq8/5/CgnNuq3NugW+6FFgO9CaCPhvhnPh7A5vqzRf6yiKJA941s/lmNs1X1sM5txW8/wGA7kGLLjiaev+R+nn5nq/6Yka9ar+IORdmlgeMAeYSQZ+NcE78jY1SHGltV49zzo3F+1P1VjM7MdgBdWKR+Hn5AzAAGA1sBX7jK4+Ic2FmKcBLwB3Oub3NrdpIWUifj3BO/IVATr35PsCWIMUSFM65Lb6/RcAreH+ubz/wM9X3tyh4EQZFU+8/4j4vzrntzrla55wH+BN11Tlhfy7MLBZv0p/pnHvZVxwxn41wTvxfAgPNrJ+ZxeG9WfVakGPqMGaWbGapB6aBM4CleM/Bdb7VrgNeDU6EQdPU+38N+JavBcexQMmBn/3h6qB66gvxfj7Aey6uMLN4M+uH96bmvI6OL1DMzICngeXOuUfrLYqcz4ZzLmxfwFRgFfANcG+w4+ng994fWOR7LTvw/oFMvC0WVvv+ZgQ71gCeg+fxVmFU471qu6mp94/35/yTvs/KEiA/2PF3wLl4zvdeF+NNbtn11r/Xdy5WAmcFO/52PhfH462qWQws9L2mRtJnQ102iIhEmHCu6hERkUYo8YuIRBglfhGRCKPELyISYZT4RUQijBK/RAQz+9z3N8/Mrmrnff+0sWOJdFZqzikRxcwmA3c75845jG2inXO1zSzf55xLaY/4RDqCrvglIpjZPt/kQ8AJvv7n7zSzaDN72My+9HVW9l3f+pN9fbb/He9DO5jZv30d3i070OmdmT0EJPr2N7P+sXxPej5sZkvNOy7C5fX2/ZGZvWhmK8xspu9pUszsITP72hfLIx15jiRyxAQ7AJEOdg/1rvh9CbzEOTfezOKBz8zsXd+6E4Dhzts1McCNzrliM0sEvjSzl5xz95jZ95xzoxs51kV4O0AbBWT5tvnEt2wM3v7utwCfAceZ2dd4u04Y4pxzZpbe7u9eBF3xi5yBtx+WhXi75s3E2zcNwLx6SR/g+2a2CJiDt9OugTTveOB55+0IbTvwMTC+3r4LnbeDtIVAHrAXqAD+bGYXAeVH/O5EGqHEL5HOgNtc3ShU/ZxzB674y/wree8NnAZMdM6NAr4CElqx76ZU1puuBWKcczV4f2W8hHcQkLcP652ItJISv0SaUiC13vw7wC2+bnoxs0G+3kwPlgbsds6Vm9kQvEP2HVB9YPuDfAJc7ruP0A04kWZ6ufT1D5/mvEMg3oG3mkik3amOXyLNYqDGV2XzDPA7vNUsC3w3WHfQ+HCUbwM3m9livD1Wzqm3bDqw2MwWOOeurlf+CjARbw+pDviRc26b74ujManAq2aWgPfXwp1te4sizVNzThGRCKOqHhGRCKPELyISYZT4RUQijBK/iEiEUeIXEYkwSvwiIhFGiV9EJML8fwu7pqO85lGRAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "als.fit_stream(ratings, test, train_df)\n", + "plot_learning_curve(als)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.4 64-bit ('base': conda)", + "language": "python", + "name": "python37464bitbaseconda9114583a17cf498dbdf9713d49f5bef8" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}