diff --git a/scikits/crab/base.py b/scikits/crab/base.py index b49c70d..435e6fa 100644 --- a/scikits/crab/base.py +++ b/scikits/crab/base.py @@ -8,7 +8,7 @@ # Bruno Melo # License: BSD Style. -from scikits.learn.base import BaseEstimator +from sklearn.base import BaseEstimator class BaseRecommender(BaseEstimator): diff --git a/scikits/crab/datasets/book_crossing.py b/scikits/crab/datasets/book_crossing.py index 5a522dc..6e7be95 100644 --- a/scikits/crab/datasets/book_crossing.py +++ b/scikits/crab/datasets/book_crossing.py @@ -40,7 +40,7 @@ from os.path import dirname from os.path import join import numpy as np -from base import Bunch +from .base import Bunch import csv logger = logging.getLogger(__name__) @@ -116,19 +116,19 @@ def load_bookcrossings(data_home=None, download_if_missing=True, if not os.path.exists(os.path.join(data_home, 'BX-Book-Ratings.csv')) \ and not open(os.path.join(data_home, 'BX-Books.csv')): raise IOError - except Exception, e: - print 80 * '_' - print 'Loading files failed' - print 80 * '_' - print e + except Exception as e: + print ( 80 * '_') + print ('Loading files failed') + print (80 * '_') + print (e) if download_if_missing: - print 'downloading the dataset...' + print ('downloading the dataset...') try: download_book_crossings(data_home) except: raise Exception('FAIL: Problems during the download.') - print 'dataset downloaded.' + print ('dataset downloaded.') else: raise IOError('Book-Crossing dataset not found') diff --git a/scikits/crab/models/classes.py b/scikits/crab/models/classes.py index 010dbf7..e145613 100644 --- a/scikits/crab/models/classes.py +++ b/scikits/crab/models/classes.py @@ -105,13 +105,17 @@ def build_model(self): Build the data model ''' #Is it important to store as numpy array ? - self._user_ids = np.asanyarray(self.dataset.keys()) + myListOKeys =[k for k,_ in self.dataset.items()] + self._user_ids = np.asanyarray(myListOKeys) self._user_ids.sort() #Is it important to store as numpy array ? self._item_ids = [] - for items in self.dataset.itervalues(): - self._item_ids.extend(items.keys()) + + + #for dictVal in self.dataset.values(): + # self._item_ids.extend(dictVal.keys()) + self._item_ids = [k for k,v in self.dataset.items() ] self._item_ids = np.unique(np.array(self._item_ids)) self._item_ids.sort() diff --git a/scikits/crab/recommenders/knn/classes.py b/scikits/crab/recommenders/knn/classes.py index bf96af3..f72fbf4 100644 --- a/scikits/crab/recommenders/knn/classes.py +++ b/scikits/crab/recommenders/knn/classes.py @@ -13,10 +13,10 @@ # Author: Marcel Caraciolo # # License: BSD Style. - -from base import ItemRecommender, UserRecommender -from item_strategies import ItemsNeighborhoodStrategy -from neighborhood_strategies import NearestNeighborsStrategy +from sklearn.base import BaseEstimator +from .base import ItemRecommender, UserRecommender +from .item_strategies import ItemsNeighborhoodStrategy +from .neighborhood_strategies import NearestNeighborsStrategy import numpy as np @@ -533,8 +533,9 @@ def estimate_preference(self, user_id, item_id, **params): prefs = np.array([self.model.preference_value(to_user_id, item_id) for to_user_id in nearest_neighbors]) - prefs = prefs[~np.isnan(prefs)] - similarities = similarities[~np.isnan(prefs)] + + # prefs = prefs[~np.isnan(prefs)] + # similarities = similarities[~np.isnan(prefs)] prefs_sim = np.sum(prefs[~np.isnan(similarities)] * similarities[~np.isnan(similarities)]) @@ -597,7 +598,7 @@ def recommend(self, user_id, how_many=None, **params): ''' - self._set_params(**params) + self.set_params(**params) candidate_items = self.all_other_items(user_id, **params) diff --git a/scikits/crab/recommenders/knn/item_strategies.py b/scikits/crab/recommenders/knn/item_strategies.py index c003891..600b881 100644 --- a/scikits/crab/recommenders/knn/item_strategies.py +++ b/scikits/crab/recommenders/knn/item_strategies.py @@ -11,7 +11,7 @@ # # License: BSD Style. -from base import BaseCandidateItemsStrategy +from .base import BaseCandidateItemsStrategy import numpy as np diff --git a/scikits/crab/recommenders/knn/neighborhood_strategies.py b/scikits/crab/recommenders/knn/neighborhood_strategies.py index 3bcf312..9dad2b5 100644 --- a/scikits/crab/recommenders/knn/neighborhood_strategies.py +++ b/scikits/crab/recommenders/knn/neighborhood_strategies.py @@ -11,7 +11,7 @@ # # License: BSD Style. -from base import BaseUserNeighborhoodStrategy +from .base import BaseUserNeighborhoodStrategy import numpy as np from ...similarities.basic_similarities import UserSimilarity from ...metrics.pairwise import euclidean_distances diff --git a/scikits/crab/recommenders/svd/classes.py b/scikits/crab/recommenders/svd/classes.py index 36be33a..ede955e 100644 --- a/scikits/crab/recommenders/svd/classes.py +++ b/scikits/crab/recommenders/svd/classes.py @@ -15,7 +15,7 @@ # License: BSD Style. import random -from base import SVDRecommender +from .base import SVDRecommender from ..knn.item_strategies import ItemsNeighborhoodStrategy import numpy as np from math import sqrt diff --git a/scikits/crab/similarities/basic_similarities.py b/scikits/crab/similarities/basic_similarities.py index b8231f8..7b2198e 100644 --- a/scikits/crab/similarities/basic_similarities.py +++ b/scikits/crab/similarities/basic_similarities.py @@ -9,7 +9,7 @@ import numpy as np -from base import BaseSimilarity +from .base import BaseSimilarity from ..metrics.pairwise import loglikehood_coefficient @@ -18,10 +18,15 @@ def find_common_elements(source_preferences, target_preferences): src = dict(source_preferences) tgt = dict(target_preferences) - inter = np.intersect1d(src.keys(), tgt.keys()) + srcKeys = [k for k,_ in src.items()] + tgtKeys = [k for k,_ in tgt.items()] + + inter = np.intersect1d(srcKeys, tgtKeys) common_preferences = zip(*[(src[item], tgt[item]) for item in inter \ if not np.isnan(src[item]) and not np.isnan(tgt[item])]) + common_preferences = list(common_preferences ) + if common_preferences: return np.asarray([common_preferences[0]]), np.asarray([common_preferences[1]]) else: