Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scikits/crab/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Bruno Melo <bruno@muricoca.com>
# License: BSD Style.

from scikits.learn.base import BaseEstimator
from sklearn.base import BaseEstimator


class BaseRecommender(BaseEstimator):
Expand Down
16 changes: 8 additions & 8 deletions scikits/crab/datasets/book_crossing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from os.path import dirname
from os.path import join
import numpy as np
from base import Bunch
from .base import Bunch
import csv

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -116,19 +116,19 @@ def load_bookcrossings(data_home=None, download_if_missing=True,
if not os.path.exists(os.path.join(data_home, 'BX-Book-Ratings.csv')) \
and not open(os.path.join(data_home, 'BX-Books.csv')):
raise IOError
except Exception, e:
print 80 * '_'
print 'Loading files failed'
print 80 * '_'
print e
except Exception as e:
print ( 80 * '_')
print ('Loading files failed')
print (80 * '_')
print (e)

if download_if_missing:
print 'downloading the dataset...'
print ('downloading the dataset...')
try:
download_book_crossings(data_home)
except:
raise Exception('FAIL: Problems during the download.')
print 'dataset downloaded.'
print ('dataset downloaded.')
else:
raise IOError('Book-Crossing dataset not found')

Expand Down
10 changes: 7 additions & 3 deletions scikits/crab/models/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,17 @@ def build_model(self):
Build the data model
'''
#Is it important to store as numpy array ?
self._user_ids = np.asanyarray(self.dataset.keys())
myListOKeys =[k for k,_ in self.dataset.items()]
self._user_ids = np.asanyarray(myListOKeys)
self._user_ids.sort()

#Is it important to store as numpy array ?
self._item_ids = []
for items in self.dataset.itervalues():
self._item_ids.extend(items.keys())


#for dictVal in self.dataset.values():
# self._item_ids.extend(dictVal.keys())
self._item_ids = [k for k,v in self.dataset.items() ]

self._item_ids = np.unique(np.array(self._item_ids))
self._item_ids.sort()
Expand Down
15 changes: 8 additions & 7 deletions scikits/crab/recommenders/knn/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
# Author: Marcel Caraciolo <marcel@muricoca.com>
#
# License: BSD Style.

from base import ItemRecommender, UserRecommender
from item_strategies import ItemsNeighborhoodStrategy
from neighborhood_strategies import NearestNeighborsStrategy
from sklearn.base import BaseEstimator
from .base import ItemRecommender, UserRecommender
from .item_strategies import ItemsNeighborhoodStrategy
from .neighborhood_strategies import NearestNeighborsStrategy
import numpy as np


Expand Down Expand Up @@ -533,8 +533,9 @@ def estimate_preference(self, user_id, item_id, **params):
prefs = np.array([self.model.preference_value(to_user_id, item_id)
for to_user_id in nearest_neighbors])

prefs = prefs[~np.isnan(prefs)]
similarities = similarities[~np.isnan(prefs)]

# prefs = prefs[~np.isnan(prefs)]
# similarities = similarities[~np.isnan(prefs)]

prefs_sim = np.sum(prefs[~np.isnan(similarities)] *
similarities[~np.isnan(similarities)])
Expand Down Expand Up @@ -597,7 +598,7 @@ def recommend(self, user_id, how_many=None, **params):

'''

self._set_params(**params)
self.set_params(**params)

candidate_items = self.all_other_items(user_id, **params)

Expand Down
2 changes: 1 addition & 1 deletion scikits/crab/recommenders/knn/item_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# License: BSD Style.

from base import BaseCandidateItemsStrategy
from .base import BaseCandidateItemsStrategy
import numpy as np


Expand Down
2 changes: 1 addition & 1 deletion scikits/crab/recommenders/knn/neighborhood_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# License: BSD Style.

from base import BaseUserNeighborhoodStrategy
from .base import BaseUserNeighborhoodStrategy
import numpy as np
from ...similarities.basic_similarities import UserSimilarity
from ...metrics.pairwise import euclidean_distances
Expand Down
2 changes: 1 addition & 1 deletion scikits/crab/recommenders/svd/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# License: BSD Style.
import random

from base import SVDRecommender
from .base import SVDRecommender
from ..knn.item_strategies import ItemsNeighborhoodStrategy
import numpy as np
from math import sqrt
Expand Down
9 changes: 7 additions & 2 deletions scikits/crab/similarities/basic_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


import numpy as np
from base import BaseSimilarity
from .base import BaseSimilarity
from ..metrics.pairwise import loglikehood_coefficient


Expand All @@ -18,10 +18,15 @@ def find_common_elements(source_preferences, target_preferences):
src = dict(source_preferences)
tgt = dict(target_preferences)

inter = np.intersect1d(src.keys(), tgt.keys())
srcKeys = [k for k,_ in src.items()]
tgtKeys = [k for k,_ in tgt.items()]

inter = np.intersect1d(srcKeys, tgtKeys)

common_preferences = zip(*[(src[item], tgt[item]) for item in inter \
if not np.isnan(src[item]) and not np.isnan(tgt[item])])
common_preferences = list(common_preferences )

if common_preferences:
return np.asarray([common_preferences[0]]), np.asarray([common_preferences[1]])
else:
Expand Down