Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to Python3 #19

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: python

python:
- "2.7"
- "3.6"

sudo: false

Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ mrec recommender systems library

Introduction
------------
This fork is Python 3 only.
`mrec` is a Python package developed at `Mendeley <http://www.mendeley.com>`_ to support recommender systems development and evaluation. The package currently focuses on item similarity and other methods that work well on implicit feedback, and on experimental evaluation.

Why another package when there are already some really good software projects implementing recommender systems?
Expand Down
8 changes: 4 additions & 4 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@
try:
release = pkg_resources.get_distribution('mrec').version
except pkg_resources.DistributionNotFound:
print 'To build the documentation, The distribution information of mrec'
print 'has to be available. Either install the package into your'
print 'development environment or run "python setup.py develop" to setup'
print 'the metadata.'
print('To build the documentation, The distribution information of mrec')
print('has to be available. Either install the package into your')
print('development environment or run "python setup.py develop" to setup')
print('the metadata.')
sys.exit(1)
del pkg_resources
version = '.'.join(release.split('.')[:2])
Expand Down
45 changes: 22 additions & 23 deletions mrec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
from itertools import izip
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix
from scipy.io import mmread, mmwrite
try:
import cPickle as pickle
except ImportError:
import pickle

from sparse import fast_sparse_matrix, loadtxt, loadz, savez
from base_recommender import BaseRecommender
from mrec.base_recommender import BaseRecommender
from mrec.sparse import fast_sparse_matrix, loadtxt, loadz, savez

__version__ = '0.3.1'

def load_fast_sparse_matrix(input_format,filepath):

def load_fast_sparse_matrix(input_format, filepath):
"""
Load a fast_sparse_matrix from an input file of the specified format,
by delegating to the appropriate static method.
Expand All @@ -31,14 +25,15 @@ def load_fast_sparse_matrix(input_format,filepath):
if input_format == 'tsv':
return fast_sparse_matrix.loadtxt(filepath)
elif input_format == 'csv':
return fast_sparse_matrix.loadtxt(filepath,delimiter=',')
return fast_sparse_matrix.loadtxt(filepath, delimiter=',')
elif input_format == 'mm':
return fast_sparse_matrix.loadmm(filepath)
elif input_format == 'fsm':
return fast_sparse_matrix.load(filepath)
raise ValueError('unknown input format: {0}'.format(input_format))

def load_sparse_matrix(input_format,filepath):

def load_sparse_matrix(input_format, filepath):
"""
Load a scipy.sparse.csr_matrix from an input file of the specified format.

Expand All @@ -57,7 +52,7 @@ def load_sparse_matrix(input_format,filepath):
if input_format == 'tsv':
return loadtxt(filepath)
elif input_format == 'csv':
return loadtxt(filepath,delimiter=',')
return loadtxt(filepath, delimiter=',')
elif input_format == 'mm':
return mmread(filepath).tocsr()
elif input_format == 'npz':
Expand All @@ -66,7 +61,8 @@ def load_sparse_matrix(input_format,filepath):
return fast_sparse_matrix.load(filepath).X
raise ValueError('unknown input format: {0}'.format(input_format))

def save_sparse_matrix(data,fmt,filepath):

def save_sparse_matrix(data, fmt, filepath):
"""
Save a scipy sparse matrix in the specified format. Row and column
indices will be converted to 1-indexed if you specify a plain text
Expand All @@ -88,24 +84,25 @@ def save_sparse_matrix(data,fmt,filepath):
"""
if fmt == 'tsv':
m = data.tocoo()
with open(filepath,'w') as out:
for u,i,v in izip(m.row,m.col,m.data):
print >>out,'{0}\t{1}\t{2}'.format(u+1,i+1,v)
with open(filepath, 'w') as out:
for u, i, v in zip(m.row, m.col, m.data):
print('{0}\t{1}\t{2}'.format(u + 1, i + 1, v), file=out)
elif fmt == 'csv':
m = data.tocoo()
with open(filepath,'w') as out:
for u,i,v in izip(m.row,m.col,m.data):
print >>out,'{0},{1},{2}'.format(u+1,i+1,v)
with open(filepath, 'w') as out:
for u, i, v in zip(m.row, m.col, m.data):
print('{0},{1},{2}'.format(u + 1, i + 1, v), file=out)
elif fmt == 'mm':
mmwrite(filepath,data)
mmwrite(filepath, data)
elif fmt == 'npz':
savez(data.tocoo(),filepath)
savez(data.tocoo(), filepath)
elif fmt == 'fsm':
fast_sparse_matrix(data).save(filepath)
else:
raise ValueError('unknown output format: {0}'.format(fmt))

def save_recommender(model,filepath):

def save_recommender(model, filepath):
"""
Save a recommender model to file.

Expand All @@ -118,6 +115,7 @@ def save_recommender(model,filepath):
"""
model.save(filepath)


def load_recommender(filepath):
"""
Load a recommender model from file after it has been saved by
Expand All @@ -130,6 +128,7 @@ def load_recommender(filepath):
"""
return BaseRecommender.load(filepath)


def read_recommender_description(filepath):
"""
Read a recommender model description from file after it has
Expand Down
52 changes: 25 additions & 27 deletions mrec/base_recommender.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
try:
import cPickle as pickle
except ImportError:
import pickle
import pickle

import numpy as np
from scipy.sparse import csr_matrix


class BaseRecommender(object):
"""
Minimal interface to be implemented by recommenders, along with
Expand All @@ -23,7 +22,7 @@ class BaseRecommender(object):
and the batch methods to recommend items.
"""

def recommend_items(self,dataset,u,max_items=10,return_scores=True,item_features=None):
def recommend_items(self, dataset, u, max_items=10, return_scores=True, item_features=None):
"""
Recommend new items for a user.

Expand All @@ -48,7 +47,7 @@ def recommend_items(self,dataset,u,max_items=10,return_scores=True,item_features
"""
raise NotImplementedError('you must implement recommend_items()')

def fit(self,train,item_features=None):
def fit(self, train, item_features=None):
"""
Train on supplied data. In general you will want to
implement this rather than computing recommendations on
Expand All @@ -63,7 +62,7 @@ def fit(self,train,item_features=None):
"""
raise NotImplementedError('you should implement fit()')

def save(self,filepath):
def save(self, filepath):
"""
Serialize model to file.

Expand All @@ -84,9 +83,9 @@ def save(self,filepath):

archive = self._create_archive()
if archive:
np.savez(filepath,**archive)
np.savez(filepath, **archive)
else:
pickle.dump(self,open(filepath,'w'))
pickle.dump(self, open(filepath, 'wb'))

def _create_archive(self):
"""
Expand Down Expand Up @@ -114,10 +113,10 @@ def load(filepath):
The filepath to read from.
"""
r = np.load(filepath)
if isinstance(r,BaseRecommender):
if isinstance(r, BaseRecommender):
model = r
else:
model = np.loads(str(r['model']))
model = np.loads(r['model'])
model._load_archive(r) # restore any fields serialized separately
return model

Expand All @@ -144,15 +143,15 @@ def read_recommender_description(filepath):
filepath : str
The filepath to read from.
"""
r = np.load(filepath,mmap_mode='r')
if isinstance(r,BaseRecommender):
r = np.load(filepath, mmap_mode='r')
if isinstance(r, BaseRecommender):
model = r
else:
model = np.loads(str(r['model']))
model = np.loads(r['model'])
return str(model)

def __str__(self):
if hasattr(self,'description'):
if hasattr(self, 'description'):
return self.description
return 'unspecified recommender: you should set self.description or implement __str__()'

Expand Down Expand Up @@ -190,12 +189,12 @@ def batch_recommend_items(self,
this for most recommenders.
"""
recs = []
for u in xrange(self.num_users):
if show_progress and u%1000 == 0:
print u,'..',
recs.append(self.recommend_items(dataset,u,max_items,return_scores))
for u in range(self.num_users):
if show_progress and u % 1000 == 0:
print(u, '..', )
recs.append(self.recommend_items(dataset, u, max_items, return_scores))
if show_progress:
print
print()
return recs

def range_recommend_items(self,
Expand Down Expand Up @@ -234,9 +233,9 @@ def range_recommend_items(self,
This provides a default implementation, you will be able to optimize
this for most recommenders.
"""
return [self.recommend_items(dataset,u,max_items,return_scores) for u in xrange(user_start,user_end)]
return [self.recommend_items(dataset, u, max_items, return_scores) for u in range(user_start, user_end)]

def _zero_known_item_scores(self,r,train):
def _zero_known_item_scores(self, r, train):
"""
Helper function to set predicted scores/ratings for training items
to zero or less, to avoid recommending already known items.
Expand All @@ -255,7 +254,7 @@ def _zero_known_item_scores(self,r,train):
in train.
"""
col = train.indices
if isinstance(r,csr_matrix):
if isinstance(r, csr_matrix):
max_score = r.data.max()
else:
max_score = r.max()
Expand All @@ -264,9 +263,8 @@ def _zero_known_item_scores(self,r,train):
# - we can't just use row,col = train.nonzero() as this eliminates
# u,i for which train[u,i] has been explicitly set to zero
row = np.zeros(col.shape)
for u in xrange(train.shape[0]):
start,end = train.indptr[u],train.indptr[u+1]
for u in range(train.shape[0]):
start, end = train.indptr[u], train.indptr[u + 1]
if end > start:
row[start:end] = u
return r - csr_matrix((data,(row,col)),shape=r.shape)

return r - csr_matrix((data, (row, col)), shape=r.shape)
22 changes: 11 additions & 11 deletions mrec/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@ class Evaluator(object):
The number of recommendations needed to compute the evaluation function.
"""

def __init__(self,compute_metrics,max_items):
def __init__(self, compute_metrics, max_items):
self.compute_metrics = compute_metrics
self.max_items = max_items

def _add_metrics(self,predicted,actual):
metrics = self.compute_metrics(predicted,actual)
def _add_metrics(self, predicted, actual):
metrics = self.compute_metrics(predicted, actual)
if metrics:
for m,val in metrics.iteritems():
for m, val in metrics.items():
self.cum_metrics[m] += val
self.count += 1

def process(self,testdata,recsfile,start,end,offset=1):
def process(self, testdata, recsfile, start, end, offset=1):
"""
Parameters
----------
Expand Down Expand Up @@ -54,19 +54,19 @@ def process(self,testdata,recsfile,start,end,offset=1):
last_user = start
recs = []
for line in open(recsfile):
user,item,score = line.strip().split('\t')
user = int(user)-1 # convert to 0-indxed
item = int(item)-1
user, item, score = line.strip().split('\t')
user = int(user) - 1 # convert to 0-indxed
item = int(item) - 1
if user >= end:
break
if user < start:
continue
if user != last_user:
self._add_metrics(recs,testdata[last_user,:].indices.tolist())
self._add_metrics(recs, testdata[last_user, :].indices.tolist())
last_user = user
recs = []
if len(recs) < self.max_items:
recs.append(item)
self._add_metrics(recs,testdata[last_user,:].indices.tolist())
self._add_metrics(recs, testdata[last_user, :].indices.tolist())

return self.cum_metrics,self.count
return self.cum_metrics, self.count
Loading