public code v1
This commit is contained in:
@@ -0,0 +1,136 @@
|
||||
import numpy as np
|
||||
import scipy
|
||||
from typing import Union, Protocol, runtime_checkable
|
||||
|
||||
from implicit.recommender_base import RecommenderBase
|
||||
from .recommender_model import RecommenderModel
|
||||
from pygrex.data_reader import DataReader
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class FittableImplicitModel(Protocol):
|
||||
user_factors: np.ndarray
|
||||
item_factors: np.ndarray
|
||||
|
||||
def fit(self, item_user_data) -> None: ...
|
||||
|
||||
|
||||
class MFImplicitModel(RecommenderModel):
|
||||
def __init__(
|
||||
self,
|
||||
latent_dim,
|
||||
reg_term,
|
||||
learning_rate,
|
||||
epochs,
|
||||
num_users=None,
|
||||
num_items=None,
|
||||
):
|
||||
self.latent_dim = latent_dim
|
||||
self.reg_term = reg_term
|
||||
self.learning_rate = learning_rate
|
||||
self.epochs = epochs
|
||||
self.model: Union[RecommenderBase, FittableImplicitModel, None] = None
|
||||
self.total_users = num_users
|
||||
self.total_items = num_items
|
||||
|
||||
def fit(self, data: DataReader) -> None:
|
||||
if self.model is None:
|
||||
raise RuntimeError(
|
||||
"The model has not been initialized. Please use a specific subclass like ALS or BPR."
|
||||
)
|
||||
num_user_for_shape = data.dataset["userId"].max() + 1
|
||||
num_item_for_shape = data.dataset["itemId"].max() + 1
|
||||
self.total_users = num_user_for_shape
|
||||
self.total_items = num_item_for_shape
|
||||
|
||||
item_user_data = self.rearrange_dataset(
|
||||
ds=data.dataset,
|
||||
num_user=num_user_for_shape,
|
||||
num_item=num_item_for_shape,
|
||||
).T.tocsr()
|
||||
|
||||
self.model.fit(item_user_data)
|
||||
|
||||
@staticmethod
|
||||
def rearrange_dataset(ds, num_user: int, num_item: int) -> scipy.sparse.csr_matrix:
|
||||
"""
|
||||
Converts the dataset into a sparse matrix format for the implicit model.
|
||||
|
||||
Args:
|
||||
ds: Dataset containing userId and itemId columns
|
||||
num_user : Number of users in the dataset
|
||||
num_item : Number of items in the dataset
|
||||
|
||||
Returns:
|
||||
ds_mtr: Sparse matrix representation of the dataset
|
||||
"""
|
||||
|
||||
# Create sparse matrix directly from data
|
||||
data = np.ones(len(ds)) # Array of 1s for each interaction
|
||||
rows = ds["userId"].values # User IDs as row indices
|
||||
cols = ds["itemId"].values # Item IDs as column indices
|
||||
|
||||
ds_mtr = scipy.sparse.csr_matrix(
|
||||
(data, (rows, cols)), shape=(num_user, num_item)
|
||||
)
|
||||
|
||||
return ds_mtr
|
||||
|
||||
def predict(
|
||||
self, user_id: Union[str, int], item_id: Union[str, int, list, np.ndarray]
|
||||
) -> Union[float, list]:
|
||||
"""
|
||||
Predict ratings for a user and one or more items using efficient vectorization.
|
||||
|
||||
Args:
|
||||
user_id : User identifier
|
||||
item_id : Item identifier or a list/array of item identifiers
|
||||
|
||||
Returns:
|
||||
A single predicted score (float) or an array of scores (np.ndarray)
|
||||
"""
|
||||
if not isinstance(self.model, FittableImplicitModel):
|
||||
raise RuntimeError(
|
||||
"The model has not been trained yet. Please call fit() first."
|
||||
)
|
||||
user_id = int(user_id)
|
||||
|
||||
# 1. Validate user_id
|
||||
if not (0 <= user_id < self.model.user_factors.shape[0]):
|
||||
raise ValueError(f"user_id {user_id} is out of bounds")
|
||||
|
||||
# 2. Unify input to always be a numpy array
|
||||
is_single_item = not isinstance(item_id, (list, np.ndarray))
|
||||
item_ids_arr = np.array(item_id, ndmin=1).astype(int)
|
||||
|
||||
# 3. Perform a single, vectorized bounds check for all items at once
|
||||
max_item_id = self.model.item_factors.shape[0]
|
||||
if not np.all((item_ids_arr >= 0) & (item_ids_arr < max_item_id)):
|
||||
out_of_bounds_id = item_ids_arr[
|
||||
(item_ids_arr < 0) | (item_ids_arr >= max_item_id)
|
||||
][0]
|
||||
raise ValueError(f"item_id {out_of_bounds_id} is out of bounds")
|
||||
|
||||
# 4. Get all item vectors in a single, highly efficient operation
|
||||
item_vectors = self.model.item_factors[item_ids_arr]
|
||||
user_vector = self.model.user_factors[user_id]
|
||||
|
||||
# 5. Calculate all scores with one dot product
|
||||
scores = user_vector.dot(item_vectors.T)
|
||||
|
||||
# 6. Return a single float if the input was a single item, otherwise the array
|
||||
return scores[0].item() if is_single_item else scores.tolist()
|
||||
|
||||
def user_embedding(self) -> np.ndarray:
|
||||
if not isinstance(self.model, FittableImplicitModel):
|
||||
raise RuntimeError(
|
||||
"The model has not been trained yet. Please call fit() first."
|
||||
)
|
||||
return self.model.user_factors
|
||||
|
||||
def item_embedding(self) -> np.ndarray:
|
||||
if not isinstance(self.model, FittableImplicitModel):
|
||||
raise RuntimeError(
|
||||
"The model has not been trained yet. Please call fit() first."
|
||||
)
|
||||
return self.model.item_factors
|
||||
Reference in New Issue
Block a user