public code v1
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from .recommender import Recommender
|
||||
from .group_recommender import GroupRecommender
|
||||
|
||||
__all__ = ["Recommender", "GroupRecommender"]
|
||||
@@ -0,0 +1,72 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm.autonotebook import tqdm
|
||||
|
||||
|
||||
class GenericRecommender:
|
||||
def __init__(self, dataset_metadata, model, top_n: int = 10):
|
||||
self.top_n = top_n
|
||||
self.dataset = dataset_metadata.dataset
|
||||
self.model = model
|
||||
self.catalogue = set(self.dataset["itemId"])
|
||||
|
||||
def recommend_all(self):
|
||||
"""
|
||||
Get all recommendations.
|
||||
:param top_n:
|
||||
:return: recommendations for any user.
|
||||
"""
|
||||
|
||||
ratings = self.dataset.groupby("userId")
|
||||
|
||||
recommendations = pd.DataFrame({"userId": [], "itemId": [], "rank": []})
|
||||
|
||||
with tqdm(
|
||||
total=self.dataset["userId"].nunique(), desc="Recommending for users: "
|
||||
) as pbar:
|
||||
for user_id, user_ratings in ratings:
|
||||
# Replace .append() with pd.concat() - pandas 2.2.x +
|
||||
recommendations = pd.concat(
|
||||
[recommendations, self.recommend_user(user_id, user_ratings)], # type: ignore
|
||||
ignore_index=True,
|
||||
)
|
||||
pbar.update()
|
||||
|
||||
return recommendations
|
||||
|
||||
def rank_prediction(self, user_id, target_item_id, predictions):
|
||||
# Ensure predictions are flattened if they're 2D
|
||||
if isinstance(predictions, np.ndarray) and predictions.ndim > 1:
|
||||
predictions = predictions.flatten()
|
||||
recommendations = pd.DataFrame(
|
||||
{"userId": user_id, "itemId": target_item_id, "prediction": predictions}
|
||||
)
|
||||
|
||||
recommendations["rank"] = recommendations["prediction"].rank(
|
||||
method="first", ascending=False
|
||||
)
|
||||
|
||||
recommendations.sort_values(["userId", "rank"], inplace=True)
|
||||
|
||||
recommendations = recommendations[recommendations["rank"] <= self.top_n]
|
||||
|
||||
return recommendations[["userId", "itemId", "rank"]]
|
||||
|
||||
def get_unrated(self, user_ratings):
|
||||
"""
|
||||
Extract the set of items a user has not rated.
|
||||
:param user_ratings: list, items rated.
|
||||
:return: list, items not rated.
|
||||
"""
|
||||
unrated_item_id = self.catalogue - set(user_ratings)
|
||||
unrated_item_id = list(unrated_item_id)
|
||||
return unrated_item_id
|
||||
|
||||
def get_rated(self, user_id):
|
||||
"""
|
||||
Extract the set of items a user has not rated.
|
||||
:param user_id: userId rated.
|
||||
:return: list, rated items.
|
||||
"""
|
||||
rated = self.dataset[self.dataset["userId"] == user_id]
|
||||
return rated
|
||||
@@ -0,0 +1,391 @@
|
||||
from typing import Dict, List, Union, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pygrex.data_reader.data_reader import DataReader
|
||||
from pygrex.models.recommender_model import RecommenderModel
|
||||
from pygrex.utils.aggregation_strategy import ScoreAggregator, AggregationStrategy
|
||||
from pygrex.utils.scale import Scale
|
||||
|
||||
|
||||
class GroupRecommender:
|
||||
"""
|
||||
A class to represent a group recommender system that follows the workflow:
|
||||
1. Setup and Candidate Selection
|
||||
2. Individual Preference Collection
|
||||
3. Score Aggregation
|
||||
4. Final Recommendation List
|
||||
"""
|
||||
|
||||
def __init__(self, data: DataReader):
|
||||
"""Initialize the group recommender with data.
|
||||
|
||||
Args:
|
||||
data: The dataset containing user-item interactions.
|
||||
"""
|
||||
self.data = data
|
||||
self._group_predictions = None
|
||||
self._members = None
|
||||
self._item_pool = None
|
||||
self._model = None
|
||||
self._aggregation_strategy = None
|
||||
self._score_aggregator = None
|
||||
self._aggregated_scores = None
|
||||
self._top_recommendation = None
|
||||
|
||||
def setup_recommendation(
|
||||
self,
|
||||
model: RecommenderModel,
|
||||
members: List[Union[str, int]],
|
||||
data: DataReader,
|
||||
aggregation_strategy: AggregationStrategy, # type: ignore
|
||||
most_respected_person: Optional[Union[str, int]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Setup and Candidate Selection: Initialize the group recommendation process.
|
||||
Args:
|
||||
model: The recommendation model to use
|
||||
members: List of user IDs representing the group members
|
||||
data: DataReader object containing the dataset
|
||||
aggregation_strategy: Strategy for aggregating individual predictions
|
||||
most_respected_person: User ID of most respected person (required for MRP strategy)
|
||||
"""
|
||||
self._members = members
|
||||
self._model = model
|
||||
self._aggregation_strategy = aggregation_strategy
|
||||
|
||||
# Initialize score aggregator
|
||||
self._score_aggregator = ScoreAggregator(
|
||||
most_respected_person=most_respected_person
|
||||
)
|
||||
|
||||
# get all item IDs from the dataset
|
||||
item_ids = data.dataset["itemId"].unique()
|
||||
|
||||
# Get items that no group member has interacted with
|
||||
self._item_pool = self.get_non_interacted_items_for_recommendation(
|
||||
self.data,
|
||||
item_ids, # type: ignore
|
||||
members, # type: ignore
|
||||
)
|
||||
|
||||
# Filter item_pool to only include IDs that are valid for the model
|
||||
# This prevents out-of-bounds errors when the model was trained with a different
|
||||
# number of items than what's currently in the dataset
|
||||
max_item_id = self._get_max_valid_item_id(model)
|
||||
# Convert to int array and filter out invalid IDs
|
||||
item_pool_int = self._item_pool.astype(int)
|
||||
valid_mask = (item_pool_int >= 0) & (item_pool_int < max_item_id)
|
||||
self._item_pool = item_pool_int[valid_mask]
|
||||
|
||||
# Individual Preference Collection: Generate predictions for each group member
|
||||
self._group_predictions = self._generate_group_predictions()
|
||||
|
||||
# Score Aggregation: Aggregate individual predictions into collective scores
|
||||
self._aggregated_scores = self._aggregate_group_scores()
|
||||
|
||||
def _generate_group_predictions(self) -> Dict[Union[str, int], Dict[int, float]]:
|
||||
"""
|
||||
Individual Preference Collection: Generate predictions for all group members.
|
||||
|
||||
Returns:
|
||||
A dictionary with user IDs as keys and their predictions as values
|
||||
"""
|
||||
if not self._members or self._model is None or self._item_pool is None:
|
||||
raise ValueError(
|
||||
"You must call setup_recommendation before generating predictions"
|
||||
)
|
||||
|
||||
predictions = {}
|
||||
for member in self._members:
|
||||
user_pred = self.generate_recommendation(
|
||||
self._model,
|
||||
member,
|
||||
self._item_pool, # type: ignore
|
||||
self.data, # type: ignore
|
||||
)
|
||||
predictions[member] = user_pred
|
||||
|
||||
return predictions
|
||||
|
||||
def _aggregate_group_scores(self) -> Dict[int, float]:
|
||||
"""
|
||||
Score Aggregation: Aggregate individual predictions into collective scores.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping item IDs to aggregated scores
|
||||
"""
|
||||
if (
|
||||
self._group_predictions is None
|
||||
or self._score_aggregator is None
|
||||
or self._aggregation_strategy is None
|
||||
):
|
||||
raise ValueError(
|
||||
"You must call setup_recommendation before aggregating scores"
|
||||
)
|
||||
|
||||
# For Borda Count, we need to create rankings from predictions
|
||||
rankings = None
|
||||
if self._aggregation_strategy == AggregationStrategy.BORDA_COUNT:
|
||||
rankings = self._create_rankings_from_predictions()
|
||||
|
||||
# Use ScoreAggregator to aggregate scores
|
||||
aggregated_scores = self._score_aggregator.aggregate_scores(
|
||||
evaluations=self._group_predictions, # type: ignore
|
||||
strategy=self._aggregation_strategy,
|
||||
rankings=rankings, # type: ignore
|
||||
)
|
||||
|
||||
# Sort items by their aggregated scores in descending order
|
||||
sorted_scores = dict(
|
||||
sorted(aggregated_scores.items(), key=lambda x: x[1], reverse=True)
|
||||
)
|
||||
|
||||
return sorted_scores # type: ignore
|
||||
|
||||
def _create_rankings_from_predictions(self) -> Dict[Union[str, int], List[int]]:
|
||||
"""
|
||||
Create rankings from predictions for Borda Count aggregation.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping user IDs to ranked lists of item IDs
|
||||
"""
|
||||
if self._group_predictions is None:
|
||||
raise ValueError("Group predictions not available")
|
||||
|
||||
rankings = {}
|
||||
for user_id, predictions in self._group_predictions.items():
|
||||
# Sort items by prediction score in descending order
|
||||
sorted_items = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
|
||||
rankings[user_id] = [item_id for item_id, _ in sorted_items]
|
||||
|
||||
return rankings
|
||||
|
||||
def _get_max_valid_item_id(self, model: RecommenderModel) -> int:
|
||||
"""
|
||||
Get the maximum valid item ID for the given model.
|
||||
|
||||
Args:
|
||||
model: The recommendation model
|
||||
|
||||
Returns:
|
||||
Maximum valid item ID (exclusive, so valid IDs are [0, max_item_id))
|
||||
"""
|
||||
# For implicit models (MFImplicitModel), check item_factors shape
|
||||
if hasattr(model, 'model') and model.model is not None:
|
||||
if hasattr(model.model, 'item_factors'):
|
||||
return model.model.item_factors.shape[0]
|
||||
# Check if model has total_items attribute (set during fit)
|
||||
if hasattr(model, 'total_items') and model.total_items is not None:
|
||||
return model.total_items
|
||||
# Fallback to data.num_item if model shape is not available
|
||||
return self.data.num_item
|
||||
|
||||
def get_non_interacted_items_for_recommendation(
|
||||
self,
|
||||
data: DataReader,
|
||||
item_ids: List[Union[str, int]],
|
||||
members: List[Union[str, int]],
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Returns the list of item IDs that none of the specified group members have interacted with.
|
||||
|
||||
This method is typically used in recommendation systems to filter out items that have already
|
||||
been interacted with by any member of the group, ensuring that recommendations focus on new or
|
||||
unseen items.
|
||||
|
||||
Args:
|
||||
data: The original dataset containing user-item interactions.
|
||||
item_ids: A list of all available item IDs to consider.
|
||||
members: A list of user IDs representing the group.
|
||||
|
||||
Returns:
|
||||
np.ndarray: A list of item IDs that have not been interacted with by any member of the group.
|
||||
"""
|
||||
|
||||
consecutive_member_ids = [data.get_new_user_id(int(m)) for m in members]
|
||||
consecutive_member_ids = [m for m in consecutive_member_ids if m is not None]
|
||||
|
||||
# Get all unique item IDs interacted with by users in the group
|
||||
interacted_item_ids = data.dataset.loc[
|
||||
data.dataset.userId.isin(consecutive_member_ids), "itemId"
|
||||
].unique()
|
||||
|
||||
# Use numpy set difference to get non-interacted item IDs
|
||||
item_pool = np.setdiff1d(item_ids, interacted_item_ids, assume_unique=True)
|
||||
|
||||
return item_pool
|
||||
|
||||
def generate_recommendation(
|
||||
self,
|
||||
model: RecommenderModel,
|
||||
member: Union[str, int],
|
||||
item_pool: List[Union[str, int]],
|
||||
data: DataReader,
|
||||
) -> Dict[int, float]:
|
||||
"""
|
||||
Generate recommendations for a user based on the provided model.
|
||||
|
||||
Args:
|
||||
model: A recommendation model that implements the RecommenderModel interface
|
||||
member: The ID of the user
|
||||
item_pool: List of item IDs to predict ratings/scores for
|
||||
data: The dataset containing user-item interactions
|
||||
|
||||
Returns:
|
||||
A dictionary mapping item IDs to predicted ratings/scores
|
||||
"""
|
||||
member = int(member)
|
||||
new_member_id = data.get_new_user_id(member)
|
||||
|
||||
if new_member_id is None:
|
||||
return {} # Return empty predictions for this user
|
||||
|
||||
# Additional safety check: filter item_pool to valid IDs before prediction
|
||||
# This provides a second layer of protection in case filtering was missed earlier
|
||||
max_valid_item_id = self._get_max_valid_item_id(model)
|
||||
if isinstance(item_pool, np.ndarray):
|
||||
item_pool = item_pool.astype(int)
|
||||
item_pool = item_pool[(item_pool >= 0) & (item_pool < max_valid_item_id)]
|
||||
elif isinstance(item_pool, list):
|
||||
item_pool = [int(item) for item in item_pool if 0 <= int(item) < max_valid_item_id]
|
||||
|
||||
if len(item_pool) == 0:
|
||||
print(f"No valid items found for user {new_member_id}. Returning empty predictions.")
|
||||
return {} # Return empty predictions if no valid items
|
||||
|
||||
raw_predictions = model.predict(new_member_id, item_pool) # type: ignore
|
||||
if not isinstance(raw_predictions, (list, np.ndarray)):
|
||||
raise TypeError(
|
||||
f"Model's predict function returned an unexpected type: {type(raw_predictions)}"
|
||||
)
|
||||
|
||||
# raw_predictions = []
|
||||
# # Generate predictions for each item in the pool
|
||||
# for item in item_pool:
|
||||
# item = int(item)
|
||||
# raw_predictions.append(model.predict(new_member_id, item)) # type: ignore
|
||||
|
||||
# Ensure raw_predictions is a numpy array
|
||||
raw_predictions = np.array(raw_predictions)
|
||||
|
||||
# # Flatten the predictions if it's a 2D array (single user, multiple items)
|
||||
# if raw_predictions.ndim == 2 and raw_predictions.shape[0] == 1:
|
||||
# raw_predictions = raw_predictions.flatten()
|
||||
|
||||
# # Check if the length of raw_predictions matches item_pool
|
||||
# if len(raw_predictions) != len(item_pool):
|
||||
# raise ValueError(
|
||||
# "Mismatch between predictions and item IDs. Check the model's predict function."
|
||||
# )
|
||||
|
||||
# Apply scaling to normalize predictions to 1-5 range
|
||||
scaled_linear = Scale.linear(
|
||||
np.array(raw_predictions),
|
||||
target_min=1,
|
||||
target_max=5,
|
||||
)
|
||||
# Convert the scaled predictions into a dictionary with original item IDs as keys
|
||||
predictions = {}
|
||||
for item, scaled_pred in zip(item_pool, scaled_linear):
|
||||
# Ensure item_id is treated as an integer
|
||||
item_original_id = data.get_original_item_id(int(item))
|
||||
if item_original_id is not None:
|
||||
predictions[int(item_original_id)] = scaled_pred # type: ignore
|
||||
|
||||
# Sort the predictions in descending order of scores
|
||||
sorted_predictions = dict(
|
||||
sorted(predictions.items(), key=lambda item: item[1], reverse=True)
|
||||
)
|
||||
|
||||
return sorted_predictions
|
||||
|
||||
def get_group_recommendations(
|
||||
self, top_k: Optional[int] = None
|
||||
) -> Union[int, List[int]]:
|
||||
"""
|
||||
Final Recommendation List: Get recommendations for the group based on aggregated scores.
|
||||
|
||||
Args:
|
||||
top_k: The number of recommendations to return.
|
||||
If None, returns all recommendations sorted by score.
|
||||
If 1, returns only the top recommendation as a single item ID.
|
||||
If > 1, returns the top k recommendations as a list of item IDs.
|
||||
|
||||
Returns:
|
||||
If top_k is 1, a single item ID. Otherwise, a list of item IDs.
|
||||
"""
|
||||
if self._aggregated_scores is None:
|
||||
raise ValueError(
|
||||
"You must call setup_recommendation before getting recommendations"
|
||||
)
|
||||
|
||||
sorted_items = list(self._aggregated_scores.items())
|
||||
|
||||
# Return results based on top_k parameter
|
||||
if top_k is None:
|
||||
# Return all items as a list of item IDs
|
||||
return [item_id for item_id, _ in sorted_items]
|
||||
elif top_k == 1:
|
||||
# Return only the top item ID
|
||||
if sorted_items:
|
||||
return sorted_items[0][0]
|
||||
return None # type: ignore
|
||||
else:
|
||||
# Return top k item IDs
|
||||
return [
|
||||
item_id for item_id, _ in sorted_items[: min(top_k, len(sorted_items))]
|
||||
]
|
||||
|
||||
def get_top_recommendation(self) -> int:
|
||||
"""
|
||||
Get the top recommendation for the group.
|
||||
|
||||
Returns:
|
||||
The item ID with the highest aggregated score across all group members.
|
||||
"""
|
||||
if self._top_recommendation is None:
|
||||
self._top_recommendation = self.get_group_recommendations(top_k=1)
|
||||
return self._top_recommendation # type: ignore
|
||||
|
||||
def get_recommendation_scores(self) -> Dict[int, float]:
|
||||
"""
|
||||
Get the aggregated scores for all items across the group.
|
||||
|
||||
Returns:
|
||||
A dictionary with item IDs as keys and their aggregated scores as values.
|
||||
"""
|
||||
if self._aggregated_scores is None:
|
||||
raise ValueError(
|
||||
"You must call setup_recommendation before getting recommendation scores"
|
||||
)
|
||||
return self._aggregated_scores.copy()
|
||||
|
||||
def get_aggregation_strategy(self) -> Optional[AggregationStrategy]:
|
||||
"""
|
||||
Get the current aggregation strategy.
|
||||
|
||||
Returns:
|
||||
The aggregation strategy being used, or None if not set.
|
||||
"""
|
||||
return self._aggregation_strategy
|
||||
|
||||
def get_group_members(self) -> Optional[List[Union[str, int]]]:
|
||||
"""
|
||||
Get the current group members.
|
||||
|
||||
Returns:
|
||||
List of group member IDs, or None if not set.
|
||||
"""
|
||||
return self._members.copy() if self._members else None
|
||||
|
||||
def get_individual_predictions(
|
||||
self,
|
||||
) -> Optional[Dict[Union[str, int], Dict[int, float]]]:
|
||||
"""
|
||||
Get the individual predictions for all group members.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping user IDs to their individual predictions, or None if not available.
|
||||
"""
|
||||
return self._group_predictions.copy() if self._group_predictions else None
|
||||
@@ -0,0 +1,57 @@
|
||||
import pandas as pd
|
||||
from typing import Optional
|
||||
|
||||
from .generic_recommender import GenericRecommender
|
||||
|
||||
|
||||
class Recommender(GenericRecommender):
|
||||
def __init__(self, dataset_metadata, model, top_n: int = 10):
|
||||
super(Recommender, self).__init__(dataset_metadata, model, top_n)
|
||||
|
||||
def get_predictions(
|
||||
self,
|
||||
user_id: int,
|
||||
target_item_id: list,
|
||||
):
|
||||
predictions = self.model.predict(user_id, target_item_id)
|
||||
return predictions
|
||||
|
||||
def recommend(self, user_id: int, target_item_id: list):
|
||||
"""
|
||||
Generate recommendations on specific itemId and userId
|
||||
:param user_id: list, user Ids
|
||||
:param target_item_id: list, item Ids
|
||||
:param rated_items: list, of rated interactions.
|
||||
:return: data.frame [userId, itemId, rank], recommendations ranking for the specified pairs of userId and itemId.
|
||||
"""
|
||||
predictions = self.get_predictions(user_id, target_item_id)
|
||||
|
||||
return self.rank_prediction(user_id, target_item_id, predictions)
|
||||
|
||||
def recommend_user(
|
||||
self, user_id: Optional[int] = None, user_ratings: Optional[pd.DataFrame] = None
|
||||
):
|
||||
"""
|
||||
Get recommendations for a user.
|
||||
:param user_id: int, a user Id
|
||||
:param user_ratings: list, interactions on the user
|
||||
:return: dataframe [userId, itemId, rank], recommendations ranking for the specified userId.
|
||||
"""
|
||||
if user_ratings is None:
|
||||
if user_id is None:
|
||||
raise ValueError("Either 'user_id' or 'user_ratings' must be provided.")
|
||||
user_ratings = self.get_rated(user_id=user_id)
|
||||
|
||||
if user_ratings is None:
|
||||
return pd.DataFrame(
|
||||
columns=["userId", "itemId", "rank"]
|
||||
) # Return empty recommendations
|
||||
|
||||
if user_id is None:
|
||||
raise ValueError(
|
||||
"Could not determine user_id from the provided user_ratings."
|
||||
)
|
||||
|
||||
unrated_item_id = self.get_unrated(user_ratings["itemId"])
|
||||
|
||||
return self.recommend(user_id=user_id, target_item_id=unrated_item_id)
|
||||
Reference in New Issue
Block a user