public code v1
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
from .splitter import Splitter
|
||||
from .model_evaluator import ModelEvaluator
|
||||
from .explainer_evaluator import ExplanationEvaluator
|
||||
from .evaluation_pipelines import (
|
||||
run_evaluation_with_proper_split,
|
||||
run_leave_one_out_evaluation,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Splitter",
|
||||
"ModelEvaluator",
|
||||
"ExplanationEvaluator",
|
||||
"run_evaluation_with_proper_split",
|
||||
"run_leave_one_out_evaluation",
|
||||
]
|
||||
@@ -0,0 +1,251 @@
|
||||
import time
|
||||
from typing import Dict
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from pygrex.data_reader.data_reader import DataReader
|
||||
from pygrex.evaluator import Splitter, ModelEvaluator
|
||||
|
||||
|
||||
def run_leave_one_out_evaluation(
|
||||
data_reader: DataReader, model, top_n: int = 10
|
||||
) -> Dict:
|
||||
print("Starting leave-one-out evaluation...")
|
||||
start_time = time.time()
|
||||
|
||||
# 1. Proper leave-one-out split (one item per user)
|
||||
train_dr, test_df = Splitter.split_leave_n_out(
|
||||
data_reader, n=1
|
||||
) # n=1 for true leave-one-out
|
||||
print(f"Split completed: {len(test_df)} test interactions")
|
||||
|
||||
train_users = set(train_dr.dataset["userId"].unique())
|
||||
train_items = set(train_dr.dataset["itemId"].unique())
|
||||
|
||||
original_test_len = len(test_df)
|
||||
test_df = test_df[
|
||||
test_df["userId"].isin(train_users) & test_df["itemId"].isin(train_items)
|
||||
]
|
||||
print(
|
||||
f"Filtered test set: {len(test_df)} interactions remaining from {original_test_len}"
|
||||
)
|
||||
|
||||
# 2. Train model on training data
|
||||
print("Training model on reduced dataset...")
|
||||
train_start = time.time()
|
||||
model.fit(train_dr)
|
||||
train_time = time.time() - train_start
|
||||
print(f"Model training completed in {train_time:.2f} seconds")
|
||||
|
||||
# 3. Generate recommendations efficiently
|
||||
print("Generating recommendations...")
|
||||
rec_start = time.time()
|
||||
recommendations = generate_recommendations_batch(model, train_dr, test_df, top_n)
|
||||
rec_time = time.time() - rec_start
|
||||
print(f"Recommendations generated in {rec_time:.2f} seconds")
|
||||
|
||||
# 4. Use the existing Evaluator class
|
||||
evaluator = ModelEvaluator(test_df, top_n=top_n)
|
||||
|
||||
# Calculate metrics
|
||||
hit_ratio = evaluator.cal_hit_ratio(recommendations)
|
||||
ndcg = evaluator.cal_ndcg(recommendations)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
print(f"Total evaluation time: {total_time:.2f} seconds")
|
||||
|
||||
return {
|
||||
"Hit Ratio": hit_ratio,
|
||||
"NDCG": ndcg, # Using standard NDCG instead of eNDCG for now
|
||||
"evaluation_time": total_time,
|
||||
}
|
||||
|
||||
|
||||
def generate_recommendations_batch(
|
||||
model, train_dr: DataReader, test_df: pd.DataFrame, top_n: int
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Generate recommendations in batch mode for efficiency.
|
||||
Returns DataFrame with columns: ['userId', 'itemId', 'rank', 'score']
|
||||
"""
|
||||
all_items = set(train_dr.dataset["itemId"].unique())
|
||||
recommendations = []
|
||||
|
||||
test_users = test_df["userId"].unique()
|
||||
print(f"Generating recommendations for {len(test_users)} users...")
|
||||
|
||||
for i, user_id in enumerate(test_users):
|
||||
if i % 100 == 0: # Progress indicator
|
||||
print(f"Processing user {i}/{len(test_users)}")
|
||||
|
||||
# Get items the user has already interacted with
|
||||
user_items = set(
|
||||
train_dr.dataset[train_dr.dataset["userId"] == user_id]["itemId"]
|
||||
)
|
||||
|
||||
# Candidate items (unseen items)
|
||||
candidate_items = list(all_items - user_items)
|
||||
|
||||
# For efficiency, limit candidates if there are too many
|
||||
if len(candidate_items) > 10000: # Adjust this threshold based on your needs
|
||||
candidate_items = np.random.choice(
|
||||
candidate_items, 10000, replace=False
|
||||
).tolist()
|
||||
|
||||
# Generate predictions - try to use batch prediction if available
|
||||
try:
|
||||
# Check if model has batch prediction capability
|
||||
if hasattr(model, "predict_batch") or hasattr(model, "recommend"):
|
||||
user_recs = generate_recommendations_efficient(
|
||||
model, user_id, candidate_items, top_n
|
||||
)
|
||||
else:
|
||||
# Fall back to individual predictions (slower)
|
||||
user_recs = generate_recommendations_individual(
|
||||
model, user_id, candidate_items, top_n
|
||||
)
|
||||
|
||||
recommendations.extend(user_recs)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating recommendations for user {user_id}: {e}")
|
||||
continue
|
||||
|
||||
# Convert to DataFrame
|
||||
if recommendations:
|
||||
rec_df = pd.DataFrame(
|
||||
recommendations, columns=["userId", "itemId", "rank", "score"]
|
||||
)
|
||||
else:
|
||||
# Return empty DataFrame with correct structure
|
||||
rec_df = pd.DataFrame(columns=["userId", "itemId", "rank", "score"])
|
||||
|
||||
return rec_df
|
||||
|
||||
|
||||
def generate_recommendations_efficient(
|
||||
model, user_id: int, candidate_items: list, top_n: int
|
||||
) -> list:
|
||||
"""
|
||||
Try to use efficient recommendation methods if available.
|
||||
"""
|
||||
recommendations = []
|
||||
|
||||
# Try different efficient methods based on model type
|
||||
if hasattr(model, "recommend"):
|
||||
# Some models have a recommend method
|
||||
try:
|
||||
recs = model.recommend(user_id, candidate_items, top_n)
|
||||
for rank, (item_id, score) in enumerate(recs, 1):
|
||||
recommendations.append((user_id, item_id, rank, score))
|
||||
except Exception:
|
||||
# Fall back to individual predictions
|
||||
return generate_recommendations_individual(
|
||||
model, user_id, candidate_items, top_n
|
||||
)
|
||||
|
||||
elif hasattr(model, "predict_batch"):
|
||||
# Batch prediction if available
|
||||
try:
|
||||
user_items_batch = [(user_id, item_id) for item_id in candidate_items]
|
||||
scores = model.predict_batch(user_items_batch)
|
||||
|
||||
# Sort by score and get top-N
|
||||
scored_items = list(zip(candidate_items, scores))
|
||||
scored_items.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
for rank, (item_id, score) in enumerate(scored_items[:top_n], 1):
|
||||
recommendations.append((user_id, item_id, rank, score))
|
||||
except Exception:
|
||||
return generate_recommendations_individual(
|
||||
model, user_id, candidate_items, top_n
|
||||
)
|
||||
|
||||
else:
|
||||
return generate_recommendations_individual(
|
||||
model, user_id, candidate_items, top_n
|
||||
)
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
def generate_recommendations_individual(
|
||||
model, user_id: int, candidate_items: list, top_n: int
|
||||
) -> list:
|
||||
"""
|
||||
Fall back to individual predictions (slower but works with any model).
|
||||
"""
|
||||
predictions = []
|
||||
|
||||
# Batch the individual predictions for better performance
|
||||
batch_size = 100
|
||||
for i in range(0, len(candidate_items), batch_size):
|
||||
batch_items = candidate_items[i : i + batch_size]
|
||||
|
||||
for item_id in batch_items:
|
||||
try:
|
||||
score = model.predict(user_id, item_id)
|
||||
predictions.append((item_id, score))
|
||||
except Exception as e:
|
||||
print(f"Prediction error for user {user_id}, item {item_id}: {e}")
|
||||
# Skip items that cause prediction errors
|
||||
continue
|
||||
|
||||
# Sort by score and get top-N
|
||||
predictions.sort(key=lambda x: x[1], reverse=True)
|
||||
top_predictions = predictions[:top_n]
|
||||
|
||||
recommendations = []
|
||||
for rank, (item_id, score) in enumerate(top_predictions, 1):
|
||||
recommendations.append((user_id, item_id, rank, score))
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
def run_evaluation_with_proper_split(
|
||||
data_reader: DataReader, model, test_size: float = 0.2, top_n: int = 10
|
||||
) -> Dict:
|
||||
"""
|
||||
Alternative evaluation using a proper train/test split instead of leave-one-out.
|
||||
"""
|
||||
print(f"Starting evaluation with {test_size * 100}% test split...")
|
||||
start_time = time.time()
|
||||
|
||||
# 1. Split data into train/test
|
||||
train_dr, test_df = Splitter.split_leave_n_out(data_reader, frac=test_size)
|
||||
print(f"Split completed: {len(test_df)} test interactions")
|
||||
|
||||
# 2. Filter test set to ensure all users/items exist in the training set
|
||||
train_users = set(train_dr.dataset["userId"].unique())
|
||||
train_items = set(train_dr.dataset["itemId"].unique())
|
||||
|
||||
original_test_len = len(test_df)
|
||||
test_df = test_df[
|
||||
test_df["userId"].isin(train_users) & test_df["itemId"].isin(train_items)
|
||||
]
|
||||
print(
|
||||
f"Filtered test set: {len(test_df)} interactions remaining from {original_test_len}"
|
||||
)
|
||||
|
||||
# 2. Train model
|
||||
print("Training model...")
|
||||
model.fit(train_dr)
|
||||
|
||||
# 3. Generate recommendations
|
||||
print("Generating recommendations...")
|
||||
recommendations = generate_recommendations_batch(model, train_dr, test_df, top_n)
|
||||
|
||||
# 4. Evaluate
|
||||
evaluator = ModelEvaluator(test_df, top_n=top_n)
|
||||
hit_ratio = evaluator.cal_hit_ratio(recommendations)
|
||||
ndcg = evaluator.cal_ndcg(recommendations)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
print(f"Evaluation completed in {total_time:.2f} seconds")
|
||||
|
||||
return {
|
||||
"Hit Ratio": hit_ratio,
|
||||
"NDCG": ndcg,
|
||||
"evaluation_time": total_time,
|
||||
"test_interactions": len(test_df),
|
||||
"total_recommendations": len(recommendations),
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from pygrex.utils import calculate_gild_for_explanations
|
||||
|
||||
|
||||
class ExplanationEvaluator:
|
||||
"""
|
||||
A unified evaluator for different explanation methods.
|
||||
|
||||
This class takes the results generated by an explainer and calculates
|
||||
a standard set of quality metrics, such as Fidelity and Diversity (GILD).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the ExplanationEvaluator."""
|
||||
# This class is stateless, so __init__ is simple.
|
||||
pass
|
||||
|
||||
def evaluate(
|
||||
self, explanation_results: Dict[str, Any], explainer_type: str
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculates all relevant metrics for a given explanation result.
|
||||
|
||||
Args:
|
||||
explanation_results: The dictionary returned by an explainer's
|
||||
`find_explanation` method.
|
||||
explainer_type: A string identifier for the explainer used
|
||||
(e.g., "LORE4Groups", "EXPGRS").
|
||||
|
||||
Returns:
|
||||
A dictionary containing the calculated metric scores.
|
||||
"""
|
||||
if not explanation_results:
|
||||
return {"fidelity": 0.0, "gild": 0.0}
|
||||
|
||||
fidelity = self._calculate_fidelity(explanation_results)
|
||||
gild = self._calculate_gild(explanation_results, explainer_type)
|
||||
|
||||
return {"fidelity": fidelity, "gild": gild}
|
||||
|
||||
def _calculate_fidelity(self, explanation_results: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Extracts the fidelity score from the explanation results.
|
||||
|
||||
Fidelity is computed by the explainer itself, as it's the ratio of
|
||||
items it was able to explain. This method standardizes its retrieval.
|
||||
"""
|
||||
return explanation_results.get("fidelity", 0.0)
|
||||
|
||||
def _calculate_gild(
|
||||
self, explanation_results: Dict[str, Any], explainer_type: str
|
||||
) -> float:
|
||||
"""
|
||||
Calculates the Gaussian Inter-List Diversity (GILD) of the explanations.
|
||||
|
||||
This is a wrapper around the utility function that handles the details.
|
||||
It uses the 'details' part of the explanation results.
|
||||
"""
|
||||
explanation_details = explanation_results.get("details", {})
|
||||
if not explanation_details:
|
||||
return 0.0
|
||||
|
||||
# The GILD function is now called from a central, logical place.
|
||||
gild_score = calculate_gild_for_explanations(
|
||||
explanation_details, explainer_type
|
||||
)
|
||||
return gild_score
|
||||
@@ -0,0 +1,179 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class ModelEvaluator:
|
||||
disc_functions = ["log", "linear"]
|
||||
|
||||
def __init__(self, test_set, top_n: int = 10, discount_function: str = "log"):
|
||||
self.test_set = test_set
|
||||
self._top_n = top_n
|
||||
assert discount_function in self.disc_functions, "Wrong Discount Function."
|
||||
self._discount_function = discount_function
|
||||
self.num_users = self.test_set.userId.nunique()
|
||||
|
||||
@property
|
||||
def top_n(self):
|
||||
return self._top_n
|
||||
|
||||
@top_n.setter
|
||||
def top_n(self, top_n: int):
|
||||
self._top_n = top_n
|
||||
|
||||
@property
|
||||
def discount_function(self):
|
||||
return self._discount_function
|
||||
|
||||
@discount_function.setter
|
||||
def discount_function(self, discount_function: str):
|
||||
assert discount_function in self.disc_functions, "Wrong Discount Function."
|
||||
self._discount_function = discount_function
|
||||
|
||||
def cal_hit_ratio(self, recommendations):
|
||||
"""
|
||||
Hit Ratio
|
||||
:param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: hit rate.
|
||||
"""
|
||||
test_in_top_n = self.get_hits(recommendations)
|
||||
# count hits per user
|
||||
hits_per_user = self.count_positives(test_in_top_n)
|
||||
# merge with the entire list of positive items for user
|
||||
hits_per_user = hits_per_user.merge(
|
||||
self.count_positives(self.test_set),
|
||||
on="userId",
|
||||
suffixes=("_true", ""),
|
||||
how="right",
|
||||
)
|
||||
# if there are users with 0 hits the merge will have NA.
|
||||
hits_per_user = hits_per_user.fillna(0)
|
||||
# get the hit rate per user
|
||||
hit_rate = hits_per_user.positive_true / hits_per_user.positive
|
||||
# average
|
||||
hit_rate = hit_rate.mean()
|
||||
return hit_rate
|
||||
|
||||
def get_hits(self, recommendations):
|
||||
"""
|
||||
Find which items in the test set have a hit on the recommendations.
|
||||
:param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: dataframe, removing the rows missing in the test set.
|
||||
"""
|
||||
# check whether there are top_n items per user
|
||||
top_n_recommendations = self.filter_to_top_n(recommendations)
|
||||
# find the hits
|
||||
test_in_top_n = pd.merge(
|
||||
top_n_recommendations, self.test_set, on=["userId", "itemId"]
|
||||
)
|
||||
return test_in_top_n
|
||||
|
||||
def filter_to_top_n(self, dataset):
|
||||
"""
|
||||
if rank > top_n, we do not use it for evaluation
|
||||
:param dataset: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
"""
|
||||
return dataset[dataset["rank"] <= self.top_n]
|
||||
|
||||
def cal_ndcg(self, recommendations):
|
||||
r"""
|
||||
For evaluating the top-N recommendation list, we also provide the normalized Discounted Cumulative Gain at N
|
||||
recommendation (nDCG@N) computed as the ratio of the Discounted Cumulative Gain(DCG) with the ideal Discounted
|
||||
Cumulative Gain(IDCG):
|
||||
DGC_{pos} = rel_1 + \sum_{i=2}^{pos} \frac{rel_i}{\log_2i} \qquad \qquad
|
||||
IDGC_{pos} = rel_1 + \sum_{i=2}^{|h|-1} \frac{rel_i}{\log_2i} \\
|
||||
nDCG_{pos} = \frac{DCG}{IDCG}
|
||||
where pos denotes the position up to which relevance is accumulated, and $rel_i$ is the relevance of the recommended item at position \textit{i}.
|
||||
Ref: Y. Wang, L. Wang, Y. Li, D. He, T.-Y. Liu, and W. Chen.
|
||||
A theoretical analysis of ndcgtype ranking measures.
|
||||
:param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: nDCG
|
||||
"""
|
||||
# get hits
|
||||
hits = self.get_hits(recommendations)
|
||||
|
||||
DCG = self.cal_dcg(hits)
|
||||
iDCG = self.cal_idcg()
|
||||
|
||||
# join to check if there are users in the test without hits
|
||||
nDCG = iDCG.merge(DCG, on="userId", how="left")
|
||||
nDCG = nDCG.fillna(0)
|
||||
# normalize
|
||||
nDCG["ndcg"] = nDCG["dcg"] / nDCG["idcg"]
|
||||
|
||||
return nDCG["ndcg"].mean()
|
||||
|
||||
def cal_dcg(self, hits):
|
||||
"""
|
||||
Discounted Comulative Gain
|
||||
:param hits: recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: DCG
|
||||
"""
|
||||
# todo: the gain so far is set to a constant.
|
||||
|
||||
if self.discount_function == "log":
|
||||
hits["discounted_gain"] = np.log(2) / np.log(hits["rank"] + 1)
|
||||
elif self.discount_function == "linear":
|
||||
hits["discounted_gain"] = 1 / hits["rank"]
|
||||
|
||||
DCG = hits.groupby("userId")["discounted_gain"].sum()
|
||||
|
||||
return pd.DataFrame(
|
||||
{"userId": hits["userId"].unique(), "dcg": DCG}
|
||||
).reset_index(drop=True)
|
||||
|
||||
def cal_idcg(self):
|
||||
"""
|
||||
the Ideal DCG, is the DCG for the best ranking possible (i.e. all true positives were recommended first).
|
||||
:return: iDCG
|
||||
"""
|
||||
# create a fake ranking for test set items.
|
||||
# We assume that the items in the test set are all on the Top-N list.
|
||||
count_positives = self.count_positives(self.test_set)
|
||||
ideal_rank = [i for x in count_positives["positive"] for i in (range(1, x + 1))]
|
||||
test_ideal_ranking = self.test_set.copy()
|
||||
test_ideal_ranking["rank"] = ideal_rank
|
||||
# Filter to have at most top-N items.
|
||||
test_ideal_ranking = self.filter_to_top_n(test_ideal_ranking)
|
||||
# get the dcg for the ideal ranking
|
||||
idcg = self.cal_dcg(test_ideal_ranking)
|
||||
idcg = idcg.rename(columns={"dcg": "idcg"})
|
||||
return idcg
|
||||
|
||||
@staticmethod
|
||||
def count_positives(dataset):
|
||||
"""
|
||||
Returns the positives count.
|
||||
:param dataset: dataframe, columns = ['userId', 'itemId', 'rank']
|
||||
:return: dataframe, columns = ['userId', 'positive']
|
||||
"""
|
||||
users_with_positives = dataset.userId.unique()
|
||||
positives_per_user = dataset.groupby("userId")["itemId"].count()
|
||||
positives_per_user = pd.DataFrame(
|
||||
{"userId": users_with_positives, "positive": positives_per_user}
|
||||
)
|
||||
|
||||
return positives_per_user.reset_index(drop=True)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
## recoms = pd.DataFrame({
|
||||
# 'userId': [1, 1, 1, 2, 2, 2, 3, 3, 3],
|
||||
# 'itemId': [1, 2, 3, 4, 1, 2, 2, 3, 4],
|
||||
# 'rank': [1, 2, 3, 1, 2, 3, 1, 2, 3]
|
||||
# })
|
||||
|
||||
# test = pd.DataFrame({
|
||||
# 'userId': [1, 1, 2, 3],
|
||||
# 'itemId': [1, 4, 1, 5]
|
||||
# })
|
||||
|
||||
# eval = Evaluator(test_set=test, top_n=2)
|
||||
|
||||
# assert eval.num_users == 3, 'number of users'
|
||||
# assert eval.top_n == 2, 'number of top n'
|
||||
# eval.top_n = 3
|
||||
# assert eval.top_n == 3, 'changing of top n'
|
||||
|
||||
# print(eval.cal_hit_ratio(recoms))
|
||||
# print(eval.cal_ndcg(recoms))
|
||||
@@ -0,0 +1,169 @@
|
||||
import sys
|
||||
import random
|
||||
import pandas as pd
|
||||
import copy
|
||||
|
||||
from pygrex.data_reader.data_reader import DataReader
|
||||
|
||||
|
||||
def fix_data_reader_mappings(source: DataReader, target: DataReader):
|
||||
target._num_user = source._num_user
|
||||
target._num_item = source._num_item
|
||||
# Copy over the original ID mappings
|
||||
target.original_user_id = source.original_user_id
|
||||
target.original_item_id = source.original_item_id
|
||||
target.new_user_id = source.new_user_id
|
||||
target.new_item_id = source.new_item_id
|
||||
return target
|
||||
|
||||
|
||||
class Splitter:
|
||||
"""
|
||||
Super Splitting Class.
|
||||
args:
|
||||
data: DataReader object, which contains in its dataset attribute 4 columns = ['userId', 'itemId', 'rating', 'timestamp']
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def split_leave_latest_out(data: DataReader, n_latest: int = 1):
|
||||
"""
|
||||
Leave N latest interactions out train/test split.
|
||||
Ref:
|
||||
Campos, Pedro G., Fernando Díez, and Iván Cantador. "Time-aware recommender systems: a comprehensive survey and
|
||||
analysis of existing evaluation protocols." User Modeling and User-Adapted Interaction 24.1-2 (2014): 67-119.
|
||||
:param data:
|
||||
:param n_latest: int, number of latest interactions to be in the the test set.
|
||||
:returns train as DataReader, test as data.frames
|
||||
"""
|
||||
|
||||
# group items by suer id and rank them by timestamp
|
||||
rank_latest = data.dataset.groupby(["userId"])["timestamp"].rank(
|
||||
method="first", ascending=False
|
||||
)
|
||||
|
||||
# keep in test items that are ranked higher than n_latest
|
||||
test = data.dataset[rank_latest <= n_latest]
|
||||
# keep in train the rest
|
||||
train = DataReader(dataframe=data.dataset.copy())
|
||||
train.dataset = data.dataset[rank_latest > n_latest]
|
||||
|
||||
train = fix_data_reader_mappings(data, train)
|
||||
|
||||
return train, test
|
||||
|
||||
@staticmethod
|
||||
def split_leave_n_out(data: DataReader, n: int = 1, frac: float | None = None):
|
||||
"""
|
||||
Leave N latest interactions out train/test split.
|
||||
Ref:
|
||||
Shani, Guy, and Asela Gunawardana. "Evaluating recommendation systems." Recommender systems handbook. Springer,
|
||||
Boston, MA, 2011. 257-297.
|
||||
:param data:
|
||||
:param n int, number of interactions to be in the the test set.
|
||||
:param frac float, fraction.
|
||||
:returns dataframe train and test
|
||||
"""
|
||||
min_nr_ratings_user = min(data.dataset["userId"].value_counts())
|
||||
|
||||
if min_nr_ratings_user < n:
|
||||
sys.exit(
|
||||
"split_leave_n_out: There are users with less ratings than n (required number of interactions "
|
||||
"in the test set)."
|
||||
)
|
||||
|
||||
if frac is not None and frac > 1:
|
||||
sys.exit("f (i.e.) fraction should be smaller than 1.")
|
||||
|
||||
# group items by user id and extraxt a random number of items per user
|
||||
grouped = data.dataset.groupby(["userId"])
|
||||
if frac is not None:
|
||||
test = grouped.sample(frac=frac)
|
||||
else:
|
||||
test = grouped.sample(n=n)
|
||||
|
||||
test = test.reset_index(drop=True)
|
||||
train_pd = pd.merge(
|
||||
data.dataset,
|
||||
test,
|
||||
on=list(data.dataset.columns),
|
||||
how="outer",
|
||||
indicator=True,
|
||||
)
|
||||
train_pd = train_pd[train_pd["_merge"] == "left_only"]
|
||||
train_pd = train_pd.drop(columns="_merge")
|
||||
|
||||
train = copy.deepcopy(data)
|
||||
train.dataset = train_pd
|
||||
train = fix_data_reader_mappings(data, train)
|
||||
assert test.shape[0] + train_pd.shape[0] == data.dataset.shape[0]
|
||||
|
||||
return train, test
|
||||
|
||||
def rel_plus_n(
|
||||
self,
|
||||
data,
|
||||
negative_sample_size: int = 99,
|
||||
splitting: str = "latest",
|
||||
n: int = 1,
|
||||
):
|
||||
"""
|
||||
RelPlusN: We build the users test set by extracting one relevant random item ($HR_u$) from the entire set of
|
||||
rated items. Then a set of random items with unknown relevance ($NR_u$), is extracted for each user $u$, where $u$
|
||||
had no previous interaction with these items. Finally, for each item $i$ in $HR_u$, the algorithm requests a ranking
|
||||
of the top-$N$ items from the set $ {i} cup NR_u$, on which the evaluation is performed. The evaluation metrics
|
||||
are averaged over all the items in $HR_u$ and later over all the users. In the following, all experiments have been
|
||||
conducted according to this protocol.
|
||||
Ref:
|
||||
- Paolo Cremonesi, Yehuda Koren, and Roberto Turrin. 2010. Performance of Recommender Algorithms on Top-n
|
||||
Recommendation Tasks. InProceedings ofthe Fourth ACM Conference on Recommender Systems (RecSys ’10).
|
||||
- Xiangnan He, Lizi Liao, Hanwang Zhang, Liqiang Nie, Xia Hu, and Tat-Seng Chua. 2017. Neural Collaborative
|
||||
Filtering. In Proceedings of the 26th InternationalConference on World Wide Web (WWW ’17).
|
||||
:param data
|
||||
:param negative_sample_size how many negative items to compute
|
||||
:param splitting either latest for leave n latest out, or n for leave n out
|
||||
:param n how many to leave out
|
||||
|
||||
"""
|
||||
|
||||
if splitting == "latest":
|
||||
train, test = self.split_leave_latest_out(data, n)
|
||||
elif splitting == "n":
|
||||
train, test = self.split_leave_n_out(data, n)
|
||||
else:
|
||||
sys.exit('splitting can be either "latest" or "n". ')
|
||||
|
||||
neg_sample = self.sample_negative(data, negative_sample_size)
|
||||
|
||||
return train, pd.concat([test, neg_sample], ignore_index=True)
|
||||
|
||||
@staticmethod
|
||||
def sample_negative(data, negative_sample_size):
|
||||
"""return all negative items"""
|
||||
|
||||
item_catalogue = set(data.dataset["itemId"])
|
||||
|
||||
interact_status = (
|
||||
data.dataset.groupby("userId")["itemId"]
|
||||
.apply(set)
|
||||
.reset_index()
|
||||
.rename(columns={"itemId": "interacted_items"})
|
||||
)
|
||||
interact_status["negative_items"] = interact_status["interacted_items"].apply(
|
||||
lambda x: item_catalogue - x
|
||||
)
|
||||
interact_status["negative_samples"] = interact_status["negative_items"].apply(
|
||||
lambda x: random.sample(x, negative_sample_size)
|
||||
)
|
||||
interact_status = interact_status[["userId", "negative_samples"]]
|
||||
|
||||
userId = []
|
||||
itemId = []
|
||||
for row in interact_status.itertuples():
|
||||
for i in range(negative_sample_size):
|
||||
userId.append(int(row.userId))
|
||||
itemId.append(int(row.negative_samples[i]))
|
||||
|
||||
return pd.DataFrame.from_dict({"userId": userId, "itemId": itemId})
|
||||
Reference in New Issue
Block a user