public code v1
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
from .model_based_emf import EMFExplainer
|
||||
from .model_based_als_explain import ALSExplainer
|
||||
from .post_hoc_association_rules import ARPostHocExplainer
|
||||
from .post_hoc_knn import KNNPostHocExplainer
|
||||
|
||||
__all__ = [
|
||||
"EMFExplainer",
|
||||
"ALSExplainer",
|
||||
"ARPostHocExplainer",
|
||||
"KNNPostHocExplainer",
|
||||
]
|
||||
@@ -0,0 +1,49 @@
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
class Explainer(ABC):
|
||||
def __init__(self, model, recommendations, data):
|
||||
self.model = model
|
||||
self.recommendations = recommendations
|
||||
self.dataset = data.dataset
|
||||
self.num_items = data.num_item
|
||||
self.num_users = data.num_user
|
||||
self.users = self.dataset.groupby(by="userId")
|
||||
|
||||
def explain_recommendations(self):
|
||||
explanations = []
|
||||
|
||||
with tqdm(
|
||||
total=self.recommendations.shape[0], desc="Computing explanations: "
|
||||
) as pbar:
|
||||
for _, row in self.recommendations.iterrows():
|
||||
explanations.append(
|
||||
self.explain_recommendation_to_user(
|
||||
int(row.userId), int(row.itemId)
|
||||
)
|
||||
)
|
||||
pbar.update()
|
||||
|
||||
self.recommendations["explanations"] = explanations
|
||||
return self.recommendations
|
||||
|
||||
def get_user_items(self, user_id):
|
||||
"""
|
||||
Items Ids rated by a user.
|
||||
:param user_id: the user
|
||||
:return: list
|
||||
"""
|
||||
return self.users.get_group(user_id).itemId.values
|
||||
|
||||
@abstractmethod
|
||||
def explain_recommendation_to_user(
|
||||
self, user_id: int, item_id: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generates an explanation for a single user-item recommendation.
|
||||
This method must be implemented by any subclass.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
@@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .explainer import Explainer
|
||||
|
||||
|
||||
class ALSExplainer(Explainer):
|
||||
def __init__(self, model, recommendations, data, number_of_contributions=10):
|
||||
super(ALSExplainer, self).__init__(model, recommendations, data)
|
||||
self.number_of_contributions = number_of_contributions
|
||||
|
||||
def explain_recommendation_to_user(self, user_id: int, item_id: int):
|
||||
"""
|
||||
Measuring the contribution of each item to the recommendation.
|
||||
:param model:
|
||||
:param item_id:
|
||||
:param user_id:
|
||||
:return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
|
||||
"""
|
||||
|
||||
current_interactions = np.zeros(self.num_items)
|
||||
current_interactions[self.get_user_items(user_id)] = 1
|
||||
|
||||
c_u = np.diag(current_interactions)
|
||||
|
||||
y_t = self.model.item_embedding().transpose()
|
||||
temp = np.matmul(y_t, c_u)
|
||||
temp = np.matmul(temp, self.model.item_embedding())
|
||||
temp = temp + np.diag([self.model.reg_term] * self.model.latent_dim)
|
||||
|
||||
if len(self.get_user_items(user_id)) > 1:
|
||||
weight_mtr = np.linalg.inv(temp)
|
||||
else:
|
||||
weight_mtr = np.linalg.pinv(temp)
|
||||
|
||||
temp = np.matmul(self.model.item_embedding(), weight_mtr)
|
||||
|
||||
sim_to_rec_id = temp.dot(self.model.item_embedding()[item_id, :])
|
||||
|
||||
sim_to_rec_id = sim_to_rec_id[self.get_user_items(user_id)]
|
||||
|
||||
contribution = {
|
||||
"item": self.get_user_items(user_id),
|
||||
"contribution": sim_to_rec_id,
|
||||
}
|
||||
contribution = pd.DataFrame(contribution)
|
||||
contribution = contribution.sort_values(by=["contribution"], ascending=False)
|
||||
return {
|
||||
"item": contribution.item[: self.number_of_contributions],
|
||||
"contribution": contribution.contribution[: self.number_of_contributions],
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
from .explainer import Explainer
|
||||
|
||||
|
||||
class EMFExplainer(Explainer):
|
||||
def __init__(self, model, recommendations, data):
|
||||
super(EMFExplainer, self).__init__(model, recommendations, data)
|
||||
|
||||
def explain_recommendation_to_user(self, user_id: int, item_id: int):
|
||||
"""
|
||||
Measuring the contribution of each item to the recommendation.
|
||||
:param user_id:
|
||||
:param item_id: recommendation
|
||||
:return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
|
||||
"""
|
||||
|
||||
ratings_on_item = self.dataset[self.dataset.itemId == item_id]
|
||||
similar_users = self.model.sim_users[user_id]
|
||||
similar_users_ratings_on_item = ratings_on_item[
|
||||
ratings_on_item.userId.isin(similar_users)
|
||||
]
|
||||
|
||||
explanation_df = similar_users_ratings_on_item.groupby(by="rating").count()
|
||||
explanation = {}
|
||||
|
||||
for index, row in explanation_df.iterrows():
|
||||
explanation[index] = row[0]
|
||||
|
||||
return explanation
|
||||
@@ -0,0 +1,79 @@
|
||||
from typing import Any, Dict
|
||||
from mlxtend.preprocessing import TransactionEncoder
|
||||
from mlxtend.frequent_patterns import apriori, association_rules
|
||||
import pandas as pd
|
||||
|
||||
from .explainer import Explainer
|
||||
|
||||
|
||||
class ARPostHocExplainer(Explainer):
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
recommendations,
|
||||
data,
|
||||
min_support=0.1,
|
||||
max_len=2,
|
||||
metric="lift",
|
||||
min_threshold=0.1,
|
||||
min_confidence=0.1,
|
||||
min_lift=0.1,
|
||||
):
|
||||
super(ARPostHocExplainer, self).__init__(model, recommendations, data)
|
||||
self.AR = None
|
||||
self.min_support = min_support
|
||||
self.max_len = max_len
|
||||
self.metric = metric
|
||||
self.min_threshold = min_threshold
|
||||
self.min_confidence = min_confidence
|
||||
self.min_lift = min_lift
|
||||
|
||||
self.rules: pd.DataFrame | None = None
|
||||
|
||||
def get_rules_for_getting(self, item_id: int) -> pd.DataFrame:
|
||||
if self.rules is None:
|
||||
self.compute_association_rules()
|
||||
|
||||
if self.rules is not None:
|
||||
return self.rules[self.rules.consequents == item_id]
|
||||
|
||||
return pd.DataFrame()
|
||||
|
||||
def compute_association_rules(self):
|
||||
item_sets = [
|
||||
[item for item in self.dataset[self.dataset.userId == user].itemId]
|
||||
for user in self.dataset.userId.unique()
|
||||
]
|
||||
|
||||
te = TransactionEncoder()
|
||||
te_ary = te.fit(item_sets).transform(item_sets)
|
||||
|
||||
# The te_ary object is a NumPy array, which is a valid input for a DataFrame.
|
||||
# Pylance may raise a false positive here due to incomplete type stubs for mlxtend.
|
||||
df = pd.DataFrame(te_ary.astype(bool), columns=te.columns_) # type: ignore
|
||||
|
||||
frequent_itemsets = apriori(
|
||||
df, min_support=self.min_support, use_colnames=True, max_len=self.max_len
|
||||
)
|
||||
|
||||
rules = association_rules(
|
||||
frequent_itemsets, metric="lift", min_threshold=self.min_threshold
|
||||
)
|
||||
rules = rules[
|
||||
(rules["confidence"] > self.min_confidence)
|
||||
& (rules["lift"] > self.min_lift)
|
||||
]
|
||||
|
||||
rules["consequents"] = rules["consequents"].apply(lambda x: list(x)[0])
|
||||
rules["antecedents"] = rules["antecedents"].apply(lambda x: list(x)[0])
|
||||
|
||||
self.rules = rules[["consequents", "antecedents", "confidence"]]
|
||||
|
||||
def explain_recommendation_to_user(
|
||||
self, user_id: int, item_id: int
|
||||
) -> Dict[str, Any]:
|
||||
user_ratings = self.get_user_items(user_id)
|
||||
rules = self.get_rules_for_getting(item_id)
|
||||
explanations = rules[rules.antecedents.isin(user_ratings)]
|
||||
|
||||
return {"antecedents": set(explanations.antecedents)}
|
||||
@@ -0,0 +1,46 @@
|
||||
from scipy import sparse
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import numpy as np
|
||||
from typing import Dict, Any
|
||||
|
||||
from .explainer import Explainer
|
||||
|
||||
|
||||
class KNNPostHocExplainer(Explainer):
|
||||
def __init__(self, model, recommendations, data, knn=10):
|
||||
super(KNNPostHocExplainer, self).__init__(model, recommendations, data)
|
||||
|
||||
self.knn = knn
|
||||
# Initialize as an empty dictionary to prevent subscripting None
|
||||
self.knn_items_dict: Dict[int, np.ndarray] = {}
|
||||
|
||||
def get_nn_for_getting(self, item_id: int) -> np.ndarray:
|
||||
# Check if the KNN dictionary has been computed
|
||||
if not self.knn_items_dict:
|
||||
self.compute_knn_items_for_all_items()
|
||||
|
||||
# Return the neighbors for the item, or an empty array if not found
|
||||
return self.knn_items_dict.get(item_id, np.array([]))
|
||||
|
||||
def compute_knn_items_for_all_items(self):
|
||||
ds = np.zeros((self.num_items, self.num_users))
|
||||
# Assuming self.dataset has attributes itemId, userId, and rating
|
||||
ds[self.dataset.itemId, self.dataset.userId] = self.dataset.rating
|
||||
|
||||
ds = sparse.csr_matrix(ds)
|
||||
sim_matrix = cosine_similarity(ds)
|
||||
min_val = sim_matrix.min() - 1
|
||||
|
||||
for i in range(self.num_items):
|
||||
sim_matrix[i, i] = min_val
|
||||
knn_to_item_i = (-sim_matrix[i, :]).argsort()[: self.knn]
|
||||
self.knn_items_dict[i] = knn_to_item_i
|
||||
|
||||
def explain_recommendation_to_user(
|
||||
self, user_id: int, item_id: int
|
||||
) -> Dict[str, Any]:
|
||||
user_ratings = self.get_user_items(user_id)
|
||||
sim_items = self.get_nn_for_getting(item_id)
|
||||
explanations = set(sim_items) & set(user_ratings)
|
||||
|
||||
return {"explanations": explanations}
|
||||
Reference in New Issue
Block a user