public code v1

This commit is contained in:
2026-05-22 10:02:10 +02:00
commit 46a9ecf065
166 changed files with 6982454 additions and 0 deletions
+11
View File
@@ -0,0 +1,11 @@
from .model_based_emf import EMFExplainer
from .model_based_als_explain import ALSExplainer
from .post_hoc_association_rules import ARPostHocExplainer
from .post_hoc_knn import KNNPostHocExplainer
__all__ = [
"EMFExplainer",
"ALSExplainer",
"ARPostHocExplainer",
"KNNPostHocExplainer",
]
+49
View File
@@ -0,0 +1,49 @@
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
from typing import Dict, Any
class Explainer(ABC):
def __init__(self, model, recommendations, data):
self.model = model
self.recommendations = recommendations
self.dataset = data.dataset
self.num_items = data.num_item
self.num_users = data.num_user
self.users = self.dataset.groupby(by="userId")
def explain_recommendations(self):
explanations = []
with tqdm(
total=self.recommendations.shape[0], desc="Computing explanations: "
) as pbar:
for _, row in self.recommendations.iterrows():
explanations.append(
self.explain_recommendation_to_user(
int(row.userId), int(row.itemId)
)
)
pbar.update()
self.recommendations["explanations"] = explanations
return self.recommendations
def get_user_items(self, user_id):
"""
Items Ids rated by a user.
:param user_id: the user
:return: list
"""
return self.users.get_group(user_id).itemId.values
@abstractmethod
def explain_recommendation_to_user(
self, user_id: int, item_id: int
) -> Dict[str, Any]:
"""
Generates an explanation for a single user-item recommendation.
This method must be implemented by any subclass.
"""
raise NotImplementedError
@@ -0,0 +1,51 @@
import numpy as np
import pandas as pd
from .explainer import Explainer
class ALSExplainer(Explainer):
def __init__(self, model, recommendations, data, number_of_contributions=10):
super(ALSExplainer, self).__init__(model, recommendations, data)
self.number_of_contributions = number_of_contributions
def explain_recommendation_to_user(self, user_id: int, item_id: int):
"""
Measuring the contribution of each item to the recommendation.
:param model:
:param item_id:
:param user_id:
:return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
"""
current_interactions = np.zeros(self.num_items)
current_interactions[self.get_user_items(user_id)] = 1
c_u = np.diag(current_interactions)
y_t = self.model.item_embedding().transpose()
temp = np.matmul(y_t, c_u)
temp = np.matmul(temp, self.model.item_embedding())
temp = temp + np.diag([self.model.reg_term] * self.model.latent_dim)
if len(self.get_user_items(user_id)) > 1:
weight_mtr = np.linalg.inv(temp)
else:
weight_mtr = np.linalg.pinv(temp)
temp = np.matmul(self.model.item_embedding(), weight_mtr)
sim_to_rec_id = temp.dot(self.model.item_embedding()[item_id, :])
sim_to_rec_id = sim_to_rec_id[self.get_user_items(user_id)]
contribution = {
"item": self.get_user_items(user_id),
"contribution": sim_to_rec_id,
}
contribution = pd.DataFrame(contribution)
contribution = contribution.sort_values(by=["contribution"], ascending=False)
return {
"item": contribution.item[: self.number_of_contributions],
"contribution": contribution.contribution[: self.number_of_contributions],
}
@@ -0,0 +1,28 @@
from .explainer import Explainer
class EMFExplainer(Explainer):
def __init__(self, model, recommendations, data):
super(EMFExplainer, self).__init__(model, recommendations, data)
def explain_recommendation_to_user(self, user_id: int, item_id: int):
"""
Measuring the contribution of each item to the recommendation.
:param user_id:
:param item_id: recommendation
:return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
"""
ratings_on_item = self.dataset[self.dataset.itemId == item_id]
similar_users = self.model.sim_users[user_id]
similar_users_ratings_on_item = ratings_on_item[
ratings_on_item.userId.isin(similar_users)
]
explanation_df = similar_users_ratings_on_item.groupby(by="rating").count()
explanation = {}
for index, row in explanation_df.iterrows():
explanation[index] = row[0]
return explanation
@@ -0,0 +1,79 @@
from typing import Any, Dict
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
from .explainer import Explainer
class ARPostHocExplainer(Explainer):
def __init__(
self,
model,
recommendations,
data,
min_support=0.1,
max_len=2,
metric="lift",
min_threshold=0.1,
min_confidence=0.1,
min_lift=0.1,
):
super(ARPostHocExplainer, self).__init__(model, recommendations, data)
self.AR = None
self.min_support = min_support
self.max_len = max_len
self.metric = metric
self.min_threshold = min_threshold
self.min_confidence = min_confidence
self.min_lift = min_lift
self.rules: pd.DataFrame | None = None
def get_rules_for_getting(self, item_id: int) -> pd.DataFrame:
if self.rules is None:
self.compute_association_rules()
if self.rules is not None:
return self.rules[self.rules.consequents == item_id]
return pd.DataFrame()
def compute_association_rules(self):
item_sets = [
[item for item in self.dataset[self.dataset.userId == user].itemId]
for user in self.dataset.userId.unique()
]
te = TransactionEncoder()
te_ary = te.fit(item_sets).transform(item_sets)
# The te_ary object is a NumPy array, which is a valid input for a DataFrame.
# Pylance may raise a false positive here due to incomplete type stubs for mlxtend.
df = pd.DataFrame(te_ary.astype(bool), columns=te.columns_) # type: ignore
frequent_itemsets = apriori(
df, min_support=self.min_support, use_colnames=True, max_len=self.max_len
)
rules = association_rules(
frequent_itemsets, metric="lift", min_threshold=self.min_threshold
)
rules = rules[
(rules["confidence"] > self.min_confidence)
& (rules["lift"] > self.min_lift)
]
rules["consequents"] = rules["consequents"].apply(lambda x: list(x)[0])
rules["antecedents"] = rules["antecedents"].apply(lambda x: list(x)[0])
self.rules = rules[["consequents", "antecedents", "confidence"]]
def explain_recommendation_to_user(
self, user_id: int, item_id: int
) -> Dict[str, Any]:
user_ratings = self.get_user_items(user_id)
rules = self.get_rules_for_getting(item_id)
explanations = rules[rules.antecedents.isin(user_ratings)]
return {"antecedents": set(explanations.antecedents)}
+46
View File
@@ -0,0 +1,46 @@
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from typing import Dict, Any
from .explainer import Explainer
class KNNPostHocExplainer(Explainer):
def __init__(self, model, recommendations, data, knn=10):
super(KNNPostHocExplainer, self).__init__(model, recommendations, data)
self.knn = knn
# Initialize as an empty dictionary to prevent subscripting None
self.knn_items_dict: Dict[int, np.ndarray] = {}
def get_nn_for_getting(self, item_id: int) -> np.ndarray:
# Check if the KNN dictionary has been computed
if not self.knn_items_dict:
self.compute_knn_items_for_all_items()
# Return the neighbors for the item, or an empty array if not found
return self.knn_items_dict.get(item_id, np.array([]))
def compute_knn_items_for_all_items(self):
ds = np.zeros((self.num_items, self.num_users))
# Assuming self.dataset has attributes itemId, userId, and rating
ds[self.dataset.itemId, self.dataset.userId] = self.dataset.rating
ds = sparse.csr_matrix(ds)
sim_matrix = cosine_similarity(ds)
min_val = sim_matrix.min() - 1
for i in range(self.num_items):
sim_matrix[i, i] = min_val
knn_to_item_i = (-sim_matrix[i, :]).argsort()[: self.knn]
self.knn_items_dict[i] = knn_to_item_i
def explain_recommendation_to_user(
self, user_id: int, item_id: int
) -> Dict[str, Any]:
user_ratings = self.get_user_items(user_id)
sim_items = self.get_nn_for_getting(item_id)
explanations = set(sim_items) & set(user_ratings)
return {"explanations": explanations}