public code v1

2026-05-22 10:02:10 +02:00
commit 46a9ecf065
166 changed files with 6982454 additions and 0 deletions
@@ -0,0 +1,18 @@
+from .individual.model_based_emf import EMFExplainer
+from .individual.model_based_als_explain import ALSExplainer
+from .individual.post_hoc_association_rules import ARPostHocExplainer
+from .individual.post_hoc_knn import KNNPostHocExplainer
+from .groups.rule_based_group_rec_explainer import RuleBasedGroupRecExplainer
+from .groups.sliding_window_explainer import SlidingWindowExplainer
+from .groups.lore4groups_explainer import LORE4GroupsExplainer
+
+
+__all__ = [
+    "EMFExplainer",
+    "ALSExplainer",
+    "ARPostHocExplainer",
+    "KNNPostHocExplainer",
+    "RuleBasedGroupRecExplainer",
+    "SlidingWindowExplainer",
+    "LORE4GroupsExplainer",
+]
@@ -0,0 +1,10 @@
+from .rule_based_group_rec_explainer import RuleBasedGroupRecExplainer
+from .sliding_window_explainer import SlidingWindowExplainer
+from .lore4groups_explainer import LORE4GroupsExplainer
+
+
+__all__ = [
+    "RuleBasedGroupRecExplainer",
+    "SlidingWindowExplainer",
+    "LORE4GroupsExplainer",
+]
@@ -0,0 +1,731 @@
+import pandas as pd
+import numpy as np
+import re
+import logging
+import traceback
+from collections import Counter
+from typing import Dict, Set, List, Optional, Any, Tuple, Union
+from sklearn.tree import DecisionTreeClassifier, _tree
+
+ItemId = Union[str, int]
+UserId = Union[str, int]
+FactualRule = List[str]
+CounterfactualSet = List[List[str]]
+Explanation = Tuple[Optional[FactualRule], Optional[CounterfactualSet]]
+
+
+class LORE4GroupsExplainer:
+    """
+    Enhanced LORE4Groups explainer that incorporates genre information
+    and stores decision trees for visualization
+    """
+
+    def __init__(
+        self,
+        item_profiles: Dict[str, Set[str]],
+        item_label_matrix: pd.DataFrame,
+        config: Dict,
+        genre_profiles: Optional[Dict[str, Set[str]]] = None,
+    ):
+        self.item_profiles = {str(k): v for k, v in item_profiles.items()}
+        self.item_label_matrix = item_label_matrix
+        self.params = config["explainer"]["lore4groups"]
+
+        # NEW: Store genre information
+        self.genre_profiles = (
+            {str(k): v for k, v in genre_profiles.items()} if genre_profiles else {}
+        )
+
+        all_columns = item_label_matrix.columns.tolist()
+        self.all_labels = [col for col in all_columns if col != "like"]
+
+        # Add 'like' back for target variable access (but not as feature)
+        if "like" in all_columns:
+            self.all_labels.append("like")
+
+    def _enhanced_jaccard_similarity(self, item1_id: ItemId, item2_id: ItemId) -> float:
+        """Enhanced Jaccard similarity that considers both tags and genres"""
+        # Get regular tags
+        tags1 = self.item_profiles.get(str(item1_id), set())
+        tags2 = self.item_profiles.get(str(item2_id), set())
+
+        # Get genres and add them as features
+        genres1 = self.genre_profiles.get(str(item1_id), set())
+        genres2 = self.genre_profiles.get(str(item2_id), set())
+
+        # Combine tags and genres for enhanced similarity
+        features1 = tags1.union({f"genre_{g.lower()}" for g in genres1})
+        features2 = tags2.union({f"genre_{g.lower()}" for g in genres2})
+
+        if not features1 or not features2:
+            return 0.0
+
+        union_len = len(features1.union(features2))
+        intersection_len = len(features1.intersection(features2))
+
+        return intersection_len / union_len if union_len > 0 else 0.0
+
+    def _jaccard_similarity(self, item1_id: ItemId, item2_id: ItemId) -> float:
+        """Original jaccard similarity (kept for compatibility)"""
+        tags1 = self.item_profiles.get(str(item1_id), set())
+        tags2 = self.item_profiles.get(str(item2_id), set())
+        if not tags1 or not tags2:
+            return 0.0
+        union_len = len(tags1.union(tags2))
+        return len(tags1.intersection(tags2)) / union_len if union_len > 0 else 0.0
+
+    def _get_enhanced_similar_examples(
+        self,
+        user_id_consecutive: UserId,
+        target_item_id: ItemId,
+        user_hist: Set[ItemId],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Tuple[pd.DataFrame, Dict[str, Any]]:
+        """Enhanced version that returns both DataFrame and metadata for visualization"""
+
+        # 1. Find all similar items using enhanced similarity
+        similarities = [
+            (seen_id, self._enhanced_jaccard_similarity(target_item_id, seen_id))
+            for seen_id in user_hist
+        ]
+        similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
+
+        sim_th = self.params.get("similarity_threshold", 0.0)
+        top_similar_items_str = {
+            item[0]
+            for item in similarities[: self.params["n_similar_for_tree"]]
+            if item[1] >= sim_th
+        }
+
+        if not top_similar_items_str:
+            return pd.DataFrame(), {}
+
+        # 2. Build the local dataset
+        top_similar_items_int = [int(i) for i in top_similar_items_str]
+
+        # Get existing ratings for similar items
+        local_df = dataset[
+            (dataset["userId"] == user_id_consecutive)
+            & (dataset["itemId"].isin(top_similar_items_int))
+        ].copy()
+
+        rated_items = set(local_df["itemId"])
+        items_to_predict = [
+            item for item in top_similar_items_int if item not in rated_items
+        ]
+
+        # Add predictions for unrated items
+        if model and data_reader and items_to_predict:
+            try:
+                orig_user_id = data_reader.get_original_user_id(
+                    int(user_id_consecutive)
+                )
+                predicted_ratings = []
+
+                for item_id_consecutive in items_to_predict:
+                    orig_item_id = data_reader.get_original_item_id(
+                        int(item_id_consecutive)
+                    )
+                    pred = model.predict(orig_user_id, orig_item_id)
+                    predicted_ratings.append(
+                        {
+                            "userId": user_id_consecutive,
+                            "itemId": item_id_consecutive,
+                            "rating": float(pred),
+                        }
+                    )
+
+                if predicted_ratings:
+                    pred_df = pd.DataFrame(predicted_ratings)
+                    local_df = pd.concat([local_df, pred_df], ignore_index=True)
+
+            except Exception:
+                traceback.print_exc()
+
+        # Check minimum samples requirement
+        if len(local_df) < 2:
+            return pd.DataFrame(), {}
+
+        # 3. Apply thresholding with fallbacks
+        rating_threshold = self.params["rating_threshold_for_like"]
+
+        threshold_info = {
+            "was_overridden": False,
+            "original_threshold": rating_threshold,
+            "final_threshold": rating_threshold,
+        }
+
+        local_df["like"] = (local_df["rating"] >= rating_threshold).astype(int)
+
+        # Apply fallback thresholds if needed
+        like_counts = local_df["like"].value_counts()
+
+        if len(like_counts) < 2:
+            # Try mean-based threshold
+            mean_rating = local_df["rating"].mean()
+            local_df["like"] = (local_df["rating"] >= mean_rating).astype(int)
+            threshold_info["was_overridden"] = True
+            threshold_info["final_threshold"] = mean_rating
+            like_counts = local_df["like"].value_counts()
+            if len(like_counts) < 2:
+                return pd.DataFrame(), {}
+
+        # Check for severe imbalance (>90% one class)
+        min_class_ratio = like_counts.min() / len(local_df)
+        if min_class_ratio < 0.1:
+            if like_counts.min() < 2:
+                return pd.DataFrame(), {}
+
+        # 4. Construct the enhanced feature matrix (including genres)
+        feature_labels = [label for label in self.all_labels if label != "like"]
+
+        examples = []
+        genre_features_used = set()
+
+        for idx, row in local_df.iterrows():
+            item_id = str(int(row["itemId"]))
+            tags = self.item_profiles.get(item_id, set())
+            genres = self.genre_profiles.get(item_id, set())
+
+            # Create base example with target variables
+            example = {
+                "movie_id": item_id,
+                "rating": row["rating"],
+                "like": int(row["like"]),
+            }
+
+            # Add tag features (excluding 'like')
+            for label in feature_labels:
+                example[label] = 1 if label in tags else 0
+
+            # Add genre features dynamically
+            for genre in genres:
+                genre_feature = f"genre_{genre.lower()}"
+                example[genre_feature] = 1
+                genre_features_used.add(genre_feature)
+
+                # Also add to feature_labels if not already there
+                if genre_feature not in feature_labels:
+                    feature_labels.append(genre_feature)
+
+            examples.append(example)
+
+        # Ensure all examples have all genre features
+        for example in examples:
+            for genre_feature in genre_features_used:
+                if genre_feature not in example:
+                    example[genre_feature] = 0
+
+        final_df = pd.DataFrame(examples)
+
+        # Final validation
+        if final_df["like"].nunique() < 2:
+            return pd.DataFrame(), {}
+
+        # Prepare metadata for visualization
+        metadata = {
+            "feature_labels": [label for label in feature_labels if label != "like"],
+            "genre_features": list(genre_features_used),
+            "similarity_scores": dict(similarities[:5]),  # Top 5 similarities
+            "target_item_genres": self.genre_profiles.get(str(target_item_id), set()),
+            "rating_threshold": threshold_info["final_threshold"],
+            "threshold_info": threshold_info,
+        }
+
+        return final_df, metadata
+
+    def _get_factual_path_for_item(
+        self,
+        clf: DecisionTreeClassifier,
+        x_item: pd.DataFrame,
+        metadata: Dict[str, Any],
+    ) -> Optional[List[str]]:
+        """
+        Traces the specific path an item takes through the decision tree
+        and returns the corresponding factual rule set.
+        """
+        feature_labels = metadata.get("feature_labels", [])
+        if not feature_labels:
+            return None
+
+        # 1. Get the sequence of nodes the item travels through
+        node_indicator = clf.decision_path(x_item)
+        node_index = node_indicator.indices[  # type: ignore
+            node_indicator.indptr[0] : node_indicator.indptr[  # type: ignore
+                1
+            ]
+        ]
+
+        rules = []
+        tree = clf.tree_
+
+        # 2. Iterate through the path to build the rules
+        # We stop at the second to last node because the last one is the leaf
+        for i in range(len(node_index) - 1):
+            node_id = node_index[i]
+            child_node_id = node_index[i + 1]
+
+            # Ensure this is not a leaf node
+            if tree.feature[node_id] != _tree.TREE_UNDEFINED:  # type: ignore
+                feature_name = feature_labels[tree.feature[node_id]]  # type: ignore
+                threshold = tree.threshold[node_id]  # type: ignore
+
+                # 3. Determine if the path went left or right to form the rule
+                if child_node_id == tree.children_left[node_id]:  # type: ignore
+                    # Path went left (True condition for <= threshold)
+                    rule = f"{feature_name} <= {threshold:.2f}"
+                else:
+                    # Path went right (False condition for <= threshold)
+                    rule = f"{feature_name} > {threshold:.2f}"
+
+                # Use the same enhanced formatting as before for consistency
+                if feature_name.startswith("genre_"):
+                    genre_name = feature_name.replace("genre_", "").title()
+                    if child_node_id == tree.children_left[node_id]:  # type: ignore
+                        rules.append(f"Does NOT have genre: `{genre_name}`")
+                    else:
+                        rules.append(f"Has genre: `{genre_name}`")
+                else:
+                    rules.append(rule)
+
+        return rules if rules else None
+
+    def _train_enhanced_decision_tree(
+        self,
+        user_id_consecutive: UserId,
+        item_id: ItemId,
+        user_hist: Set[ItemId],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Tuple[Optional[DecisionTreeClassifier], Dict[str, Any]]:
+        """Enhanced tree training that returns both classifier and metadata"""
+
+        df_examples, metadata = self._get_enhanced_similar_examples(
+            user_id_consecutive, item_id, user_hist, dataset, model, data_reader
+        )
+
+        if df_examples.empty:
+            return None, {}
+
+        like_counts = df_examples["like"].value_counts()
+
+        if len(like_counts) < 2 or like_counts.min() < 2:
+            return None, {}
+
+        feature_labels = metadata.get("feature_labels", [])
+        X = df_examples[feature_labels]
+        y = df_examples["like"]
+
+        # Verify feature matrix has variance
+        feature_variances = X.var()
+        if (feature_variances == 0).all():
+            return None, {}
+
+        clf = DecisionTreeClassifier(
+            max_depth=5,  # Slightly deeper to accommodate genre features
+            min_samples_split=max(4, len(df_examples) // 4),
+            min_samples_leaf=2,
+            random_state=42,
+            class_weight="balanced",
+        )
+
+        try:
+            clf.fit(X, y)
+
+            # Enhanced feature importance analysis
+            feature_importance = list(zip(feature_labels, clf.feature_importances_))
+            important_features = [
+                (f, imp) for f, imp in feature_importance if imp > 0.001
+            ]
+            genre_important_features = [
+                (f, imp) for f, imp in important_features if f.startswith("genre_")
+            ]
+
+            # Add classifier and feature info to metadata
+            metadata.update(
+                {
+                    "classifier": clf,
+                    "feature_importance": dict(feature_importance),
+                    "important_features": important_features,
+                    "genre_important_features": genre_important_features,
+                    "training_data_size": len(df_examples),
+                    "class_distribution": like_counts.to_dict(),
+                }
+            )
+
+            return clf, metadata
+
+        except Exception as _:
+            return None, {}
+
+    def _get_enhanced_explanation_path(
+        self,
+        clf: DecisionTreeClassifier,
+        x_item: pd.DataFrame,
+        metadata: Dict[str, Any],
+    ) -> Optional[List[str]]:
+        """Enhanced explanation path that provides better rule descriptions"""
+
+        if 1 not in clf.classes_:
+            return None
+
+        leaf_id = clf.apply(x_item)[0]  # type: ignore
+        class_index = np.where(clf.classes_ == 1)[0]
+        if not class_index.size or clf.tree_.value[leaf_id][0][class_index[0]] == 0:  # type: ignore
+            return None
+
+        node_indicator = clf.decision_path(x_item)
+        node_index = node_indicator.indices[  # type: ignore
+            node_indicator.indptr[0] : node_indicator.indptr[  # type: ignore
+                1
+            ]
+        ]
+
+        rules = []
+        feature_labels = metadata.get("feature_labels", [])
+
+        for i in range(len(node_index) - 1):  # Exclude leaf node
+            node_id = node_index[i]
+            next_node_id = node_index[i + 1]
+
+            if clf.tree_.feature[node_id] != _tree.TREE_UNDEFINED:  # type: ignore
+                feature_name = feature_labels[clf.tree_.feature[node_id]]  # type: ignore
+                threshold = clf.tree_.threshold[node_id]  # type: ignore
+
+                # Enhanced rule formatting based on feature type
+                if feature_name.startswith("genre_"):
+                    genre_name = feature_name.replace("genre_", "").title()
+                    if next_node_id == clf.tree_.children_left[node_id]:  # type: ignore
+                        rules.append(f"Does NOT have genre: `{genre_name}`")
+                    else:
+                        rules.append(f"Has genre: `{genre_name}`")
+                else:
+                    # Regular tag features
+                    if next_node_id == clf.tree_.children_left[node_id]:  # type: ignore
+                        rules.append(f"{feature_name} <= {threshold}")
+                    else:
+                        rules.append(f"{feature_name} > {threshold}")
+
+        return rules
+
+    def _generate_enhanced_individual_explanation(
+        self, clf: DecisionTreeClassifier, item_id: ItemId, metadata: Dict[str, Any]
+    ) -> Optional[Explanation]:
+        """Enhanced individual explanation generation"""
+
+        if str(item_id) not in self.item_label_matrix.index:
+            return None
+
+        x_item_full = self.item_label_matrix.loc[[str(item_id)]]
+        feature_labels = metadata.get("feature_labels", [])
+
+        try:
+            # For genre features, we need to dynamically add them to the item
+            item_genres = self.genre_profiles.get(str(item_id), set())
+
+            # Create enhanced item representation
+            enhanced_item_data = x_item_full.copy()
+
+            # Add genre features
+            for genre in item_genres:
+                genre_feature = f"genre_{genre.lower()}"
+                if genre_feature in feature_labels:
+                    enhanced_item_data[genre_feature] = 1
+
+            # Ensure all genre features exist (set to 0 if not present)
+            for feature in feature_labels:
+                if (
+                    feature.startswith("genre_")
+                    and feature not in enhanced_item_data.columns
+                ):
+                    enhanced_item_data[feature] = 0
+
+            # Select only the features used in training
+            x_item = enhanced_item_data[feature_labels]
+
+        except KeyError as _:
+            return None
+        # Get enhanced factual rule
+        # factual_rule = self._get_enhanced_explanation_path(clf, x_item, metadata)
+        factual_rule = self._get_factual_path_for_item(clf, x_item, metadata)
+
+        if not factual_rule:
+            return None
+
+        # Get counterfactuals (reuse existing method)
+        counterfactual_set = self._get_counterfactual_paths(clf, x_item)
+        if not counterfactual_set:
+            return None
+
+        return (factual_rule, counterfactual_set)
+
+    def _get_counterfactual_paths(
+        self, clf: DecisionTreeClassifier, x_item: pd.DataFrame
+    ) -> Optional[CounterfactualSet]:
+        """Original counterfactual path method (kept for compatibility)"""
+        tree = clf.tree_
+        paths = []
+
+        def find_paths(node_id, current_path):
+            if tree.feature[node_id] == _tree.TREE_UNDEFINED:  # type: ignore
+                class_index = np.where(clf.classes_ == 0)[0]
+                if class_index.size and tree.value[node_id][0][class_index[0]] > 0:
+                    paths.append(list(current_path))
+                return
+            feature_idx = tree.feature[node_id]  # type: ignore
+            threshold = tree.threshold[node_id]  # type: ignore
+            current_path.append((feature_idx, "<=", threshold))
+            find_paths(tree.children_left[node_id], current_path)  # type: ignore
+            current_path.pop()
+            current_path.append((feature_idx, ">", threshold))
+            find_paths(tree.children_right[node_id], current_path)  # type: ignore
+            current_path.pop()
+
+        find_paths(0, [])
+        if not paths:
+            return None
+
+        min_nf = float("inf")
+        counterfactuals = []
+        for path in paths:
+            nf = 0
+            for feature_idx, op, threshold in path:
+                if feature_idx < len(x_item.columns):
+                    item_val = x_item.iloc[0, feature_idx]
+                    if not (
+                        (op == "<=" and item_val <= threshold)
+                        or (op == ">" and item_val > threshold)
+                    ):
+                        nf += 1
+            if nf < min_nf:
+                min_nf = nf
+                counterfactuals = [path]
+            elif nf == min_nf:
+                counterfactuals.append(path)
+
+        # Enhanced counterfactual formatting
+        formatted_counterfactuals = []
+        for cf_path in counterfactuals:
+            formatted_path = []
+            for idx, op, _ in cf_path:
+                if idx < len(x_item.columns):
+                    feature_name = x_item.columns[idx]
+                    if feature_name.startswith("genre_"):
+                        genre_name = feature_name.replace("genre_", "").title()
+                        if op == "<=":
+                            formatted_path.append(
+                                f"Does NOT have genre: `{genre_name}`"
+                            )
+                        else:
+                            formatted_path.append(f"Has genre: `{genre_name}`")
+                    else:
+                        formatted_path.append(f"{feature_name} {op} 0.5")
+            if formatted_path:
+                formatted_counterfactuals.append(formatted_path)
+
+        return formatted_counterfactuals if formatted_counterfactuals else None
+
+    def _aggregate_factual_rules(
+        self, individual_explanations: Dict[UserId, List[str]], total_group_size: int
+    ) -> Dict[str, List[str]]:
+        """
+        Aggregates individual factual rules into a group consensus by finding
+        the rules supported by a majority of members.
+        """
+
+        # Flatten the list of all rules from all users into a single list
+        all_rules_flat = [
+            rule
+            for rules_list in individual_explanations.values()
+            for rule in rules_list
+        ]
+
+        if not all_rules_flat:
+            return {"unanimous": [], "majority": [], "minority": []}
+
+        # Count the occurrences of each rule
+        rule_counts = Counter(all_rules_flat)
+
+        majority_threshold = (total_group_size // 2) + 1 if total_group_size > 1 else 1
+        minority_threshold = 1
+        cleaned_rules_set = self._clean_contradictory_rules(set(rule_counts.keys()))
+        categorized_rules = {"unanimous": [], "majority": [], "minority": []}
+
+        for rule in sorted(list(cleaned_rules_set)):
+            count = rule_counts[rule]
+            rule_with_support = f"{rule} ({count}/{total_group_size} members)"
+
+            if count == total_group_size:
+                categorized_rules["unanimous"].append(rule_with_support)
+            elif count >= majority_threshold:
+                categorized_rules["majority"].append(rule_with_support)
+            elif count >= minority_threshold:
+                categorized_rules["minority"].append(rule_with_support)
+
+        return categorized_rules
+
+    def _clean_contradictory_rules(self, rules_set: Set[str]) -> Set[str]:
+        """Enhanced contradiction cleaning that handles genre rules"""
+        conditions_by_attr = {}
+
+        for rule in rules_set:
+            # Handle genre rules
+            if "Has genre:" in rule or "Does NOT have genre:" in rule:
+                genre_match = re.search(r"`([^`]+)`", rule)
+                if genre_match:
+                    genre = genre_match.group(1)
+                    attr = f"genre_{genre}"
+                    op = "has" if "Has genre:" in rule else "not_has"
+                    conditions_by_attr.setdefault(attr, set()).add(op)
+            else:
+                # Handle regular rules
+                match = re.match(r"(.+?)\s*([<>]=?)\s*(\d+\.?\d*)", rule)
+                if match:
+                    attr, op, _ = match.groups()
+                    conditions_by_attr.setdefault(attr.strip(), set()).add(op)
+
+        # Find contradictory attributes
+        invalid_attrs = set()
+        for attr, ops in conditions_by_attr.items():
+            if attr.startswith("genre_"):
+                # Genre contradiction: has and not_has same genre
+                if "has" in ops and "not_has" in ops:
+                    invalid_attrs.add(attr)
+            else:
+                # Numerical contradiction: <= and >
+                if any(op in ops for op in ["<=", "<"]) and any(
+                    op in ops for op in [">", ">="]
+                ):
+                    invalid_attrs.add(attr)
+
+        # Remove contradictory rules
+        clean_rules = set()
+        for rule in rules_set:
+            is_invalid = False
+            for invalid_attr in invalid_attrs:
+                if invalid_attr.startswith("genre_"):
+                    genre = invalid_attr.replace("genre_", "")
+                    if f"`{genre}`" in rule:
+                        is_invalid = True
+                        break
+                else:
+                    if invalid_attr in rule:
+                        is_invalid = True
+                        break
+
+            if not is_invalid:
+                clean_rules.add(rule)
+
+        return clean_rules
+
+    def find_explanation(
+        self,
+        recommended_items: List[ItemId],
+        members: List[UserId],
+        user_hist: Dict[UserId, Set[ItemId]],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Dict[str, Any]:
+        """Enhanced explanation finding with tree storage for visualization"""
+        if data_reader is None:
+            raise ValueError(
+                "A 'data_reader' object must be provided to find explanations."
+            )
+
+        detailed_explanations = {}
+        explainable_count = 0
+
+        if not recommended_items:
+            return {"fidelity": 0.0, "details": {}}
+
+        for item_id in recommended_items:
+            all_individual_rules = {}
+            all_counterfactuals = {}
+            stored_classifiers = {}  # Store classifiers for visualization
+            stored_metadata = {}  # Store metadata for visualization
+            representative_decision_path = None
+            threshold_info_for_item = None
+
+            for user_id in members:
+                user_id_consecutive = data_reader.get_new_user_id(user_id)
+                clf, metadata = self._train_enhanced_decision_tree(
+                    user_id_consecutive,
+                    item_id,
+                    user_hist.get(user_id, set()),
+                    dataset,
+                    model,
+                    data_reader,
+                )
+
+                if clf and metadata:
+                    if threshold_info_for_item is None and "threshold_info" in metadata:
+                        threshold_info_for_item = metadata["threshold_info"]
+
+                    explanation = self._generate_enhanced_individual_explanation(
+                        clf, item_id, metadata
+                    )
+
+                    if explanation:
+                        r, phi = explanation
+                        all_individual_rules[user_id] = r
+                        all_counterfactuals[user_id] = phi
+
+                        if representative_decision_path is None:
+                            representative_decision_path = r
+                        # Store for visualization (use first successful classifier)
+                        if not stored_classifiers:
+                            stored_classifiers[user_id] = clf
+                            stored_metadata[user_id] = metadata
+
+            total_members_in_group = len(members)
+            factual_set = self._aggregate_factual_rules(
+                all_individual_rules, total_members_in_group
+            )
+
+            if representative_decision_path and factual_set:
+                explainable_count += 1
+
+                # Enhanced detailed explanations with visualization data
+                item_explanation = {
+                    "decision_path": representative_decision_path,
+                    "group_factual_rule": factual_set,
+                    "individual_counterfactuals": all_counterfactuals,
+                }
+
+                if threshold_info_for_item:
+                    item_explanation["threshold_info"] = threshold_info_for_item
+
+                # Add visualization data if available
+                if stored_classifiers:
+                    user_id_for_viz = list(stored_classifiers.keys())[0]
+                    item_explanation.update(
+                        {
+                            "decision_tree": stored_classifiers[user_id_for_viz],
+                            "feature_names": stored_metadata[user_id_for_viz].get(
+                                "feature_labels", []
+                            ),
+                            "tree_metadata": stored_metadata[user_id_for_viz],
+                            "item_genres": self.genre_profiles.get(str(item_id), set()),
+                        }
+                    )
+
+                detailed_explanations[item_id] = item_explanation
+
+        fidelity = (
+            explainable_count / len(recommended_items) if recommended_items else 0.0
+        )
+
+        group_explanations = {
+            "fidelity": fidelity,
+            "details": detailed_explanations,
+        }
+
+        logging.info(
+            f"Enhanced fidelity for {members}: {fidelity:.3f} ({explainable_count}/{len(recommended_items)})"
+        )
+
+        return group_explanations
@@ -0,0 +1,314 @@
+"""Rule-based group recommendation explainer module."""
+
+from typing import Dict, List, Optional, Set, Union
+import logging
+
+from pygrex.data_reader.data_reader import DataReader
+from pygrex.utils.association_rules import AssociationRules
+
+# Type aliases for better readability
+ItemId = Union[str, int]
+MemberId = Union[str, int]
+UserHistory = Dict[MemberId, Set[ItemId]]
+
+logger = logging.getLogger(__name__)
+
+
+class RuleBasedGroupRecExplainer:
+    """
+    A class to explain group recommendations using rule-based methods.
+
+    This class provides methods to generate explanations for group recommendations
+    based on association rules and user interaction history.
+    """
+
+    def __init__(
+        self,
+        rules: AssociationRules,
+        data: DataReader,
+        pool_recommendations: Optional[Union[List[ItemId], ItemId]] = None,
+        members: Optional[List[MemberId]] = None,
+        user_history: Optional[UserHistory] = None,
+        min_members_threshold: int = 1,
+    ) -> None:
+        """
+        Initialize the RuleBasedGroupRecExplainer.
+
+        Args:
+            rules: An instance of AssociationRules containing the rules for explanations.
+            pool_recommendations: A list of item IDs to explain, or a single item ID.
+            members: A list of member IDs in the group.
+            user_history: A dictionary mapping member IDs to sets of item IDs
+                         they have interacted with.
+            min_members_threshold: Minimum number of members that must satisfy
+                                 the rule for it to be considered valid.
+
+        Raises:
+            ValueError: If min_members_threshold is less than 1.
+        """
+        if min_members_threshold < 1:
+            raise ValueError("min_members_threshold must be at least 1")
+
+        self.rules = rules
+        self.members = members or []
+        self.min_members_threshold = min_members_threshold
+        self.user_history = user_history or {}
+        self.data = data
+
+        # Normalize pool_recommendations to always be a list
+        self.pool_recommendations = self._normalize_recommendations(
+            pool_recommendations
+        )
+
+    def _normalize_recommendations(
+        self, recommendations: Optional[Union[List[ItemId], ItemId]]
+    ) -> List[ItemId]:
+        """
+        Normalize recommendations input to a list format.
+
+        Args:
+            recommendations: Single item ID, list of item IDs, or None.
+
+        Returns:
+            List of item IDs.
+        """
+        if recommendations is None:
+            return []
+
+        if isinstance(recommendations, (str, int)):
+            return [recommendations]
+
+        return recommendations
+
+    def _is_rule_satisfied_by_member(
+        self, member: MemberId, antecedent: Set[ItemId]
+    ) -> bool:
+        """
+        Check if a member satisfies the rule's antecedent.
+
+        Args:
+            member: The member ID to check.
+            antecedent: The set of items that form the rule's antecedent.
+
+        Returns:
+            True if the member's history contains all items in the antecedent.
+        """
+
+        member_history = self.user_history.get(member, set())
+        member_history_str = {str(item) for item in member_history}
+
+        x = member_history_str.issuperset(antecedent)
+        return x
+
+    def _count_satisfied_members(self, antecedent: Set[ItemId]) -> int:
+        """
+        Count how many members satisfy the given antecedent.
+
+        Args:
+            antecedent: The set of items that form the rule's antecedent.
+
+        Returns:
+            Number of members whose history satisfies the antecedent.
+        """
+        return sum(
+            1
+            for member in self.members
+            if self._is_rule_satisfied_by_member(member, antecedent)
+        )
+
+    def _find_applicable_rules(self, item_id: ItemId):
+        """
+        Find rules that have the given item in their consequents.
+
+        Args:
+            item_id: The item ID to find rules for.
+
+        Returns:
+            DataFrame containing applicable rules.
+        """
+        item_id = self.data.get_new_item_id(item_id)  # type: ignore
+
+        applicable_rules = self.rules[  # type: ignore
+            self.rules["consequents"].apply(lambda x: str(item_id) in x)  # type: ignore
+        ]
+
+        return applicable_rules
+
+    def find_explanation(self) -> float:
+        """
+        Generate explanations for the group recommendations based on the rules.
+
+        Returns:
+            The fidelity of the explanations, which is the ratio of explained
+            recommendations to total recommendations in the pool.
+        """
+        if not self.pool_recommendations:
+            logger.warning("No recommendations to explain")
+            return 0.0
+
+        explained_count = 0
+        total_recommendations = len(self.pool_recommendations)
+
+        for item_id in self.pool_recommendations:
+            if self._can_explain_item(item_id):
+                explained_count += 1
+
+        fidelity = explained_count / total_recommendations
+        logger.info(
+            f"Explained {explained_count}/{total_recommendations} recommendations "
+            f"(fidelity: {fidelity:.3f})"
+        )
+
+        return fidelity
+
+    def _can_explain_item(self, item_id: ItemId) -> bool:
+        """
+        Check if an item can be explained by any rule.
+
+        Args:
+            item_id: The item ID to check.
+
+        Returns:
+            True if at least one rule can explain the item.
+        """
+        applicable_rules = self._find_applicable_rules(item_id)
+
+        for _, rule in applicable_rules.iterrows():
+            antecedent = rule["antecedents"]
+            satisfied_count = self._count_satisfied_members(antecedent)
+
+            if satisfied_count >= self.min_members_threshold:
+                logger.debug(f"Rule fired for item {item_id}")
+                return True
+
+        return False
+
+    def get_explanation_details(self) -> Dict[ItemId, List[Dict]]:
+        """
+        Get detailed explanations for each recommendation.
+
+        Returns:
+            Dictionary mapping item IDs to lists of applicable rule details.
+        """
+        explanations = {}
+
+        for item_id in self.pool_recommendations:
+            item_explanations = []
+            applicable_rules = self._find_applicable_rules(item_id)
+
+            for _, rule in applicable_rules.iterrows():
+                antecedent = rule["antecedents"]
+                satisfied_count = self._count_satisfied_members(antecedent)
+
+                if satisfied_count >= self.min_members_threshold:
+                    item_explanations.append(
+                        {
+                            "antecedent": antecedent,
+                            "consequent": rule["consequents"],
+                            "satisfied_members": satisfied_count,
+                            "confidence": rule.get("confidence", "N/A"),
+                            "support": rule.get("support", "N/A"),
+                        }
+                    )
+
+            explanations[item_id] = item_explanations
+
+        return explanations
+
+    def compute_group_fidelity_advanced(self) -> float:
+        """
+        Compute group fidelity using advanced conditions.
+
+        This method implements a more sophisticated fidelity calculation where:
+        - Condition 1: Each member of the group must have seen at least one item from the antecedent
+        - Condition 2: Each item in the antecedent must have been seen by at least one member
+
+        Returns:
+            The fidelity score as a float between 0 and 1.
+        """
+        if not self.pool_recommendations:
+            logger.warning("No recommendations to explain")
+            return 0.0
+
+        if not self.members:
+            logger.warning("No group members defined")
+            return 0.0
+
+        explained_count = 0
+        total_recommendations = len(self.pool_recommendations)
+
+        # Convert member IDs to set for faster lookup
+        members_set = set(self.members)
+
+        # Get all items seen by any group member
+        all_seen_items = set()
+        for member in members_set:
+            member_history = self.user_history.get(member, set())
+            # Convert to strings for consistency with rules
+            member_history_str = {str(item) for item in member_history}
+            all_seen_items.update(member_history_str)
+
+        for item_id in self.pool_recommendations:
+            if self._can_explain_item_advanced(item_id, members_set, all_seen_items):
+                explained_count += 1
+
+        fidelity = explained_count / total_recommendations
+        logger.info(
+            f"Advanced explanation: {explained_count}/{total_recommendations} recommendations "
+            f"(fidelity: {fidelity:.3f})"
+        )
+
+        return fidelity
+
+    def _can_explain_item_advanced(
+        self, item_id: ItemId, members_set: Set[MemberId], all_seen_items: Set[str]
+    ) -> bool:
+        """
+        Check if an item can be explained using advanced conditions.
+
+        Args:
+            item_id: The item ID to check.
+            members_set: Set of group member IDs.
+            all_seen_items: Set of all items seen by any group member.
+
+        Returns:
+            True if the item can be explained by at least one rule satisfying both conditions.
+        """
+        applicable_rules = self._find_applicable_rules(item_id)
+
+        for _, rule in applicable_rules.iterrows():
+            antecedent = rule["antecedents"]
+
+            # Condition 1: Each member must have seen at least one item from the antecedent
+            cond1 = all(
+                self._member_has_antecedent_item(member, antecedent)
+                for member in members_set
+            )
+
+            # Condition 2: Each item in the antecedent must have been seen by at least one member
+            cond2 = antecedent.issubset(all_seen_items)
+
+            if cond1 and cond2:
+                logger.debug(f"Advanced rule fired for item {item_id}")
+                return True
+
+        return False
+
+    def _member_has_antecedent_item(
+        self, member: MemberId, antecedent: Set[ItemId]
+    ) -> bool:
+        """
+        Check if a member has seen at least one item from the antecedent.
+
+        Args:
+            member: The member ID to check.
+            antecedent: The set of items in the rule's antecedent.
+
+        Returns:
+            True if the member has seen at least one item from the antecedent.
+        """
+        member_history = self.user_history.get(member, set())
+        member_history_str = {str(item) for item in member_history}
+
+        # Check if there's any intersection between member history and antecedent
+        return len(antecedent.intersection(member_history_str)) > 0
@@ -0,0 +1,434 @@
+import itertools
+from typing import Dict, List, Sequence, Union
+
+from pygrex.data_reader import DataReader, GroupInteractionHandler
+from pygrex.models import RecommenderModel
+from pygrex.recommender import GroupRecommender
+from pygrex.utils import SlidingWindowRanker, SlidingWindow, AggregationStrategy
+
+
+class SlidingWindowExplainer:
+    """
+    Stratigi, M., Bikakis, N., Stefanidis, K.: Counterfactual explanations for group
+    recommendations. In: Proceedings of the 27th International Workshop on Design,
+    Optimization, Languages and Analytical Processing of Big Data (DOLAP 2025).
+
+    A class that uses a sliding window approach to find counterfactual explanations
+    for group recommendation systems.
+
+    This class helps identify which items, if removed from the group's interaction history,
+    would cause a specific target item to no longer appear in the group recommendations.
+    """
+
+    def __init__(
+        self,
+        config,
+        data: DataReader,
+        group_handler: GroupInteractionHandler,
+        members: List[Union[str, int]],
+        target_item: Union[str, int],
+        model: RecommenderModel,
+        aggregation_strategy: AggregationStrategy = AggregationStrategy.AVG_PREDICTIONS,
+        window_size=3,
+    ):
+        """
+        Initialize the SlidingWindowExplainer.
+
+        Args:
+            config: Configuration object with model parameters
+            data: DataReader object containing the dataset
+            group_handler: Object that handles group data modifications
+            members: List of user IDs in the group
+            target_item: The item ID for which explanation is sought
+            model: Recommender model to use for predictions,
+            aggregation_strategy: Strategy to aggregate individual recommendations,
+            window_size: Size of the sliding window
+        """
+        self.cfg = config
+        self.data = data
+        self.group_handler = group_handler
+        self.members = members
+        self.target_item = target_item
+        self.model = model
+        self.aggregation_strategy = aggregation_strategy
+        self.window_size = window_size
+
+        # Results tracking
+        self.explanations_found: Dict[int, Dict] = {}
+        self.calls = 0
+        self.max_calls = 1000
+        self.item_metrics = {}
+
+    def set_sliding_window(self, sliding_window):
+        """Set the sliding window object if not provided during initialization."""
+        self.sliding_window = sliding_window
+
+    def set_item_metrics(self, metrics: Dict[Union[str, int], Dict[str, float]]):
+        """Store the pre-calculated metric scores for all items."""
+        self.item_metrics = metrics
+
+    def find_explanation(
+        self,
+        items_rated_by_group: List[Union[str, int]],
+        group_predictions: Dict,
+        top_recommendation: Union[str, int],
+        ranking_weights: Dict[str, float],
+    ) -> Dict[int, Dict]:
+        """
+        Find counterfactual explanations using the full, encapsulated process.
+
+        Args:
+            items_rated_by_group: All items rated by any member of the group.
+            group_predictions: The original individual predictions from the recommender.
+            top_recommendation: The original top recommended item.
+            ranking_weights: The weights from the UI for each ranking component.
+
+        Returns:
+            A dictionary of found explanations, including their justification metrics.
+        """
+
+        self.calls = 0
+        ranker = SlidingWindowRanker(config={})
+        ranker.set_group_recommender_values(group_predictions, top_recommendation)
+        ranked_items, self.item_metrics = ranker.generate_ranked_items(
+            all_rated_items=items_rated_by_group,
+            data=self.data,
+            group_members=self.members,
+            component_weights=ranking_weights,
+        )
+
+        sliding_window = SlidingWindow(
+            sequence=ranked_items, window_size=self.window_size
+        )
+
+        found = 0
+        while True:
+            # Get the sliding window
+            big_window = sliding_window.get_next_window()
+
+            # Check exit conditions
+            if big_window is None or found > 0 or self.calls >= self.max_calls:
+                break
+
+            # Count calls and windows
+            self.calls += 1
+
+            # Test if removing this window affects recommendations
+            if self._test_window_removal(big_window, self.target_item):
+                # A counterfactual explanation has been found
+                found += 1
+                # Look for minimal subsets within this window
+                self._find_minimal_subset(big_window, self.target_item)
+
+        if found == 0:
+            print("Explanation could not be found")
+
+        return self.explanations_found
+
+    def _test_window_removal(
+        self, item_ids: List[Union[str, int]], original_group_rec: Union[str, int]
+    ) -> bool:
+        """
+        Test if removing the given items affects the group recommendation.
+
+        Args:
+            item_ids: List of item IDs to remove from group interactions
+            original_group_rec: The original recommendation to compare against
+
+        Returns:
+            bool: True if removing these items changes recommendations, False otherwise
+        """
+
+        # Get new recommendations after removing items
+        group_recommendation = self._get_recommendations_after_removal(item_ids)
+
+        # Check if target item is still in recommendations
+
+        return original_group_rec not in group_recommendation
+
+    def _get_recommendations_after_removal(
+        self, item_ids: List[Union[str, int]], top_n: int = 10
+    ) -> Sequence[Union[str, int]]:
+        """
+        Get group recommendations after removing specified items from interaction history.
+
+        Args:
+            item_ids: List of item IDs to remove from group interactions
+            top_n: Number of top recommendations to return
+
+        Returns:
+            List of recommended item IDs
+        """
+        # Create modified dataset with items removed
+        changed_data = self.group_handler.create_modified_dataset(
+            original_data=self.data.dataset,
+            group_ids=self.members,
+            item_ids=item_ids,
+            data=self.data,
+        )
+
+        # Create new DataReader and retrain model
+        data_retrained = self._create_data_reader_and_prepare(changed_data)
+        model_retrained = self._retrain_model(data_retrained)
+
+        # Set up recommender with new model and data
+        group_recommender = GroupRecommender(data_retrained)
+        group_recommender.setup_recommendation(
+            model_retrained,
+            self.members,
+            data_retrained,
+            aggregation_strategy=self.aggregation_strategy,
+        )
+        recommendations = group_recommender.get_group_recommendations(top_n)
+
+        if not isinstance(recommendations, list):
+            return []
+
+        return recommendations
+
+    def _create_data_reader_and_prepare(self, changed_data):
+        """
+        Create and prepare a new DataReader with modified data.
+
+        Args:
+            changed_data: DataFrame with modified dataset
+
+        Returns:
+            DataReader: A new DataReader object with the modified dataset
+        """
+        data_retrained = DataReader(
+            filepath_or_buffer=None,
+            sep=None,
+            names=None,
+            skiprows=0,
+            dataframe=changed_data,
+        )
+
+        # Fix for potential dataset issue in original code
+        # data_retrained.dataset = data_retrained.dataset.iloc[1:].reset_index(drop=True)
+
+        # Prepare data
+        data_retrained.make_consecutive_ids_in_dataset()
+        data_retrained.binarize(binary_threshold=1)
+
+        return data_retrained
+
+    def _retrain_model(self, data):
+        """
+        Retrain the recommendation model with modified data.
+
+        Args:
+            data: Prepared DataReader object with modified dataset
+
+        Returns:
+            Retrained model
+        """
+        self.model.fit(data)
+        return self.model
+
+    def _find_minimal_subset(
+        self, big_window: List[Union[str, int]], original_group_rec: Union[str, int]
+    ) -> None:
+        """
+        Find minimal subset of items that act as counterfactual explanation.
+
+        Args:
+            big_window: List of item IDs to search within
+            original_group_rec: The original recommendation to compare against
+
+        """
+        found_subset = 0
+
+        # Try combinations of different lengths
+        for length in range(1, len(big_window) + 1):
+            if found_subset > 0 or self.calls > self.max_calls:
+                break
+
+            combinations = itertools.combinations(big_window, length)
+            for item_combo in combinations:
+                if found_subset > 0 or self.calls > self.max_calls:
+                    break
+
+                subset_items = list(item_combo)
+                self.calls += 1
+
+                # Get recommendations after removing this subset
+                new_recommendations = self._get_recommendations_after_removal(
+                    subset_items
+                )
+
+                # Check if this is a counterfactual explanation
+                if original_group_rec not in new_recommendations:
+                    found_subset += 1
+                    self._record_explanation(
+                        subset_items, original_group_rec, new_recommendations[0]
+                    )
+
+    def _record_explanation(
+        self,
+        explanation_items: List[Union[str, int]],
+        original_rec: Union[str, int],
+        new_rec: Union[str, int],
+    ) -> None:
+        """
+        Record and display found explanation.
+
+        Args:
+            explanation_items: Items that form the counterfactual explanation
+            original_rec: Original recommendation
+            new_rec: New top recommendation after removing explanation items
+        """
+        print(
+            f"If the group had not interacted with these items {explanation_items},\n"
+            f"the item of interest {original_rec} would not have appeared on the recommendation list;\n"
+            f"instead, {new_rec} would have been recommended."
+        )
+        # 	print("")
+        #   print(f"Explanation: {explanation_items} : found at call: {self.calls}")
+
+        # Calculate metrics for the explanation
+        item_intensity = self._calculate_item_intensity(explanation_items)
+        user_intensity = self._calculate_user_intensity(explanation_items)
+        explanation_metrics = {
+            item: self.item_metrics.get(item, {}) for item in explanation_items
+        }
+
+        self.explanations_found[self.calls] = {
+            "items": explanation_items,
+            "new_rec": new_rec,
+            "metrics": explanation_metrics,
+        }
+
+        exp_size = len(explanation_items)
+
+        #   print(f"{exp_size}\t{self.calls}\t{item_intensity}\t{user_intensity}")
+
+    def _calculate_item_intensity(self, items: List[Union[str, int]]) -> List[float]:
+        """
+        Calculate average item intensity for explanation items.
+
+        Args:
+            items: List of item IDs in the explanation
+
+        Returns:
+            List of average intensity scores for each item
+        """
+
+        return self._calculate_average_item_intensity_score(
+            items, self.members, self.data
+        )
+
+    def _calculate_user_intensity(self, items: List[Union[str, int]]) -> List[float]:
+        """
+        Calculate user intensity score for explanation items.
+
+        Args:
+            items: List of item IDs in the explanation
+
+        Returns:
+            List of intensity scores for each user
+        """
+        return self._calculate_user_intensity_score(items, self.members, self.data)
+
+    @staticmethod
+    def _calculate_average_item_intensity_score(
+        explanation: List[Union[str, int]],
+        members: List[Union[str, int]],
+        data: DataReader,
+    ) -> List[float]:
+        """
+        Calculate the average item intensity for a counterfactual explanation.
+
+        Average item intensity is defined as the average number of interactions
+        between group members and each item in the explanation.
+
+        Args:
+            explanation: The counterfactual explanation items.
+            members: User IDs of the group members.
+            data: DataReader object containing the dataset and ID mapping methods.
+
+        Returns:
+            list: Average intensity for each item in the explanation.
+        """
+        internal_group_ids = []
+        # Convert user IDs to internal representation
+        for user_id in members:
+            new_user_id = data.get_new_user_id(user_id)
+            if isinstance(new_user_id, list):
+                if new_user_id:  # Check that the list is not empty
+                    internal_group_ids.append(int(new_user_id[0]))
+            else:
+                internal_group_ids.append(int(new_user_id))
+
+        group_size = len(members)
+        item_intensities = []
+
+        for item_id in explanation:
+            # Convert item ID to internal representation
+            internal_item_id = data.get_new_item_id(item_id)
+
+            # Count interactions between this item and group members
+            interactions_count = len(
+                data.dataset[
+                    (data.dataset.itemId == internal_item_id)
+                    & (data.dataset.userId.isin(internal_group_ids))
+                ]
+            )
+
+            # Calculate average intensity
+            average_intensity = interactions_count / group_size
+            item_intensities.append(average_intensity)
+
+        return item_intensities
+
+    @staticmethod
+    def _calculate_user_intensity_score(
+        explanation_items: List[Union[str, int]],
+        members: List[Union[str, int]],
+        data: DataReader,
+    ) -> List[float]:
+        """
+        Calculate the interaction intensity for each user based on their interactions with items in an explanation.
+
+        Interaction intensity represents how much a user has interacted with the items in the explanation,
+        normalized by the total number of explanation items.
+
+        Args
+            explanation_items : List of item IDs in the explanation
+            members : List of user IDs to calculate intensity for
+            data : DataReader object containing the dataset and ID mapping methods
+
+        Returns
+            List of interaction intensities for each user (same order as members)
+            Values range from 0 to 1, where:
+            - 0 means no interaction with any explanation item
+            - 1 means interaction with all explanation items
+
+        Notes
+            Intensity is calculated as: (number of user interactions with explanation items) / (number of explanation items)
+        """
+        # Convert external item IDs to internal IDs
+        internal_item_ids = [
+            data.get_new_item_id(item_id) for item_id in explanation_items
+        ]
+
+        user_intensities = []
+        num_explanation_items = len(explanation_items)
+
+        for member in members:
+            # Convert external user ID to internal ID
+            internal_user_id = data.get_new_user_id(member)
+
+            # Count interactions between this user and explanation items
+            user_interactions_count = len(
+                data.dataset[
+                    (data.dataset.itemId.isin(internal_item_ids))
+                    & (data.dataset.userId == internal_user_id)
+                ]
+            )
+
+            # Calculate intensity as proportion of explanation items the user interacted with
+            intensity = user_interactions_count / num_explanation_items
+            user_intensities.append(intensity)
+
+        return user_intensities
@@ -0,0 +1,11 @@
+from .model_based_emf import EMFExplainer
+from .model_based_als_explain import ALSExplainer
+from .post_hoc_association_rules import ARPostHocExplainer
+from .post_hoc_knn import KNNPostHocExplainer
+
+__all__ = [
+    "EMFExplainer",
+    "ALSExplainer",
+    "ARPostHocExplainer",
+    "KNNPostHocExplainer",
+]
@@ -0,0 +1,49 @@
+from tqdm.auto import tqdm
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+
+class Explainer(ABC):
+    def __init__(self, model, recommendations, data):
+        self.model = model
+        self.recommendations = recommendations
+        self.dataset = data.dataset
+        self.num_items = data.num_item
+        self.num_users = data.num_user
+        self.users = self.dataset.groupby(by="userId")
+
+    def explain_recommendations(self):
+        explanations = []
+
+        with tqdm(
+            total=self.recommendations.shape[0], desc="Computing explanations: "
+        ) as pbar:
+            for _, row in self.recommendations.iterrows():
+                explanations.append(
+                    self.explain_recommendation_to_user(
+                        int(row.userId), int(row.itemId)
+                    )
+                )
+                pbar.update()
+
+        self.recommendations["explanations"] = explanations
+        return self.recommendations
+
+    def get_user_items(self, user_id):
+        """
+        Items Ids rated by a user.
+        :param user_id: the user
+        :return: list
+        """
+        return self.users.get_group(user_id).itemId.values
+
+    @abstractmethod
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        """
+        Generates an explanation for a single user-item recommendation.
+        This method must be implemented by any subclass.
+        """
+        raise NotImplementedError
@@ -0,0 +1,51 @@
+import numpy as np
+import pandas as pd
+
+from .explainer import Explainer
+
+
+class ALSExplainer(Explainer):
+    def __init__(self, model, recommendations, data, number_of_contributions=10):
+        super(ALSExplainer, self).__init__(model, recommendations, data)
+        self.number_of_contributions = number_of_contributions
+
+    def explain_recommendation_to_user(self, user_id: int, item_id: int):
+        """
+        Measuring the contribution of each item to the recommendation.
+        :param model:
+        :param item_id:
+        :param user_id:
+        :return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
+        """
+
+        current_interactions = np.zeros(self.num_items)
+        current_interactions[self.get_user_items(user_id)] = 1
+
+        c_u = np.diag(current_interactions)
+
+        y_t = self.model.item_embedding().transpose()
+        temp = np.matmul(y_t, c_u)
+        temp = np.matmul(temp, self.model.item_embedding())
+        temp = temp + np.diag([self.model.reg_term] * self.model.latent_dim)
+
+        if len(self.get_user_items(user_id)) > 1:
+            weight_mtr = np.linalg.inv(temp)
+        else:
+            weight_mtr = np.linalg.pinv(temp)
+
+        temp = np.matmul(self.model.item_embedding(), weight_mtr)
+
+        sim_to_rec_id = temp.dot(self.model.item_embedding()[item_id, :])
+
+        sim_to_rec_id = sim_to_rec_id[self.get_user_items(user_id)]
+
+        contribution = {
+            "item": self.get_user_items(user_id),
+            "contribution": sim_to_rec_id,
+        }
+        contribution = pd.DataFrame(contribution)
+        contribution = contribution.sort_values(by=["contribution"], ascending=False)
+        return {
+            "item": contribution.item[: self.number_of_contributions],
+            "contribution": contribution.contribution[: self.number_of_contributions],
+        }
@@ -0,0 +1,28 @@
+from .explainer import Explainer
+
+
+class EMFExplainer(Explainer):
+    def __init__(self, model, recommendations, data):
+        super(EMFExplainer, self).__init__(model, recommendations, data)
+
+    def explain_recommendation_to_user(self, user_id: int, item_id: int):
+        """
+        Measuring the contribution of each item to the recommendation.
+        :param user_id:
+        :param item_id: recommendation
+        :return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
+        """
+
+        ratings_on_item = self.dataset[self.dataset.itemId == item_id]
+        similar_users = self.model.sim_users[user_id]
+        similar_users_ratings_on_item = ratings_on_item[
+            ratings_on_item.userId.isin(similar_users)
+        ]
+
+        explanation_df = similar_users_ratings_on_item.groupby(by="rating").count()
+        explanation = {}
+
+        for index, row in explanation_df.iterrows():
+            explanation[index] = row[0]
+
+        return explanation
@@ -0,0 +1,79 @@
+from typing import Any, Dict
+from mlxtend.preprocessing import TransactionEncoder
+from mlxtend.frequent_patterns import apriori, association_rules
+import pandas as pd
+
+from .explainer import Explainer
+
+
+class ARPostHocExplainer(Explainer):
+    def __init__(
+        self,
+        model,
+        recommendations,
+        data,
+        min_support=0.1,
+        max_len=2,
+        metric="lift",
+        min_threshold=0.1,
+        min_confidence=0.1,
+        min_lift=0.1,
+    ):
+        super(ARPostHocExplainer, self).__init__(model, recommendations, data)
+        self.AR = None
+        self.min_support = min_support
+        self.max_len = max_len
+        self.metric = metric
+        self.min_threshold = min_threshold
+        self.min_confidence = min_confidence
+        self.min_lift = min_lift
+
+        self.rules: pd.DataFrame | None = None
+
+    def get_rules_for_getting(self, item_id: int) -> pd.DataFrame:
+        if self.rules is None:
+            self.compute_association_rules()
+
+        if self.rules is not None:
+            return self.rules[self.rules.consequents == item_id]
+
+        return pd.DataFrame()
+
+    def compute_association_rules(self):
+        item_sets = [
+            [item for item in self.dataset[self.dataset.userId == user].itemId]
+            for user in self.dataset.userId.unique()
+        ]
+
+        te = TransactionEncoder()
+        te_ary = te.fit(item_sets).transform(item_sets)
+
+        # The te_ary object is a NumPy array, which is a valid input for a DataFrame.
+        # Pylance may raise a false positive here due to incomplete type stubs for mlxtend.
+        df = pd.DataFrame(te_ary.astype(bool), columns=te.columns_)  # type: ignore
+
+        frequent_itemsets = apriori(
+            df, min_support=self.min_support, use_colnames=True, max_len=self.max_len
+        )
+
+        rules = association_rules(
+            frequent_itemsets, metric="lift", min_threshold=self.min_threshold
+        )
+        rules = rules[
+            (rules["confidence"] > self.min_confidence)
+            & (rules["lift"] > self.min_lift)
+        ]
+
+        rules["consequents"] = rules["consequents"].apply(lambda x: list(x)[0])
+        rules["antecedents"] = rules["antecedents"].apply(lambda x: list(x)[0])
+
+        self.rules = rules[["consequents", "antecedents", "confidence"]]
+
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        user_ratings = self.get_user_items(user_id)
+        rules = self.get_rules_for_getting(item_id)
+        explanations = rules[rules.antecedents.isin(user_ratings)]
+
+        return {"antecedents": set(explanations.antecedents)}
@@ -0,0 +1,46 @@
+from scipy import sparse
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from typing import Dict, Any
+
+from .explainer import Explainer
+
+
+class KNNPostHocExplainer(Explainer):
+    def __init__(self, model, recommendations, data, knn=10):
+        super(KNNPostHocExplainer, self).__init__(model, recommendations, data)
+
+        self.knn = knn
+        # Initialize as an empty dictionary to prevent subscripting None
+        self.knn_items_dict: Dict[int, np.ndarray] = {}
+
+    def get_nn_for_getting(self, item_id: int) -> np.ndarray:
+        # Check if the KNN dictionary has been computed
+        if not self.knn_items_dict:
+            self.compute_knn_items_for_all_items()
+
+        # Return the neighbors for the item, or an empty array if not found
+        return self.knn_items_dict.get(item_id, np.array([]))
+
+    def compute_knn_items_for_all_items(self):
+        ds = np.zeros((self.num_items, self.num_users))
+        # Assuming self.dataset has attributes itemId, userId, and rating
+        ds[self.dataset.itemId, self.dataset.userId] = self.dataset.rating
+
+        ds = sparse.csr_matrix(ds)
+        sim_matrix = cosine_similarity(ds)
+        min_val = sim_matrix.min() - 1
+
+        for i in range(self.num_items):
+            sim_matrix[i, i] = min_val
+            knn_to_item_i = (-sim_matrix[i, :]).argsort()[: self.knn]
+            self.knn_items_dict[i] = knn_to_item_i
+
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        user_ratings = self.get_user_items(user_id)
+        sim_items = self.get_nn_for_getting(item_id)
+        explanations = set(sim_items) & set(user_ratings)
+
+        return {"explanations": explanations}