public code v1

2026-05-22 10:02:10 +02:00
commit 46a9ecf065
166 changed files with 6982454 additions and 0 deletions
@@ -0,0 +1,7 @@
+import yaml
+from box import Box
+
+with open("configs/config.yml", "r") as yml_file:
+    full_cfg = yaml.safe_load(yml_file)
+
+cfg = Box({**full_cfg["base"]}, default_box=True, default_box_attr=None)
@@ -0,0 +1,11 @@
+from .data_reader import DataReader
+from .user_item_rating_dataset import UserItemRatingDataset
+from .group_interaction_handler import GroupInteractionHandler
+from .user_item_dict import UserItemDict
+
+__all__ = [
+    "DataReader",
+    "UserItemRatingDataset",
+    "GroupInteractionHandler",
+    "UserItemDict",
+]
@@ -0,0 +1,416 @@
+from typing import List, Optional, Union, cast
+import numpy as np
+import pandas as pd
+import warnings
+
+
+class DataReader:
+    def __init__(
+        self,
+        filepath_or_buffer: Optional[str] = None,
+        sep: Optional[str] = None,
+        names: Optional[List[str]] = None,
+        skiprows: int = 0,
+        dataframe: Optional[pd.DataFrame] = None,
+    ) -> None:
+        """
+        Initialize the DataReader with either a DataFrame or file parameters.
+
+        Args:
+            filepath_or_buffer (Optional[str]): Path to the CSV file or buffer.
+            sep (Optional[str]): Separator used in the CSV file.
+            names (Optional[List[str]]): List of column names for the CSV file.
+            skiprows (int, optional): Number of rows to skip in the CSV file. Defaults to 0.
+            dataframe (Optional[pd.DataFrame], optional): A DataFrame to use directly. Defaults to None.
+
+        Raises:
+            ValueError: If neither `dataframe` nor valid file parameters are provided.
+            FileNotFoundError: If the file cannot be found when loading from file.
+            pd.errors.ParserError: If the CSV file cannot be parsed when loading from file.
+
+        Note:
+            If `dataframe` is provided, it takes precedence, and file-related parameters
+            are ignored but stored for reference. A warning is issued in this case.
+            The DataFrame must contain columns: 'userId', 'itemId', 'rating', 'timestamp'.
+        """
+        if dataframe is None and (not filepath_or_buffer or not sep or not names):
+            raise ValueError(
+                "Must provide either a DataFrame or valid file parameters."
+            )
+
+        self.filepath_or_buffer = filepath_or_buffer
+        self.sep = sep
+        self.names = names
+        self.skiprows = skiprows
+        self._dataset = None
+        self._raw_dataset = None
+        self._num_user: Optional[int] = None
+        self._num_item: Optional[int] = None
+        self.original_user_id: Optional[pd.DataFrame] = None
+        self.original_item_id: Optional[pd.DataFrame] = None
+        self.new_user_id: Optional[pd.DataFrame] = None
+        self.new_item_id: Optional[pd.DataFrame] = None
+
+        if dataframe is not None:
+            if any(param is not None for param in [filepath_or_buffer, sep, names]):
+                warnings.warn(
+                    "DataFrame provided; file parameters (filepath_or_buffer, sep, names) are ignored.",
+                    UserWarning,
+                )
+            self.dataset = dataframe
+
+        elif filepath_or_buffer and sep and names:
+            # Eagerly load data if file parameters are provided
+            try:
+                assert self.filepath_or_buffer is not None
+
+                loaded_df = pd.read_csv(
+                    filepath_or_buffer=self.filepath_or_buffer,
+                    sep=self.sep,
+                    names=self.names,
+                    skiprows=self.skiprows,
+                    engine="python",
+                )
+                self._raw_dataset = loaded_df.copy()
+                # Use the setter to handle dataset validation and setting _num_user/_num_item
+                self.dataset = loaded_df
+            except FileNotFoundError:
+                raise FileNotFoundError(f"File not found: {self.filepath_or_buffer}")
+            except pd.errors.ParserError as e:
+                raise pd.errors.ParserError(f"Failed to parse CSV: {str(e)}")
+        else:
+            raise ValueError(
+                "Must provide either a DataFrame or valid file parameters."
+            )
+
+    @property
+    def dataset(self) -> pd.DataFrame:
+        """
+        Get the dataset DataFrame.
+        """
+        if self._dataset is None:
+            if self._dataset is None:
+                # If it reach here and _dataset is None, it means initialization failed
+                # or an empty DataFrame was set.
+                # This state should ideally not be reached with eager loading if file params were valid.
+                raise ValueError("Dataset is not loaded or is not valid.")
+        return self._dataset
+
+    @dataset.setter
+    def dataset(self, new_data: pd.DataFrame) -> None:
+        """
+        Set the dataset and compute the number of unique users and items.
+
+        Args:
+            new_data (pd.DataFrame): The new dataset to set.
+
+        Raises:
+            ValueError: If the DataFrame is None, empty, lacks required columns,
+                       or contains invalid data types/missing values.
+        """
+        if new_data is None:
+            raise ValueError("DataFrame cannot be None")
+        if new_data.empty:
+            raise ValueError("DataFrame cannot be empty")
+
+        # Validate data types
+        for col in ["userId", "itemId", "rating"]:
+            if not pd.api.types.is_numeric_dtype(new_data[col]):
+                warnings.warn(
+                    f"Column '{col}' is not numeric. Attempting conversion.",
+                    UserWarning,
+                )
+                try:
+                    new_data[col] = pd.to_numeric(new_data[col])
+                except ValueError:
+                    raise ValueError(
+                        f"Column '{col}' cannot be converted to a numeric type."
+                    )
+
+        # Check for missing values in essential columns
+        if new_data[["userId", "itemId", "rating"]].isnull().any().any():
+            raise ValueError(
+                "DataFrame contains missing values in essential columns (userId, itemId, rating)."
+            )
+
+        self._dataset = new_data
+        self._raw_dataset = new_data.copy()
+        self._num_user = int(self._dataset["userId"].nunique())
+        self._num_item = int(self._dataset["itemId"].nunique())
+        # Set the index to userId and itemId for easier access
+        # Reset id mappings as they are now invalid for the new dataset
+        self.original_user_id = None
+        self.original_item_id = None
+        self.new_user_id = None
+        self.new_item_id = None
+
+    def get_raw_dataset(self) -> pd.DataFrame:
+        """
+        Get the raw dataset as loaded from the file or initially set.
+
+        Returns:
+            pd.DataFrame: The raw dataset.
+
+        Raises:
+            ValueError: If the raw dataset is not set.
+        """
+        if self._raw_dataset is None:
+            raise ValueError(
+                "Raw dataset is not set. Load data from file or set a DataFrame first."
+            )
+        return self._raw_dataset
+
+    @staticmethod
+    def _create_id_mapping(column: pd.Series, new_column_name: str) -> pd.DataFrame:
+        """
+        Create a mapping for consecutive IDs.
+
+        Args:
+            column (pd.Series): The column to map.
+            new_column_name (str): The name of the new column for consecutive IDs.
+
+        Returns:
+            pd.DataFrame: A DataFrame with the original and mapped IDs.
+
+        Raises:
+            ValueError: If the column is empty.
+        """
+        if column.empty:
+            raise ValueError("Cannot create ID mapping for an empty column")
+        unique_values = column.drop_duplicates().reset_index(drop=True)
+        mapping = pd.DataFrame(
+            {column.name: unique_values, new_column_name: np.arange(len(unique_values))}
+        )
+        return mapping
+
+    def make_consecutive_ids_in_dataset(self) -> None:
+        """
+        Map user and item IDs to consecutive integers starting from 0 in a deterministic way.
+        Modifies the dataset in-place and stores mappings for original and new IDs.
+        """
+        if self._dataset is None:
+            raise ValueError("Dataset must be loaded or set before mapping IDs")
+
+        dataset = self.dataset.copy()
+
+        # Get unique IDs and SORT them to ensure the mapping is identical every time.
+        sorted_unique_users = sorted(dataset["userId"].unique())
+        sorted_unique_items = sorted(dataset["itemId"].unique())
+
+        # Create user ID mapping from the sorted list
+        user_id_mapping = pd.DataFrame(
+            {
+                "userId": sorted_unique_users,
+                "new_userId": range(len(sorted_unique_users)),
+            }
+        )
+        dataset["userId"] = dataset["userId"].map(
+            user_id_mapping.set_index("userId")["new_userId"]
+        )
+
+        # Create item ID mapping from the sorted list
+        item_id_mapping = pd.DataFrame(
+            {
+                "itemId": sorted_unique_items,
+                "new_itemId": range(len(sorted_unique_items)),
+            }
+        )
+        dataset["itemId"] = dataset["itemId"].map(
+            item_id_mapping.set_index("itemId")["new_itemId"]
+        )
+
+        # Store mappings for lookups
+        self.original_user_id = user_id_mapping.set_index("new_userId")
+        self.original_item_id = item_id_mapping.set_index("new_itemId")
+        self.new_user_id = user_id_mapping.set_index("userId")
+        self.new_item_id = item_id_mapping.set_index("itemId")
+
+        # Update the internal dataset
+        dataset["userId"] = dataset["userId"].astype(int)
+        dataset["itemId"] = dataset["itemId"].astype(int)
+        self._dataset = dataset
+
+        self._num_user = self._dataset["userId"].max() + 1
+        self._num_item = self._dataset["itemId"].max() + 1
+
+    def binarize(
+        self, binary_threshold: float = 1, inplace: bool = True
+    ) -> Optional[pd.DataFrame]:
+        """
+        Binarize ratings into 0 or 1 based on a threshold (implicit feedback).
+
+        Args:
+            binary_threshold (float, optional): Threshold for binarization. Defaults to 1.0.
+            inplace (bool, optional): If True, modify the dataset in-place. If False, return a new DataFrame.
+                                     Defaults to True.
+
+        Returns:
+            Optional[pd.DataFrame]: The binarized dataset if inplace=False, else None.
+
+        Raises:
+            ValueError: If the dataset is not set or binary_threshold is invalid.
+
+        Example:
+            Ratings [0.5, 2.0, 3.0] with threshold=1.0 -> [0, 1, 1]
+        """
+        if self._dataset is None:
+            raise ValueError("Dataset must be loaded or set before binarization")
+        if not isinstance(binary_threshold, (int, float)):
+            raise ValueError("binary_threshold must be a number")
+
+        dataset = self._dataset if inplace else self._dataset.copy()
+        dataset["rating"] = (dataset["rating"] > binary_threshold).astype(int)
+
+        if not inplace:
+            return dataset
+        self._dataset = dataset
+        return None
+
+    @property
+    def num_user(self) -> int:
+        """
+        Get the number of unique users.
+
+        Returns:
+            int: Number of unique users.
+
+        Raises:
+            ValueError: If the dataset is not set.
+        """
+        if self._num_user is None:
+            raise ValueError("Dataset must be loaded or set to compute num_user")
+        return self._num_user
+
+    @property
+    def num_item(self) -> int:
+        """
+        Get the number of unique items.
+
+        Returns:
+            int: Number of unique items.
+
+        Raises:
+            ValueError: If the dataset is not set.
+        """
+        if self._num_item is None:
+            raise ValueError("Dataset must be loaded or set to compute num_item")
+        return self._num_item
+
+    def get_original_user_id(self, u: Union[int, List[int]]) -> Union[int, List[int]]:
+        """
+        Get the original user ID(s) from the new (consecutive) ID(s).
+
+        Args:
+            u (Union[int, List[int]]): New user ID(s).
+
+        Returns:
+            Union[int, List[int]]: Original user ID(s).
+
+        Raises:
+            ValueError: If ID mapping is not set or if any ID is not found.
+        """
+        if self.original_user_id is None:
+            raise ValueError(
+                "ID mapping not set. Call make_consecutive_ids_in_dataset first"
+            )
+        try:
+            if isinstance(u, (int, np.integer)):
+                return int(self.original_user_id.loc[u, "userId"])  # type: ignore
+            series = cast(pd.Series, self.original_user_id.loc[u, "userId"])
+            return series.tolist()
+        except KeyError as e:
+            raise ValueError(f"User ID(s) not found: {e}")
+
+    def get_original_item_id(self, i: Union[int, List[int]]) -> Union[int, List[int]]:
+        """
+        Get the original item ID(s) from the new (consecutive) ID(s).
+
+        Args:
+            i (Union[int, List[int]]): New item ID(s).
+
+        Returns:
+            Union[int, List[int]]: Original item ID(s).
+
+        Raises:
+            ValueError: If ID mapping is not set or if any ID is not found.
+        """
+        if self.original_item_id is None:
+            raise ValueError(
+                "ID mapping not set. Call make_consecutive_ids_in_dataset first"
+            )
+        try:
+            if isinstance(i, (int, np.integer)):
+                return int(self.original_item_id.loc[i, "itemId"])  # type: ignore
+
+            series = cast(pd.Series, self.original_item_id.loc[i, "itemId"])
+            return series.tolist()
+        except KeyError as e:
+            raise ValueError(f"Item ID(s) not found: {e}")
+
+    def get_new_user_id(
+        self, u: Union[Union[str, int], List[Union[str, int]]]
+    ) -> Union[int, List[int]]:
+        """
+        Get the new (consecutive) user ID(s) from the original ID(s).
+
+        Args:
+            u: Original user ID(s).
+
+        Returns:
+            New user ID(s).
+
+        Raises:
+            ValueError: If ID mapping is not set or if any ID is not found.
+        """
+        if self.new_user_id is None:
+            raise ValueError(
+                "ID mapping not set. Call make_consecutive_ids_in_dataset first"
+            )
+        try:
+            if isinstance(u, str):
+                u = int(u)
+                return int(self.new_user_id.loc[u, "new_userId"])  # type: ignore
+            if isinstance(u, list) and all(isinstance(x, str) for x in u):
+                u = [int(x) for x in u]
+                series = cast(pd.Series, self.new_user_id.loc[u, "new_userId"])
+                return series.tolist()
+            if isinstance(u, (int, np.integer)):
+                return int(self.new_user_id.loc[u, "new_userId"])  # type: ignore
+            series = cast(pd.Series, self.new_user_id.loc[u, "new_userId"])
+            return series.tolist()
+        except KeyError as e:
+            raise ValueError(f"User ID(s) not found: {e}")
+
+    def get_new_item_id(
+        self, i: Union[Union[str, int], List[Union[str, int]]]
+    ) -> Union[int, List[int]]:
+        """
+        Get the new (consecutive) item ID(s) from the original ID(s).
+
+        Args:
+            i: Original item ID(s).
+
+        Returns:
+            New item ID(s).
+
+        Raises:
+            ValueError: If ID mapping is not set or if any ID is not found.
+        """
+        if self.new_item_id is None:
+            raise ValueError(
+                "ID mapping not set. Call make_consecutive_ids_in_dataset first"
+            )
+        try:
+            if isinstance(i, str):
+                i = int(i)
+                return int(self.new_item_id.loc[i, "new_itemId"])  # type: ignore
+            if isinstance(i, list) and all(isinstance(x, str) for x in i):
+                i = [int(x) for x in i]
+                series = cast(pd.Series, self.new_item_id.loc[i, "new_itemId"])
+                return series.tolist()
+            if isinstance(i, (int, np.integer)):
+                return int(self.new_item_id.loc[i, "new_itemId"])  # type: ignore
+            series = cast(pd.Series, self.new_item_id.loc[i, "new_itemId"])
+            return series.tolist()
+        except KeyError as e:
+            raise ValueError(f"Item ID(s) not found: {e}")
@@ -0,0 +1,289 @@
+from typing import List, Optional, Union
+import numpy as np
+import pandas as pd
+from pathlib import Path
+
+from pygrex.data_reader.data_reader import DataReader
+
+
+class GroupInteractionHandler:
+    def __init__(self, filepath_or_buffer: Union[str, Path, List[Union[str, Path]]]):
+        """
+        Initialize the GroupInteractionHandler.
+
+        Args:
+            filepath_or_buffer: Path to directory containing group files or list of file paths
+        """
+        # Convert to Path objects
+        if isinstance(filepath_or_buffer, (str, Path)):
+            path = Path(filepath_or_buffer)
+            # If a single directory path is provided, get all files in it
+            if path.is_dir():
+                self.filepath_or_buffer = [
+                    str(file) for file in path.iterdir() if file.is_file()
+                ]
+            else:
+                self.filepath_or_buffer = [str(path)]
+        else:
+            # If a list of paths is provided, convert all to Path and then to strings
+            self.filepath_or_buffer = [str(Path(p)) for p in filepath_or_buffer]
+
+    def _get_group_filepath(self, filename: str) -> str:
+        """
+        Get a specific group file path by matching the filename.
+
+        Args:
+            filename (str): The name of the file to search for.
+
+        Returns:
+            str: The matched file path.
+
+        Raises:
+            ValueError: Error: File does not exist
+            ValueError: No file found containing '{filename}' in its name.
+        """
+        for path_str in self.filepath_or_buffer:
+            if filename in path_str:  # Check if filename is part of the path
+                path = Path(path_str).resolve()
+                if path.exists():
+                    return str(path)
+                else:
+                    raise ValueError(f"Error: File does not exist: {path}")
+
+        raise ValueError(f"Error: No file found containing '{filename}' in its name.")
+
+    def read_groups(self, filename: str) -> List[str]:
+        """
+        Method to read group IDs from a specified file.
+
+        Args:
+            filename (str): Name of the file containing group IDs.
+
+        Returns:
+            List[str]: List of group IDs.
+
+        Raises:
+            ValueError: If groups path is not specified in configuration
+        """
+        if not filename:
+            raise ValueError("Groups path not specified in configuration")
+
+        filepath = self._get_group_filepath(filename)
+
+        # Use Path for file reading
+        path = Path(filepath)
+        return [line.strip() for line in path.read_text().splitlines()]
+
+    def parse_group_members(self, group: str) -> List[int]:
+        """
+        Parse group ID to get member IDs.
+
+        Args:
+            group: Group ID string
+
+        Returns:
+            List of member IDs
+        """
+        group = group.strip()
+        members = group.split("_")
+        return [int(m) for m in members]
+
+    def get_group_members(self, group: Union[List[Union[int, str]], str]) -> List[int]:
+        """
+        Get group members from a group ID string or list.
+
+        Args:
+            group: Group ID string in format "id1_id2_id3" or list of IDs
+
+        Returns:
+            List of member IDs as integers
+
+        Raises:
+            ValueError: If any member ID cannot be converted to an integer
+            TypeError: If group is neither a string nor a list
+        """
+
+        if isinstance(group, list):
+            return [int(member) for member in group]
+
+        if not isinstance(group, str):
+            raise TypeError(f"Expected string or list, got {type(group).__name__}")
+
+        group = group.strip()
+        if not group:
+            return []
+
+        try:
+            return [int(member) for member in group.split("_")]
+        except ValueError as e:
+            raise ValueError(f"Invalid member ID in group: {str(e)}")
+
+    def create_modified_dataset(
+        self,
+        original_data: Union[pd.DataFrame, DataReader],
+        group_ids: List[Union[int, str]],
+        item_ids: List[Union[int, str]],
+        data: Optional[DataReader] = None,
+    ) -> pd.DataFrame:
+        """
+        Creates a modified dataset by removing interactions between specified groups and items.
+
+        Args:
+            original_data: Either a pandas DataFrame or a DataReader object containing the dataset
+            group_ids: List of group IDs to consider for removal
+            item_ids: List of item IDs to consider for removal
+            data: Optional DataReader object if original_data is a DataFrame
+
+        Returns:
+            pd.DataFrame: A pandas DataFrame with the specified interactions removed
+
+        Raises:
+            ValueError: If input data types are incorrect
+        """
+        # Determine the data source and target dataset
+        if isinstance(original_data, DataReader):
+            data_reader = original_data
+            dataset = original_data.dataset
+        elif isinstance(original_data, pd.DataFrame) and isinstance(data, DataReader):
+            data_reader = data
+            dataset = original_data
+        else:
+            raise ValueError(
+                "Either original_data must be a DataReader or data must be provided as a DataReader"
+            )
+
+        # Convert IDs to internal representation
+        new_group_ids = [
+            data_reader.get_new_user_id(
+                int(g) if isinstance(g, (int, np.integer)) else g
+            )
+            for g in group_ids
+        ]
+
+        new_item_ids = [
+            data_reader.get_new_item_id(
+                int(i) if isinstance(i, (int, np.integer)) else i
+            )
+            for i in item_ids
+        ]
+
+        # Create mask for rows to keep (inverse of rows to drop)
+        mask = ~(dataset.itemId.isin(new_item_ids) & dataset.userId.isin(new_group_ids))
+
+        return dataset[mask]
+
+    def get_rated_items_by_all_group_members(
+        self, group: List[Union[int, str]], original_data: DataReader
+    ) -> np.ndarray:
+        """
+        Get all items rated by any member of the group.
+
+        Args:
+            group: List of user IDs
+            original_data: Data object with mapping methods
+
+        Returns:
+            np.ndarray: Array of original item IDs rated by any group member
+        """
+        # Convert group members to new user IDs
+        new_group = [
+            original_data.get_new_user_id(
+                int(g) if isinstance(g, (int, np.integer)) else g
+            )
+            for g in group
+        ]
+
+        # Get unique items rated by any group member
+        group_items = original_data.dataset[
+            original_data.dataset.userId.isin(new_group)
+        ]["itemId"].unique()
+
+        # Convert back to original item IDs
+        original_ids = original_data.get_original_item_id(group_items.tolist())
+        return np.array(original_ids)
+
+    def get_common_rated_items(
+        self, group: List[Union[int, str]], original_data: DataReader
+    ) -> np.ndarray:
+        """
+        Get items rated by all members of the group (intersection of rated items).
+
+        Args:
+            group: List of user IDs
+            original_data: DataReader object with mapping methods
+
+        Returns:
+            np.ndarray: Array of original item IDs rated by all group members
+        """
+        # Convert group members to new user IDs
+        new_group = [
+            original_data.get_new_user_id(
+                int(g) if isinstance(g, (int, np.integer)) else g
+            )
+            for g in group
+        ]
+
+        # Get items rated by each group member
+        rated_items_per_member = []
+        for user_id in new_group:
+            user_items = original_data.dataset[original_data.dataset.userId == user_id][
+                "itemId"
+            ].unique()
+            rated_items_per_member.append(set(user_items))
+
+        # Find intersection of all rated items
+        if rated_items_per_member:
+            common_items = set.intersection(*rated_items_per_member)
+            common_items_array = np.array(list(common_items))
+            # Convert back to original item IDs
+            original_ids = original_data.get_original_item_id(
+                common_items_array.tolist()
+            )
+            return np.array(original_ids)
+        else:
+            return np.array([])
+
+    def get_items_for_group_recommendation(
+        self, data: pd.DataFrame, item_ids: np.ndarray, group: List[int]
+    ) -> np.ndarray:
+        """
+        Get items for group recommendation (those not interacted with by any group member).
+
+        Args:
+            data: DataFrame with interaction data
+            item_ids: Array of all item IDs
+            group: List of group member IDs
+
+        Returns:
+            Array of item IDs not interacted with by any group member
+        """
+        item_ids_group = data.loc[data.userId.isin(group), "itemId"]
+        return np.setdiff1d(item_ids, item_ids_group)
+
+    def get_group_preferences(
+        self, group: List[Union[int, str]], data_reader: DataReader
+    ) -> pd.DataFrame:
+        """
+        Get all preferences (ratings) by all members of the group.
+
+        Args:
+            group: List of user IDs
+            data_reader: DataReader object with the dataset
+
+        Returns:
+            pd.DataFrame: DataFrame containing all preferences by group members
+        """
+        # Convert group members to new user IDs
+        new_group = [
+            data_reader.get_new_user_id(
+                int(g) if isinstance(g, (int, np.integer)) else g
+            )
+            for g in group
+        ]
+
+        # Get all interactions by group members
+        group_preferences = data_reader.dataset[
+            data_reader.dataset.userId.isin(new_group)
+        ].copy()
+
+        return group_preferences
@@ -0,0 +1,36 @@
+from torch.utils.data import Dataset
+import torch
+import numpy as np
+
+
+class UserItemDict(Dataset):
+    """Wrapper, convert <user, item, rating> Tensor into Pytorch Dataset"""
+
+    def __init__(self, data, expl_matrix, expl):
+        """
+        args:
+
+            target_tensor: torch.Tensor, the corresponding rating for <user, item> pair
+        """
+
+        grp_data = data.groupby('userId')
+        self.users_dict = dict()
+        for userId, itemId_rating in grp_data:
+            self.users_dict[userId] = {'items': list(itemId_rating.itemId),
+                                       'rating': list(itemId_rating.rating)}
+        self.n_items = data.itemId.nunique()
+        self.n_users = data.userId.nunique()
+        self.expl_matrix = expl_matrix
+        self.expl = expl
+
+    def __getitem__(self, index):
+        ratings = np.zeros(self.n_items)
+        ratings[self.users_dict[index]['items']] = self.users_dict[index]['rating']
+        if self.expl:
+            return torch.tensor(ratings) + self.expl_matrix[index, :]
+        else:
+            return torch.tensor(ratings)
+
+    def __len__(self):
+        return self.n_users
+
@@ -0,0 +1,21 @@
+from torch.utils.data import Dataset
+
+
+class UserItemRatingDataset(Dataset):
+    """Wrapper, convert <user, item, rating> Tensor into Pytorch Dataset"""
+
+    def __init__(self, user_tensor, item_tensor, target_tensor):
+        """
+        args:
+
+            target_tensor: torch.Tensor, the corresponding rating for <user, item> pair
+        """
+        self.user_tensor = user_tensor
+        self.item_tensor = item_tensor
+        self.target_tensor = target_tensor
+
+    def __getitem__(self, index):
+        return self.user_tensor[index], self.item_tensor[index], self.target_tensor[index]
+
+    def __len__(self):
+        return self.user_tensor.size(0)
@@ -0,0 +1,15 @@
+from .splitter import Splitter
+from .model_evaluator import ModelEvaluator
+from .explainer_evaluator import ExplanationEvaluator
+from .evaluation_pipelines import (
+    run_evaluation_with_proper_split,
+    run_leave_one_out_evaluation,
+)
+
+__all__ = [
+    "Splitter",
+    "ModelEvaluator",
+    "ExplanationEvaluator",
+    "run_evaluation_with_proper_split",
+    "run_leave_one_out_evaluation",
+]
@@ -0,0 +1,251 @@
+import time
+from typing import Dict
+import pandas as pd
+import numpy as np
+from pygrex.data_reader.data_reader import DataReader
+from pygrex.evaluator import Splitter, ModelEvaluator
+
+
+def run_leave_one_out_evaluation(
+    data_reader: DataReader, model, top_n: int = 10
+) -> Dict:
+    print("Starting leave-one-out evaluation...")
+    start_time = time.time()
+
+    # 1. Proper leave-one-out split (one item per user)
+    train_dr, test_df = Splitter.split_leave_n_out(
+        data_reader, n=1
+    )  # n=1 for true leave-one-out
+    print(f"Split completed: {len(test_df)} test interactions")
+
+    train_users = set(train_dr.dataset["userId"].unique())
+    train_items = set(train_dr.dataset["itemId"].unique())
+
+    original_test_len = len(test_df)
+    test_df = test_df[
+        test_df["userId"].isin(train_users) & test_df["itemId"].isin(train_items)
+    ]
+    print(
+        f"Filtered test set: {len(test_df)} interactions remaining from {original_test_len}"
+    )
+
+    # 2. Train model on training data
+    print("Training model on reduced dataset...")
+    train_start = time.time()
+    model.fit(train_dr)
+    train_time = time.time() - train_start
+    print(f"Model training completed in {train_time:.2f} seconds")
+
+    # 3. Generate recommendations efficiently
+    print("Generating recommendations...")
+    rec_start = time.time()
+    recommendations = generate_recommendations_batch(model, train_dr, test_df, top_n)
+    rec_time = time.time() - rec_start
+    print(f"Recommendations generated in {rec_time:.2f} seconds")
+
+    # 4. Use the existing Evaluator class
+    evaluator = ModelEvaluator(test_df, top_n=top_n)
+
+    # Calculate metrics
+    hit_ratio = evaluator.cal_hit_ratio(recommendations)
+    ndcg = evaluator.cal_ndcg(recommendations)
+
+    total_time = time.time() - start_time
+    print(f"Total evaluation time: {total_time:.2f} seconds")
+
+    return {
+        "Hit Ratio": hit_ratio,
+        "NDCG": ndcg,  # Using standard NDCG instead of eNDCG for now
+        "evaluation_time": total_time,
+    }
+
+
+def generate_recommendations_batch(
+    model, train_dr: DataReader, test_df: pd.DataFrame, top_n: int
+) -> pd.DataFrame:
+    """
+    Generate recommendations in batch mode for efficiency.
+    Returns DataFrame with columns: ['userId', 'itemId', 'rank', 'score']
+    """
+    all_items = set(train_dr.dataset["itemId"].unique())
+    recommendations = []
+
+    test_users = test_df["userId"].unique()
+    print(f"Generating recommendations for {len(test_users)} users...")
+
+    for i, user_id in enumerate(test_users):
+        if i % 100 == 0:  # Progress indicator
+            print(f"Processing user {i}/{len(test_users)}")
+
+        # Get items the user has already interacted with
+        user_items = set(
+            train_dr.dataset[train_dr.dataset["userId"] == user_id]["itemId"]
+        )
+
+        # Candidate items (unseen items)
+        candidate_items = list(all_items - user_items)
+
+        # For efficiency, limit candidates if there are too many
+        if len(candidate_items) > 10000:  # Adjust this threshold based on your needs
+            candidate_items = np.random.choice(
+                candidate_items, 10000, replace=False
+            ).tolist()
+
+        # Generate predictions - try to use batch prediction if available
+        try:
+            # Check if model has batch prediction capability
+            if hasattr(model, "predict_batch") or hasattr(model, "recommend"):
+                user_recs = generate_recommendations_efficient(
+                    model, user_id, candidate_items, top_n
+                )
+            else:
+                # Fall back to individual predictions (slower)
+                user_recs = generate_recommendations_individual(
+                    model, user_id, candidate_items, top_n
+                )
+
+            recommendations.extend(user_recs)
+
+        except Exception as e:
+            print(f"Error generating recommendations for user {user_id}: {e}")
+            continue
+
+    # Convert to DataFrame
+    if recommendations:
+        rec_df = pd.DataFrame(
+            recommendations, columns=["userId", "itemId", "rank", "score"]
+        )
+    else:
+        # Return empty DataFrame with correct structure
+        rec_df = pd.DataFrame(columns=["userId", "itemId", "rank", "score"])
+
+    return rec_df
+
+
+def generate_recommendations_efficient(
+    model, user_id: int, candidate_items: list, top_n: int
+) -> list:
+    """
+    Try to use efficient recommendation methods if available.
+    """
+    recommendations = []
+
+    # Try different efficient methods based on model type
+    if hasattr(model, "recommend"):
+        # Some models have a recommend method
+        try:
+            recs = model.recommend(user_id, candidate_items, top_n)
+            for rank, (item_id, score) in enumerate(recs, 1):
+                recommendations.append((user_id, item_id, rank, score))
+        except Exception:
+            # Fall back to individual predictions
+            return generate_recommendations_individual(
+                model, user_id, candidate_items, top_n
+            )
+
+    elif hasattr(model, "predict_batch"):
+        # Batch prediction if available
+        try:
+            user_items_batch = [(user_id, item_id) for item_id in candidate_items]
+            scores = model.predict_batch(user_items_batch)
+
+            # Sort by score and get top-N
+            scored_items = list(zip(candidate_items, scores))
+            scored_items.sort(key=lambda x: x[1], reverse=True)
+
+            for rank, (item_id, score) in enumerate(scored_items[:top_n], 1):
+                recommendations.append((user_id, item_id, rank, score))
+        except Exception:
+            return generate_recommendations_individual(
+                model, user_id, candidate_items, top_n
+            )
+
+    else:
+        return generate_recommendations_individual(
+            model, user_id, candidate_items, top_n
+        )
+
+    return recommendations
+
+
+def generate_recommendations_individual(
+    model, user_id: int, candidate_items: list, top_n: int
+) -> list:
+    """
+    Fall back to individual predictions (slower but works with any model).
+    """
+    predictions = []
+
+    # Batch the individual predictions for better performance
+    batch_size = 100
+    for i in range(0, len(candidate_items), batch_size):
+        batch_items = candidate_items[i : i + batch_size]
+
+        for item_id in batch_items:
+            try:
+                score = model.predict(user_id, item_id)
+                predictions.append((item_id, score))
+            except Exception as e:
+                print(f"Prediction error for user {user_id}, item {item_id}: {e}")
+                # Skip items that cause prediction errors
+                continue
+
+    # Sort by score and get top-N
+    predictions.sort(key=lambda x: x[1], reverse=True)
+    top_predictions = predictions[:top_n]
+
+    recommendations = []
+    for rank, (item_id, score) in enumerate(top_predictions, 1):
+        recommendations.append((user_id, item_id, rank, score))
+
+    return recommendations
+
+
+def run_evaluation_with_proper_split(
+    data_reader: DataReader, model, test_size: float = 0.2, top_n: int = 10
+) -> Dict:
+    """
+    Alternative evaluation using a proper train/test split instead of leave-one-out.
+    """
+    print(f"Starting evaluation with {test_size * 100}% test split...")
+    start_time = time.time()
+
+    # 1. Split data into train/test
+    train_dr, test_df = Splitter.split_leave_n_out(data_reader, frac=test_size)
+    print(f"Split completed: {len(test_df)} test interactions")
+
+    # 2. Filter test set to ensure all users/items exist in the training set
+    train_users = set(train_dr.dataset["userId"].unique())
+    train_items = set(train_dr.dataset["itemId"].unique())
+
+    original_test_len = len(test_df)
+    test_df = test_df[
+        test_df["userId"].isin(train_users) & test_df["itemId"].isin(train_items)
+    ]
+    print(
+        f"Filtered test set: {len(test_df)} interactions remaining from {original_test_len}"
+    )
+
+    # 2. Train model
+    print("Training model...")
+    model.fit(train_dr)
+
+    # 3. Generate recommendations
+    print("Generating recommendations...")
+    recommendations = generate_recommendations_batch(model, train_dr, test_df, top_n)
+
+    # 4. Evaluate
+    evaluator = ModelEvaluator(test_df, top_n=top_n)
+    hit_ratio = evaluator.cal_hit_ratio(recommendations)
+    ndcg = evaluator.cal_ndcg(recommendations)
+
+    total_time = time.time() - start_time
+    print(f"Evaluation completed in {total_time:.2f} seconds")
+
+    return {
+        "Hit Ratio": hit_ratio,
+        "NDCG": ndcg,
+        "evaluation_time": total_time,
+        "test_interactions": len(test_df),
+        "total_recommendations": len(recommendations),
+    }
@@ -0,0 +1,68 @@
+from typing import Dict, Any
+
+from pygrex.utils import calculate_gild_for_explanations
+
+
+class ExplanationEvaluator:
+    """
+    A unified evaluator for different explanation methods.
+
+    This class takes the results generated by an explainer and calculates
+    a standard set of quality metrics, such as Fidelity and Diversity (GILD).
+    """
+
+    def __init__(self):
+        """Initializes the ExplanationEvaluator."""
+        # This class is stateless, so __init__ is simple.
+        pass
+
+    def evaluate(
+        self, explanation_results: Dict[str, Any], explainer_type: str
+    ) -> Dict[str, float]:
+        """
+        Calculates all relevant metrics for a given explanation result.
+
+        Args:
+            explanation_results: The dictionary returned by an explainer's
+                                 `find_explanation` method.
+            explainer_type: A string identifier for the explainer used
+                            (e.g., "LORE4Groups", "EXPGRS").
+
+        Returns:
+            A dictionary containing the calculated metric scores.
+        """
+        if not explanation_results:
+            return {"fidelity": 0.0, "gild": 0.0}
+
+        fidelity = self._calculate_fidelity(explanation_results)
+        gild = self._calculate_gild(explanation_results, explainer_type)
+
+        return {"fidelity": fidelity, "gild": gild}
+
+    def _calculate_fidelity(self, explanation_results: Dict[str, Any]) -> float:
+        """
+        Extracts the fidelity score from the explanation results.
+
+        Fidelity is computed by the explainer itself, as it's the ratio of
+        items it was able to explain. This method standardizes its retrieval.
+        """
+        return explanation_results.get("fidelity", 0.0)
+
+    def _calculate_gild(
+        self, explanation_results: Dict[str, Any], explainer_type: str
+    ) -> float:
+        """
+        Calculates the Gaussian Inter-List Diversity (GILD) of the explanations.
+
+        This is a wrapper around the utility function that handles the details.
+        It uses the 'details' part of the explanation results.
+        """
+        explanation_details = explanation_results.get("details", {})
+        if not explanation_details:
+            return 0.0
+
+        # The GILD function is now called from a central, logical place.
+        gild_score = calculate_gild_for_explanations(
+            explanation_details, explainer_type
+        )
+        return gild_score
@@ -0,0 +1,179 @@
+import numpy as np
+import pandas as pd
+
+
+class ModelEvaluator:
+    disc_functions = ["log", "linear"]
+
+    def __init__(self, test_set, top_n: int = 10, discount_function: str = "log"):
+        self.test_set = test_set
+        self._top_n = top_n
+        assert discount_function in self.disc_functions, "Wrong Discount Function."
+        self._discount_function = discount_function
+        self.num_users = self.test_set.userId.nunique()
+
+    @property
+    def top_n(self):
+        return self._top_n
+
+    @top_n.setter
+    def top_n(self, top_n: int):
+        self._top_n = top_n
+
+    @property
+    def discount_function(self):
+        return self._discount_function
+
+    @discount_function.setter
+    def discount_function(self, discount_function: str):
+        assert discount_function in self.disc_functions, "Wrong Discount Function."
+        self._discount_function = discount_function
+
+    def cal_hit_ratio(self, recommendations):
+        """
+        Hit Ratio
+        :param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: hit rate.
+        """
+        test_in_top_n = self.get_hits(recommendations)
+        # count hits per user
+        hits_per_user = self.count_positives(test_in_top_n)
+        # merge with the entire list of positive items for user
+        hits_per_user = hits_per_user.merge(
+            self.count_positives(self.test_set),
+            on="userId",
+            suffixes=("_true", ""),
+            how="right",
+        )
+        # if there are users with 0 hits the merge will have NA.
+        hits_per_user = hits_per_user.fillna(0)
+        # get the hit rate per user
+        hit_rate = hits_per_user.positive_true / hits_per_user.positive
+        # average
+        hit_rate = hit_rate.mean()
+        return hit_rate
+
+    def get_hits(self, recommendations):
+        """
+        Find which items in the test set have a hit on the recommendations.
+        :param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: dataframe, removing the rows missing in the test set.
+        """
+        # check whether there are top_n items per user
+        top_n_recommendations = self.filter_to_top_n(recommendations)
+        # find the hits
+        test_in_top_n = pd.merge(
+            top_n_recommendations, self.test_set, on=["userId", "itemId"]
+        )
+        return test_in_top_n
+
+    def filter_to_top_n(self, dataset):
+        """
+        if rank > top_n, we do not use it for evaluation
+        :param dataset: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: dataframe, columns = ['userId', 'itemId', 'rank']
+        """
+        return dataset[dataset["rank"] <= self.top_n]
+
+    def cal_ndcg(self, recommendations):
+        r"""
+        For evaluating the top-N recommendation list, we also provide the normalized Discounted Cumulative Gain at N
+        recommendation (nDCG@N)  computed as the ratio of the Discounted Cumulative Gain(DCG) with the ideal Discounted
+        Cumulative Gain(IDCG):
+         DGC_{pos} = rel_1 + \sum_{i=2}^{pos} \frac{rel_i}{\log_2i} \qquad \qquad
+        IDGC_{pos} = rel_1 + \sum_{i=2}^{|h|-1} \frac{rel_i}{\log_2i} \\
+        nDCG_{pos} = \frac{DCG}{IDCG}
+        where pos denotes the position up to which relevance is accumulated, and $rel_i$ is the relevance of the recommended item at position \textit{i}.
+        Ref: Y. Wang, L. Wang, Y. Li, D. He, T.-Y. Liu, and W. Chen.
+            A theoretical analysis of ndcgtype ranking measures.
+        :param recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: nDCG
+        """
+        # get hits
+        hits = self.get_hits(recommendations)
+
+        DCG = self.cal_dcg(hits)
+        iDCG = self.cal_idcg()
+
+        # join to check if there are users in the test without hits
+        nDCG = iDCG.merge(DCG, on="userId", how="left")
+        nDCG = nDCG.fillna(0)
+        # normalize
+        nDCG["ndcg"] = nDCG["dcg"] / nDCG["idcg"]
+
+        return nDCG["ndcg"].mean()
+
+    def cal_dcg(self, hits):
+        """
+        Discounted Comulative Gain
+        :param hits: recommendations: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: DCG
+        """
+        # todo: the gain so far is set to a constant.
+
+        if self.discount_function == "log":
+            hits["discounted_gain"] = np.log(2) / np.log(hits["rank"] + 1)
+        elif self.discount_function == "linear":
+            hits["discounted_gain"] = 1 / hits["rank"]
+
+        DCG = hits.groupby("userId")["discounted_gain"].sum()
+
+        return pd.DataFrame(
+            {"userId": hits["userId"].unique(), "dcg": DCG}
+        ).reset_index(drop=True)
+
+    def cal_idcg(self):
+        """
+        the Ideal DCG, is the DCG for the best ranking possible (i.e. all true positives were recommended first).
+        :return: iDCG
+        """
+        # create a fake ranking for test set items.
+        # We assume that the items in the test set are all on the Top-N list.
+        count_positives = self.count_positives(self.test_set)
+        ideal_rank = [i for x in count_positives["positive"] for i in (range(1, x + 1))]
+        test_ideal_ranking = self.test_set.copy()
+        test_ideal_ranking["rank"] = ideal_rank
+        # Filter to have at most top-N items.
+        test_ideal_ranking = self.filter_to_top_n(test_ideal_ranking)
+        # get the dcg for the ideal ranking
+        idcg = self.cal_dcg(test_ideal_ranking)
+        idcg = idcg.rename(columns={"dcg": "idcg"})
+        return idcg
+
+    @staticmethod
+    def count_positives(dataset):
+        """
+        Returns the positives count.
+        :param dataset: dataframe, columns = ['userId', 'itemId', 'rank']
+        :return: dataframe, columns = ['userId', 'positive']
+        """
+        users_with_positives = dataset.userId.unique()
+        positives_per_user = dataset.groupby("userId")["itemId"].count()
+        positives_per_user = pd.DataFrame(
+            {"userId": users_with_positives, "positive": positives_per_user}
+        )
+
+        return positives_per_user.reset_index(drop=True)
+
+
+# if __name__ == '__main__':
+##    recoms = pd.DataFrame({
+#        'userId': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+#        'itemId': [1, 2, 3, 4, 1, 2, 2, 3, 4],
+#        'rank': [1, 2, 3, 1, 2, 3, 1, 2, 3]
+#    })
+
+#    test = pd.DataFrame({
+#        'userId': [1, 1, 2, 3],
+#        'itemId': [1, 4, 1, 5]
+#    })
+
+#    eval = Evaluator(test_set=test, top_n=2)
+
+#    assert eval.num_users == 3, 'number of users'
+#    assert eval.top_n == 2, 'number of top n'
+#    eval.top_n = 3
+#    assert eval.top_n == 3, 'changing of top n'
+
+#   print(eval.cal_hit_ratio(recoms))
+#   print(eval.cal_ndcg(recoms))
@@ -0,0 +1,169 @@
+import sys
+import random
+import pandas as pd
+import copy
+
+from pygrex.data_reader.data_reader import DataReader
+
+
+def fix_data_reader_mappings(source: DataReader, target: DataReader):
+    target._num_user = source._num_user
+    target._num_item = source._num_item
+    #  Copy over the original ID mappings
+    target.original_user_id = source.original_user_id
+    target.original_item_id = source.original_item_id
+    target.new_user_id = source.new_user_id
+    target.new_item_id = source.new_item_id
+    return target
+
+
+class Splitter:
+    """
+    Super Splitting Class.
+    args:
+        data: DataReader object, which contains in its dataset attribute 4 columns = ['userId', 'itemId', 'rating', 'timestamp']
+    """
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def split_leave_latest_out(data: DataReader, n_latest: int = 1):
+        """
+        Leave N latest interactions out train/test split.
+        Ref:
+        Campos, Pedro G., Fernando Díez, and Iván Cantador. "Time-aware recommender systems: a comprehensive survey and
+        analysis of existing evaluation protocols." User Modeling and User-Adapted Interaction 24.1-2 (2014): 67-119.
+        :param data:
+        :param n_latest: int, number of latest interactions to be in the the test set.
+        :returns train as DataReader, test as data.frames
+        """
+
+        # group items by suer id and rank them by timestamp
+        rank_latest = data.dataset.groupby(["userId"])["timestamp"].rank(
+            method="first", ascending=False
+        )
+
+        # keep in test items that are ranked higher than n_latest
+        test = data.dataset[rank_latest <= n_latest]
+        # keep in train the rest
+        train = DataReader(dataframe=data.dataset.copy())
+        train.dataset = data.dataset[rank_latest > n_latest]
+
+        train = fix_data_reader_mappings(data, train)
+
+        return train, test
+
+    @staticmethod
+    def split_leave_n_out(data: DataReader, n: int = 1, frac: float | None = None):
+        """
+        Leave N latest interactions out train/test split.
+        Ref:
+        Shani, Guy, and Asela Gunawardana. "Evaluating recommendation systems." Recommender systems handbook. Springer,
+        Boston, MA, 2011. 257-297.
+        :param data:
+        :param n int, number of interactions to be in the the test set.
+        :param frac float, fraction.
+        :returns dataframe train and test
+        """
+        min_nr_ratings_user = min(data.dataset["userId"].value_counts())
+
+        if min_nr_ratings_user < n:
+            sys.exit(
+                "split_leave_n_out: There are users with less ratings than n (required number of interactions "
+                "in the test set)."
+            )
+
+        if frac is not None and frac > 1:
+            sys.exit("f (i.e.) fraction should be smaller than 1.")
+
+        # group items by user id and extraxt a random number of items per user
+        grouped = data.dataset.groupby(["userId"])
+        if frac is not None:
+            test = grouped.sample(frac=frac)
+        else:
+            test = grouped.sample(n=n)
+
+        test = test.reset_index(drop=True)
+        train_pd = pd.merge(
+            data.dataset,
+            test,
+            on=list(data.dataset.columns),
+            how="outer",
+            indicator=True,
+        )
+        train_pd = train_pd[train_pd["_merge"] == "left_only"]
+        train_pd = train_pd.drop(columns="_merge")
+
+        train = copy.deepcopy(data)
+        train.dataset = train_pd
+        train = fix_data_reader_mappings(data, train)
+        assert test.shape[0] + train_pd.shape[0] == data.dataset.shape[0]
+
+        return train, test
+
+    def rel_plus_n(
+        self,
+        data,
+        negative_sample_size: int = 99,
+        splitting: str = "latest",
+        n: int = 1,
+    ):
+        """
+        RelPlusN: We build the users test set by extracting one relevant random item ($HR_u$) from the entire set of
+        rated items. Then  a set of random items with unknown relevance ($NR_u$), is extracted for each user $u$, where $u$
+        had no previous interaction with these items. Finally, for each item $i$ in $HR_u$, the algorithm requests a ranking
+        of the top-$N$ items from the set $ {i} cup NR_u$, on which the evaluation is performed. The evaluation metrics
+        are averaged over all the items in $HR_u$ and later over all the users. In the following, all experiments have been
+        conducted according to this protocol.
+        Ref:
+        - Paolo Cremonesi, Yehuda Koren, and Roberto Turrin. 2010.   Performance of Recommender Algorithms on Top-n
+        Recommendation Tasks. InProceedings ofthe Fourth ACM Conference on Recommender Systems (RecSys ’10).
+        - Xiangnan He, Lizi Liao, Hanwang Zhang, Liqiang Nie, Xia Hu, and Tat-Seng Chua. 2017. Neural Collaborative
+        Filtering. In Proceedings of the 26th InternationalConference on World Wide Web (WWW ’17).
+        :param data
+        :param negative_sample_size how many negative items to compute
+        :param splitting either latest for leave n latest out, or n for leave n out
+        :param n how many to leave out
+
+        """
+
+        if splitting == "latest":
+            train, test = self.split_leave_latest_out(data, n)
+        elif splitting == "n":
+            train, test = self.split_leave_n_out(data, n)
+        else:
+            sys.exit('splitting can be either "latest" or "n". ')
+
+        neg_sample = self.sample_negative(data, negative_sample_size)
+
+        return train, pd.concat([test, neg_sample], ignore_index=True)
+
+    @staticmethod
+    def sample_negative(data, negative_sample_size):
+        """return all negative items"""
+
+        item_catalogue = set(data.dataset["itemId"])
+
+        interact_status = (
+            data.dataset.groupby("userId")["itemId"]
+            .apply(set)
+            .reset_index()
+            .rename(columns={"itemId": "interacted_items"})
+        )
+        interact_status["negative_items"] = interact_status["interacted_items"].apply(
+            lambda x: item_catalogue - x
+        )
+        interact_status["negative_samples"] = interact_status["negative_items"].apply(
+            lambda x: random.sample(x, negative_sample_size)
+        )
+        interact_status = interact_status[["userId", "negative_samples"]]
+
+        userId = []
+        itemId = []
+        for row in interact_status.itertuples():
+            for i in range(negative_sample_size):
+                userId.append(int(row.userId))
+                itemId.append(int(row.negative_samples[i]))
+
+        return pd.DataFrame.from_dict({"userId": userId, "itemId": itemId})
@@ -0,0 +1,18 @@
+from .individual.model_based_emf import EMFExplainer
+from .individual.model_based_als_explain import ALSExplainer
+from .individual.post_hoc_association_rules import ARPostHocExplainer
+from .individual.post_hoc_knn import KNNPostHocExplainer
+from .groups.rule_based_group_rec_explainer import RuleBasedGroupRecExplainer
+from .groups.sliding_window_explainer import SlidingWindowExplainer
+from .groups.lore4groups_explainer import LORE4GroupsExplainer
+
+
+__all__ = [
+    "EMFExplainer",
+    "ALSExplainer",
+    "ARPostHocExplainer",
+    "KNNPostHocExplainer",
+    "RuleBasedGroupRecExplainer",
+    "SlidingWindowExplainer",
+    "LORE4GroupsExplainer",
+]
@@ -0,0 +1,10 @@
+from .rule_based_group_rec_explainer import RuleBasedGroupRecExplainer
+from .sliding_window_explainer import SlidingWindowExplainer
+from .lore4groups_explainer import LORE4GroupsExplainer
+
+
+__all__ = [
+    "RuleBasedGroupRecExplainer",
+    "SlidingWindowExplainer",
+    "LORE4GroupsExplainer",
+]
@@ -0,0 +1,731 @@
+import pandas as pd
+import numpy as np
+import re
+import logging
+import traceback
+from collections import Counter
+from typing import Dict, Set, List, Optional, Any, Tuple, Union
+from sklearn.tree import DecisionTreeClassifier, _tree
+
+ItemId = Union[str, int]
+UserId = Union[str, int]
+FactualRule = List[str]
+CounterfactualSet = List[List[str]]
+Explanation = Tuple[Optional[FactualRule], Optional[CounterfactualSet]]
+
+
+class LORE4GroupsExplainer:
+    """
+    Enhanced LORE4Groups explainer that incorporates genre information
+    and stores decision trees for visualization
+    """
+
+    def __init__(
+        self,
+        item_profiles: Dict[str, Set[str]],
+        item_label_matrix: pd.DataFrame,
+        config: Dict,
+        genre_profiles: Optional[Dict[str, Set[str]]] = None,
+    ):
+        self.item_profiles = {str(k): v for k, v in item_profiles.items()}
+        self.item_label_matrix = item_label_matrix
+        self.params = config["explainer"]["lore4groups"]
+
+        # NEW: Store genre information
+        self.genre_profiles = (
+            {str(k): v for k, v in genre_profiles.items()} if genre_profiles else {}
+        )
+
+        all_columns = item_label_matrix.columns.tolist()
+        self.all_labels = [col for col in all_columns if col != "like"]
+
+        # Add 'like' back for target variable access (but not as feature)
+        if "like" in all_columns:
+            self.all_labels.append("like")
+
+    def _enhanced_jaccard_similarity(self, item1_id: ItemId, item2_id: ItemId) -> float:
+        """Enhanced Jaccard similarity that considers both tags and genres"""
+        # Get regular tags
+        tags1 = self.item_profiles.get(str(item1_id), set())
+        tags2 = self.item_profiles.get(str(item2_id), set())
+
+        # Get genres and add them as features
+        genres1 = self.genre_profiles.get(str(item1_id), set())
+        genres2 = self.genre_profiles.get(str(item2_id), set())
+
+        # Combine tags and genres for enhanced similarity
+        features1 = tags1.union({f"genre_{g.lower()}" for g in genres1})
+        features2 = tags2.union({f"genre_{g.lower()}" for g in genres2})
+
+        if not features1 or not features2:
+            return 0.0
+
+        union_len = len(features1.union(features2))
+        intersection_len = len(features1.intersection(features2))
+
+        return intersection_len / union_len if union_len > 0 else 0.0
+
+    def _jaccard_similarity(self, item1_id: ItemId, item2_id: ItemId) -> float:
+        """Original jaccard similarity (kept for compatibility)"""
+        tags1 = self.item_profiles.get(str(item1_id), set())
+        tags2 = self.item_profiles.get(str(item2_id), set())
+        if not tags1 or not tags2:
+            return 0.0
+        union_len = len(tags1.union(tags2))
+        return len(tags1.intersection(tags2)) / union_len if union_len > 0 else 0.0
+
+    def _get_enhanced_similar_examples(
+        self,
+        user_id_consecutive: UserId,
+        target_item_id: ItemId,
+        user_hist: Set[ItemId],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Tuple[pd.DataFrame, Dict[str, Any]]:
+        """Enhanced version that returns both DataFrame and metadata for visualization"""
+
+        # 1. Find all similar items using enhanced similarity
+        similarities = [
+            (seen_id, self._enhanced_jaccard_similarity(target_item_id, seen_id))
+            for seen_id in user_hist
+        ]
+        similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
+
+        sim_th = self.params.get("similarity_threshold", 0.0)
+        top_similar_items_str = {
+            item[0]
+            for item in similarities[: self.params["n_similar_for_tree"]]
+            if item[1] >= sim_th
+        }
+
+        if not top_similar_items_str:
+            return pd.DataFrame(), {}
+
+        # 2. Build the local dataset
+        top_similar_items_int = [int(i) for i in top_similar_items_str]
+
+        # Get existing ratings for similar items
+        local_df = dataset[
+            (dataset["userId"] == user_id_consecutive)
+            & (dataset["itemId"].isin(top_similar_items_int))
+        ].copy()
+
+        rated_items = set(local_df["itemId"])
+        items_to_predict = [
+            item for item in top_similar_items_int if item not in rated_items
+        ]
+
+        # Add predictions for unrated items
+        if model and data_reader and items_to_predict:
+            try:
+                orig_user_id = data_reader.get_original_user_id(
+                    int(user_id_consecutive)
+                )
+                predicted_ratings = []
+
+                for item_id_consecutive in items_to_predict:
+                    orig_item_id = data_reader.get_original_item_id(
+                        int(item_id_consecutive)
+                    )
+                    pred = model.predict(orig_user_id, orig_item_id)
+                    predicted_ratings.append(
+                        {
+                            "userId": user_id_consecutive,
+                            "itemId": item_id_consecutive,
+                            "rating": float(pred),
+                        }
+                    )
+
+                if predicted_ratings:
+                    pred_df = pd.DataFrame(predicted_ratings)
+                    local_df = pd.concat([local_df, pred_df], ignore_index=True)
+
+            except Exception:
+                traceback.print_exc()
+
+        # Check minimum samples requirement
+        if len(local_df) < 2:
+            return pd.DataFrame(), {}
+
+        # 3. Apply thresholding with fallbacks
+        rating_threshold = self.params["rating_threshold_for_like"]
+
+        threshold_info = {
+            "was_overridden": False,
+            "original_threshold": rating_threshold,
+            "final_threshold": rating_threshold,
+        }
+
+        local_df["like"] = (local_df["rating"] >= rating_threshold).astype(int)
+
+        # Apply fallback thresholds if needed
+        like_counts = local_df["like"].value_counts()
+
+        if len(like_counts) < 2:
+            # Try mean-based threshold
+            mean_rating = local_df["rating"].mean()
+            local_df["like"] = (local_df["rating"] >= mean_rating).astype(int)
+            threshold_info["was_overridden"] = True
+            threshold_info["final_threshold"] = mean_rating
+            like_counts = local_df["like"].value_counts()
+            if len(like_counts) < 2:
+                return pd.DataFrame(), {}
+
+        # Check for severe imbalance (>90% one class)
+        min_class_ratio = like_counts.min() / len(local_df)
+        if min_class_ratio < 0.1:
+            if like_counts.min() < 2:
+                return pd.DataFrame(), {}
+
+        # 4. Construct the enhanced feature matrix (including genres)
+        feature_labels = [label for label in self.all_labels if label != "like"]
+
+        examples = []
+        genre_features_used = set()
+
+        for idx, row in local_df.iterrows():
+            item_id = str(int(row["itemId"]))
+            tags = self.item_profiles.get(item_id, set())
+            genres = self.genre_profiles.get(item_id, set())
+
+            # Create base example with target variables
+            example = {
+                "movie_id": item_id,
+                "rating": row["rating"],
+                "like": int(row["like"]),
+            }
+
+            # Add tag features (excluding 'like')
+            for label in feature_labels:
+                example[label] = 1 if label in tags else 0
+
+            # Add genre features dynamically
+            for genre in genres:
+                genre_feature = f"genre_{genre.lower()}"
+                example[genre_feature] = 1
+                genre_features_used.add(genre_feature)
+
+                # Also add to feature_labels if not already there
+                if genre_feature not in feature_labels:
+                    feature_labels.append(genre_feature)
+
+            examples.append(example)
+
+        # Ensure all examples have all genre features
+        for example in examples:
+            for genre_feature in genre_features_used:
+                if genre_feature not in example:
+                    example[genre_feature] = 0
+
+        final_df = pd.DataFrame(examples)
+
+        # Final validation
+        if final_df["like"].nunique() < 2:
+            return pd.DataFrame(), {}
+
+        # Prepare metadata for visualization
+        metadata = {
+            "feature_labels": [label for label in feature_labels if label != "like"],
+            "genre_features": list(genre_features_used),
+            "similarity_scores": dict(similarities[:5]),  # Top 5 similarities
+            "target_item_genres": self.genre_profiles.get(str(target_item_id), set()),
+            "rating_threshold": threshold_info["final_threshold"],
+            "threshold_info": threshold_info,
+        }
+
+        return final_df, metadata
+
+    def _get_factual_path_for_item(
+        self,
+        clf: DecisionTreeClassifier,
+        x_item: pd.DataFrame,
+        metadata: Dict[str, Any],
+    ) -> Optional[List[str]]:
+        """
+        Traces the specific path an item takes through the decision tree
+        and returns the corresponding factual rule set.
+        """
+        feature_labels = metadata.get("feature_labels", [])
+        if not feature_labels:
+            return None
+
+        # 1. Get the sequence of nodes the item travels through
+        node_indicator = clf.decision_path(x_item)
+        node_index = node_indicator.indices[  # type: ignore
+            node_indicator.indptr[0] : node_indicator.indptr[  # type: ignore
+                1
+            ]
+        ]
+
+        rules = []
+        tree = clf.tree_
+
+        # 2. Iterate through the path to build the rules
+        # We stop at the second to last node because the last one is the leaf
+        for i in range(len(node_index) - 1):
+            node_id = node_index[i]
+            child_node_id = node_index[i + 1]
+
+            # Ensure this is not a leaf node
+            if tree.feature[node_id] != _tree.TREE_UNDEFINED:  # type: ignore
+                feature_name = feature_labels[tree.feature[node_id]]  # type: ignore
+                threshold = tree.threshold[node_id]  # type: ignore
+
+                # 3. Determine if the path went left or right to form the rule
+                if child_node_id == tree.children_left[node_id]:  # type: ignore
+                    # Path went left (True condition for <= threshold)
+                    rule = f"{feature_name} <= {threshold:.2f}"
+                else:
+                    # Path went right (False condition for <= threshold)
+                    rule = f"{feature_name} > {threshold:.2f}"
+
+                # Use the same enhanced formatting as before for consistency
+                if feature_name.startswith("genre_"):
+                    genre_name = feature_name.replace("genre_", "").title()
+                    if child_node_id == tree.children_left[node_id]:  # type: ignore
+                        rules.append(f"Does NOT have genre: `{genre_name}`")
+                    else:
+                        rules.append(f"Has genre: `{genre_name}`")
+                else:
+                    rules.append(rule)
+
+        return rules if rules else None
+
+    def _train_enhanced_decision_tree(
+        self,
+        user_id_consecutive: UserId,
+        item_id: ItemId,
+        user_hist: Set[ItemId],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Tuple[Optional[DecisionTreeClassifier], Dict[str, Any]]:
+        """Enhanced tree training that returns both classifier and metadata"""
+
+        df_examples, metadata = self._get_enhanced_similar_examples(
+            user_id_consecutive, item_id, user_hist, dataset, model, data_reader
+        )
+
+        if df_examples.empty:
+            return None, {}
+
+        like_counts = df_examples["like"].value_counts()
+
+        if len(like_counts) < 2 or like_counts.min() < 2:
+            return None, {}
+
+        feature_labels = metadata.get("feature_labels", [])
+        X = df_examples[feature_labels]
+        y = df_examples["like"]
+
+        # Verify feature matrix has variance
+        feature_variances = X.var()
+        if (feature_variances == 0).all():
+            return None, {}
+
+        clf = DecisionTreeClassifier(
+            max_depth=5,  # Slightly deeper to accommodate genre features
+            min_samples_split=max(4, len(df_examples) // 4),
+            min_samples_leaf=2,
+            random_state=42,
+            class_weight="balanced",
+        )
+
+        try:
+            clf.fit(X, y)
+
+            # Enhanced feature importance analysis
+            feature_importance = list(zip(feature_labels, clf.feature_importances_))
+            important_features = [
+                (f, imp) for f, imp in feature_importance if imp > 0.001
+            ]
+            genre_important_features = [
+                (f, imp) for f, imp in important_features if f.startswith("genre_")
+            ]
+
+            # Add classifier and feature info to metadata
+            metadata.update(
+                {
+                    "classifier": clf,
+                    "feature_importance": dict(feature_importance),
+                    "important_features": important_features,
+                    "genre_important_features": genre_important_features,
+                    "training_data_size": len(df_examples),
+                    "class_distribution": like_counts.to_dict(),
+                }
+            )
+
+            return clf, metadata
+
+        except Exception as _:
+            return None, {}
+
+    def _get_enhanced_explanation_path(
+        self,
+        clf: DecisionTreeClassifier,
+        x_item: pd.DataFrame,
+        metadata: Dict[str, Any],
+    ) -> Optional[List[str]]:
+        """Enhanced explanation path that provides better rule descriptions"""
+
+        if 1 not in clf.classes_:
+            return None
+
+        leaf_id = clf.apply(x_item)[0]  # type: ignore
+        class_index = np.where(clf.classes_ == 1)[0]
+        if not class_index.size or clf.tree_.value[leaf_id][0][class_index[0]] == 0:  # type: ignore
+            return None
+
+        node_indicator = clf.decision_path(x_item)
+        node_index = node_indicator.indices[  # type: ignore
+            node_indicator.indptr[0] : node_indicator.indptr[  # type: ignore
+                1
+            ]
+        ]
+
+        rules = []
+        feature_labels = metadata.get("feature_labels", [])
+
+        for i in range(len(node_index) - 1):  # Exclude leaf node
+            node_id = node_index[i]
+            next_node_id = node_index[i + 1]
+
+            if clf.tree_.feature[node_id] != _tree.TREE_UNDEFINED:  # type: ignore
+                feature_name = feature_labels[clf.tree_.feature[node_id]]  # type: ignore
+                threshold = clf.tree_.threshold[node_id]  # type: ignore
+
+                # Enhanced rule formatting based on feature type
+                if feature_name.startswith("genre_"):
+                    genre_name = feature_name.replace("genre_", "").title()
+                    if next_node_id == clf.tree_.children_left[node_id]:  # type: ignore
+                        rules.append(f"Does NOT have genre: `{genre_name}`")
+                    else:
+                        rules.append(f"Has genre: `{genre_name}`")
+                else:
+                    # Regular tag features
+                    if next_node_id == clf.tree_.children_left[node_id]:  # type: ignore
+                        rules.append(f"{feature_name} <= {threshold}")
+                    else:
+                        rules.append(f"{feature_name} > {threshold}")
+
+        return rules
+
+    def _generate_enhanced_individual_explanation(
+        self, clf: DecisionTreeClassifier, item_id: ItemId, metadata: Dict[str, Any]
+    ) -> Optional[Explanation]:
+        """Enhanced individual explanation generation"""
+
+        if str(item_id) not in self.item_label_matrix.index:
+            return None
+
+        x_item_full = self.item_label_matrix.loc[[str(item_id)]]
+        feature_labels = metadata.get("feature_labels", [])
+
+        try:
+            # For genre features, we need to dynamically add them to the item
+            item_genres = self.genre_profiles.get(str(item_id), set())
+
+            # Create enhanced item representation
+            enhanced_item_data = x_item_full.copy()
+
+            # Add genre features
+            for genre in item_genres:
+                genre_feature = f"genre_{genre.lower()}"
+                if genre_feature in feature_labels:
+                    enhanced_item_data[genre_feature] = 1
+
+            # Ensure all genre features exist (set to 0 if not present)
+            for feature in feature_labels:
+                if (
+                    feature.startswith("genre_")
+                    and feature not in enhanced_item_data.columns
+                ):
+                    enhanced_item_data[feature] = 0
+
+            # Select only the features used in training
+            x_item = enhanced_item_data[feature_labels]
+
+        except KeyError as _:
+            return None
+        # Get enhanced factual rule
+        # factual_rule = self._get_enhanced_explanation_path(clf, x_item, metadata)
+        factual_rule = self._get_factual_path_for_item(clf, x_item, metadata)
+
+        if not factual_rule:
+            return None
+
+        # Get counterfactuals (reuse existing method)
+        counterfactual_set = self._get_counterfactual_paths(clf, x_item)
+        if not counterfactual_set:
+            return None
+
+        return (factual_rule, counterfactual_set)
+
+    def _get_counterfactual_paths(
+        self, clf: DecisionTreeClassifier, x_item: pd.DataFrame
+    ) -> Optional[CounterfactualSet]:
+        """Original counterfactual path method (kept for compatibility)"""
+        tree = clf.tree_
+        paths = []
+
+        def find_paths(node_id, current_path):
+            if tree.feature[node_id] == _tree.TREE_UNDEFINED:  # type: ignore
+                class_index = np.where(clf.classes_ == 0)[0]
+                if class_index.size and tree.value[node_id][0][class_index[0]] > 0:
+                    paths.append(list(current_path))
+                return
+            feature_idx = tree.feature[node_id]  # type: ignore
+            threshold = tree.threshold[node_id]  # type: ignore
+            current_path.append((feature_idx, "<=", threshold))
+            find_paths(tree.children_left[node_id], current_path)  # type: ignore
+            current_path.pop()
+            current_path.append((feature_idx, ">", threshold))
+            find_paths(tree.children_right[node_id], current_path)  # type: ignore
+            current_path.pop()
+
+        find_paths(0, [])
+        if not paths:
+            return None
+
+        min_nf = float("inf")
+        counterfactuals = []
+        for path in paths:
+            nf = 0
+            for feature_idx, op, threshold in path:
+                if feature_idx < len(x_item.columns):
+                    item_val = x_item.iloc[0, feature_idx]
+                    if not (
+                        (op == "<=" and item_val <= threshold)
+                        or (op == ">" and item_val > threshold)
+                    ):
+                        nf += 1
+            if nf < min_nf:
+                min_nf = nf
+                counterfactuals = [path]
+            elif nf == min_nf:
+                counterfactuals.append(path)
+
+        # Enhanced counterfactual formatting
+        formatted_counterfactuals = []
+        for cf_path in counterfactuals:
+            formatted_path = []
+            for idx, op, _ in cf_path:
+                if idx < len(x_item.columns):
+                    feature_name = x_item.columns[idx]
+                    if feature_name.startswith("genre_"):
+                        genre_name = feature_name.replace("genre_", "").title()
+                        if op == "<=":
+                            formatted_path.append(
+                                f"Does NOT have genre: `{genre_name}`"
+                            )
+                        else:
+                            formatted_path.append(f"Has genre: `{genre_name}`")
+                    else:
+                        formatted_path.append(f"{feature_name} {op} 0.5")
+            if formatted_path:
+                formatted_counterfactuals.append(formatted_path)
+
+        return formatted_counterfactuals if formatted_counterfactuals else None
+
+    def _aggregate_factual_rules(
+        self, individual_explanations: Dict[UserId, List[str]], total_group_size: int
+    ) -> Dict[str, List[str]]:
+        """
+        Aggregates individual factual rules into a group consensus by finding
+        the rules supported by a majority of members.
+        """
+
+        # Flatten the list of all rules from all users into a single list
+        all_rules_flat = [
+            rule
+            for rules_list in individual_explanations.values()
+            for rule in rules_list
+        ]
+
+        if not all_rules_flat:
+            return {"unanimous": [], "majority": [], "minority": []}
+
+        # Count the occurrences of each rule
+        rule_counts = Counter(all_rules_flat)
+
+        majority_threshold = (total_group_size // 2) + 1 if total_group_size > 1 else 1
+        minority_threshold = 1
+        cleaned_rules_set = self._clean_contradictory_rules(set(rule_counts.keys()))
+        categorized_rules = {"unanimous": [], "majority": [], "minority": []}
+
+        for rule in sorted(list(cleaned_rules_set)):
+            count = rule_counts[rule]
+            rule_with_support = f"{rule} ({count}/{total_group_size} members)"
+
+            if count == total_group_size:
+                categorized_rules["unanimous"].append(rule_with_support)
+            elif count >= majority_threshold:
+                categorized_rules["majority"].append(rule_with_support)
+            elif count >= minority_threshold:
+                categorized_rules["minority"].append(rule_with_support)
+
+        return categorized_rules
+
+    def _clean_contradictory_rules(self, rules_set: Set[str]) -> Set[str]:
+        """Enhanced contradiction cleaning that handles genre rules"""
+        conditions_by_attr = {}
+
+        for rule in rules_set:
+            # Handle genre rules
+            if "Has genre:" in rule or "Does NOT have genre:" in rule:
+                genre_match = re.search(r"`([^`]+)`", rule)
+                if genre_match:
+                    genre = genre_match.group(1)
+                    attr = f"genre_{genre}"
+                    op = "has" if "Has genre:" in rule else "not_has"
+                    conditions_by_attr.setdefault(attr, set()).add(op)
+            else:
+                # Handle regular rules
+                match = re.match(r"(.+?)\s*([<>]=?)\s*(\d+\.?\d*)", rule)
+                if match:
+                    attr, op, _ = match.groups()
+                    conditions_by_attr.setdefault(attr.strip(), set()).add(op)
+
+        # Find contradictory attributes
+        invalid_attrs = set()
+        for attr, ops in conditions_by_attr.items():
+            if attr.startswith("genre_"):
+                # Genre contradiction: has and not_has same genre
+                if "has" in ops and "not_has" in ops:
+                    invalid_attrs.add(attr)
+            else:
+                # Numerical contradiction: <= and >
+                if any(op in ops for op in ["<=", "<"]) and any(
+                    op in ops for op in [">", ">="]
+                ):
+                    invalid_attrs.add(attr)
+
+        # Remove contradictory rules
+        clean_rules = set()
+        for rule in rules_set:
+            is_invalid = False
+            for invalid_attr in invalid_attrs:
+                if invalid_attr.startswith("genre_"):
+                    genre = invalid_attr.replace("genre_", "")
+                    if f"`{genre}`" in rule:
+                        is_invalid = True
+                        break
+                else:
+                    if invalid_attr in rule:
+                        is_invalid = True
+                        break
+
+            if not is_invalid:
+                clean_rules.add(rule)
+
+        return clean_rules
+
+    def find_explanation(
+        self,
+        recommended_items: List[ItemId],
+        members: List[UserId],
+        user_hist: Dict[UserId, Set[ItemId]],
+        dataset: pd.DataFrame,
+        model=None,
+        data_reader=None,
+    ) -> Dict[str, Any]:
+        """Enhanced explanation finding with tree storage for visualization"""
+        if data_reader is None:
+            raise ValueError(
+                "A 'data_reader' object must be provided to find explanations."
+            )
+
+        detailed_explanations = {}
+        explainable_count = 0
+
+        if not recommended_items:
+            return {"fidelity": 0.0, "details": {}}
+
+        for item_id in recommended_items:
+            all_individual_rules = {}
+            all_counterfactuals = {}
+            stored_classifiers = {}  # Store classifiers for visualization
+            stored_metadata = {}  # Store metadata for visualization
+            representative_decision_path = None
+            threshold_info_for_item = None
+
+            for user_id in members:
+                user_id_consecutive = data_reader.get_new_user_id(user_id)
+                clf, metadata = self._train_enhanced_decision_tree(
+                    user_id_consecutive,
+                    item_id,
+                    user_hist.get(user_id, set()),
+                    dataset,
+                    model,
+                    data_reader,
+                )
+
+                if clf and metadata:
+                    if threshold_info_for_item is None and "threshold_info" in metadata:
+                        threshold_info_for_item = metadata["threshold_info"]
+
+                    explanation = self._generate_enhanced_individual_explanation(
+                        clf, item_id, metadata
+                    )
+
+                    if explanation:
+                        r, phi = explanation
+                        all_individual_rules[user_id] = r
+                        all_counterfactuals[user_id] = phi
+
+                        if representative_decision_path is None:
+                            representative_decision_path = r
+                        # Store for visualization (use first successful classifier)
+                        if not stored_classifiers:
+                            stored_classifiers[user_id] = clf
+                            stored_metadata[user_id] = metadata
+
+            total_members_in_group = len(members)
+            factual_set = self._aggregate_factual_rules(
+                all_individual_rules, total_members_in_group
+            )
+
+            if representative_decision_path and factual_set:
+                explainable_count += 1
+
+                # Enhanced detailed explanations with visualization data
+                item_explanation = {
+                    "decision_path": representative_decision_path,
+                    "group_factual_rule": factual_set,
+                    "individual_counterfactuals": all_counterfactuals,
+                }
+
+                if threshold_info_for_item:
+                    item_explanation["threshold_info"] = threshold_info_for_item
+
+                # Add visualization data if available
+                if stored_classifiers:
+                    user_id_for_viz = list(stored_classifiers.keys())[0]
+                    item_explanation.update(
+                        {
+                            "decision_tree": stored_classifiers[user_id_for_viz],
+                            "feature_names": stored_metadata[user_id_for_viz].get(
+                                "feature_labels", []
+                            ),
+                            "tree_metadata": stored_metadata[user_id_for_viz],
+                            "item_genres": self.genre_profiles.get(str(item_id), set()),
+                        }
+                    )
+
+                detailed_explanations[item_id] = item_explanation
+
+        fidelity = (
+            explainable_count / len(recommended_items) if recommended_items else 0.0
+        )
+
+        group_explanations = {
+            "fidelity": fidelity,
+            "details": detailed_explanations,
+        }
+
+        logging.info(
+            f"Enhanced fidelity for {members}: {fidelity:.3f} ({explainable_count}/{len(recommended_items)})"
+        )
+
+        return group_explanations
@@ -0,0 +1,314 @@
+"""Rule-based group recommendation explainer module."""
+
+from typing import Dict, List, Optional, Set, Union
+import logging
+
+from pygrex.data_reader.data_reader import DataReader
+from pygrex.utils.association_rules import AssociationRules
+
+# Type aliases for better readability
+ItemId = Union[str, int]
+MemberId = Union[str, int]
+UserHistory = Dict[MemberId, Set[ItemId]]
+
+logger = logging.getLogger(__name__)
+
+
+class RuleBasedGroupRecExplainer:
+    """
+    A class to explain group recommendations using rule-based methods.
+
+    This class provides methods to generate explanations for group recommendations
+    based on association rules and user interaction history.
+    """
+
+    def __init__(
+        self,
+        rules: AssociationRules,
+        data: DataReader,
+        pool_recommendations: Optional[Union[List[ItemId], ItemId]] = None,
+        members: Optional[List[MemberId]] = None,
+        user_history: Optional[UserHistory] = None,
+        min_members_threshold: int = 1,
+    ) -> None:
+        """
+        Initialize the RuleBasedGroupRecExplainer.
+
+        Args:
+            rules: An instance of AssociationRules containing the rules for explanations.
+            pool_recommendations: A list of item IDs to explain, or a single item ID.
+            members: A list of member IDs in the group.
+            user_history: A dictionary mapping member IDs to sets of item IDs
+                         they have interacted with.
+            min_members_threshold: Minimum number of members that must satisfy
+                                 the rule for it to be considered valid.
+
+        Raises:
+            ValueError: If min_members_threshold is less than 1.
+        """
+        if min_members_threshold < 1:
+            raise ValueError("min_members_threshold must be at least 1")
+
+        self.rules = rules
+        self.members = members or []
+        self.min_members_threshold = min_members_threshold
+        self.user_history = user_history or {}
+        self.data = data
+
+        # Normalize pool_recommendations to always be a list
+        self.pool_recommendations = self._normalize_recommendations(
+            pool_recommendations
+        )
+
+    def _normalize_recommendations(
+        self, recommendations: Optional[Union[List[ItemId], ItemId]]
+    ) -> List[ItemId]:
+        """
+        Normalize recommendations input to a list format.
+
+        Args:
+            recommendations: Single item ID, list of item IDs, or None.
+
+        Returns:
+            List of item IDs.
+        """
+        if recommendations is None:
+            return []
+
+        if isinstance(recommendations, (str, int)):
+            return [recommendations]
+
+        return recommendations
+
+    def _is_rule_satisfied_by_member(
+        self, member: MemberId, antecedent: Set[ItemId]
+    ) -> bool:
+        """
+        Check if a member satisfies the rule's antecedent.
+
+        Args:
+            member: The member ID to check.
+            antecedent: The set of items that form the rule's antecedent.
+
+        Returns:
+            True if the member's history contains all items in the antecedent.
+        """
+
+        member_history = self.user_history.get(member, set())
+        member_history_str = {str(item) for item in member_history}
+
+        x = member_history_str.issuperset(antecedent)
+        return x
+
+    def _count_satisfied_members(self, antecedent: Set[ItemId]) -> int:
+        """
+        Count how many members satisfy the given antecedent.
+
+        Args:
+            antecedent: The set of items that form the rule's antecedent.
+
+        Returns:
+            Number of members whose history satisfies the antecedent.
+        """
+        return sum(
+            1
+            for member in self.members
+            if self._is_rule_satisfied_by_member(member, antecedent)
+        )
+
+    def _find_applicable_rules(self, item_id: ItemId):
+        """
+        Find rules that have the given item in their consequents.
+
+        Args:
+            item_id: The item ID to find rules for.
+
+        Returns:
+            DataFrame containing applicable rules.
+        """
+        item_id = self.data.get_new_item_id(item_id)  # type: ignore
+
+        applicable_rules = self.rules[  # type: ignore
+            self.rules["consequents"].apply(lambda x: str(item_id) in x)  # type: ignore
+        ]
+
+        return applicable_rules
+
+    def find_explanation(self) -> float:
+        """
+        Generate explanations for the group recommendations based on the rules.
+
+        Returns:
+            The fidelity of the explanations, which is the ratio of explained
+            recommendations to total recommendations in the pool.
+        """
+        if not self.pool_recommendations:
+            logger.warning("No recommendations to explain")
+            return 0.0
+
+        explained_count = 0
+        total_recommendations = len(self.pool_recommendations)
+
+        for item_id in self.pool_recommendations:
+            if self._can_explain_item(item_id):
+                explained_count += 1
+
+        fidelity = explained_count / total_recommendations
+        logger.info(
+            f"Explained {explained_count}/{total_recommendations} recommendations "
+            f"(fidelity: {fidelity:.3f})"
+        )
+
+        return fidelity
+
+    def _can_explain_item(self, item_id: ItemId) -> bool:
+        """
+        Check if an item can be explained by any rule.
+
+        Args:
+            item_id: The item ID to check.
+
+        Returns:
+            True if at least one rule can explain the item.
+        """
+        applicable_rules = self._find_applicable_rules(item_id)
+
+        for _, rule in applicable_rules.iterrows():
+            antecedent = rule["antecedents"]
+            satisfied_count = self._count_satisfied_members(antecedent)
+
+            if satisfied_count >= self.min_members_threshold:
+                logger.debug(f"Rule fired for item {item_id}")
+                return True
+
+        return False
+
+    def get_explanation_details(self) -> Dict[ItemId, List[Dict]]:
+        """
+        Get detailed explanations for each recommendation.
+
+        Returns:
+            Dictionary mapping item IDs to lists of applicable rule details.
+        """
+        explanations = {}
+
+        for item_id in self.pool_recommendations:
+            item_explanations = []
+            applicable_rules = self._find_applicable_rules(item_id)
+
+            for _, rule in applicable_rules.iterrows():
+                antecedent = rule["antecedents"]
+                satisfied_count = self._count_satisfied_members(antecedent)
+
+                if satisfied_count >= self.min_members_threshold:
+                    item_explanations.append(
+                        {
+                            "antecedent": antecedent,
+                            "consequent": rule["consequents"],
+                            "satisfied_members": satisfied_count,
+                            "confidence": rule.get("confidence", "N/A"),
+                            "support": rule.get("support", "N/A"),
+                        }
+                    )
+
+            explanations[item_id] = item_explanations
+
+        return explanations
+
+    def compute_group_fidelity_advanced(self) -> float:
+        """
+        Compute group fidelity using advanced conditions.
+
+        This method implements a more sophisticated fidelity calculation where:
+        - Condition 1: Each member of the group must have seen at least one item from the antecedent
+        - Condition 2: Each item in the antecedent must have been seen by at least one member
+
+        Returns:
+            The fidelity score as a float between 0 and 1.
+        """
+        if not self.pool_recommendations:
+            logger.warning("No recommendations to explain")
+            return 0.0
+
+        if not self.members:
+            logger.warning("No group members defined")
+            return 0.0
+
+        explained_count = 0
+        total_recommendations = len(self.pool_recommendations)
+
+        # Convert member IDs to set for faster lookup
+        members_set = set(self.members)
+
+        # Get all items seen by any group member
+        all_seen_items = set()
+        for member in members_set:
+            member_history = self.user_history.get(member, set())
+            # Convert to strings for consistency with rules
+            member_history_str = {str(item) for item in member_history}
+            all_seen_items.update(member_history_str)
+
+        for item_id in self.pool_recommendations:
+            if self._can_explain_item_advanced(item_id, members_set, all_seen_items):
+                explained_count += 1
+
+        fidelity = explained_count / total_recommendations
+        logger.info(
+            f"Advanced explanation: {explained_count}/{total_recommendations} recommendations "
+            f"(fidelity: {fidelity:.3f})"
+        )
+
+        return fidelity
+
+    def _can_explain_item_advanced(
+        self, item_id: ItemId, members_set: Set[MemberId], all_seen_items: Set[str]
+    ) -> bool:
+        """
+        Check if an item can be explained using advanced conditions.
+
+        Args:
+            item_id: The item ID to check.
+            members_set: Set of group member IDs.
+            all_seen_items: Set of all items seen by any group member.
+
+        Returns:
+            True if the item can be explained by at least one rule satisfying both conditions.
+        """
+        applicable_rules = self._find_applicable_rules(item_id)
+
+        for _, rule in applicable_rules.iterrows():
+            antecedent = rule["antecedents"]
+
+            # Condition 1: Each member must have seen at least one item from the antecedent
+            cond1 = all(
+                self._member_has_antecedent_item(member, antecedent)
+                for member in members_set
+            )
+
+            # Condition 2: Each item in the antecedent must have been seen by at least one member
+            cond2 = antecedent.issubset(all_seen_items)
+
+            if cond1 and cond2:
+                logger.debug(f"Advanced rule fired for item {item_id}")
+                return True
+
+        return False
+
+    def _member_has_antecedent_item(
+        self, member: MemberId, antecedent: Set[ItemId]
+    ) -> bool:
+        """
+        Check if a member has seen at least one item from the antecedent.
+
+        Args:
+            member: The member ID to check.
+            antecedent: The set of items in the rule's antecedent.
+
+        Returns:
+            True if the member has seen at least one item from the antecedent.
+        """
+        member_history = self.user_history.get(member, set())
+        member_history_str = {str(item) for item in member_history}
+
+        # Check if there's any intersection between member history and antecedent
+        return len(antecedent.intersection(member_history_str)) > 0
@@ -0,0 +1,434 @@
+import itertools
+from typing import Dict, List, Sequence, Union
+
+from pygrex.data_reader import DataReader, GroupInteractionHandler
+from pygrex.models import RecommenderModel
+from pygrex.recommender import GroupRecommender
+from pygrex.utils import SlidingWindowRanker, SlidingWindow, AggregationStrategy
+
+
+class SlidingWindowExplainer:
+    """
+    Stratigi, M., Bikakis, N., Stefanidis, K.: Counterfactual explanations for group
+    recommendations. In: Proceedings of the 27th International Workshop on Design,
+    Optimization, Languages and Analytical Processing of Big Data (DOLAP 2025).
+
+    A class that uses a sliding window approach to find counterfactual explanations
+    for group recommendation systems.
+
+    This class helps identify which items, if removed from the group's interaction history,
+    would cause a specific target item to no longer appear in the group recommendations.
+    """
+
+    def __init__(
+        self,
+        config,
+        data: DataReader,
+        group_handler: GroupInteractionHandler,
+        members: List[Union[str, int]],
+        target_item: Union[str, int],
+        model: RecommenderModel,
+        aggregation_strategy: AggregationStrategy = AggregationStrategy.AVG_PREDICTIONS,
+        window_size=3,
+    ):
+        """
+        Initialize the SlidingWindowExplainer.
+
+        Args:
+            config: Configuration object with model parameters
+            data: DataReader object containing the dataset
+            group_handler: Object that handles group data modifications
+            members: List of user IDs in the group
+            target_item: The item ID for which explanation is sought
+            model: Recommender model to use for predictions,
+            aggregation_strategy: Strategy to aggregate individual recommendations,
+            window_size: Size of the sliding window
+        """
+        self.cfg = config
+        self.data = data
+        self.group_handler = group_handler
+        self.members = members
+        self.target_item = target_item
+        self.model = model
+        self.aggregation_strategy = aggregation_strategy
+        self.window_size = window_size
+
+        # Results tracking
+        self.explanations_found: Dict[int, Dict] = {}
+        self.calls = 0
+        self.max_calls = 1000
+        self.item_metrics = {}
+
+    def set_sliding_window(self, sliding_window):
+        """Set the sliding window object if not provided during initialization."""
+        self.sliding_window = sliding_window
+
+    def set_item_metrics(self, metrics: Dict[Union[str, int], Dict[str, float]]):
+        """Store the pre-calculated metric scores for all items."""
+        self.item_metrics = metrics
+
+    def find_explanation(
+        self,
+        items_rated_by_group: List[Union[str, int]],
+        group_predictions: Dict,
+        top_recommendation: Union[str, int],
+        ranking_weights: Dict[str, float],
+    ) -> Dict[int, Dict]:
+        """
+        Find counterfactual explanations using the full, encapsulated process.
+
+        Args:
+            items_rated_by_group: All items rated by any member of the group.
+            group_predictions: The original individual predictions from the recommender.
+            top_recommendation: The original top recommended item.
+            ranking_weights: The weights from the UI for each ranking component.
+
+        Returns:
+            A dictionary of found explanations, including their justification metrics.
+        """
+
+        self.calls = 0
+        ranker = SlidingWindowRanker(config={})
+        ranker.set_group_recommender_values(group_predictions, top_recommendation)
+        ranked_items, self.item_metrics = ranker.generate_ranked_items(
+            all_rated_items=items_rated_by_group,
+            data=self.data,
+            group_members=self.members,
+            component_weights=ranking_weights,
+        )
+
+        sliding_window = SlidingWindow(
+            sequence=ranked_items, window_size=self.window_size
+        )
+
+        found = 0
+        while True:
+            # Get the sliding window
+            big_window = sliding_window.get_next_window()
+
+            # Check exit conditions
+            if big_window is None or found > 0 or self.calls >= self.max_calls:
+                break
+
+            # Count calls and windows
+            self.calls += 1
+
+            # Test if removing this window affects recommendations
+            if self._test_window_removal(big_window, self.target_item):
+                # A counterfactual explanation has been found
+                found += 1
+                # Look for minimal subsets within this window
+                self._find_minimal_subset(big_window, self.target_item)
+
+        if found == 0:
+            print("Explanation could not be found")
+
+        return self.explanations_found
+
+    def _test_window_removal(
+        self, item_ids: List[Union[str, int]], original_group_rec: Union[str, int]
+    ) -> bool:
+        """
+        Test if removing the given items affects the group recommendation.
+
+        Args:
+            item_ids: List of item IDs to remove from group interactions
+            original_group_rec: The original recommendation to compare against
+
+        Returns:
+            bool: True if removing these items changes recommendations, False otherwise
+        """
+
+        # Get new recommendations after removing items
+        group_recommendation = self._get_recommendations_after_removal(item_ids)
+
+        # Check if target item is still in recommendations
+
+        return original_group_rec not in group_recommendation
+
+    def _get_recommendations_after_removal(
+        self, item_ids: List[Union[str, int]], top_n: int = 10
+    ) -> Sequence[Union[str, int]]:
+        """
+        Get group recommendations after removing specified items from interaction history.
+
+        Args:
+            item_ids: List of item IDs to remove from group interactions
+            top_n: Number of top recommendations to return
+
+        Returns:
+            List of recommended item IDs
+        """
+        # Create modified dataset with items removed
+        changed_data = self.group_handler.create_modified_dataset(
+            original_data=self.data.dataset,
+            group_ids=self.members,
+            item_ids=item_ids,
+            data=self.data,
+        )
+
+        # Create new DataReader and retrain model
+        data_retrained = self._create_data_reader_and_prepare(changed_data)
+        model_retrained = self._retrain_model(data_retrained)
+
+        # Set up recommender with new model and data
+        group_recommender = GroupRecommender(data_retrained)
+        group_recommender.setup_recommendation(
+            model_retrained,
+            self.members,
+            data_retrained,
+            aggregation_strategy=self.aggregation_strategy,
+        )
+        recommendations = group_recommender.get_group_recommendations(top_n)
+
+        if not isinstance(recommendations, list):
+            return []
+
+        return recommendations
+
+    def _create_data_reader_and_prepare(self, changed_data):
+        """
+        Create and prepare a new DataReader with modified data.
+
+        Args:
+            changed_data: DataFrame with modified dataset
+
+        Returns:
+            DataReader: A new DataReader object with the modified dataset
+        """
+        data_retrained = DataReader(
+            filepath_or_buffer=None,
+            sep=None,
+            names=None,
+            skiprows=0,
+            dataframe=changed_data,
+        )
+
+        # Fix for potential dataset issue in original code
+        # data_retrained.dataset = data_retrained.dataset.iloc[1:].reset_index(drop=True)
+
+        # Prepare data
+        data_retrained.make_consecutive_ids_in_dataset()
+        data_retrained.binarize(binary_threshold=1)
+
+        return data_retrained
+
+    def _retrain_model(self, data):
+        """
+        Retrain the recommendation model with modified data.
+
+        Args:
+            data: Prepared DataReader object with modified dataset
+
+        Returns:
+            Retrained model
+        """
+        self.model.fit(data)
+        return self.model
+
+    def _find_minimal_subset(
+        self, big_window: List[Union[str, int]], original_group_rec: Union[str, int]
+    ) -> None:
+        """
+        Find minimal subset of items that act as counterfactual explanation.
+
+        Args:
+            big_window: List of item IDs to search within
+            original_group_rec: The original recommendation to compare against
+
+        """
+        found_subset = 0
+
+        # Try combinations of different lengths
+        for length in range(1, len(big_window) + 1):
+            if found_subset > 0 or self.calls > self.max_calls:
+                break
+
+            combinations = itertools.combinations(big_window, length)
+            for item_combo in combinations:
+                if found_subset > 0 or self.calls > self.max_calls:
+                    break
+
+                subset_items = list(item_combo)
+                self.calls += 1
+
+                # Get recommendations after removing this subset
+                new_recommendations = self._get_recommendations_after_removal(
+                    subset_items
+                )
+
+                # Check if this is a counterfactual explanation
+                if original_group_rec not in new_recommendations:
+                    found_subset += 1
+                    self._record_explanation(
+                        subset_items, original_group_rec, new_recommendations[0]
+                    )
+
+    def _record_explanation(
+        self,
+        explanation_items: List[Union[str, int]],
+        original_rec: Union[str, int],
+        new_rec: Union[str, int],
+    ) -> None:
+        """
+        Record and display found explanation.
+
+        Args:
+            explanation_items: Items that form the counterfactual explanation
+            original_rec: Original recommendation
+            new_rec: New top recommendation after removing explanation items
+        """
+        print(
+            f"If the group had not interacted with these items {explanation_items},\n"
+            f"the item of interest {original_rec} would not have appeared on the recommendation list;\n"
+            f"instead, {new_rec} would have been recommended."
+        )
+        # 	print("")
+        #   print(f"Explanation: {explanation_items} : found at call: {self.calls}")
+
+        # Calculate metrics for the explanation
+        item_intensity = self._calculate_item_intensity(explanation_items)
+        user_intensity = self._calculate_user_intensity(explanation_items)
+        explanation_metrics = {
+            item: self.item_metrics.get(item, {}) for item in explanation_items
+        }
+
+        self.explanations_found[self.calls] = {
+            "items": explanation_items,
+            "new_rec": new_rec,
+            "metrics": explanation_metrics,
+        }
+
+        exp_size = len(explanation_items)
+
+        #   print(f"{exp_size}\t{self.calls}\t{item_intensity}\t{user_intensity}")
+
+    def _calculate_item_intensity(self, items: List[Union[str, int]]) -> List[float]:
+        """
+        Calculate average item intensity for explanation items.
+
+        Args:
+            items: List of item IDs in the explanation
+
+        Returns:
+            List of average intensity scores for each item
+        """
+
+        return self._calculate_average_item_intensity_score(
+            items, self.members, self.data
+        )
+
+    def _calculate_user_intensity(self, items: List[Union[str, int]]) -> List[float]:
+        """
+        Calculate user intensity score for explanation items.
+
+        Args:
+            items: List of item IDs in the explanation
+
+        Returns:
+            List of intensity scores for each user
+        """
+        return self._calculate_user_intensity_score(items, self.members, self.data)
+
+    @staticmethod
+    def _calculate_average_item_intensity_score(
+        explanation: List[Union[str, int]],
+        members: List[Union[str, int]],
+        data: DataReader,
+    ) -> List[float]:
+        """
+        Calculate the average item intensity for a counterfactual explanation.
+
+        Average item intensity is defined as the average number of interactions
+        between group members and each item in the explanation.
+
+        Args:
+            explanation: The counterfactual explanation items.
+            members: User IDs of the group members.
+            data: DataReader object containing the dataset and ID mapping methods.
+
+        Returns:
+            list: Average intensity for each item in the explanation.
+        """
+        internal_group_ids = []
+        # Convert user IDs to internal representation
+        for user_id in members:
+            new_user_id = data.get_new_user_id(user_id)
+            if isinstance(new_user_id, list):
+                if new_user_id:  # Check that the list is not empty
+                    internal_group_ids.append(int(new_user_id[0]))
+            else:
+                internal_group_ids.append(int(new_user_id))
+
+        group_size = len(members)
+        item_intensities = []
+
+        for item_id in explanation:
+            # Convert item ID to internal representation
+            internal_item_id = data.get_new_item_id(item_id)
+
+            # Count interactions between this item and group members
+            interactions_count = len(
+                data.dataset[
+                    (data.dataset.itemId == internal_item_id)
+                    & (data.dataset.userId.isin(internal_group_ids))
+                ]
+            )
+
+            # Calculate average intensity
+            average_intensity = interactions_count / group_size
+            item_intensities.append(average_intensity)
+
+        return item_intensities
+
+    @staticmethod
+    def _calculate_user_intensity_score(
+        explanation_items: List[Union[str, int]],
+        members: List[Union[str, int]],
+        data: DataReader,
+    ) -> List[float]:
+        """
+        Calculate the interaction intensity for each user based on their interactions with items in an explanation.
+
+        Interaction intensity represents how much a user has interacted with the items in the explanation,
+        normalized by the total number of explanation items.
+
+        Args
+            explanation_items : List of item IDs in the explanation
+            members : List of user IDs to calculate intensity for
+            data : DataReader object containing the dataset and ID mapping methods
+
+        Returns
+            List of interaction intensities for each user (same order as members)
+            Values range from 0 to 1, where:
+            - 0 means no interaction with any explanation item
+            - 1 means interaction with all explanation items
+
+        Notes
+            Intensity is calculated as: (number of user interactions with explanation items) / (number of explanation items)
+        """
+        # Convert external item IDs to internal IDs
+        internal_item_ids = [
+            data.get_new_item_id(item_id) for item_id in explanation_items
+        ]
+
+        user_intensities = []
+        num_explanation_items = len(explanation_items)
+
+        for member in members:
+            # Convert external user ID to internal ID
+            internal_user_id = data.get_new_user_id(member)
+
+            # Count interactions between this user and explanation items
+            user_interactions_count = len(
+                data.dataset[
+                    (data.dataset.itemId.isin(internal_item_ids))
+                    & (data.dataset.userId == internal_user_id)
+                ]
+            )
+
+            # Calculate intensity as proportion of explanation items the user interacted with
+            intensity = user_interactions_count / num_explanation_items
+            user_intensities.append(intensity)
+
+        return user_intensities
@@ -0,0 +1,11 @@
+from .model_based_emf import EMFExplainer
+from .model_based_als_explain import ALSExplainer
+from .post_hoc_association_rules import ARPostHocExplainer
+from .post_hoc_knn import KNNPostHocExplainer
+
+__all__ = [
+    "EMFExplainer",
+    "ALSExplainer",
+    "ARPostHocExplainer",
+    "KNNPostHocExplainer",
+]
@@ -0,0 +1,49 @@
+from tqdm.auto import tqdm
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+
+class Explainer(ABC):
+    def __init__(self, model, recommendations, data):
+        self.model = model
+        self.recommendations = recommendations
+        self.dataset = data.dataset
+        self.num_items = data.num_item
+        self.num_users = data.num_user
+        self.users = self.dataset.groupby(by="userId")
+
+    def explain_recommendations(self):
+        explanations = []
+
+        with tqdm(
+            total=self.recommendations.shape[0], desc="Computing explanations: "
+        ) as pbar:
+            for _, row in self.recommendations.iterrows():
+                explanations.append(
+                    self.explain_recommendation_to_user(
+                        int(row.userId), int(row.itemId)
+                    )
+                )
+                pbar.update()
+
+        self.recommendations["explanations"] = explanations
+        return self.recommendations
+
+    def get_user_items(self, user_id):
+        """
+        Items Ids rated by a user.
+        :param user_id: the user
+        :return: list
+        """
+        return self.users.get_group(user_id).itemId.values
+
+    @abstractmethod
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        """
+        Generates an explanation for a single user-item recommendation.
+        This method must be implemented by any subclass.
+        """
+        raise NotImplementedError
@@ -0,0 +1,51 @@
+import numpy as np
+import pandas as pd
+
+from .explainer import Explainer
+
+
+class ALSExplainer(Explainer):
+    def __init__(self, model, recommendations, data, number_of_contributions=10):
+        super(ALSExplainer, self).__init__(model, recommendations, data)
+        self.number_of_contributions = number_of_contributions
+
+    def explain_recommendation_to_user(self, user_id: int, item_id: int):
+        """
+        Measuring the contribution of each item to the recommendation.
+        :param model:
+        :param item_id:
+        :param user_id:
+        :return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
+        """
+
+        current_interactions = np.zeros(self.num_items)
+        current_interactions[self.get_user_items(user_id)] = 1
+
+        c_u = np.diag(current_interactions)
+
+        y_t = self.model.item_embedding().transpose()
+        temp = np.matmul(y_t, c_u)
+        temp = np.matmul(temp, self.model.item_embedding())
+        temp = temp + np.diag([self.model.reg_term] * self.model.latent_dim)
+
+        if len(self.get_user_items(user_id)) > 1:
+            weight_mtr = np.linalg.inv(temp)
+        else:
+            weight_mtr = np.linalg.pinv(temp)
+
+        temp = np.matmul(self.model.item_embedding(), weight_mtr)
+
+        sim_to_rec_id = temp.dot(self.model.item_embedding()[item_id, :])
+
+        sim_to_rec_id = sim_to_rec_id[self.get_user_items(user_id)]
+
+        contribution = {
+            "item": self.get_user_items(user_id),
+            "contribution": sim_to_rec_id,
+        }
+        contribution = pd.DataFrame(contribution)
+        contribution = contribution.sort_values(by=["contribution"], ascending=False)
+        return {
+            "item": contribution.item[: self.number_of_contributions],
+            "contribution": contribution.contribution[: self.number_of_contributions],
+        }
@@ -0,0 +1,28 @@
+from .explainer import Explainer
+
+
+class EMFExplainer(Explainer):
+    def __init__(self, model, recommendations, data):
+        super(EMFExplainer, self).__init__(model, recommendations, data)
+
+    def explain_recommendation_to_user(self, user_id: int, item_id: int):
+        """
+        Measuring the contribution of each item to the recommendation.
+        :param user_id:
+        :param item_id: recommendation
+        :return: returns a dataframe with the contribution to the recommendation of each previously interacted with item.
+        """
+
+        ratings_on_item = self.dataset[self.dataset.itemId == item_id]
+        similar_users = self.model.sim_users[user_id]
+        similar_users_ratings_on_item = ratings_on_item[
+            ratings_on_item.userId.isin(similar_users)
+        ]
+
+        explanation_df = similar_users_ratings_on_item.groupby(by="rating").count()
+        explanation = {}
+
+        for index, row in explanation_df.iterrows():
+            explanation[index] = row[0]
+
+        return explanation
@@ -0,0 +1,79 @@
+from typing import Any, Dict
+from mlxtend.preprocessing import TransactionEncoder
+from mlxtend.frequent_patterns import apriori, association_rules
+import pandas as pd
+
+from .explainer import Explainer
+
+
+class ARPostHocExplainer(Explainer):
+    def __init__(
+        self,
+        model,
+        recommendations,
+        data,
+        min_support=0.1,
+        max_len=2,
+        metric="lift",
+        min_threshold=0.1,
+        min_confidence=0.1,
+        min_lift=0.1,
+    ):
+        super(ARPostHocExplainer, self).__init__(model, recommendations, data)
+        self.AR = None
+        self.min_support = min_support
+        self.max_len = max_len
+        self.metric = metric
+        self.min_threshold = min_threshold
+        self.min_confidence = min_confidence
+        self.min_lift = min_lift
+
+        self.rules: pd.DataFrame | None = None
+
+    def get_rules_for_getting(self, item_id: int) -> pd.DataFrame:
+        if self.rules is None:
+            self.compute_association_rules()
+
+        if self.rules is not None:
+            return self.rules[self.rules.consequents == item_id]
+
+        return pd.DataFrame()
+
+    def compute_association_rules(self):
+        item_sets = [
+            [item for item in self.dataset[self.dataset.userId == user].itemId]
+            for user in self.dataset.userId.unique()
+        ]
+
+        te = TransactionEncoder()
+        te_ary = te.fit(item_sets).transform(item_sets)
+
+        # The te_ary object is a NumPy array, which is a valid input for a DataFrame.
+        # Pylance may raise a false positive here due to incomplete type stubs for mlxtend.
+        df = pd.DataFrame(te_ary.astype(bool), columns=te.columns_)  # type: ignore
+
+        frequent_itemsets = apriori(
+            df, min_support=self.min_support, use_colnames=True, max_len=self.max_len
+        )
+
+        rules = association_rules(
+            frequent_itemsets, metric="lift", min_threshold=self.min_threshold
+        )
+        rules = rules[
+            (rules["confidence"] > self.min_confidence)
+            & (rules["lift"] > self.min_lift)
+        ]
+
+        rules["consequents"] = rules["consequents"].apply(lambda x: list(x)[0])
+        rules["antecedents"] = rules["antecedents"].apply(lambda x: list(x)[0])
+
+        self.rules = rules[["consequents", "antecedents", "confidence"]]
+
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        user_ratings = self.get_user_items(user_id)
+        rules = self.get_rules_for_getting(item_id)
+        explanations = rules[rules.antecedents.isin(user_ratings)]
+
+        return {"antecedents": set(explanations.antecedents)}
@@ -0,0 +1,46 @@
+from scipy import sparse
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from typing import Dict, Any
+
+from .explainer import Explainer
+
+
+class KNNPostHocExplainer(Explainer):
+    def __init__(self, model, recommendations, data, knn=10):
+        super(KNNPostHocExplainer, self).__init__(model, recommendations, data)
+
+        self.knn = knn
+        # Initialize as an empty dictionary to prevent subscripting None
+        self.knn_items_dict: Dict[int, np.ndarray] = {}
+
+    def get_nn_for_getting(self, item_id: int) -> np.ndarray:
+        # Check if the KNN dictionary has been computed
+        if not self.knn_items_dict:
+            self.compute_knn_items_for_all_items()
+
+        # Return the neighbors for the item, or an empty array if not found
+        return self.knn_items_dict.get(item_id, np.array([]))
+
+    def compute_knn_items_for_all_items(self):
+        ds = np.zeros((self.num_items, self.num_users))
+        # Assuming self.dataset has attributes itemId, userId, and rating
+        ds[self.dataset.itemId, self.dataset.userId] = self.dataset.rating
+
+        ds = sparse.csr_matrix(ds)
+        sim_matrix = cosine_similarity(ds)
+        min_val = sim_matrix.min() - 1
+
+        for i in range(self.num_items):
+            sim_matrix[i, i] = min_val
+            knn_to_item_i = (-sim_matrix[i, :]).argsort()[: self.knn]
+            self.knn_items_dict[i] = knn_to_item_i
+
+    def explain_recommendation_to_user(
+        self, user_id: int, item_id: int
+    ) -> Dict[str, Any]:
+        user_ratings = self.get_user_items(user_id)
+        sim_items = self.get_nn_for_getting(item_id)
+        explanations = set(sim_items) & set(user_ratings)
+
+        return {"explanations": explanations}
@@ -0,0 +1,23 @@
+from .als_model import ALS
+from .bpr_model import BPR
+from .gmf_model import GMFModel
+from .emf_model import EMFModel
+from .autoencoder_model import ExplAutoencoderTorch
+from .mlp_model import MLPModel
+from .emf_model import PyTorchModel
+from .knn_basic_model import KNNBasic
+from .svd_model import SVD
+from .recommender_model import RecommenderModel
+
+__all__ = [
+    "ALS",
+    "BPR",
+    "GMFModel",
+    "EMFModel",
+    "PyTorchModel",
+    "MLPModel",
+    "ExplAutoencoderTorch",
+    "KNNBasic",
+    "SVD",
+    "RecommenderModel",
+]
@@ -0,0 +1,31 @@
+import implicit
+
+from .mf_implicit_model import MFImplicitModel
+
+
+class ALS(MFImplicitModel):
+    def __init__(
+        self,
+        latent_dim,
+        reg_term,
+        epochs,
+        random_state=42,
+        num_users=None,
+        num_items=None,
+        **kwargs,
+    ):
+        super(ALS, self).__init__(
+            latent_dim=latent_dim,
+            reg_term=reg_term,
+            epochs=epochs,
+            learning_rate=None,
+            num_users=num_users,
+            num_items=num_items,
+        )
+
+        self.model = implicit.als.AlternatingLeastSquares(
+            factors=self.latent_dim,
+            regularization=self.reg_term,
+            iterations=self.epochs,
+            random_state=random_state,
+        )
@@ -0,0 +1,223 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim
+from scipy import sparse
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.preprocessing import MinMaxScaler
+from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+from typing import Optional, Union, List
+
+from pygrex.utils.torch_utils import use_cuda, use_optimizer
+from pygrex.data_reader import UserItemDict, DataReader
+from .recommender_model import RecommenderModel
+
+
+class ExplAutoencoderTorch(RecommenderModel, nn.Module):
+    def __init__(
+        self,
+        hidden_layer_features: int,
+        learning_rate: float,
+        positive_threshold: float,
+        weight_decay: float,
+        epochs: int,
+        knn: int,
+        cuda: bool,
+        optimizer_name: str,
+        expl: bool,
+        device_id: Optional[int] = None,
+    ):
+        super().__init__()
+        if optimizer_name not in ["sgd", "adam", "rmsprop"]:
+            raise Exception("Wrong optimizer.")
+        if cuda:
+            use_cuda(True, device_id if device_id is not None else 0)
+
+        self.positive_threshold = positive_threshold
+        self.weight_decay = weight_decay
+        self.knn = knn
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.use_gpu = cuda
+        self.optimizer_name = optimizer_name
+        self.hidden_layer_features = hidden_layer_features
+        self.expl = expl
+
+        self.dataset = None
+        self.data = None
+        self.embedding_user = None
+        self.embedding_item = None
+        self.optimizer: Optional[torch.optim.Optimizer] = None
+
+        self.explainability_matrix = None
+        self.sim_users = {}
+
+        self.criterion = nn.MSELoss()
+
+    def fit(self, data: DataReader):
+        self.data = data
+        self.dataset = data.dataset
+        num_items = self.data.num_item
+
+        self.encoder_hidden_layer = nn.Linear(
+            in_features=num_items, out_features=self.hidden_layer_features
+        )
+
+        self.decoder_output_layer = nn.Linear(
+            in_features=self.hidden_layer_features, out_features=num_items
+        )
+
+        self.compute_explainability()
+        optimizer = use_optimizer(
+            network=self,
+            weight_decay=self.weight_decay,
+            learning_rate=self.learning_rate,
+            optimizer_name=self.optimizer_name,
+        )
+
+        assert isinstance(optimizer, torch.optim.Optimizer)
+        self.optimizer = optimizer
+
+        with tqdm(total=self.epochs) as progress:
+            train_loader = self.instance_a_train_loader()
+            for epoch in range(self.epochs):
+                loss = self.train_an_epoch(train_loader)
+                progress.update(1)
+                progress.set_postfix({"loss": loss})
+
+    def compute_explainability(self):
+        assert self.dataset is not None
+        assert self.data is not None
+        ds = self.dataset.pivot(index="userId", columns="itemId", values="rating")
+        ds = ds.fillna(0)
+        ds = sparse.csr_matrix(ds)
+        sim_matrix = cosine_similarity(ds)
+        min_val = sim_matrix.min() - 1
+
+        for i in range(self.data.num_user):
+            sim_matrix[i, i] = min_val
+
+            knn_to_user_i = (-sim_matrix[i, :]).argsort()[: self.knn]
+            self.sim_users[i] = knn_to_user_i
+
+        self.explainability_matrix = np.zeros((self.data.num_user, self.data.num_item))
+
+        filter_dataset_on_threshold = self.dataset[
+            self.dataset["rating"] >= self.positive_threshold
+        ]
+
+        for i in range(self.data.num_user):
+            knn_to_user_i = self.sim_users[i]
+
+            rated_items_by_sim_users = filter_dataset_on_threshold[
+                filter_dataset_on_threshold["userId"].isin(knn_to_user_i)
+            ]
+
+            sim_scores = rated_items_by_sim_users.groupby(by="itemId")
+            sim_scores = sim_scores["rating"].sum()
+            sim_scores = sim_scores.reset_index()
+
+            self.explainability_matrix[i, sim_scores.itemId] = (
+                sim_scores.rating.to_list()
+            )
+
+        self.explainability_matrix = MinMaxScaler().fit_transform(
+            self.explainability_matrix
+        )
+
+        self.explainability_matrix = torch.from_numpy(self.explainability_matrix)
+
+    def instance_a_train_loader(self):
+        """instance train loader for one training epoch"""
+        assert self.dataset is not None
+        assert self.explainability_matrix is not None
+        self.user_item_dict = UserItemDict(
+            self.dataset, self.explainability_matrix, self.expl
+        )
+        return DataLoader(self.user_item_dict, shuffle=True)
+
+    def train_an_epoch(self, train_loader):
+        self.train()
+        cnt = 0
+        total_loss = 0
+        for batch_id, batch in enumerate(train_loader):
+            assert isinstance(batch[0], torch.Tensor)
+            rating = batch[0]
+            rating = rating.float()
+            loss = self.train_single_user(rating)
+            total_loss += loss
+            cnt += 1
+        return total_loss / cnt
+
+    def train_single_user(self, ratings):
+        if self.use_gpu:
+            ratings = ratings.cuda()
+
+        assert self.optimizer is not None
+        self.optimizer.zero_grad()
+        ratings_pred = self(ratings)
+        loss = self.criterion(ratings_pred, ratings)
+        loss.backward()
+        self.optimizer.step()
+        loss = loss.item()
+        return loss
+
+    def forward(self, user_adjusted_ratings):
+        activation = self.encoder_hidden_layer(user_adjusted_ratings)
+        code = torch.relu(activation)
+        activation = self.decoder_output_layer(code)
+        reconstructed_ratings = torch.relu(activation)
+        return reconstructed_ratings
+
+    def predict(
+        self, user_id: Union[int, List[int], str], item_id: Union[int, List[int], str]
+    ) -> list:
+        try:
+            if isinstance(user_id, str):
+                user_id = int(user_id)
+            elif isinstance(user_id, list):
+                user_id = [int(u) for u in user_id]
+            if isinstance(item_id, str):
+                item_id = int(item_id)
+            elif isinstance(item_id, list):
+                item_id = [int(i) for i in item_id]
+        except (ValueError, TypeError):
+            raise ValueError(
+                "User and item IDs must be integers or strings that can be converted to integers."
+            )
+
+        single_user = isinstance(user_id, int)
+        single_item = isinstance(item_id, int)
+
+        if isinstance(user_id, int):
+            user_id = [user_id]
+        if isinstance(item_id, int):
+            item_id = [item_id]
+
+        with torch.no_grad():
+            assert self.user_item_dict is not None, "The model has not been fitted yet."
+
+            # Collect ratings for all users
+            ratings_list = []
+            for uid in user_id:
+                rating = self.user_item_dict[uid]  # Pass scalar user_id to dict
+                ratings_list.append(rating)
+
+            rating = torch.stack(ratings_list)
+            rating = rating.float()
+            if self.use_gpu:
+                rating = rating.cuda()
+            pred = self.forward(rating).cpu()
+            predictions = pred[:, item_id].tolist()
+
+            # Flatten the nested list if it contains only one user's predictions
+            if single_user and single_item:
+                return (
+                    predictions[0][0]
+                    if isinstance(predictions[0], list)
+                    else predictions[0]
+                )
+            elif single_user:
+                return predictions[0]
+            return predictions
@@ -0,0 +1,25 @@
+import implicit
+
+from .mf_implicit_model import MFImplicitModel
+
+
+class BPR(MFImplicitModel):
+    """"""
+    def __init__(self,
+                 latent_dim,
+                 reg_term,
+                 learning_rate,
+                 epochs,
+                 **kwargs):
+
+        super(BPR, self).__init__(latent_dim=latent_dim,
+                                  reg_term=reg_term,
+                                  learning_rate=learning_rate,
+                                  epochs=epochs)
+
+        self.model = implicit.bpr.BayesianPersonalizedRanking(
+            factors=self.latent_dim,
+            learning_rate=self.learning_rate,
+            regularization=self.reg_term,
+            iterations=self.epochs
+        )
@@ -0,0 +1,391 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from scipy import sparse
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.preprocessing import MinMaxScaler
+from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+from typing import Union
+
+from pygrex.data_reader import UserItemRatingDataset, DataReader
+from pygrex.utils import EMFLoss
+from .py_torch_model import PyTorchModel
+from .recommender_model import RecommenderModel
+
+
+class EMFModel(RecommenderModel):
+    def __init__(
+        self,
+        learning_rate: float,
+        reg_term: float,
+        expl_reg_term: float,
+        positive_threshold: float,
+        latent_dim: int,
+        epochs: int,
+        knn: int,
+    ):
+        self.latent_dim = latent_dim
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+
+        self.dataset = None
+        self.data = None
+        self.embedding_user = None
+        self.embedding_item = None
+        self.optimizer = None
+
+        self.reg_term = reg_term
+        self.expl_reg_term = expl_reg_term
+        self.positive_threshold = positive_threshold
+        self.knn = knn
+
+        self.explainability_matrix = None
+        self.sim_users = {}
+
+        self.affine_output = nn.Linear(in_features=self.latent_dim, out_features=1)
+
+        self.criterion = EMFLoss()
+
+    def fit(self, data: DataReader) -> None:
+        self.data = data
+        self.dataset = data.dataset
+
+        assert self.data is not None
+        num_users = self.data.num_user
+        num_items = self.data.num_item
+
+        self.embedding_user = np.random.uniform(
+            low=0, high=0.5 / self.latent_dim, size=(num_users, self.latent_dim)
+        )
+
+        self.embedding_item = np.random.uniform(
+            low=0, high=0.5 / self.latent_dim, size=(num_items, self.latent_dim)
+        )
+
+        self.compute_explainability()
+
+        with tqdm(total=self.epochs) as progress:
+            assert self.dataset is not None
+            for epoch in range(self.epochs):
+                self.dataset = self.dataset.sample(frac=1)
+                loss = []
+                for _, row in self.dataset.iterrows():
+                    user_id = int(row.userId)
+                    item_id = int(row.itemId)
+
+                    p_ui = self.predict(user_id, item_id)
+
+                    e_ui = row.rating - p_ui
+
+                    loss.append(e_ui**2)
+
+                    assert self.embedding_item is not None
+                    assert self.embedding_user is not None
+                    delta_u = 2 * e_ui * self.embedding_item[item_id, :]
+                    delta_u -= self.reg_term * self.embedding_user[user_id, :]
+                    temp = np.sign(
+                        self.embedding_item[item_id, :]
+                        - self.embedding_user[user_id, :]
+                    )
+                    assert self.explainability_matrix is not None
+                    temp *= (
+                        self.expl_reg_term
+                        * self.explainability_matrix[user_id, item_id]
+                    )
+                    delta_u -= temp
+
+                    delta_v = 2 * e_ui * self.embedding_user[user_id, :]
+                    delta_v -= self.reg_term * self.embedding_item[item_id, :]
+                    temp = np.sign(
+                        self.embedding_user[user_id, :]
+                        - self.embedding_item[item_id, :]
+                    )
+                    assert self.explainability_matrix is not None
+                    temp *= (
+                        self.expl_reg_term
+                        * self.explainability_matrix[user_id, item_id]
+                    )
+                    delta_v -= temp
+
+                    self.embedding_user[user_id, :] += self.learning_rate * delta_u
+                    self.embedding_item[item_id, :] += self.learning_rate * delta_v
+
+                progress.update(1)
+
+                progress.set_postfix({"MSE": sum(loss) / len(loss)})
+
+    def compute_explainability(self):
+        assert self.dataset is not None
+        ds = self.dataset.pivot(index="userId", columns="itemId", values="rating")
+        ds = ds.fillna(0)
+        ds = sparse.csr_matrix(ds)
+        sim_matrix = cosine_similarity(ds)
+        min_val = sim_matrix.min() - 1
+
+        assert self.data is not None
+        for i in range(self.data.num_user):
+            sim_matrix[i, i] = min_val
+
+            knn_to_user_i = (-sim_matrix[i, :]).argsort()[: self.knn]
+            self.sim_users[i] = knn_to_user_i
+
+        self.explainability_matrix = np.zeros((self.data.num_user, self.data.num_item))
+
+        filter_dataset_on_threshold = self.dataset[
+            self.dataset["rating"] >= self.positive_threshold
+        ]
+
+        for i in range(self.data.num_user):
+            knn_to_user_i = self.sim_users[i]
+
+            rated_items_by_sim_users = filter_dataset_on_threshold[
+                filter_dataset_on_threshold["userId"].isin(knn_to_user_i)
+            ]
+
+            sim_scores = rated_items_by_sim_users.groupby(by="itemId")
+            sim_scores = sim_scores["rating"].sum()
+            sim_scores = sim_scores.reset_index()
+
+            self.explainability_matrix[i, sim_scores.itemId.astype(int)] = (
+                sim_scores.rating.to_list()
+            )
+
+        self.explainability_matrix = MinMaxScaler().fit_transform(
+            self.explainability_matrix
+        )
+
+    def predict(
+        self, user_id: Union[int, str], item_id: Union[int, str]
+    ) -> Union[float, list]:
+        user_id_processed = user_id
+        item_id_processed = item_id
+
+        if isinstance(user_id_processed, np.ndarray):
+            user_id_processed = user_id_processed.tolist()
+        if isinstance(item_id_processed, np.ndarray):
+            item_id_processed = item_id_processed.tolist()
+
+        is_list_input = isinstance(user_id_processed, list) or isinstance(
+            item_id_processed, list
+        )
+
+        if is_list_input:
+            user_id_list = (
+                user_id_processed
+                if isinstance(user_id_processed, list)
+                else [user_id_processed]
+            )
+            item_id_list = (
+                item_id_processed
+                if isinstance(item_id_processed, list)
+                else [item_id_processed]
+            )
+            predictions = []
+            for u in user_id_list:
+                assert self.embedding_user is not None
+                assert self.embedding_item is not None
+                pred = [
+                    np.dot(
+                        self.embedding_user[int(u), :], self.embedding_item[int(i), :]
+                    )
+                    for i in item_id_list
+                ]
+                predictions.append(pred)
+            predictions_np = np.array(predictions)
+
+            if len(user_id_list) == 1 or len(item_id_list) == 1:
+                predictions_np = predictions_np.flatten()
+
+            return predictions_np.tolist()
+
+        else:
+            assert self.embedding_user is not None
+            assert self.embedding_item is not None
+            return np.dot(
+                self.embedding_user[int(user_id), :],
+                self.embedding_item[int(item_id), :],
+            )
+
+    def user_embedding(self):
+        return self.embedding_user
+
+    def item_embedding(self):
+        return self.embedding_item
+
+
+class EMFTorchModel(PyTorchModel):
+    def __init__(
+        self,
+        learning_rate: float,
+        reg_term: float,
+        expl_reg_term: float,
+        positive_threshold: float,
+        momentum: float,
+        weight_decay: float,
+        latent_dim: int,
+        epochs: int,
+        batch_size: int,
+        knn: int,
+        cuda: bool,
+        optimizer_name: str,
+        device_id=None,
+    ):
+        super().__init__(
+            learning_rate=learning_rate,
+            latent_dim=latent_dim,
+            epochs=epochs,
+            batch_size=batch_size,
+            cuda=cuda,
+            optimizer_name=optimizer_name,
+            device_id=device_id,
+        )
+
+        self.reg_term = reg_term
+        self.expl_reg_term = expl_reg_term
+        self.positive_threshold = positive_threshold
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        self.knn = knn
+
+        self.explainability_matrix = None
+        self.sim_users = {}
+
+        self.affine_output = nn.Linear(in_features=self.latent_dim, out_features=1)
+
+        self.criterion = EMFLoss()
+
+    def fit(self, data: DataReader) -> None:
+        self.data = data
+        self.dataset = data.dataset
+
+        assert self.data is not None
+        num_users = self.data.num_user
+        num_items = self.data.num_item
+
+        self.embedding_user = nn.Embedding(
+            num_embeddings=num_users, embedding_dim=self.latent_dim
+        )
+
+        self.embedding_item = nn.Embedding(
+            num_embeddings=num_items, embedding_dim=self.latent_dim
+        )
+
+        self.compute_explainability()
+
+        self.optimizer = torch.optim.SGD(
+            self.parameters(),
+            lr=self.learning_rate,
+            momentum=self.momentum,
+            weight_decay=self.weight_decay,
+        )
+
+        with tqdm(total=self.epochs) as progress:
+            for epoch in range(self.epochs):
+                train_loader = self.instance_a_train_loader(self.batch_size)
+                loss = self.train_an_epoch(train_loader)
+                progress.update(1)
+                progress.set_postfix({"loss": loss})
+
+    def compute_explainability(self):
+        assert self.dataset is not None
+        ds = self.dataset.pivot(index="userId", columns="itemId", values="rating")
+        ds = ds.fillna(0)
+        ds = sparse.csr_matrix(ds)
+        sim_matrix = cosine_similarity(ds)
+        min_val = sim_matrix.min() - 1
+
+        assert self.data is not None
+        for i in range(self.data.num_user):
+            sim_matrix[i, i] = min_val
+
+            knn_to_user_i = (-sim_matrix[i, :]).argsort()[: self.knn]
+            self.sim_users[i] = knn_to_user_i
+
+        self.explainability_matrix = np.zeros((self.data.num_user, self.data.num_item))
+
+        filter_dataset_on_threshold = self.dataset[
+            self.dataset["rating"] >= self.positive_threshold
+        ]
+
+        for i in range(self.data.num_user):
+            knn_to_user_i = self.sim_users[i]
+
+            rated_items_by_sim_users = filter_dataset_on_threshold[
+                filter_dataset_on_threshold["userId"].isin(knn_to_user_i)
+            ]
+
+            sim_scores = rated_items_by_sim_users.groupby(by="itemId")
+            sim_scores = sim_scores["rating"].sum()
+            sim_scores = sim_scores.reset_index()
+
+            self.explainability_matrix[i, sim_scores.itemId.astype(int)] = (
+                sim_scores.rating.to_list()
+            )
+
+        self.explainability_matrix = MinMaxScaler().fit_transform(
+            self.explainability_matrix
+        )
+
+        self.explainability_matrix = torch.from_numpy(self.explainability_matrix)
+
+    def instance_a_train_loader(self, batch_size):
+        assert self.dataset is not None
+        dataset = UserItemRatingDataset(
+            user_tensor=torch.LongTensor(self.dataset.userId.values),
+            item_tensor=torch.LongTensor(self.dataset.itemId.values),
+            target_tensor=torch.FloatTensor(self.dataset.rating.values),
+        )
+        return DataLoader(dataset, batch_size=batch_size, shuffle=True)
+
+    def train_an_epoch(self, train_loader):
+        self.train()
+        cnt = 0
+        total_loss = 0
+        for batch_id, batch in enumerate(train_loader):
+            assert isinstance(batch[0], torch.LongTensor)
+            user, item, rating = batch[0], batch[1], batch[2]
+            rating = rating.float()
+            loss = self.train_single_batch(user, item, rating)
+            total_loss += loss
+            cnt += 1
+        return total_loss / cnt
+
+    def train_single_batch(self, users, items, ratings):
+        if self.cuda is True:
+            users, items, ratings = users.cuda(), items.cuda(), ratings.cuda()
+
+        assert self.optimizer is not None
+        self.optimizer.zero_grad()
+
+        ratings_pred = self(users, items)
+
+        assert self.embedding_user is not None
+        user_embeddings = self.embedding_user(users)
+        assert self.embedding_item is not None
+        item_embeddings = self.embedding_item(items)
+
+        assert self.explainability_matrix is not None
+        loss = self.criterion(
+            ratings_pred=ratings_pred,
+            ratings=ratings,
+            u=user_embeddings,
+            v=item_embeddings,
+            reg_term=self.reg_term,
+            expl=self.explainability_matrix[users, items],
+            expl_reg_term=self.expl_reg_term,
+        )
+        loss.backward()
+        self.optimizer.step()
+        loss = loss.item()
+
+        return loss
+
+    def forward(self, user_indices, item_indices):
+        assert self.embedding_user is not None
+        user_embeddings = self.embedding_user(user_indices)
+        assert self.embedding_item is not None
+        item_embeddings = self.embedding_item(item_indices)
+        element_product = torch.mul(user_embeddings, item_embeddings)
+        rating = self.affine_output(element_product)
+        return rating
@@ -0,0 +1,165 @@
+import random
+import pandas as pd
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from torch.optim import Optimizer
+
+from tqdm.auto import tqdm
+
+from pygrex.data_reader import DataReader, UserItemRatingDataset
+from pygrex.utils.torch_utils import use_optimizer
+from .py_torch_model import PyTorchModel
+
+
+class GMFModel(PyTorchModel):
+    def __init__(
+        self,
+        learning_rate: float,
+        weight_decay: float,
+        latent_dim: int,
+        epochs: int,
+        num_negative: int,
+        batch_size: int,
+        cuda: bool,
+        optimizer_name: str,
+        device_id=None,
+    ):
+        super().__init__(
+            learning_rate=learning_rate,
+            latent_dim=latent_dim,
+            epochs=epochs,
+            batch_size=batch_size,
+            cuda=cuda,
+            optimizer_name=optimizer_name,
+            device_id=device_id,
+        )
+
+        self.negative_sample_size = num_negative
+        self.weight_decay = weight_decay
+        self.optimizer: Optimizer | None = None
+
+        self.affine_output = torch.nn.Linear(
+            in_features=self.latent_dim, out_features=1
+        )
+        self.logistic = torch.nn.Sigmoid()
+
+        self.criterion = nn.BCELoss()
+
+    def fit(self, data: DataReader):
+        optimizer = use_optimizer(
+            network=self,
+            weight_decay=self.weight_decay,
+            learning_rate=self.learning_rate,
+            optimizer_name=self.optimizer_name,
+        )
+
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError(f"Expected an Optimizer, but got {type(optimizer)}")
+        self.optimizer = optimizer
+        dataset = data.dataset
+
+        num_users = data.num_user
+        num_items = data.num_item
+
+        self.embedding_user = torch.nn.Embedding(
+            num_embeddings=num_users, embedding_dim=self.latent_dim
+        )
+
+        self.embedding_item = torch.nn.Embedding(
+            num_embeddings=num_items, embedding_dim=self.latent_dim
+        )
+
+        self.negatives = self._sample_negative(dataset)
+
+        with tqdm(total=self.epochs) as progress:
+            for epoch in range(self.epochs):
+                train_loader = self.instance_a_train_loader(
+                    dataset, self.negative_sample_size, self.batch_size
+                )
+                loss = self.train_an_epoch(train_loader)
+                progress.update(1)
+                progress.set_postfix({"loss": loss})
+
+    def instance_a_train_loader(self, dataset, num_negatives, batch_size):
+        """instance train loader for one training epoch"""
+        users, items, ratings = [], [], []
+        train_ratings = pd.merge(
+            dataset, self.negatives[["userId", "negative_items"]], on="userId"
+        )
+        train_ratings["negatives"] = train_ratings["negative_items"].apply(
+            lambda x: random.sample(list(x), num_negatives)
+        )
+        user_ids = train_ratings["userId"].tolist()
+        item_ids = train_ratings["itemId"].tolist()
+        rating_values = train_ratings["rating"].tolist()
+        negatives_lists = train_ratings["negatives"].tolist()
+
+        for user, item, rating, negatives in zip(
+            user_ids, item_ids, rating_values, negatives_lists
+        ):
+            users.append(user)
+            items.append(item)
+            ratings.append(rating)
+            for neg_item in negatives:
+                users.append(user)
+                items.append(neg_item)
+                ratings.append(float(0))  # negative samples get 0 rating
+        # negative samples get 0 rating
+        dataset = UserItemRatingDataset(
+            user_tensor=torch.LongTensor(users),
+            item_tensor=torch.LongTensor(items),
+            target_tensor=torch.FloatTensor(ratings),
+        )
+        return DataLoader(dataset, batch_size=batch_size, shuffle=True)
+
+    def train_an_epoch(self, train_loader):
+        self.train()
+        cnt = 0
+        total_loss = 0
+        for batch_id, batch in enumerate(train_loader):
+            assert isinstance(batch[0], torch.LongTensor)
+            user, item, rating = batch[0], batch[1], batch[2]
+            rating = rating.float()
+            loss = self.train_single_batch(user, item, rating)
+            total_loss += loss
+            cnt += 1
+        return total_loss / cnt
+
+    def train_single_batch(self, users, items, ratings):
+        if self.cuda is True:
+            users, items, ratings = users.cuda(), items.cuda(), ratings.cuda()
+
+        if self.optimizer is None:
+            raise RuntimeError(
+                "Optimizer is not initialized. Call fit() before training."
+            )
+        self.optimizer.zero_grad()
+        ratings_pred = self(users, items)
+        loss = self.criterion(ratings_pred.view(-1), ratings)
+        loss.backward()
+        self.optimizer.step()
+        loss = loss.item()
+        return loss
+
+    def _sample_negative(self, ratings):
+        """return all negative items & 100 sampled negative items"""
+        interact_status = (
+            ratings.groupby("userId")["itemId"]
+            .apply(set)
+            .reset_index()
+            .rename(columns={"itemId": "interacted_items"})
+        )
+        self.item_catalogue = set(ratings.itemId)
+        interact_status["negative_items"] = interact_status["interacted_items"].apply(
+            lambda x: self.item_catalogue - x
+        )
+        return interact_status[["userId", "negative_items"]]
+
+    def forward(self, user_indices, item_indices):
+        user_embedding = self.embedding_user(user_indices)
+        item_embedding = self.embedding_item(item_indices)
+        element_product = torch.mul(user_embedding, item_embedding)
+        dot = self.affine_output(element_product)
+        rating = self.logistic(dot)
+        return rating
@@ -0,0 +1,22 @@
+import torch.nn as nn
+
+
+class Item2Vec(nn.Module):
+
+    def __init__(self, config):
+        super().__init__()
+        self.num_items = config['num_items']
+        self.latent_dim = config['latent_dim']
+        self.embedding = nn.Embedding(
+            num_embeddings=self.num_items,
+            embedding_dim=self.latent_dim)
+        self.fc = nn.Linear(
+            in_features=self.latent_dim,
+            out_features=self.num_items)
+
+    def forward(self, input_data):
+        embedding = self.embedding(input_data)
+        return self.fc(embedding)
+
+    def item_embedding(self):
+        return self.embedding.weight.detach()
@@ -0,0 +1,240 @@
+from typing import Optional, Union
+import numpy as np
+import scipy.sparse as sp
+
+from .recommender_model import RecommenderModel
+from pygrex.data_reader import DataReader
+
+
+class KNNBasic(RecommenderModel):
+    """
+    An improved K-Nearest Neighbors collaborative filtering model.
+
+    This version uses Pearson correlation similarity and improved neighbor selection
+    for better performance on sparse datasets like MovieLens.
+
+    Args:
+        k (int): Number of neighbors to consider. Default 50.
+        min_k (int): Minimum number of neighbors required for prediction. Default 3.
+        sim_options (dict): Similarity options. Default pearson, user-based.
+    """
+
+    def __init__(self, k: int = 50, min_k: int = 3, sim_options: Optional[dict] = None):
+        super().__init__()
+        self.k = k
+        self.min_k = min_k
+        self.sim_options = sim_options if sim_options is not None else {}
+
+        # Validate similarity options
+        if self.sim_options.get("user_based", True) is False:
+            raise NotImplementedError("Only the user-based approach is implemented.")
+
+        sim_name = self.sim_options.get("name", "pearson").lower()
+        if sim_name not in ["cosine", "pearson"]:
+            raise NotImplementedError(
+                "Only cosine and pearson similarity are implemented."
+            )
+
+        # Model attributes
+        self.trainset: Optional[sp.csr_matrix] = None
+        self.global_mean: float = 0
+        self.user_biases: Optional[np.ndarray] = None
+        self.item_biases: Optional[np.ndarray] = None
+        self.num_users: Optional[int] = None
+        self.num_items: Optional[int] = None
+
+        # For memory-efficient similarity computation
+        self.user_means: Optional[np.ndarray] = None
+
+    def fit(self, data: DataReader) -> None:
+        """
+        Trains the KNN model with improved memory efficiency.
+        """
+        print("Fitting the improved KNNBasic model...")
+        df = data.dataset
+        self.num_users = data.num_user
+        self.num_items = data.num_item
+
+        print(
+            f"Building ratings matrix for {self.num_users} users and {self.num_items} items..."
+        )
+
+        # 1. Build the sparse user-item ratings matrix
+        ratings = df["rating"].values
+        rows = df["userId"].values
+        cols = df["itemId"].values
+        self.trainset = sp.csr_matrix(
+            (ratings, (rows, cols)), shape=(self.num_users, self.num_items)
+        )
+
+        # 2. Calculate global mean and biases
+        print("Computing biases...")
+        self.global_mean = self.trainset.data.mean()
+
+        # User biases: bu = avg(ratings_u) - global_mean
+        user_sums = np.array(self.trainset.sum(axis=1)).flatten()
+        user_counts = np.diff(self.trainset.indptr)
+
+        with np.errstate(divide="ignore", invalid="ignore"):
+            user_avg_ratings = np.where(
+                user_counts > 0, user_sums / user_counts, self.global_mean
+            )
+        self.user_biases = np.where(
+            user_counts > 0, user_avg_ratings - self.global_mean, 0
+        )
+
+        # Item biases: bi = avg(ratings_i) - global_mean
+        item_sums = np.array(self.trainset.sum(axis=0)).flatten()
+        item_counts = np.diff(self.trainset.tocsc().indptr)
+
+        with np.errstate(divide="ignore", invalid="ignore"):
+            item_avg_ratings = np.where(
+                item_counts > 0, item_sums / item_counts, self.global_mean
+            )
+        self.item_biases = np.where(
+            item_counts > 0, item_avg_ratings - self.global_mean, 0
+        )
+
+        # Store user means for similarity computation
+        self.user_means = user_avg_ratings
+
+        print("Model fitting complete.")
+
+    def _compute_user_similarity(self, user1_id: int, user2_id: int) -> float:
+        """
+        Compute Pearson correlation similarity between two users.
+        This works better than cosine similarity for collaborative filtering.
+        """
+        assert self.trainset is not None
+        # Get rating vectors for both users
+        user1_ratings = self.trainset[user1_id].toarray().flatten()
+        user2_ratings = self.trainset[user2_id].toarray().flatten()
+
+        # Find commonly rated items
+        mask = (user1_ratings > 0) & (user2_ratings > 0)
+        n_common = np.sum(mask)
+
+        # Need at least 2 common ratings for correlation
+        if n_common < 2:
+            return 0.0
+
+        # Extract ratings for commonly rated items
+        u1_common = user1_ratings[mask]
+        u2_common = user2_ratings[mask]
+
+        # Mean-center the ratings
+        u1_mean = np.mean(u1_common)
+        u2_mean = np.mean(u2_common)
+
+        u1_centered = u1_common - u1_mean
+        u2_centered = u2_common - u2_mean
+
+        # Compute Pearson correlation
+        numerator = np.sum(u1_centered * u2_centered)
+        denom1 = np.sqrt(np.sum(u1_centered**2))
+        denom2 = np.sqrt(np.sum(u2_centered**2))
+
+        if denom1 == 0 or denom2 == 0:
+            return 0.0
+
+        correlation = numerator / (denom1 * denom2)
+
+        # Apply significance weighting based on number of common items
+        # More common items = more reliable similarity
+        significance_weight = min(n_common / 50.0, 1.0)  # Cap at 50 common items
+
+        return correlation * significance_weight
+
+    def _get_neighbors_for_item(self, user_id: int, item_id: int):
+        """
+        Get the top-k most similar users who have rated the given item.
+        """
+        # Find users who rated this item
+        assert self.trainset is not None
+        item_col = self.trainset[:, item_id]  # type: ignore
+        neighbor_candidates, _ = item_col.nonzero()
+
+        # Remove the target user if they're in the candidates
+        neighbor_candidates = neighbor_candidates[neighbor_candidates != user_id]
+
+        if len(neighbor_candidates) == 0:
+            return np.array([]), np.array([]), np.array([])
+
+        # Compute similarities
+        similarities = []
+        for neighbor_id in neighbor_candidates:
+            sim = self._compute_user_similarity(user_id, neighbor_id)
+            similarities.append((sim, neighbor_id))
+
+        # Sort by similarity and take top-k
+        similarities.sort(key=lambda x: x[0], reverse=True)
+        top_k = similarities[: min(self.k, len(similarities))]
+
+        if len(top_k) < self.min_k:
+            return np.array([]), np.array([]), np.array([])
+
+        # Extract data
+        neighbor_sims = np.array([sim for sim, _ in top_k])
+        neighbor_ids = np.array([nid for _, nid in top_k])
+        neighbor_ratings = np.array(
+            [self.trainset[nid, item_id] for nid in neighbor_ids]
+        )
+
+        return neighbor_sims, neighbor_ids, neighbor_ratings
+
+    def predict(self, user_id: Union[int, str], item_id: Union[int, str]) -> float:
+        """
+        Predict rating for a user-item pair using KNN.
+        """
+        if self.trainset is None:
+            raise RuntimeError("Model must be trained first using fit() method.")
+
+        assert self.num_users is not None
+        assert self.num_items is not None
+        assert self.user_biases is not None
+        assert self.item_biases is not None
+        user_id = int(user_id)
+        item_id = int(item_id)
+        # Handle out-of-bounds users/items
+        if user_id >= self.num_users or item_id >= self.num_items:
+            return self.global_mean
+
+        # 1. Calculate baseline estimate
+        baseline = (
+            self.global_mean + self.user_biases[user_id] + self.item_biases[item_id]
+        )
+
+        # 2. Get neighbors who rated this item
+        neighbor_sims, neighbor_ids, neighbor_ratings = self._get_neighbors_for_item(
+            user_id, item_id
+        )
+
+        if len(neighbor_ids) == 0:
+            return baseline
+
+        # 3. Calculate weighted prediction
+        neighbor_biases = self.user_biases[neighbor_ids]
+        neighbor_baselines = (
+            self.global_mean + neighbor_biases + self.item_biases[item_id]
+        )
+
+        deviations = neighbor_ratings - neighbor_baselines
+
+        # Only use neighbors with positive similarity
+        positive_mask = neighbor_sims > 0
+        if not np.any(positive_mask):
+            return baseline
+
+        neighbor_sims = neighbor_sims[positive_mask]
+        deviations = deviations[positive_mask]
+
+        numerator = np.sum(neighbor_sims * deviations)
+        denominator = np.sum(np.abs(neighbor_sims))
+
+        if denominator == 0:
+            return baseline
+
+        prediction = baseline + (numerator / denominator)
+
+        # Clip to valid rating range
+        return np.clip(prediction, 1.0, 5.0)
@@ -0,0 +1,136 @@
+import numpy as np
+import scipy
+from typing import Union, Protocol, runtime_checkable
+
+from implicit.recommender_base import RecommenderBase
+from .recommender_model import RecommenderModel
+from pygrex.data_reader import DataReader
+
+
+@runtime_checkable
+class FittableImplicitModel(Protocol):
+    user_factors: np.ndarray
+    item_factors: np.ndarray
+
+    def fit(self, item_user_data) -> None: ...
+
+
+class MFImplicitModel(RecommenderModel):
+    def __init__(
+        self,
+        latent_dim,
+        reg_term,
+        learning_rate,
+        epochs,
+        num_users=None,
+        num_items=None,
+    ):
+        self.latent_dim = latent_dim
+        self.reg_term = reg_term
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.model: Union[RecommenderBase, FittableImplicitModel, None] = None
+        self.total_users = num_users
+        self.total_items = num_items
+
+    def fit(self, data: DataReader) -> None:
+        if self.model is None:
+            raise RuntimeError(
+                "The model has not been initialized. Please use a specific subclass like ALS or BPR."
+            )
+        num_user_for_shape = data.dataset["userId"].max() + 1
+        num_item_for_shape = data.dataset["itemId"].max() + 1
+        self.total_users = num_user_for_shape
+        self.total_items = num_item_for_shape
+
+        item_user_data = self.rearrange_dataset(
+            ds=data.dataset,
+            num_user=num_user_for_shape,
+            num_item=num_item_for_shape,
+        ).T.tocsr()
+
+        self.model.fit(item_user_data)
+
+    @staticmethod
+    def rearrange_dataset(ds, num_user: int, num_item: int) -> scipy.sparse.csr_matrix:
+        """
+        Converts the dataset into a sparse matrix format for the implicit model.
+
+        Args:
+            ds: Dataset containing userId and itemId columns
+            num_user : Number of users in the dataset
+            num_item : Number of items in the dataset
+
+        Returns:
+            ds_mtr: Sparse matrix representation of the dataset
+        """
+
+        # Create sparse matrix directly from data
+        data = np.ones(len(ds))  # Array of 1s for each interaction
+        rows = ds["userId"].values  # User IDs as row indices
+        cols = ds["itemId"].values  # Item IDs as column indices
+
+        ds_mtr = scipy.sparse.csr_matrix(
+            (data, (rows, cols)), shape=(num_user, num_item)
+        )
+
+        return ds_mtr
+
+    def predict(
+        self, user_id: Union[str, int], item_id: Union[str, int, list, np.ndarray]
+    ) -> Union[float, list]:
+        """
+        Predict ratings for a user and one or more items using efficient vectorization.
+
+        Args:
+            user_id : User identifier
+            item_id : Item identifier or a list/array of item identifiers
+
+        Returns:
+            A single predicted score (float) or an array of scores (np.ndarray)
+        """
+        if not isinstance(self.model, FittableImplicitModel):
+            raise RuntimeError(
+                "The model has not been trained yet. Please call fit() first."
+            )
+        user_id = int(user_id)
+
+        # 1. Validate user_id
+        if not (0 <= user_id < self.model.user_factors.shape[0]):
+            raise ValueError(f"user_id {user_id} is out of bounds")
+
+        # 2. Unify input to always be a numpy array
+        is_single_item = not isinstance(item_id, (list, np.ndarray))
+        item_ids_arr = np.array(item_id, ndmin=1).astype(int)
+
+        # 3. Perform a single, vectorized bounds check for all items at once
+        max_item_id = self.model.item_factors.shape[0]
+        if not np.all((item_ids_arr >= 0) & (item_ids_arr < max_item_id)):
+            out_of_bounds_id = item_ids_arr[
+                (item_ids_arr < 0) | (item_ids_arr >= max_item_id)
+            ][0]
+            raise ValueError(f"item_id {out_of_bounds_id} is out of bounds")
+
+        # 4. Get all item vectors in a single, highly efficient operation
+        item_vectors = self.model.item_factors[item_ids_arr]
+        user_vector = self.model.user_factors[user_id]
+
+        # 5. Calculate all scores with one dot product
+        scores = user_vector.dot(item_vectors.T)
+
+        # 6. Return a single float if the input was a single item, otherwise the array
+        return scores[0].item() if is_single_item else scores.tolist()
+
+    def user_embedding(self) -> np.ndarray:
+        if not isinstance(self.model, FittableImplicitModel):
+            raise RuntimeError(
+                "The model has not been trained yet. Please call fit() first."
+            )
+        return self.model.user_factors
+
+    def item_embedding(self) -> np.ndarray:
+        if not isinstance(self.model, FittableImplicitModel):
+            raise RuntimeError(
+                "The model has not been trained yet. Please call fit() first."
+            )
+        return self.model.item_factors
@@ -0,0 +1,179 @@
+import random
+
+import pandas as pd
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from torch.optim import Optimizer
+from tqdm.auto import tqdm
+
+from pygrex.data_reader import DataReader, UserItemRatingDataset
+from pygrex.utils.torch_utils import use_optimizer
+from .py_torch_model import PyTorchModel
+
+
+class MLPModel(PyTorchModel):
+    def __init__(
+        self,
+        learning_rate: float,
+        weight_decay: float,
+        latent_dim: int,
+        epochs: int,
+        num_negative: int,
+        batch_size: int,
+        cuda: bool,
+        optimizer_name: str,
+        device_id=None,
+    ):
+        super().__init__(
+            learning_rate=learning_rate,
+            latent_dim=latent_dim,
+            epochs=epochs,
+            batch_size=batch_size,
+            cuda=cuda,
+            optimizer_name=optimizer_name,
+            device_id=device_id,
+        )
+
+        self.negative_sample_size = num_negative
+        self.weight_decay = weight_decay
+
+        # layer dim is 2*self.latent_dim since the embeddings will be concatenated
+        self.affine_output = torch.nn.Linear(
+            in_features=2 * self.latent_dim, out_features=1
+        )
+        self.logistic = torch.nn.Sigmoid()
+
+        self.criterion = nn.BCELoss()
+        self.optimizer: Optimizer | None = None
+
+    def fit(self, data: DataReader):
+        optimizer = use_optimizer(
+            network=self,
+            weight_decay=self.weight_decay,
+            learning_rate=self.learning_rate,
+            optimizer_name=self.optimizer_name,
+        )
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError(f"Expected an Optimizer, but got {type(optimizer)}")
+        self.optimizer = optimizer
+
+        dataset = data.dataset
+
+        num_users = data.num_user
+        num_items = data.num_item
+
+        self.embedding_user = torch.nn.Embedding(
+            num_embeddings=num_users, embedding_dim=self.latent_dim
+        )
+
+        self.embedding_item = torch.nn.Embedding(
+            num_embeddings=num_items, embedding_dim=self.latent_dim
+        )
+
+        self.negatives = self._sample_negative(dataset)
+
+        with tqdm(total=self.epochs) as progress:
+            for epoch in range(self.epochs):
+                train_loader = self.instance_a_train_loader(
+                    dataset, self.negative_sample_size, self.batch_size
+                )
+                loss = self.train_an_epoch(train_loader)
+                progress.update(1)
+                progress.set_postfix({"loss": loss})
+
+    def instance_a_train_loader(self, dataset, num_negatives, batch_size):
+        """instance train loader for one training epoch"""
+        users, items, ratings = [], [], []
+        train_ratings = pd.merge(
+            dataset, self.negatives[["userId", "negative_items"]], on="userId"
+        )
+        train_ratings["negatives"] = train_ratings["negative_items"].apply(
+            lambda x: random.sample(list(x), num_negatives)
+        )
+        user_ids = train_ratings["userId"].tolist()
+        item_ids = train_ratings["itemId"].tolist()
+        rating_values = train_ratings["rating"].tolist()
+        negatives_lists = train_ratings["negatives"].tolist()
+
+        for user, item, rating, negatives in zip(
+            user_ids, item_ids, rating_values, negatives_lists
+        ):
+            users.append(user)
+            items.append(item)
+            ratings.append(rating)
+            for neg_item in negatives:
+                users.append(user)
+                items.append(neg_item)
+                ratings.append(float(0))  # negative samples get 0 rating
+
+        dataset = UserItemRatingDataset(
+            user_tensor=torch.LongTensor(users),
+            item_tensor=torch.LongTensor(items),
+            target_tensor=torch.FloatTensor(ratings),
+        )
+        return DataLoader(dataset, batch_size=batch_size, shuffle=True)
+
+    def train_an_epoch(self, train_loader):
+        self.train()
+        cnt = 0
+        total_loss = 0
+        for batch_id, batch in enumerate(train_loader):
+            assert isinstance(batch[0], torch.LongTensor)
+            user, item, rating = batch[0], batch[1], batch[2]
+            rating = rating.float()
+            loss = self.train_single_batch(user, item, rating)
+            total_loss += loss
+            cnt += 1
+        return total_loss / cnt
+
+    def train_single_batch(self, users, items, ratings):
+        if self.cuda is True:
+            users, items, ratings = users.cuda(), items.cuda(), ratings.cuda()
+        if self.optimizer is None:
+            raise RuntimeError(
+                "Optimizer is not initialized. Call fit() before training."
+            )
+
+        self.optimizer.zero_grad()
+        ratings_pred = self(users, items)
+        loss = self.criterion(ratings_pred.view(-1), ratings)
+        loss.backward()
+        self.optimizer.step()
+        loss = loss.item()
+        return loss
+
+    def _sample_negative(self, ratings):
+        """return all negative items & 100 sampled negative items"""
+        interact_status = (
+            ratings.groupby("userId")["itemId"]
+            .apply(set)
+            .reset_index()
+            .rename(columns={"itemId": "interacted_items"})
+        )
+        self.item_catalogue = set(ratings.itemId)
+        interact_status["negative_items"] = interact_status["interacted_items"].apply(
+            lambda x: self.item_catalogue - x
+        )
+        return interact_status[["userId", "negative_items"]]
+
+    def forward(self, user_indices, item_indices):
+        user_embedding = self.embedding_user(user_indices)
+        item_embedding = self.embedding_item(item_indices)
+
+        # Ensure embeddings are 2D [batch_size, embedding_dim]
+        if user_embedding.dim() == 3:
+            user_embedding = user_embedding.squeeze(1)
+        if item_embedding.dim() == 3:
+            item_embedding = item_embedding.squeeze(1)
+
+        # This is needed because cat does not support broadcasting.
+        if user_embedding.size(0) == 1 and item_embedding.size(0) > 1:
+            user_embedding = user_embedding.repeat(item_embedding.size(0), 1)
+        elif item_embedding.size(0) == 1 and user_embedding.size(0) > 1:
+            item_embedding = item_embedding.repeat(user_embedding.size(0), 1)
+
+        element_concat = torch.cat((user_embedding, item_embedding), 1)
+        concat = self.affine_output(element_concat)
+        rating = self.logistic(concat)
+        return rating
@@ -0,0 +1,69 @@
+import itertools
+from typing import Union
+import torch
+
+from pygrex.utils.torch_utils import use_cuda
+from .recommender_model import RecommenderModel
+from pygrex.data_reader import DataReader
+
+
+class PyTorchModel(RecommenderModel, torch.nn.Module):
+    """Meta Learner
+
+    Note: Subclass should implement self.model !
+    """
+
+    def __init__(
+        self,
+        learning_rate: float,
+        latent_dim: int,
+        epochs: int,
+        batch_size: int,
+        cuda: bool,
+        optimizer_name: str,
+        device_id: Union[int, None] = None,
+    ):
+        if optimizer_name not in ["sgd", "adam", "rmsprop"]:
+            raise Exception("Wrong optimizer.")
+
+        if cuda is True and device_id is not None:
+            use_cuda(True, device_id)
+
+        self.latent_dim = latent_dim
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.batch_size = batch_size
+        self._cuda = cuda
+        self.optimizer_name = optimizer_name
+
+        self.dataset = None
+        self.dataset_metadata = None
+        self.embedding_user = None
+        self.embedding_item = None
+        self.optimizer = None
+
+        super().__init__()
+
+    def fit(self, data: DataReader):
+        pass
+
+    def predict(self, user_id, item_id) -> list:
+        if isinstance(user_id, int):
+            user_id = [user_id]
+        if isinstance(item_id, int):
+            item_id = [item_id]
+        user_id = torch.LongTensor(user_id)
+        item_id = torch.LongTensor(item_id)
+        with torch.no_grad():
+            if self._cuda:
+                user_id = user_id.cuda()
+                item_id = item_id.cuda()
+            pred = self.forward(user_id, item_id).cpu().tolist()
+            pred = list(itertools.chain.from_iterable(pred))
+            return pred
+
+    def user_embedding(self):
+        return self.state_dict()["embedding_user.weight"].cpu().numpy()
+
+    def item_embedding(self):
+        return self.state_dict()["embedding_item.weight"].cpu().numpy()
@@ -0,0 +1,35 @@
+from abc import ABC, abstractmethod
+from typing import Union
+
+from pygrex.data_reader.data_reader import DataReader
+
+
+class RecommenderModel(ABC):
+    """
+    Abstract base class that defines the interface for recommendation models.
+    All model implementations should inherit from this class.
+    """
+
+    @abstractmethod
+    def predict(
+        self, user_id: Union[str, int], item_id: Union[str, int]
+    ) -> Union[float, list]:
+        """
+        Make predictions for a specific user on a list of items.
+
+        Args:
+            user_id: The ID of the user
+            item_ids: List of item IDs to predict ratings/scores for
+
+        Returns:
+            A dictionary mapping item IDs to predicted ratings/scores
+        """
+        pass
+
+    @abstractmethod
+    def fit(self, data: DataReader):
+        """
+        Train the model on data.
+        The specific parameters depend on the model implementation.
+        """
+        pass
@@ -0,0 +1,169 @@
+from math import sqrt
+import numpy as np
+from pygrex.data_reader.data_reader import DataReader
+from pygrex.models.recommender_model import RecommenderModel
+
+
+class SVD(RecommenderModel):
+    def __init__(
+        self,
+        n_factors=50,
+        n_epochs=25,
+        lr=0.007,
+        reg=0.1,
+        init_mean=0.0,
+        init_std=0.1,
+        random_state=42,
+        early_stopping=True,
+    ):
+        self.n_factors = n_factors
+        self.n_epochs = n_epochs
+        self.lr = lr
+        self.reg = reg
+        self.init_mean = init_mean
+        self.init_std = init_std
+        self.random_state = random_state
+        self.early_stopping = early_stopping
+
+        # Model parameters
+        self.user_factors = None
+        self.item_factors = None
+        self.user_biases = None
+        self.item_biases = None
+        self.global_mean = None
+
+        # Training history
+        self.training_rmse = []
+
+    def fit(self, data: DataReader, validation_data=None):
+        df = data.dataset
+        if data._num_user is None or data._num_item is None:
+            raise ValueError("The number of users and items cannot be None.")
+        num_users, num_items = data._num_user, data._num_item
+
+        # Initialize random number generator
+        rng = np.random.RandomState(self.random_state)
+
+        # Initialize parameters with better scaling
+        scale = 1.0 / sqrt(self.n_factors)
+        self.user_factors = rng.normal(
+            self.init_mean, scale, (num_users, self.n_factors)
+        )  # type: ignore
+        self.item_factors = rng.normal(
+            self.init_mean, scale, (num_items, self.n_factors)
+        )  # type: ignore
+        self.user_biases = np.zeros(num_users)
+        self.item_biases = np.zeros(num_items)
+        self.global_mean = df["rating"].mean()
+
+        # Convert to list of tuples for faster iteration
+        ratings_tuple = list(
+            df[["userId", "itemId", "rating"]].itertuples(index=False, name=None)
+        )
+
+        # Training loop with early stopping
+        best_rmse = float("inf")
+        patience = 3
+        patience_counter = 0
+
+        for epoch in range(self.n_epochs):
+            print(f"Epoch {epoch + 1}/{self.n_epochs}...")
+
+            # Shuffle training data
+            rng.shuffle(ratings_tuple)
+
+            # SGD updates
+            for user, item, rating in ratings_tuple:
+                # Predict rating
+                dot_product = np.dot(self.user_factors[user], self.item_factors[item])
+                prediction = (
+                    self.global_mean
+                    + self.user_biases[user]
+                    + self.item_biases[item]
+                    + dot_product
+                )
+
+                # Compute error
+                error = rating - prediction
+
+                # Update biases
+                self.user_biases[user] += self.lr * (
+                    error - self.reg * self.user_biases[user]
+                )
+                self.item_biases[item] += self.lr * (
+                    error - self.reg * self.item_biases[item]
+                )
+
+                # Update factors
+                uf_temp = self.user_factors[user].copy()
+                self.user_factors[user] += self.lr * (
+                    error * self.item_factors[item] - self.reg * self.user_factors[user]
+                )
+                self.item_factors[item] += self.lr * (
+                    error * uf_temp - self.reg * self.item_factors[item]
+                )
+
+            # Calculate training RMSE
+            if epoch % 5 == 0 or epoch == self.n_epochs - 1:
+                train_rmse = self.calculate_rmse(ratings_tuple)
+                self.training_rmse.append(train_rmse)
+                print(f"  Training RMSE: {train_rmse:.4f}")
+
+                # Early stopping
+                if self.early_stopping and validation_data is not None:
+                    val_rmse = self.calculate_rmse(validation_data)
+                    print(f"  Validation RMSE: {val_rmse:.4f}")
+
+                    if val_rmse < best_rmse:
+                        best_rmse = val_rmse
+                        patience_counter = 0
+                    else:
+                        patience_counter += 1
+
+                    if patience_counter >= patience:
+                        print(f"Early stopping at epoch {epoch + 1}")
+                        break
+
+        print("Fit complete.")
+
+    def calculate_rmse(self, ratings_data):
+        """Calculate RMSE for given ratings data."""
+        total_error = 0
+        count = 0
+
+        for user, item, rating in ratings_data:
+            prediction = self.predict(user, item)
+            total_error += (rating - prediction) ** 2
+            count += 1
+
+        return sqrt(total_error / count) if count > 0 else 0
+
+    def predict(self, user_id: int | str, item_id: int | str) -> float:
+        # Check that all model components are initialized
+        if (
+            self.user_factors is None
+            or self.item_factors is None
+            or self.user_biases is None
+            or self.item_biases is None
+            or self.global_mean is None
+        ):
+            raise RuntimeError("The model has not been trained yet.")
+
+        try:
+            user_id = int(user_id)
+            item_id = int(item_id)
+        except (ValueError, TypeError):
+            # If conversion fails, return the global mean rating
+            return self.global_mean
+
+        # Make prediction
+        dot_product = np.dot(self.user_factors[user_id], self.item_factors[item_id])
+        prediction = (
+            self.global_mean
+            + self.user_biases[user_id]
+            + self.item_biases[item_id]
+            + dot_product
+        )
+
+        # Clip to valid rating range
+        return np.clip(prediction, 1, 5)
@@ -0,0 +1,4 @@
+from .recommender import Recommender
+from .group_recommender import GroupRecommender
+
+__all__ = ["Recommender", "GroupRecommender"]
@@ -0,0 +1,72 @@
+import numpy as np
+import pandas as pd
+from tqdm.autonotebook import tqdm
+
+
+class GenericRecommender:
+    def __init__(self, dataset_metadata, model, top_n: int = 10):
+        self.top_n = top_n
+        self.dataset = dataset_metadata.dataset
+        self.model = model
+        self.catalogue = set(self.dataset["itemId"])
+
+    def recommend_all(self):
+        """
+        Get all recommendations.
+        :param top_n:
+        :return: recommendations for any user.
+        """
+
+        ratings = self.dataset.groupby("userId")
+
+        recommendations = pd.DataFrame({"userId": [], "itemId": [], "rank": []})
+
+        with tqdm(
+            total=self.dataset["userId"].nunique(), desc="Recommending for users: "
+        ) as pbar:
+            for user_id, user_ratings in ratings:
+                # Replace .append() with pd.concat() - pandas 2.2.x +
+                recommendations = pd.concat(
+                    [recommendations, self.recommend_user(user_id, user_ratings)],  # type: ignore
+                    ignore_index=True,
+                )
+                pbar.update()
+
+        return recommendations
+
+    def rank_prediction(self, user_id, target_item_id, predictions):
+        # Ensure predictions are flattened if they're 2D
+        if isinstance(predictions, np.ndarray) and predictions.ndim > 1:
+            predictions = predictions.flatten()
+        recommendations = pd.DataFrame(
+            {"userId": user_id, "itemId": target_item_id, "prediction": predictions}
+        )
+
+        recommendations["rank"] = recommendations["prediction"].rank(
+            method="first", ascending=False
+        )
+
+        recommendations.sort_values(["userId", "rank"], inplace=True)
+
+        recommendations = recommendations[recommendations["rank"] <= self.top_n]
+
+        return recommendations[["userId", "itemId", "rank"]]
+
+    def get_unrated(self, user_ratings):
+        """
+        Extract the set of items a user has not rated.
+        :param user_ratings: list, items rated.
+        :return: list, items not rated.
+        """
+        unrated_item_id = self.catalogue - set(user_ratings)
+        unrated_item_id = list(unrated_item_id)
+        return unrated_item_id
+
+    def get_rated(self, user_id):
+        """
+        Extract the set of items a user has not rated.
+        :param user_id: userId rated.
+        :return: list, rated items.
+        """
+        rated = self.dataset[self.dataset["userId"] == user_id]
+        return rated
@@ -0,0 +1,391 @@
+from typing import Dict, List, Union, Optional
+
+import numpy as np
+
+from pygrex.data_reader.data_reader import DataReader
+from pygrex.models.recommender_model import RecommenderModel
+from pygrex.utils.aggregation_strategy import ScoreAggregator, AggregationStrategy
+from pygrex.utils.scale import Scale
+
+
+class GroupRecommender:
+    """
+    A class to represent a group recommender system that follows the workflow:
+    1. Setup and Candidate Selection
+    2. Individual Preference Collection
+    3. Score Aggregation
+    4. Final Recommendation List
+    """
+
+    def __init__(self, data: DataReader):
+        """Initialize the group recommender with data.
+
+        Args:
+            data: The dataset containing user-item interactions.
+        """
+        self.data = data
+        self._group_predictions = None
+        self._members = None
+        self._item_pool = None
+        self._model = None
+        self._aggregation_strategy = None
+        self._score_aggregator = None
+        self._aggregated_scores = None
+        self._top_recommendation = None
+
+    def setup_recommendation(
+        self,
+        model: RecommenderModel,
+        members: List[Union[str, int]],
+        data: DataReader,
+        aggregation_strategy: AggregationStrategy,  # type: ignore
+        most_respected_person: Optional[Union[str, int]] = None,
+    ) -> None:
+        """
+        Setup and Candidate Selection: Initialize the group recommendation process.
+               Args:
+                   model: The recommendation model to use
+                   members: List of user IDs representing the group members
+                   data: DataReader object containing the dataset
+                   aggregation_strategy: Strategy for aggregating individual predictions
+                   most_respected_person: User ID of most respected person (required for MRP strategy)
+        """
+        self._members = members
+        self._model = model
+        self._aggregation_strategy = aggregation_strategy
+
+        # Initialize score aggregator
+        self._score_aggregator = ScoreAggregator(
+            most_respected_person=most_respected_person
+        )
+
+        # get all item IDs from the dataset
+        item_ids = data.dataset["itemId"].unique()
+
+        # Get items that no group member has interacted with
+        self._item_pool = self.get_non_interacted_items_for_recommendation(
+            self.data,
+            item_ids,  # type: ignore
+            members,  # type: ignore
+        )
+        
+        # Filter item_pool to only include IDs that are valid for the model
+        # This prevents out-of-bounds errors when the model was trained with a different
+        # number of items than what's currently in the dataset
+        max_item_id = self._get_max_valid_item_id(model)
+        # Convert to int array and filter out invalid IDs
+        item_pool_int = self._item_pool.astype(int)
+        valid_mask = (item_pool_int >= 0) & (item_pool_int < max_item_id)
+        self._item_pool = item_pool_int[valid_mask]
+
+        # Individual Preference Collection: Generate predictions for each group member
+        self._group_predictions = self._generate_group_predictions()
+
+        # Score Aggregation: Aggregate individual predictions into collective scores
+        self._aggregated_scores = self._aggregate_group_scores()
+
+    def _generate_group_predictions(self) -> Dict[Union[str, int], Dict[int, float]]:
+        """
+        Individual Preference Collection: Generate predictions for all group members.
+
+        Returns:
+            A dictionary with user IDs as keys and their predictions as values
+        """
+        if not self._members or self._model is None or self._item_pool is None:
+            raise ValueError(
+                "You must call setup_recommendation before generating predictions"
+            )
+
+        predictions = {}
+        for member in self._members:
+            user_pred = self.generate_recommendation(
+                self._model,
+                member,
+                self._item_pool,  # type: ignore
+                self.data,  # type: ignore
+            )
+            predictions[member] = user_pred
+
+        return predictions
+
+    def _aggregate_group_scores(self) -> Dict[int, float]:
+        """
+        Score Aggregation: Aggregate individual predictions into collective scores.
+
+        Returns:
+            Dictionary mapping item IDs to aggregated scores
+        """
+        if (
+            self._group_predictions is None
+            or self._score_aggregator is None
+            or self._aggregation_strategy is None
+        ):
+            raise ValueError(
+                "You must call setup_recommendation before aggregating scores"
+            )
+
+        # For Borda Count, we need to create rankings from predictions
+        rankings = None
+        if self._aggregation_strategy == AggregationStrategy.BORDA_COUNT:
+            rankings = self._create_rankings_from_predictions()
+
+        # Use ScoreAggregator to aggregate scores
+        aggregated_scores = self._score_aggregator.aggregate_scores(
+            evaluations=self._group_predictions,  # type: ignore
+            strategy=self._aggregation_strategy,
+            rankings=rankings,  # type: ignore
+        )
+
+        # Sort items by their aggregated scores in descending order
+        sorted_scores = dict(
+            sorted(aggregated_scores.items(), key=lambda x: x[1], reverse=True)
+        )
+
+        return sorted_scores  # type: ignore
+
+    def _create_rankings_from_predictions(self) -> Dict[Union[str, int], List[int]]:
+        """
+        Create rankings from predictions for Borda Count aggregation.
+
+        Returns:
+            Dictionary mapping user IDs to ranked lists of item IDs
+        """
+        if self._group_predictions is None:
+            raise ValueError("Group predictions not available")
+
+        rankings = {}
+        for user_id, predictions in self._group_predictions.items():
+            # Sort items by prediction score in descending order
+            sorted_items = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
+            rankings[user_id] = [item_id for item_id, _ in sorted_items]
+
+        return rankings
+
+    def _get_max_valid_item_id(self, model: RecommenderModel) -> int:
+        """
+        Get the maximum valid item ID for the given model.
+        
+        Args:
+            model: The recommendation model
+            
+        Returns:
+            Maximum valid item ID (exclusive, so valid IDs are [0, max_item_id))
+        """
+        # For implicit models (MFImplicitModel), check item_factors shape
+        if hasattr(model, 'model') and model.model is not None:
+            if hasattr(model.model, 'item_factors'):
+                return model.model.item_factors.shape[0]
+        # Check if model has total_items attribute (set during fit)
+        if hasattr(model, 'total_items') and model.total_items is not None:
+            return model.total_items
+        # Fallback to data.num_item if model shape is not available
+        return self.data.num_item
+    
+    def get_non_interacted_items_for_recommendation(
+        self,
+        data: DataReader,
+        item_ids: List[Union[str, int]],
+        members: List[Union[str, int]],
+    ) -> np.ndarray:
+        """
+        Returns the list of item IDs that none of the specified group members have interacted with.
+
+        This method is typically used in recommendation systems to filter out items that have already
+        been interacted with by any member of the group, ensuring that recommendations focus on new or
+        unseen items.
+
+        Args:
+            data: The original dataset containing user-item interactions.
+            item_ids: A list of all available item IDs to consider.
+            members: A list of user IDs representing the group.
+
+        Returns:
+            np.ndarray: A list of item IDs that have not been interacted with by any member of the group.
+        """
+
+        consecutive_member_ids = [data.get_new_user_id(int(m)) for m in members]
+        consecutive_member_ids = [m for m in consecutive_member_ids if m is not None]
+
+        # Get all unique item IDs interacted with by users in the group
+        interacted_item_ids = data.dataset.loc[
+            data.dataset.userId.isin(consecutive_member_ids), "itemId"
+        ].unique()
+
+        # Use numpy set difference to get non-interacted item IDs
+        item_pool = np.setdiff1d(item_ids, interacted_item_ids, assume_unique=True)
+
+        return item_pool
+
+    def generate_recommendation(
+        self,
+        model: RecommenderModel,
+        member: Union[str, int],
+        item_pool: List[Union[str, int]],
+        data: DataReader,
+    ) -> Dict[int, float]:
+        """
+        Generate recommendations for a user based on the provided model.
+
+        Args:
+            model: A recommendation model that implements the RecommenderModel interface
+            member: The ID of the user
+            item_pool: List of item IDs to predict ratings/scores for
+            data: The dataset containing user-item interactions
+
+        Returns:
+            A dictionary mapping item IDs to predicted ratings/scores
+        """
+        member = int(member)
+        new_member_id = data.get_new_user_id(member)
+
+        if new_member_id is None:
+            return {}  # Return empty predictions for this user
+
+        # Additional safety check: filter item_pool to valid IDs before prediction
+        # This provides a second layer of protection in case filtering was missed earlier
+        max_valid_item_id = self._get_max_valid_item_id(model)
+        if isinstance(item_pool, np.ndarray):
+            item_pool = item_pool.astype(int)
+            item_pool = item_pool[(item_pool >= 0) & (item_pool < max_valid_item_id)]
+        elif isinstance(item_pool, list):
+            item_pool = [int(item) for item in item_pool if 0 <= int(item) < max_valid_item_id]
+        
+        if len(item_pool) == 0:
+            print(f"No valid items found for user {new_member_id}. Returning empty predictions.")
+            return {}  # Return empty predictions if no valid items
+
+        raw_predictions = model.predict(new_member_id, item_pool)  # type: ignore
+        if not isinstance(raw_predictions, (list, np.ndarray)):
+            raise TypeError(
+                f"Model's predict function returned an unexpected type: {type(raw_predictions)}"
+            )
+
+        # raw_predictions = []
+        # # Generate predictions for each item in the pool
+        # for item in item_pool:
+        #     item = int(item)
+        #     raw_predictions.append(model.predict(new_member_id, item))  # type: ignore
+
+        # Ensure raw_predictions is a numpy array
+        raw_predictions = np.array(raw_predictions)
+
+        # # Flatten the predictions if it's a 2D array (single user, multiple items)
+        # if raw_predictions.ndim == 2 and raw_predictions.shape[0] == 1:
+        #     raw_predictions = raw_predictions.flatten()
+
+        # # Check if the length of raw_predictions matches item_pool
+        # if len(raw_predictions) != len(item_pool):
+        #     raise ValueError(
+        #         "Mismatch between predictions and item IDs. Check the model's predict function."
+        #     )
+
+        # Apply scaling to normalize predictions to 1-5 range
+        scaled_linear = Scale.linear(
+            np.array(raw_predictions),
+            target_min=1,
+            target_max=5,
+        )
+        # Convert the scaled predictions into a dictionary with original item IDs as keys
+        predictions = {}
+        for item, scaled_pred in zip(item_pool, scaled_linear):
+            # Ensure item_id is treated as an integer
+            item_original_id = data.get_original_item_id(int(item))
+            if item_original_id is not None:
+                predictions[int(item_original_id)] = scaled_pred  # type: ignore
+
+        # Sort the predictions in descending order of scores
+        sorted_predictions = dict(
+            sorted(predictions.items(), key=lambda item: item[1], reverse=True)
+        )
+
+        return sorted_predictions
+
+    def get_group_recommendations(
+        self, top_k: Optional[int] = None
+    ) -> Union[int, List[int]]:
+        """
+        Final Recommendation List: Get recommendations for the group based on aggregated scores.
+
+        Args:
+            top_k: The number of recommendations to return.
+                  If None, returns all recommendations sorted by score.
+                  If 1, returns only the top recommendation as a single item ID.
+                  If > 1, returns the top k recommendations as a list of item IDs.
+
+        Returns:
+            If top_k is 1, a single item ID. Otherwise, a list of item IDs.
+        """
+        if self._aggregated_scores is None:
+            raise ValueError(
+                "You must call setup_recommendation before getting recommendations"
+            )
+
+        sorted_items = list(self._aggregated_scores.items())
+
+        # Return results based on top_k parameter
+        if top_k is None:
+            # Return all items as a list of item IDs
+            return [item_id for item_id, _ in sorted_items]
+        elif top_k == 1:
+            # Return only the top item ID
+            if sorted_items:
+                return sorted_items[0][0]
+            return None  # type: ignore
+        else:
+            # Return top k item IDs
+            return [
+                item_id for item_id, _ in sorted_items[: min(top_k, len(sorted_items))]
+            ]
+
+    def get_top_recommendation(self) -> int:
+        """
+        Get the top recommendation for the group.
+
+        Returns:
+            The item ID with the highest aggregated score across all group members.
+        """
+        if self._top_recommendation is None:
+            self._top_recommendation = self.get_group_recommendations(top_k=1)
+        return self._top_recommendation  # type: ignore
+
+    def get_recommendation_scores(self) -> Dict[int, float]:
+        """
+        Get the aggregated scores for all items across the group.
+
+        Returns:
+            A dictionary with item IDs as keys and their aggregated scores as values.
+        """
+        if self._aggregated_scores is None:
+            raise ValueError(
+                "You must call setup_recommendation before getting recommendation scores"
+            )
+        return self._aggregated_scores.copy()
+
+    def get_aggregation_strategy(self) -> Optional[AggregationStrategy]:
+        """
+        Get the current aggregation strategy.
+
+        Returns:
+            The aggregation strategy being used, or None if not set.
+        """
+        return self._aggregation_strategy
+
+    def get_group_members(self) -> Optional[List[Union[str, int]]]:
+        """
+        Get the current group members.
+
+        Returns:
+            List of group member IDs, or None if not set.
+        """
+        return self._members.copy() if self._members else None
+
+    def get_individual_predictions(
+        self,
+    ) -> Optional[Dict[Union[str, int], Dict[int, float]]]:
+        """
+        Get the individual predictions for all group members.
+
+        Returns:
+            Dictionary mapping user IDs to their individual predictions, or None if not available.
+        """
+        return self._group_predictions.copy() if self._group_predictions else None
@@ -0,0 +1,57 @@
+import pandas as pd
+from typing import Optional
+
+from .generic_recommender import GenericRecommender
+
+
+class Recommender(GenericRecommender):
+    def __init__(self, dataset_metadata, model, top_n: int = 10):
+        super(Recommender, self).__init__(dataset_metadata, model, top_n)
+
+    def get_predictions(
+        self,
+        user_id: int,
+        target_item_id: list,
+    ):
+        predictions = self.model.predict(user_id, target_item_id)
+        return predictions
+
+    def recommend(self, user_id: int, target_item_id: list):
+        """
+        Generate recommendations on specific itemId and userId
+        :param user_id: list, user Ids
+        :param target_item_id: list, item Ids
+        :param rated_items: list, of rated interactions.
+        :return: data.frame [userId, itemId, rank], recommendations ranking for the specified pairs of userId and itemId.
+        """
+        predictions = self.get_predictions(user_id, target_item_id)
+
+        return self.rank_prediction(user_id, target_item_id, predictions)
+
+    def recommend_user(
+        self, user_id: Optional[int] = None, user_ratings: Optional[pd.DataFrame] = None
+    ):
+        """
+        Get recommendations for a user.
+        :param user_id: int, a user Id
+        :param user_ratings: list, interactions on the user
+        :return: dataframe [userId, itemId, rank], recommendations ranking for the specified userId.
+        """
+        if user_ratings is None:
+            if user_id is None:
+                raise ValueError("Either 'user_id' or 'user_ratings' must be provided.")
+            user_ratings = self.get_rated(user_id=user_id)
+
+        if user_ratings is None:
+            return pd.DataFrame(
+                columns=["userId", "itemId", "rank"]
+            )  # Return empty recommendations
+
+        if user_id is None:
+            raise ValueError(
+                "Could not determine user_id from the provided user_ratings."
+            )
+
+        unrated_item_id = self.get_unrated(user_ratings["itemId"])
+
+        return self.recommend(user_id=user_id, target_item_id=unrated_item_id)
@@ -0,0 +1,17 @@
+from .aggregation_strategy import AggregationStrategy
+from .association_rules import AssociationRules
+from .scale import Scale
+from .sliding_window import SlidingWindow
+from .emp_loss import EMFLoss
+from .explanation_diversity import calculate_gild_for_explanations
+from .sliding_window_ranker import SlidingWindowRanker
+
+__all__ = [
+    "AggregationStrategy",
+    "AssociationRules",
+    "Scale",
+    "EMFLoss",
+    "calculate_gild_for_explanations",
+    "SlidingWindowRanker",
+    "SlidingWindow",
+]
@@ -0,0 +1,210 @@
+import numpy as np
+from typing import Dict, List, Union, Optional, TypeAlias
+from enum import Enum
+
+# Type aliases for better readability
+UserID: TypeAlias = Union[str, int]
+ItemID: TypeAlias = Union[str, int]
+EvaluationScore: TypeAlias = float
+AggregatedScore: TypeAlias = float
+
+# Main data structure types
+UserEvaluations: TypeAlias = Dict[UserID, Dict[ItemID, EvaluationScore]]
+UserRankings: TypeAlias = Dict[UserID, List[ItemID]]
+AggregatedScores: TypeAlias = Dict[ItemID, AggregatedScore]
+
+
+class AggregationStrategy(Enum):
+    """Enumeration of available aggregation strategies."""
+
+    # Individual Predictions
+    AVG_PREDICTIONS = "avg_predictions"
+    LEAST_MISERY = "least_misery"
+    MOST_PLEASURE = "most_pleasure"
+    MOST_RESPECTED_PERSON = "most_respected_person"
+
+    # Individual Preferences
+    ADDITIVE_UTILITARIAN = "additive_utilitarian"
+    MULTIPLICATIVE = "multiplicative"
+    BORDA_COUNT = "borda_count"
+
+
+class ScoreAggregator:
+    """
+    A class for aggregating individual predictions or preferences into collective scores.
+
+    Supports two main approaches:
+    1. Individual Predictions: AVG, LM, MP, MRP
+    2. Individual Preferences: AVG, ADD, MUL, BRC
+
+    Felfernig, A., Boratto, L., Stettinger, M., Tkali, M.: Group Recommender Systems:
+    An Introduction. Springer Publishing Company, Incorporated, 1st edn. (2018)
+
+    """
+
+    def __init__(self, most_respected_person: Optional[UserID] = None):
+        """
+        Initialize the ScoreAggregator.
+
+        Args:
+            most_respected_person: User ID of the most respected person (required for MRP strategy)
+        """
+        self.most_respected_person = most_respected_person
+
+    def aggregate_scores(
+        self,
+        evaluations: UserEvaluations,
+        strategy: AggregationStrategy,
+        rankings: Optional[UserRankings] = None,
+    ) -> AggregatedScores:
+        """
+        Aggregate individual evaluations into collective scores.
+
+        Args:
+            evaluations: Dictionary mapping user_id -> {item_id: evaluation_score}
+            strategy: Aggregation strategy to use
+            rankings: Dictionary mapping user_id -> [ordered_list_of_items] (required for Borda Count)
+
+        Returns:
+            Dictionary mapping item_id -> aggregated_score
+        """
+        if not evaluations:
+            return {}
+
+        # Get all items across all users
+        all_items: set[ItemID] = set()
+        for user_evals in evaluations.values():
+            all_items.update(user_evals.keys())
+
+        result: AggregatedScores = {}
+
+        for item in all_items:
+            if strategy == AggregationStrategy.AVG_PREDICTIONS:
+                result[item] = self._avg_predictions(evaluations, item)
+            elif strategy == AggregationStrategy.LEAST_MISERY:
+                result[item] = self._least_misery(evaluations, item)
+            elif strategy == AggregationStrategy.MOST_PLEASURE:
+                result[item] = self._most_pleasure(evaluations, item)
+            elif strategy == AggregationStrategy.MOST_RESPECTED_PERSON:
+                result[item] = self._most_respected_person(evaluations, item)
+            elif strategy == AggregationStrategy.ADDITIVE_UTILITARIAN:
+                result[item] = self._additive_utilitarian(evaluations, item)
+            elif strategy == AggregationStrategy.MULTIPLICATIVE:
+                result[item] = self._multiplicative(evaluations, item)
+            elif strategy == AggregationStrategy.BORDA_COUNT:
+                if rankings is None:
+                    raise ValueError("Rankings required for Borda Count strategy")
+                result[item] = self._borda_count(rankings, item)
+            else:
+                raise ValueError(f"Unknown aggregation strategy: {strategy}")
+
+        return result
+
+    def get_top_recommendation(
+        self,
+        evaluations: UserEvaluations,
+        strategy: AggregationStrategy,
+        rankings: Optional[UserRankings] = None,
+    ) -> ItemID:
+        """
+        Get the top recommended item based on aggregated scores.
+
+        Args:
+            evaluations: Dictionary mapping user_id -> {item_id: evaluation_score}
+            strategy: Aggregation strategy to use
+            rankings: Dictionary mapping user_id -> [ordered_list_of_items] (required for Borda Count)
+
+        Returns:
+            Item ID with highest aggregated score
+        """
+        aggregated_scores = self.aggregate_scores(evaluations, strategy, rankings)
+        return max(aggregated_scores.items(), key=lambda x: x[1])[0]
+
+    def _avg_predictions(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Average of item-specific evaluations."""
+        item_evals = [
+            user_evals.get(item, 0)
+            for user_evals in evaluations.values()
+            if item in user_evals
+        ]
+        return np.mean(item_evals) if item_evals else 0.0  # type: ignore
+
+    def _least_misery(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Minimum item-specific evaluation."""
+        item_evals = [
+            user_evals.get(item, 0)
+            for user_evals in evaluations.values()
+            if item in user_evals
+        ]
+        return min(item_evals) if item_evals else 0.0
+
+    def _most_pleasure(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Maximum item-specific evaluation."""
+        item_evals = [
+            user_evals.get(item, 0)
+            for user_evals in evaluations.values()
+            if item in user_evals
+        ]
+        return max(item_evals) if item_evals else 0.0
+
+    def _most_respected_person(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Item-evaluations of most respected user."""
+        if self.most_respected_person is None:
+            raise ValueError("Most respected person not specified")
+        if self.most_respected_person not in evaluations:
+            raise ValueError(
+                f"Most respected person '{self.most_respected_person}' not found in evaluations"
+            )
+        return evaluations[self.most_respected_person].get(item, 0.0)
+
+    def _avg_preferences(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Average of item-specific evaluations (same as avg_predictions)."""
+        return self._avg_predictions(evaluations, item)
+
+    def _additive_utilitarian(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Sum of item-specific evaluations."""
+        item_evals = [
+            user_evals.get(item, 0)
+            for user_evals in evaluations.values()
+            if item in user_evals
+        ]
+        return sum(item_evals)
+
+    def _multiplicative(
+        self, evaluations: UserEvaluations, item: ItemID
+    ) -> AggregatedScore:
+        """Multiplication of item-specific evaluations."""
+        item_evals = [
+            user_evals.get(item, 0)
+            for user_evals in evaluations.values()
+            if item in user_evals
+        ]
+        if not item_evals:
+            return 0.0
+        result = 1.0
+        for eval_score in item_evals:
+            result *= eval_score
+        return result
+
+    def _borda_count(self, rankings: UserRankings, item: ItemID) -> AggregatedScore:
+        """Sum of item-specific scores derived from item ranking."""
+        total_score = 0.0
+        for user_ranking in rankings.values():
+            if item in user_ranking:
+                # Score is based on position in ranking (higher position = higher score)
+                position = user_ranking.index(item)
+                score = len(user_ranking) - position - 1  # Reverse position for score
+                total_score += score
+        return total_score
@@ -0,0 +1,255 @@
+from mlxtend.preprocessing import TransactionEncoder
+from mlxtend.frequent_patterns import fpgrowth, association_rules
+import pandas as pd
+from pygrex.data_reader.data_reader import DataReader
+from typing import List, Optional, Union
+
+
+class AssociationRules:
+    """
+    A class to represent association rules mining for recommendation systems.
+
+    This class implements association rules mining using the FP-Growth algorithm
+    to discover frequent itemsets and generate association rules from user-item
+    interaction data. It can be used to find patterns in user behavior and
+    generate item recommendations based on item associations.
+    """
+
+    def __init__(
+        self,
+        data: DataReader,
+        min_support: float = 0.2,
+        min_confidence: float = 0.2,
+        rating_threshold: float = 4.0,
+    ) -> None:
+        """Initialize the association rules miner with data and parameters.
+
+        Args:
+            data: The DataReader object containing user-item interactions with ratings.
+            min_support: Minimum support threshold for frequent itemsets.
+                Must be between 0 and 1. Default is 0.2.
+            min_confidence: Minimum confidence threshold for association rules.
+                Must be between 0 and 1. Default is 0.2.
+            rating_threshold: Minimum rating threshold to consider an interaction
+                as positive. Default is 4.0.
+
+        Raises:
+            ValueError: If support, confidence, or rating_threshold values are invalid.
+        """
+        self._validate_parameters(min_support, min_confidence, rating_threshold)
+
+        self.data = data
+        self.min_support = min_support
+        self.min_confidence = min_confidence
+        self.rating_threshold = rating_threshold
+        self._frequent_itemsets: Optional[pd.DataFrame] = None
+        self._association_rules: Optional[pd.DataFrame] = None
+
+    def _validate_parameters(
+        self, min_support: float, min_confidence: float, rating_threshold: float
+    ) -> None:
+        """Validate initialization parameters.
+
+        Args:
+            min_support: Minimum support threshold to validate.
+            min_confidence: Minimum confidence threshold to validate.
+            rating_threshold: Rating threshold to validate.
+
+        Raises:
+            ValueError: If any parameter is invalid.
+        """
+        if not (0 < min_support <= 1):
+            raise ValueError("min_support must be between 0 and 1")
+        if not (0 < min_confidence <= 1):
+            raise ValueError("min_confidence must be between 0 and 1")
+        if rating_threshold < 0:
+            raise ValueError("rating_threshold must be non-negative")
+
+    def get_df_filtered_by_rating_threshold(self) -> pd.DataFrame:
+        df = self.data.dataset.copy()
+        # Filter interactions based on rating threshold
+        df_filtered = df[df["rating"] >= self.rating_threshold]
+
+        if df_filtered.empty:
+            raise ValueError(
+                f"No interactions found with rating >= {self.rating_threshold}"
+            )
+        return df_filtered
+
+    def _prepare_transactions(self) -> List[List[str]]:
+        """Prepare transaction data from the dataset.
+
+        Filters the dataset based on rating threshold and groups items
+        by user to create transaction lists.
+
+        Returns:
+            A list of transactions, where each transaction is a list of item IDs
+            that a user has positively interacted with.
+        """
+        df_filtered = self.get_df_filtered_by_rating_threshold()
+        # Group items by user to create transactions
+        transactions = df_filtered.groupby("userId")["itemId"].apply(list).tolist()
+
+        # Convert item IDs to strings for consistency
+        transactions = [
+            [str(item) for item in transaction] for transaction in transactions
+        ]
+
+        return transactions
+
+    def _mine_frequent_itemsets(
+        self, transactions: List[List[Union[str, int]]]
+    ) -> pd.DataFrame:
+        """Mine frequent itemsets using FP-Growth algorithm.
+
+        Args:
+            transactions: List of transactions to mine frequent itemsets from.
+
+        Returns:
+            DataFrame containing frequent itemsets with their support values.
+
+        Raises:
+            ValueError: If no frequent itemsets are found.
+        """
+        # Encode transactions into binary matrix
+        transaction_encoder = TransactionEncoder()
+        transaction_matrix = transaction_encoder.fit_transform(transactions)
+
+        df_encoded = pd.DataFrame(
+            transaction_matrix,  # type: ignore
+            columns=transaction_encoder.columns_,
+        )
+
+        # Apply FP-Growth to find frequent itemsets
+        frequent_itemsets = fpgrowth(
+            df_encoded, min_support=self.min_support, use_colnames=True
+        )
+
+        if frequent_itemsets.empty:
+            raise ValueError(
+                f"No frequent itemsets found with min_support={self.min_support}"
+            )
+
+        return frequent_itemsets
+
+    def _generate_association_rules(
+        self, frequent_itemsets: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Generate association rules from frequent itemsets.
+
+        Args:
+            frequent_itemsets: DataFrame containing frequent itemsets.
+
+        Returns:
+            DataFrame containing association rules with their metrics.
+
+        Raises:
+            ValueError: If no association rules are found.
+        """
+        rules = association_rules(
+            frequent_itemsets, metric="confidence", min_threshold=self.min_confidence
+        )
+
+        if rules.empty:
+            raise ValueError(
+                f"No association rules found with min_confidence={self.min_confidence}"
+            )
+
+        return rules
+
+    def compute(self) -> pd.DataFrame:
+        """Compute association rules from the dataset.
+
+        This method performs the complete association rules mining process:
+        1. Prepares transactions from the dataset
+        2. Mines frequent itemsets using FP-Growth
+        3. Generates association rules from frequent itemsets
+
+        Returns:
+            DataFrame containing association rules with metrics including
+            antecedents, consequents, support, confidence, lift, etc.
+
+        Raises:
+            ValueError: If the dataset is empty, no transactions meet the
+                criteria, or no rules can be generated with the given parameters.
+        """
+        if self.data.dataset.empty:
+            raise ValueError("Dataset is empty")
+
+        # Prepare transactions
+        transactions = self._prepare_transactions()
+
+        if not transactions:
+            raise ValueError("No transactions found after filtering")
+
+        # Mine frequent itemsets
+        self._frequent_itemsets = self._mine_frequent_itemsets(transactions)  # type: ignore
+
+        # Generate association rules
+        self._association_rules = self._generate_association_rules(
+            self._frequent_itemsets
+        )
+
+        return self._association_rules
+
+    def get_frequent_itemsets(self) -> Optional[pd.DataFrame]:
+        """Get the computed frequent itemsets.
+
+        Returns:
+            DataFrame containing frequent itemsets if compute() has been called,
+            None otherwise.
+        """
+        return self._frequent_itemsets
+
+    def get_recommendations_for_items(
+        self, items: List[Union[str, int]], top_k: int = 10
+    ) -> pd.DataFrame:
+        """Get item recommendations based on association rules.
+
+        Args:
+            items: List of item IDs to get recommendations for.
+            top_k: Maximum number of recommendations to return. Default is 10.
+
+        Returns:
+            DataFrame containing recommended items sorted by confidence.
+
+        Raises:
+            RuntimeError: If compute() hasn't been called yet.
+            ValueError: If items list is empty.
+        """
+        if self._association_rules is None:
+            raise RuntimeError("Must call compute() before getting recommendations")
+
+        if not items:
+            raise ValueError("Items list cannot be empty")
+
+        items_set = set(str(item) for item in items)
+
+        # Filter rules where antecedents match the given items
+        matching_rules = self._association_rules[
+            self._association_rules["antecedents"].apply(
+                lambda x: items_set.issubset(set(str(item) for item in x))
+            )
+        ]
+
+        if matching_rules.empty:
+            return pd.DataFrame()
+
+        # Sort by confidence and return top_k recommendations
+        recommendations = matching_rules.nlargest(top_k, "confidence")
+
+        return recommendations[
+            ["antecedents", "consequents", "confidence", "lift", "support"]
+        ]
+
+    def __str__(self) -> str:
+        """Return string representation of the AssociationRules object."""
+        return (
+            f"AssociationRules(min_support={self.min_support}, "
+            f"min_confidence={self.min_confidence}, "
+            f"rating_threshold={self.rating_threshold})"
+        )
+
+    def __repr__(self) -> str:
+        """Return detailed string representation of the AssociationRules object."""
+        return self.__str__()
@@ -0,0 +1,17 @@
+import torch
+
+
+class EMFLoss(torch.nn.Module):
+    def __init__(self):
+        super(EMFLoss, self).__init__()
+
+    def forward(self, ratings_pred, ratings, u, v, reg_term, expl, expl_reg_term):
+
+        mse = (ratings - ratings_pred.view(-1)) ** 2
+        u_l2 = reg_term * torch.norm(u, 2, -1)
+        v_l2 = reg_term * torch.norm(v, 2, -1)
+        expl_constraint = expl_reg_term * torch.norm(u - v, 1, -1) * expl
+
+        loss = mse + u_l2 + v_l2 + expl_constraint
+
+        return loss.mean()
@@ -0,0 +1,80 @@
+from itertools import combinations
+import numpy as np
+
+
+def _get_explanation_feature_set(explanation, explainer_type, details=None):
+    """Helper to extract a consistent feature set from different explanation types."""
+    if explainer_type == "Sliding Window":
+        return set(explanation.get("items", []))
+    elif explainer_type == "EXPGRS":
+        if details is not None:
+            return set(details.get("antecedent", frozenset()))
+        else:
+            return set()
+    elif explainer_type == "LORE4Groups":
+        rules_data = explanation.get("group_factual_rule", {})
+        if isinstance(rules_data, dict):
+            return set(
+                rule for tier_rules in rules_data.values() for rule in tier_rules
+            )
+        elif isinstance(rules_data, list):
+            return set(rules_data)
+    return set()
+
+
+def calculate_gild_for_explanations(explanations_dict, explainer_type, use_median=True):
+    """Calculate Gaussian Inter-List Diversity (GILD) for a set of explanations."""
+
+    if not explanations_dict or len(explanations_dict) < 2:
+        return 0.0
+
+    feature_sets = []
+    if explainer_type == "EXPGRS":
+        for item_id, rules_list in explanations_dict.items():
+            if rules_list:
+                feature_sets.append(
+                    _get_explanation_feature_set(
+                        None, explainer_type, details=rules_list[0]
+                    )
+                )
+    elif explainer_type == "Sliding Window":
+        for call, exp_data in explanations_dict.items():
+            feature_sets.append(_get_explanation_feature_set(exp_data, explainer_type))
+    elif explainer_type == "LORE4Groups":
+        for item_id, exp_data in explanations_dict.items():
+            feature_sets.append(_get_explanation_feature_set(exp_data, explainer_type))
+
+    feature_sets = [fs for fs in feature_sets if fs]
+    if len(feature_sets) < 2:
+        return 0.0
+
+    # Calculate pairwise Jaccard distances
+    distances = []
+    for set1, set2 in combinations(feature_sets, 2):
+        intersection_len = len(set1.intersection(set2))
+        union_len = len(set1.union(set2))
+        jaccard_dist = 1.0 - (intersection_len / union_len) if union_len > 0 else 1.0
+        distances.append(jaccard_dist)
+
+    if not distances:
+        return 0.0
+
+    # Calculate sigma using paper's formula
+    k_choose_2 = len(distances)
+    if use_median:
+        reference_dist = np.median(distances)
+    else:
+        reference_dist = min(distances)
+
+    denominator = np.sqrt(2 * np.log(k_choose_2 - 1)) if k_choose_2 > 1 else 1.0
+    sigma = reference_dist / denominator if denominator > 0 else reference_dist
+    if sigma == 0:
+        sigma = 1e-9
+    kernel_distances_sum = 0.0
+    for d in distances:
+        kernel_distance = np.sqrt(2 - 2 * np.exp(-(d**2) / (2 * sigma**2)))
+        kernel_distances_sum += kernel_distance
+
+    gild = kernel_distances_sum / k_choose_2 if distances else 0
+
+    return gild
@@ -0,0 +1,138 @@
+from typing import List, Union, Optional
+import numpy as np
+from scipy import stats
+
+
+class Scale:
+    """
+    A class for scaling numerical values using different methods.
+
+    Methods:
+        quantile: Scale values using quantile-based ranking.
+        linear: Scale values linearly to a target range with outlier handling.
+    """
+
+    @staticmethod
+    def quantile(
+        raw_predictions: Union[List[float], np.ndarray],
+        target_min: float = 1,
+        target_max: float = 5,
+    ) -> np.ndarray:
+        """
+        Scale raw predictions to the target range using quantile-based ranking.
+
+        Args:
+            raw_predictions: The raw prediction values.
+            target_min: Minimum of the target range (default: 1).
+            target_max: Maximum of the target range (default: 5).
+
+        Returns:
+            numpy.ndarray: Scaled predictions.
+
+        Raises:
+            ValueError: If raw_predictions is empty.
+        """
+        if len(raw_predictions) == 0:
+            raise ValueError("Raw predictions array is empty.")
+
+        # Convert to numpy array if it's not already
+        raw_predictions = np.array(raw_predictions)
+
+        ranks = stats.rankdata(raw_predictions, method="average")
+        if len(raw_predictions) == 1:
+            # Handle single element case
+            scaled_predictions = np.array([(target_min + target_max) / 2])
+        else:
+            scaled_predictions = target_min + (ranks - 1) * (
+                target_max - target_min
+            ) / (len(raw_predictions) - 1)
+
+        # Ensure scaled predictions are within [target_min, target_max]
+        scaled_predictions = np.clip(scaled_predictions, target_min, target_max)
+
+        return scaled_predictions
+
+    @staticmethod
+    def linear(
+        raw_predictions: Union[List[float], np.ndarray],
+        target_min: float = 1,
+        target_max: float = 5,
+        ref_min: Optional[float] = None,
+        ref_max: Optional[float] = None,
+        handle_outliers: bool = True,
+    ) -> np.ndarray:
+        """
+        Scale raw predictions to the target range [target_min, target_max].
+
+        Args:
+            raw_predictions: The raw prediction values.
+            target_min: Minimum of the target range (default: 1).
+            target_max: Maximum of the target range (default: 5).
+            ref_min: Reference minimum for raw predictions. If None, will be calculated
+                     from the data or from outlier bounds if handle_outliers=True.
+            ref_max: Reference maximum for raw predictions. If None, will be calculated
+                     from the data or from outlier bounds if handle_outliers=True.
+            handle_outliers: Whether to handle outliers using IQR method (default: True).
+
+        Returns:
+            numpy.ndarray: Scaled predictions.
+
+        Raises:
+            ValueError: If raw_predictions is empty.
+        """
+        if len(raw_predictions) == 0:
+            raise ValueError("Raw predictions array is empty.")
+
+        # Convert to numpy array if it's not already
+        raw_predictions = np.array(raw_predictions)
+
+        # Handle single element case
+        if len(raw_predictions) == 1:
+            if ref_min is not None and ref_max is not None:
+                # Scale based on provided reference range
+                value = raw_predictions[0]
+                scaled_value = (
+                    target_min
+                    + (value - ref_min)
+                    * (target_max - target_min)
+                    / (ref_max - ref_min)
+                    if ref_max != ref_min
+                    else (target_min + target_max) / 2
+                )
+                scaled_value = np.clip(scaled_value, target_min, target_max)
+                return np.array([scaled_value])
+            else:
+                # Can't determine range from single value, return middle of target range
+                return np.array([(target_min + target_max) / 2])
+
+        clipped_predictions = raw_predictions.copy()
+
+        # Handle outliers if requested
+        if handle_outliers:
+            q1, q3 = np.percentile(raw_predictions, [25, 75])
+            iqr = q3 - q1
+            lower_bound = q1 - 1.5 * iqr
+            upper_bound = q3 + 1.5 * iqr
+            clipped_predictions = np.clip(raw_predictions, lower_bound, upper_bound)
+
+        # Determine min and max values
+        min_raw = np.min(clipped_predictions)
+        max_raw = np.max(clipped_predictions)
+
+        # Use provided reference bounds if given, otherwise use data bounds
+        actual_ref_min = ref_min if ref_min is not None else min_raw
+        actual_ref_max = ref_max if ref_max is not None else max_raw
+
+        # Scale to [target_min, target_max]
+        if actual_ref_max == actual_ref_min:
+            # Reference bounds are equal, return the middle of the target range
+            return np.full_like(raw_predictions, (target_min + target_max) / 2)
+        else:
+            scaled_predictions = target_min + (raw_predictions - actual_ref_min) * (
+                target_max - target_min
+            ) / (actual_ref_max - actual_ref_min)
+
+        # Ensure scaled predictions are within [target_min, target_max]
+        scaled_predictions = np.clip(scaled_predictions, target_min, target_max)
+
+        return scaled_predictions
@@ -0,0 +1,90 @@
+from typing import List, Optional, TypeVar, Generic, Iterator
+
+T = TypeVar("T")
+
+
+class SlidingWindow(Generic[T]):
+    """Class for creating and managing sliding windows over a sequence.
+
+    This class provides functionality to iterate through windows of a fixed size
+    over a sequence of items.
+    """
+
+    def __init__(self, sequence: List[T], window_size: int):
+        """Initialize the sliding window.
+
+        Args:
+            sequence: The sequence of items to slide over
+            window_size: The size of each window (must be positive)
+
+        Raises:
+            ValueError: If window_size is less than 1
+            TypeError: If sequence is not iterable
+        """
+        if window_size < 1:
+            raise ValueError("Window size must be at least 1")
+
+        if not hasattr(sequence, "__iter__"):
+            raise TypeError("Sequence must be iterable")
+
+        self.sequence = sequence
+        self.window_size = window_size
+        self.index = 0
+        self.max_index = len(sequence) - window_size + 1 if sequence else 0
+
+    def get_next_window(self) -> Optional[List[T]]:
+        """Return the next window and advance the current position.
+
+        Returns:
+            A list containing the next window of items, or None if all windows
+            have been processed.
+        """
+        if self.index >= self.max_index:
+            return None
+
+        window = self.sequence[self.index : self.index + self.window_size]
+        self.index += 1
+        return window
+
+    def reset(self) -> None:
+        """Reset the window position to the beginning of the sequence."""
+        self.index = 0
+
+    def has_next(self) -> bool:
+        """Check if there are more windows available.
+
+        Returns:
+            True if there are more windows, False otherwise.
+        """
+        return self.index < self.max_index
+
+    def __iter__(self) -> Iterator[List[T]]:
+        """Make the class iterable.
+
+        Returns:
+            An iterator over all windows in the sequence.
+        """
+        self.reset()
+        return self
+
+    def __next__(self) -> List[T]:
+        """Get the next window for iteration.
+
+        Returns:
+            The next window as a list.
+
+        Raises:
+            StopIteration: When all windows have been processed.
+        """
+        window = self.get_next_window()
+        if window is None:
+            raise StopIteration
+        return window
+
+    def __len__(self) -> int:
+        """Return the total number of windows.
+
+        Returns:
+            The number of complete windows in the sequence.
+        """
+        return max(0, self.max_index)
@@ -0,0 +1,631 @@
+import operator
+from typing import Any, Dict, List, Union, Optional
+
+import numpy as np
+import pandas as pd
+from scipy.signal import (
+    find_peaks,
+    peak_widths,
+)
+
+from pygrex.data_reader import DataReader
+
+
+class SlidingWindowRanker:
+    """
+    Stratigi, M., Bikakis, N., Stefanidis, K.: Counterfactual explanations for group
+    recommendations. In: Proceedings of the 27th International Workshop on Design,
+    Optimization, Languages and Analytical Processing of Big Data (DOLAP 2025)
+    """
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize the SlidingWindowRanker.
+
+        Args:
+            config: Configuration parameters for the evaluator
+        """
+        self.config = config
+        self.group_predictions: Optional[
+            Dict[Union[str, int], Dict[Union[str, int], float]]
+        ] = None
+        self.top_recommendation: Optional[Union[str, int]] = None
+
+    def set_group_recommender_values(
+        self,
+        group_predictions: Dict[Union[str, int], Dict[Union[str, int], float]],
+        top_recommendation: Union[str, int],
+    ) -> None:
+        """
+        Set group recommender values.
+
+        Args:
+            group_predictions: Dictionary mapping user IDs to their item predictions
+            top_recommendation: List of top recommended items for the group
+        """
+        self.group_predictions = group_predictions
+        self.top_recommendation = top_recommendation
+
+    def evaluate(self, data: DataReader) -> Dict[str, Any]:
+        """
+        Evaluate the data using the Stratigis evaluator.
+
+        Args:
+            data: DataReader object containing dataset and transformation methods
+
+        Returns:
+            Dictionary with evaluation metrics
+        """
+        # Implementation would go here
+        return {}
+
+    def calculate_item_popularity_score(
+        self, items: List[Union[str, int]], data: DataReader
+    ) -> Dict[Union[str, int], float]:
+        """
+        Calculate the normalized popularity of each item based on the number of interactions received.
+
+        Args:
+            items: List of item IDs
+            data: Data object containing the dataset and transformation methods
+
+        Returns:
+            Dictionary with item IDs as keys and normalized popularity (0-1) as values
+        """
+        # Calculate popularity (number of interactions) for each item
+        popularity_counts = {}
+        for item_id in items:
+            internal_item_id = data.get_new_item_id(item_id)
+            count = len(data.dataset[data.dataset["itemId"] == internal_item_id])
+            popularity_counts[item_id] = count
+
+        # Find min and max values for normalization
+        min_count = min(popularity_counts.values()) if popularity_counts else 0
+        max_count = max(popularity_counts.values()) if popularity_counts else 0
+
+        # Add 1% padding to the range
+        range_value = max_count - min_count
+        padded_range = range_value + (
+            range_value / 50
+        )  # Add 2% to range (1% on each end)
+        padded_min = min_count - (
+            range_value / 100
+        )  # Subtract 1% of range from minimum
+
+        if padded_range == 0:
+            padded_range = 1  # Avoid division by zero
+
+        # Normalize popularity values to [0,1]
+        popularity_mask = {}
+        for item_id, count in popularity_counts.items():
+            popularity_mask[item_id] = (count - padded_min) / padded_range
+
+        return popularity_mask
+
+    def calculate_relevance_mask(
+        self,
+        target_item_id: Union[str, int],
+    ) -> Dict[Union[str, int], float]:
+        """
+        Create a mapping between users and their prediction scores for a specific target item.
+
+        Args:
+            target_item_id :The ID of the item for which prediction scores are needed
+
+        Returns:
+            Dictionary mapping user IDs to their predicted scores for the target item
+            Note: Users without a prediction for the target item will have a value of 0
+
+        Examples
+            >>> user_preds = {'user1': {'item1': 4.5, 'item2': 3.2}, 'user2': {'item2': 2.8}}
+            >>> evaluator.set_group_recommender_values(user_preds,top_recommendation)
+            >>> evaluator.calculate_relevance_mask('item1')
+            {'user1': 4.5, 'user2': 0}
+        """
+
+        if self.group_predictions is None:
+            raise ValueError(
+                "User predictions not set. Call set_group_recommender_values first."
+            )
+
+        individual_predictions = {}
+
+        for user_id, predictions in self.group_predictions.items():
+            # Get the prediction for the target item if it exists, otherwise default to 0
+            individual_predictions[user_id] = predictions.get(target_item_id, 0)
+
+        return individual_predictions
+
+    def calculate_relevance_score(
+        self,
+        item_id: Union[str, int],
+        data: DataReader,
+        prediction_scores: Dict[Union[str, int], float],
+        members: List[Union[str, int]],
+        rating_scale: tuple = (0, 5),  # Default rating scale
+    ) -> float:
+        """
+        Calculate the normalized average prediction score for an item based on group members' predictions.
+
+        Agrs
+           item_id: ID of the item to calculate relevance for
+            data : DataReader object containing dataset and ID mapping methods
+            prediction_scores : Dictionary mapping user IDs to their prediction scores for items
+            members : List of user IDs in the group
+            rating_scale: Tuple indicating (min_rating, max_rating) for normalization
+
+        Returns
+            Normalized average prediction score in range [0,1]
+            Returns 0 if no users in the group have interacted with the item
+
+        Notes
+            1. Calculates the average prediction score for the item from group members
+            2. Normalizes the score to [0,1] range with 1% padding
+        """
+        total_score = 0
+        valid_users_count = 0
+        internal_item_id = data.get_new_item_id(item_id)
+
+        for user_id in members:
+            # Convert user ID to internal format
+            internal_user_id = (
+                data.get_new_user_id(int(user_id))
+                if isinstance(user_id, (int, np.integer))
+                else user_id
+            )
+
+            # Check if user has interacted with the item
+            user_item_data = data.dataset[
+                (data.dataset["userId"] == internal_user_id)
+                & (data.dataset["itemId"] == internal_item_id)
+            ]
+
+            if user_item_data.empty:
+                continue
+
+            # Get the prediction score for this user
+            if user_id in prediction_scores:
+                total_score += prediction_scores[user_id]
+                valid_users_count += 1
+
+        # Return 0 if no valid users found
+        if valid_users_count == 0:
+            return 0
+
+        # Calculate average score
+        average_score = total_score / valid_users_count
+
+        # Normalize to [0,1] with 1% padding
+        min_value, max_value = rating_scale
+        range_value = max_value - min_value
+        padded_range = range_value + (
+            range_value / 50
+        )  # Add 2% to range (1% on each end)
+        padded_min = min_value - (
+            range_value / 100
+        )  # Subtract 1% of range from minimum
+
+        if padded_range == 0:
+            return 0.0
+
+        normalized_score = (average_score - padded_min) / padded_range
+        return float(normalized_score)
+
+    def calculate_item_intensity_score(
+        self, item_id: Union[str, int], members: List[Union[str, int]], data: DataReader
+    ) -> float:
+        """
+        Calculate what proportion of group members have interacted with the specified item.
+
+        Args
+            item_id : ID of the item to calculate interaction rate for
+            members : List of user IDs in the group
+            data : DataReader object containing dataset and ID mapping methods
+
+        Returns
+            Proportion of group members who have interacted with the item (range [0,1])
+            0 means no group members have interacted with the item
+            1 means all group members have interacted with the item
+        """
+        # Convert item ID to internal format
+        if data is None:
+            print("Error: DataReader object is None. Cannot convert item_id.")
+            return 0.0, {user_id: 0.0 for user_id in members}, pd.DataFrame()
+        internal_item_id = data.get_new_item_id(item_id)
+
+        # Convert all user IDs to internal format
+        internal_members = [data.get_new_user_id(user_id) for user_id in members]
+
+        # Count how many users have interacted with the item
+        interaction_count = len(
+            data.dataset[
+                (data.dataset.itemId == internal_item_id)
+                & data.dataset.userId.isin(internal_members)
+            ]
+        )
+
+        # Calculate proportion of group members who interacted with item
+        if not members:
+            return 0  # Avoid division by zero if no members
+
+        interaction_rate = interaction_count / len(members)
+        return interaction_rate
+
+    def calculate_rating_score(
+        self,
+        item_id: Union[str, int],
+        members: List[Union[str, int]],
+        data: DataReader,
+        rating_scale: tuple = (0, 5),
+    ) -> float:
+        """
+        Calculate the normalized average rating given to an item by group members.
+
+        Args
+            item_id : ID of the item to calculate average rating for
+            data : DataReader object containing dataset and ID mapping methods
+            members : List of user IDs in the group
+            rating_scale: Tuple indicating (min_rating, max_rating) for normalization
+
+        Returns
+            Normalized average rating in range [0,1]
+
+        Notes
+            - Considers all group members in the denominator even if some haven't rated the item
+            - Normalizes the resulting average to [0,1] with 1% padding
+        """
+        # Convert item ID to internal format
+        if data is None:
+            print("Error: DataReader object is None. Cannot convert item_id.")
+            return 0.0
+        internal_item_id = data.get_new_item_id(item_id)
+
+        # Convert all user IDs to internal format
+        internal_members = [data.get_new_user_id(user_id) for user_id in members]
+
+        # Get ratings from users who have rated this item
+        rating_data = data.dataset[
+            (data.dataset.itemId == internal_item_id)
+            & data.dataset.userId.isin(internal_members)
+        ]
+
+        # Calculate average rating (sum of ratings divided by total group size)
+        if len(members) == 0:
+            return 0  # Avoid division by zero if no members
+
+        total_rating = rating_data["rating"].sum()
+        average_rating = total_rating / len(members)
+
+        # Normalize to [0,1] with 1% padding
+        min_value, max_value = rating_scale
+        range_value = max_value - min_value
+        padded_range = range_value + (
+            range_value / 50
+        )  # Add 2% to range (1% on each end)
+        padded_min = min_value - (
+            range_value / 100
+        )  # Subtract 1% of range from minimum
+
+        if padded_range == 0:
+            return 0.0
+
+        normalized_rating = (average_rating - padded_min) / padded_range
+        return float(normalized_rating)
+
+    def calculate_trending_score(
+        self,
+        members: List[Union[str, int]],
+        item_id: Union[str, int],
+        data: Optional[DataReader] = None,
+        peak_norm_min_height: float = 0.1,
+        peak_norm_min_prominence: float = 0.05,
+        peak_min_distance: int = 3,
+        peak_width_rel_height: float = 0.5,
+    ) -> tuple[float, Dict[Union[str, int], float], pd.DataFrame]:
+        """
+        Calculates a trending score for a user, using normalized data for hype period detection.
+
+        Args
+            members : List of user IDs in the group
+            item_id : ID of the item to calculate trending score for
+            data : DataReader object containing dataset and ID mapping methods
+            peak_norm_min_height : Minimum height of peaks in normalized data to consider as significant
+            peak_norm_min_prominence : Minimum prominence of peaks in normalized data
+            peak_min_distance : Minimum distance between peaks in months
+            peak_width_rel_height : Relative height for peak width calculation
+
+        Returns
+            tuple: (average_trending_score, individual_scores, hype_periods_for_item)
+                average_trending_score: Average trending score across all group members (0-1)
+                individual_scores: Dictionary mapping user IDs to their individual trending scores
+                hype_periods_for_item: DataFrame containing detected hype periods for the item
+        """
+
+        if not members:
+            print("Error: No group members provided for trending score calculation.")
+            return 0.0, {}, pd.DataFrame()
+
+        _df = pd.DataFrame()
+        if data is not None and isinstance(data, DataReader):
+            _df = data.dataset.copy()
+        else:
+            if data is not None:
+                print(
+                    f"Warning: data was provided but is not a DataReader object (type: {type(data)})."
+                )
+
+        if _df.empty:
+            print(
+                "Error: The DataFrame (_df) is empty. Cannot calculate score or plot."
+            )
+            return 0.0, {}, pd.DataFrame()
+
+        required_columns = [
+            "userId",
+            "itemId",
+            "rating",
+            "timestamp",
+        ]
+        missing_columns = [col for col in required_columns if col not in _df.columns]
+        if missing_columns:
+            print(
+                f"Error: Missing required columns in DataFrame: {', '.join(missing_columns)}"
+            )
+            return 0.0, {}, pd.DataFrame()
+
+        try:
+            if "timestamp_dt" not in _df.columns or _df["timestamp_dt"].isnull().all():
+                _df["timestamp_dt"] = pd.to_datetime(_df["timestamp"], unit="s")
+            if "year_month" not in _df.columns or _df["year_month"].isnull().all():
+                _df["year_month"] = _df["timestamp_dt"].dt.to_period("M")
+        except Exception as e:
+            print(f"Error during timestamp conversion or year-month extraction: {e}")
+            return 0.0, {}, pd.DataFrame()
+
+        if data is None:  # Should not happen if _df is not empty, but as a safeguard
+            return 0.0, {}, pd.DataFrame()
+
+        # Convert item ID to internal format
+        internal_item_id = data.get_new_item_id(item_id)
+
+        # Convert all user IDs to internal format
+        internal_members = [data.get_new_user_id(user_id) for user_id in members]
+
+        # Filter data for the specific item ID only
+        item_df = _df[_df["itemId"] == internal_item_id]
+        if item_df.empty:
+            return 0.0, {user_id: 0.0 for user_id in members}, pd.DataFrame()
+
+        # movie_ratings_per_month contains original rating counts
+        movie_ratings_per_month = (
+            item_df.groupby(["itemId", "year_month"], observed=False)
+            .size()
+            .reset_index(name="rating_count")
+        )
+
+        if movie_ratings_per_month.empty:
+            return 0.0, {user_id: 0.0 for user_id in members}, pd.DataFrame()
+
+        hype_periods_for_item = None
+
+        # Process the specific item for hype period detection
+        group_sorted = movie_ratings_per_month.sort_values("year_month").reset_index(
+            drop=True
+        )
+        original_ratings = group_sorted["rating_count"].to_numpy()
+
+        # Normalization Step
+        min_rating = np.min(original_ratings)
+        max_rating = np.max(original_ratings)
+
+        normalized_ratings = None
+        if (
+            max_rating > min_rating
+        ):  # Avoid division by zero if all ratings are the same
+            normalized_ratings = (original_ratings - min_rating) / (
+                max_rating - min_rating
+            )
+        elif len(original_ratings) > 0:
+            normalized_ratings = np.zeros_like(original_ratings, dtype=float)
+        else:  # No ratings for this item in group_sorted (should not happen if groupby is correct)
+            return 0.0, {user_id: 0.0 for user_id in members}, pd.DataFrame()
+
+        # Peak Detection on Normalized Data
+        peaks_indices, properties = find_peaks(
+            normalized_ratings,
+            height=peak_norm_min_height,
+            distance=peak_min_distance,
+            prominence=peak_norm_min_prominence,
+        )
+
+        hype_periods_list = []
+        if len(peaks_indices) > 0:
+            widths, _, left_ips, right_ips = peak_widths(
+                normalized_ratings, peaks_indices, rel_height=peak_width_rel_height
+            )
+
+            for i, peak_idx in enumerate(peaks_indices):
+                start_idx = max(0, int(round(left_ips[i])))
+                end_idx = min(len(group_sorted) - 1, int(round(right_ips[i])))
+
+                if start_idx <= end_idx:
+                    start_month = group_sorted.iloc[start_idx]["year_month"]
+                    end_month = group_sorted.iloc[end_idx]["year_month"]
+
+                    hype_periods_list.append(
+                        {
+                            "itemId": item_id,
+                            "hype_start_month": start_month,
+                            "hype_end_month": end_month,
+                            "peak_month": group_sorted.iloc[peak_idx]["year_month"],
+                            "peak_rating_count_original": original_ratings[peak_idx],
+                            "peak_rating_count_normalized": normalized_ratings[
+                                peak_idx
+                            ],
+                        }
+                    )
+
+        if hype_periods_list:
+            hype_periods_for_item = pd.DataFrame(hype_periods_list)
+        else:
+            return 0.0, {user_id: 0.0 for user_id in members}, pd.DataFrame()
+
+        # Calculate trending scores for each user in the group
+        individual_scores = {}
+        valid_scores = []
+
+        for idx, user_id in enumerate(internal_members):
+            user_ratings = item_df[item_df["userId"] == user_id].copy()
+
+            if user_ratings.empty:
+                individual_scores[members[idx]] = 0.0
+                continue
+
+            # Merge user ratings with hype periods
+            user_ratings_merged = pd.merge(
+                user_ratings, hype_periods_for_item, on="itemId", how="left"
+            )
+
+            user_ratings_merged["is_match"] = (
+                (
+                    user_ratings_merged["year_month"]
+                    >= user_ratings_merged["hype_start_month"]
+                )
+                & (
+                    user_ratings_merged["year_month"]
+                    <= user_ratings_merged["hype_end_month"]
+                )
+                & user_ratings_merged["hype_start_month"].notna()
+            )
+
+            if (
+                not user_ratings_merged.empty
+                and "is_match" in user_ratings_merged.columns
+            ):
+                is_event_trending = user_ratings_merged.groupby(
+                    ["userId", "itemId", "timestamp_dt"]
+                )["is_match"].any()
+                num_trending_ratings = is_event_trending.sum()
+                total_unique_rating_events = len(is_event_trending)
+            else:
+                num_trending_ratings = 0
+                total_unique_rating_events = len(
+                    user_ratings.drop_duplicates(
+                        subset=["userId", "itemId", "timestamp_dt"]
+                    )
+                )
+
+            if total_unique_rating_events == 0:
+                individual_scores[members[idx]] = 0.0
+            else:
+                trending_score = num_trending_ratings / total_unique_rating_events
+                individual_scores[members[idx]] = trending_score
+                valid_scores.append(trending_score)
+
+        # Calculate average trending score across all group members
+        # Include users with 0.0 scores (no ratings for the item) in the average
+        all_scores = [individual_scores[user_id] for user_id in members]
+        average_trending_score = sum(all_scores) / len(members) if members else 0.0
+
+        return average_trending_score, individual_scores, hype_periods_for_item
+
+    def generate_ranked_items(
+        self,
+        all_rated_items: List[Union[str, int]],
+        data: DataReader,
+        group_members: List[Union[str, int]],
+        component_weights: Optional[Dict[str, float]] = None,
+    ) -> tuple[List[Union[str, int]], Dict]:
+        """
+        Ranks items based on multiple scoring factors for a group of users.
+
+        Calculates a composite score for each item based on:
+        - Item popularity
+        - Group preference intensity
+        - Predicted ratings
+        - Relevance to the group
+        - Trends in the group
+
+        Args:
+            candidate_items: List of items that at least one group member has interacted with
+            data: The DataReader object containing user-item interactions
+            group_members: List of user identifiers in the group
+            component_weights: Optional dictionary with weights for each component
+                            (popularity, intensity, rating, relevance, trend)
+
+        Returns:
+            List of item IDs sorted in descending order by their composite scores
+        """
+        if self.group_predictions is None:
+            raise ValueError(
+                "User predictions not set. Call set_group_recommender_values first."
+            )
+        if self.top_recommendation is None:
+            raise ValueError(
+                "Top recommendation not set. Call set_group_recommender_values first."
+            )
+
+        # Default weights if not provided
+        if component_weights is None:
+            component_weights = {
+                "popularity": 1.0,
+                "intensity": 1.0,
+                "rating": 1.0,
+                "relevance": 1.0,
+                "trend": 1.0,
+            }
+
+        item_scores = {}
+        item_metric_details = {}
+        popularity_scores = self.calculate_item_popularity_score(all_rated_items, data)
+
+        relevance_mask = self.calculate_relevance_mask(self.top_recommendation)
+
+        for item_id in all_rated_items:
+            # Calculate individual score components
+
+            popularity_score = popularity_scores[item_id]
+
+            intensity_score = self.calculate_item_intensity_score(
+                item_id, group_members, data
+            )
+            rating_score = self.calculate_rating_score(item_id, group_members, data)
+            relevance_score = self.calculate_relevance_score(
+                item_id, data, relevance_mask, group_members
+            )
+
+            trending_score, _, _ = self.calculate_trending_score(
+                group_members,
+                item_id,
+                data,
+                0.3,
+                0.2,
+                9,
+                0.6,
+            )
+
+            composite_score = (
+                component_weights["popularity"] * popularity_score
+                + component_weights["intensity"] * intensity_score
+                + component_weights["rating"] * rating_score
+                + component_weights["relevance"] * relevance_score
+                + component_weights["trend"] * trending_score
+            )
+
+            item_metric_details[item_id] = {
+                "Popularity": popularity_score,
+                "Intensity": intensity_score,
+                "Rating": rating_score,
+                "Relevance": relevance_score,
+                "Trend": trending_score,
+                "Composite Score": composite_score,
+            }
+            item_scores[item_id] = composite_score
+
+        # Sort items by score in descending order
+        ranked_items = sorted(
+            item_scores.items(), key=operator.itemgetter(1), reverse=True
+        )
+
+        # Return the sorted item IDs and the detailed metrics
+        return [item_id for item_id, _ in ranked_items], item_metric_details
@@ -0,0 +1,55 @@
+"""
+Some handy functions for pytroch model training ...
+"""
+
+import torch
+from torch.optim import Optimizer
+
+
+# Checkpoints
+def save_checkpoint(model, model_dir):
+    torch.save(model.state_dict(), model_dir)
+
+
+def resume_checkpoint(model, model_dir, device_id):
+    device = f"cuda:{device_id}"
+    state_dict = torch.load(model_dir, map_location=device)
+    model.load_state_dict(state_dict)
+
+
+# Hyper params
+def use_cuda(enabled, device_id=0):
+    if enabled:
+        assert torch.cuda.is_available(), "CUDA is not available"
+        torch.cuda.set_device(device_id)
+
+
+def use_optimizer(
+    optimizer_name: str,
+    network: torch.nn.Module,
+    learning_rate: float,
+    momentum: float = 0,
+    weight_decay: float = 0,
+    alpha: float = 0.99,
+) -> Optimizer:
+    if optimizer_name == "sgd":
+        optimizer = torch.optim.SGD(
+            network.parameters(),
+            lr=learning_rate,
+            momentum=momentum,
+            weight_decay=weight_decay,
+        )
+
+    elif optimizer_name == "adam":
+        optimizer = torch.optim.Adam(
+            network.parameters(), lr=learning_rate, weight_decay=weight_decay
+        )
+
+    elif optimizer_name == "rmsprop":
+        optimizer = torch.optim.RMSprop(
+            network.parameters(), lr=learning_rate, alpha=alpha, momentum=momentum
+        )
+    else:
+        raise ValueError(f"Optimizer '{optimizer_name}' is not supported")
+
+    return optimizer