public code v1

2026-05-22 10:02:10 +02:00
commit 46a9ecf065
166 changed files with 6982454 additions and 0 deletions
@@ -0,0 +1,162 @@
+import streamlit as st
+import pandas as pd
+import os
+from io import StringIO
+
+#  Library Imports
+from pygrex.data_reader import DataReader, GroupInteractionHandler
+
+#  Page Configuration
+st.set_page_config(page_title="Data Preparation", page_icon="📄", layout="wide")
+
+st.title("📄 Data Preparation")
+
+#  Default File Paths
+DEFAULT_RATINGS_PATH = "datasets/stratigis/ratings.csv"
+DEFAULT_GROUPS_PATH = "datasets/stratigis/groupsWithHighRatings5.txt"
+
+#  Session State Initialization
+if "data_loaded" not in st.session_state:
+    st.session_state.data_loaded = False
+    st.session_state.data_reader = None
+    st.session_state.group_handler = None
+    st.session_state.num_groups = 0
+
+#  DATA INPUT SECTION
+
+#  Ratings Input
+st.header("1. Ratings Data")
+st.markdown(
+    "You can upload your own ratings file or use the default **MovieLens 100k** dataset."
+)
+ratings_file_buffer = st.file_uploader(
+    "Upload Your Ratings Data (Optional)", type=["csv"]
+)
+
+#  Group Input
+st.header("2. Group Data")
+group_input_method = st.radio(
+    "Choose group input method:",
+    ("Enter groups manually", "Upload a group file"),
+    horizontal=True,
+)
+
+#  Load default group data for the text area
+default_group_text = ""
+if os.path.exists(DEFAULT_GROUPS_PATH) and ratings_file_buffer is None:
+    with open(DEFAULT_GROUPS_PATH, "r") as f:
+        default_group_text = f.read()
+
+if group_input_method == "Enter groups manually":
+    group_text_input = st.text_area(
+        "Enter group members (one group per line, members separated by '_')",
+        value=default_group_text,
+        height=150,
+    )
+else:
+    groups_file_buffer = st.file_uploader(
+        "Upload Your Group Data (Optional)", type=["txt"]
+    )
+
+#  Preprocessing Options
+st.header("3. Preprocessing")
+binarize_data = st.checkbox(
+    "Binarize ratings (for implicit feedback models)", value=True
+)
+if binarize_data:
+    binary_threshold = st.number_input(
+        "Rating threshold for binarization", min_value=0.0, value=1.0, step=0.5
+    )
+
+#  Main Loading Logic
+st.header("4. Load and Process")
+if st.button("Load and Process Data", type="primary"):
+    with st.spinner("Processing data..."):
+        try:
+            desired_columns = ["userId", "itemId", "rating", "timestamp"]
+            #  Determine which ratings file to use
+            if ratings_file_buffer:
+                ratings_df = pd.read_csv(
+                    StringIO(ratings_file_buffer.getvalue().decode("utf-8")),
+                    sep=",",
+                    usecols=lambda column: column in desired_columns,
+                )
+            else:
+                if not os.path.exists(DEFAULT_RATINGS_PATH):
+                    st.error(
+                        f"Default ratings file not found at: `{DEFAULT_RATINGS_PATH}`"
+                    )
+                    st.stop()
+                ratings_df = pd.read_csv(
+                    DEFAULT_RATINGS_PATH,
+                    sep=",",
+                    names=desired_columns,
+                    skiprows=1,
+                )
+            ratings_df = ratings_df[desired_columns]
+
+            #  Determine which group data to use and prepare it for the handler
+            temp_dir = "temp/group_data"
+            os.makedirs(temp_dir, exist_ok=True)
+            groups_filepath = os.path.join(temp_dir, "current_groups.txt")
+
+            if group_input_method == "Enter groups manually":
+                with open(groups_filepath, "w") as f:
+                    f.write(group_text_input)  # type: ignore
+                st.session_state.group_filename = os.path.basename(groups_filepath)
+            else:  # File upload method
+                if groups_file_buffer:  # type: ignore
+                    with open(groups_filepath, "wb") as f:
+                        f.write(groups_file_buffer.getbuffer())
+                    st.session_state.group_filename = groups_file_buffer.name
+                else:  # Fallback to default if no file is uploaded
+                    if not os.path.exists(DEFAULT_GROUPS_PATH):
+                        st.error(
+                            f"Default groups file not found at: `{DEFAULT_GROUPS_PATH}`"
+                        )
+                        st.stop()
+                    groups_filepath = DEFAULT_GROUPS_PATH
+                    st.session_state.group_filename = os.path.basename(groups_filepath)
+
+            #  Instantiate library classes and process data
+            data_reader = DataReader(dataframe=ratings_df)
+            group_handler = GroupInteractionHandler(filepath_or_buffer=groups_filepath)
+
+            if binarize_data:
+                data_reader.binarize(binary_threshold=binary_threshold)  # type: ignore
+            data_reader.make_consecutive_ids_in_dataset()
+
+            available_groups = group_handler.read_groups(
+                filename=st.session_state.group_filename
+            )
+
+            #  Store results in session state
+            st.session_state.data_reader = data_reader
+            st.session_state.group_handler = group_handler
+            st.session_state.num_groups = len(available_groups)
+            st.session_state.data_loaded = True
+
+            st.success("✅ Data loaded and processed successfully!")
+
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+            st.session_state.data_loaded = False
+
+
+#  Enhanced Data Summary
+if st.session_state.data_loaded:
+    st.markdown("")
+    st.header("Data Summary")
+
+    dr = st.session_state.data_reader
+
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("👥 Unique Users", f"{dr.num_user:,}")  # type: ignore
+        st.metric("📦 Unique Items", f"{dr.num_item:,}")  # type: ignore
+    with col2:
+        st.metric("⭐ Total Ratings", f"{len(dr.get_raw_dataset()):,}")  # type: ignore
+        st.metric("👨‍👩‍👧‍👦 Number of Groups", f"{st.session_state.num_groups:,}")
+
+    with st.expander("Processed Ratings DataFrame Head:", expanded=True):
+        st.dataframe(dr.dataset.head(), hide_index=True)  # type: ignore
@@ -0,0 +1,956 @@
+import streamlit as st
+import time
+
+#  Library Imports
+from pygrex.models import (
+    ALS,
+    BPR,
+    ExplAutoencoderTorch,
+    EMFModel,
+    GMFModel,
+    MLPModel,
+    SVD,
+    KNNBasic,
+)
+from pygrex.evaluator import (
+    run_leave_one_out_evaluation,
+    run_evaluation_with_proper_split,
+)
+
+st.set_page_config(page_title="Model Training", page_icon="🧠", layout="wide")
+
+st.title("🧠 Model Selection & Training")
+
+#  Check if data is loaded
+if not st.session_state.get("data_loaded", False):
+    st.warning("⚠️ Please load data on the **📄 Data Preparation** page first.")
+    st.stop()  # Stop execution if no data is loaded
+
+#  Model Selection
+st.header("1. Select a Model")
+# As you add more models to your library, you can add them to this list.
+model_option = st.selectbox(
+    "Choose a recommendation model:",
+    ("ALS", "BPR", "Autoencoder", "EMF", "GMF", "MLP", "KNN", "SVD"),
+)
+
+#  Hyperparameter Configuration
+st.header("2. Configure Hyperparameters")
+model_params = {}
+
+if model_option == "ALS":
+    st.subheader("ALS (Alternating Least Squares) Parameters")
+
+    # Create columns for a cleaner layout
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        latent_dim = st.number_input(
+            "Latent Dimensions (factors)",
+            min_value=1,
+            max_value=500,
+            value=100,
+            step=10,
+            help="The number of latent factors to compute.",
+        )
+    with col2:
+        reg_term = st.number_input(
+            "Regularization Term",
+            min_value=0.001,
+            max_value=1.0,
+            value=0.001,
+            step=0.001,
+            format="%.3f",
+            help="The regularization factor.",
+        )
+    with col3:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=10,
+            step=5,
+            help="The number of ALS iterations.",
+        )
+    model_params = {
+        "latent_dim": latent_dim,
+        "reg_term": reg_term,
+        "epochs": epochs,
+    }
+
+
+elif model_option == "BPR":
+    st.subheader("BPR (Bayesian Personalised Ranking) Parameters")
+
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+    with col1_r1:
+        latent_dim = st.number_input(
+            "Latent Dimensions (factors)",
+            min_value=1,
+            max_value=500,
+            value=100,
+            step=10,
+            help="The number of latent factors to compute.",
+        )
+    with col2_r1:
+        reg_term = st.number_input(
+            "Regularization Term",
+            min_value=0.001,
+            max_value=1.0,
+            value=0.001,
+            step=0.001,
+            format="%.3f",
+            help="The regularization factor.",
+        )
+    with col3_r1:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=10,
+            step=5,
+            help="The number of ALS iterations.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        learning_rate = st.number_input(
+            "Learning Rate",
+            min_value=0.0,
+            max_value=0.1,
+            value=0.01,
+            step=0.01,
+            format="%.2f",
+            help="The step size at each iteration while moving toward a minimum of the loss function.",
+        )
+    model_params = {
+        "latent_dim": latent_dim,
+        "reg_term": reg_term,
+        "epochs": epochs,
+        "learning_rate": learning_rate,
+    }
+
+elif model_option == "Autoencoder":
+    st.subheader("Autoencoder Parameters")
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+
+    with col1_r1:
+        learning_rate = st.number_input(
+            "Learning Rate",
+            min_value=0.0001,
+            max_value=0.1,
+            value=0.005,
+            step=0.001,
+            format="%.4f",
+            help="The step size at each iteration while moving toward a minimum of the loss function.",
+        )
+
+    with col2_r1:
+        weight_decay = st.number_input(
+            "Weight Decay",
+            min_value=0.0000001,
+            max_value=0.0001,
+            value=0.0000001,
+            step=0.0000001,
+            format="%.7f",
+            help="The regularization factor to prevent overfitting by penalizing large weights.",
+        )
+
+    with col3_r1:
+        hidden_layer_features = st.number_input(
+            "Hidden Layer Features",
+            min_value=4,
+            max_value=128,
+            value=8,
+            step=4,
+            help="The number of features in the hidden layers of the neural network.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=30,
+            step=5,
+            help="The number of complete passes through the entire training dataset.",
+        )
+
+    with col2_r2:
+        cuda = st.checkbox(
+            "Use CUDA (GPU)",
+            value=False,
+            help="Check to use NVIDIA CUDA for GPU acceleration if available.",
+        )
+
+    with col3_r2:
+        optimizer_name = st.selectbox(
+            "Optimizer",
+            options=["adam", "sgd", "rmsprop"],
+            index=0,  # 'adam'
+            help="The optimization algorithm to use for training the model.",
+        )
+
+    # Third Row
+    col1_r3, col2_r3, col3_r3 = st.columns(3)
+
+    with col1_r3:
+        positive_threshold = st.number_input(
+            "Positive Threshold",
+            min_value=1,
+            max_value=5,
+            value=3,
+            step=1,
+            help="The minimum rating value considered as a 'positive' interaction.",
+        )
+
+    with col2_r3:
+        knn = st.number_input(
+            "K-Nearest Neighbors (KNN)",
+            min_value=1,
+            max_value=50,
+            value=10,
+            step=1,
+            help="The number of nearest neighbors to consider for KNN-based models.",
+        )
+
+    with col3_r3:
+        expl = st.checkbox(
+            "Enable Explanations",
+            value=True,
+            help="Check to enable model explanations or interpretability features.",
+        )
+    model_params = {
+        "learning_rate": learning_rate,
+        "weight_decay": weight_decay,
+        "hidden_layer_features": hidden_layer_features,
+        "epochs": epochs,
+        "cuda": cuda,
+        "optimizer_name": optimizer_name,
+        "positive_threshold": positive_threshold,
+        "knn": knn,
+        "expl": expl,
+    }
+
+elif model_option == "EMF":
+    st.subheader("EMF (Explainable Matrix Factorisation) Parameters")
+
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+
+    with col1_r1:
+        learning_rate = st.number_input(
+            "Learning Rate",
+            min_value=0.0001,
+            max_value=0.1,
+            value=0.01,
+            step=0.001,
+            format="%.4f",
+            help="The step size at each iteration for the EMF model.",
+        )
+
+    with col2_r1:
+        reg_term = st.number_input(
+            "Regularization Term",
+            min_value=0.0001,
+            max_value=1.0,
+            value=0.001,
+            step=0.001,
+            format="%.4f",
+            help="The regularization factor for the main matrix factorization components.",
+        )
+
+    with col3_r1:
+        expl_reg_term = st.number_input(
+            "Explanation Regularization Term",
+            min_value=0.0,
+            max_value=1.0,
+            value=0.0,
+            step=0.001,
+            format="%.4f",
+            help="The regularization factor for the explanation components in EMF.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        latent_dim = st.number_input(
+            "Latent Dimension",
+            min_value=10,
+            max_value=200,
+            value=80,
+            step=10,
+            help="The number of latent factors used in the matrix factorization.",
+        )
+
+    with col2_r2:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=10,
+            step=5,
+            help="The number of complete passes through the entire training dataset for EMF.",
+        )
+
+    with col3_r2:
+        positive_threshold = st.number_input(
+            "Positive Threshold",
+            min_value=1,
+            max_value=5,
+            value=3,
+            step=1,
+            help="The minimum rating value considered as a 'positive' interaction for EMF.",
+        )
+
+    # Third Row
+    col1_r3, col2_r3, col3_r3 = st.columns(3)
+
+    with col1_r3:
+        knn = st.number_input(
+            "K-Nearest Neighbors (KNN)",
+            min_value=1,
+            max_value=50,
+            value=10,
+            step=1,
+            help="The number of nearest neighbors to consider for KNN-based aspects of EMF.",
+        )
+    model_params = {
+        "learning_rate": learning_rate,
+        "reg_term": reg_term,
+        "expl_reg_term": expl_reg_term,
+        "latent_dim": latent_dim,
+        "epochs": epochs,
+        "positive_threshold": positive_threshold,
+        "knn": knn,
+    }
+
+elif model_option == "GMF":
+    st.subheader("GMF (Generalised Matrix Factorisation) Parameters")
+
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+
+    with col1_r1:
+        learning_rate = st.number_input(
+            "Learning Rate",
+            min_value=0.0001,
+            max_value=0.1,
+            value=0.005,
+            step=0.001,
+            format="%.4f",
+            help="The step size at each iteration for the GMF model.",
+        )
+
+    with col2_r1:
+        weight_decay = st.number_input(
+            "Weight Decay",
+            min_value=0.0000001,
+            max_value=0.0001,
+            value=0.0000001,
+            step=0.0000001,
+            format="%.7f",
+            help="The regularization factor to prevent overfitting in GMF.",
+        )
+
+    with col3_r1:
+        latent_dim = st.number_input(
+            "Latent Dimension",
+            min_value=4,
+            max_value=128,
+            value=8,
+            step=4,
+            help="The number of latent factors for users and items in GMF.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=30,
+            step=5,
+            help="The number of complete passes through the training data for GMF.",
+        )
+
+    with col2_r2:
+        num_negative = st.number_input(
+            "Number of Negative Samples",
+            min_value=1,
+            max_value=100,
+            value=10,
+            step=1,
+            help="The number of negative samples per positive interaction during training.",
+        )
+
+    with col3_r2:
+        batch_size = st.number_input(
+            "Batch Size",
+            min_value=64,
+            max_value=4096,
+            value=1024,
+            step=64,
+            help="The number of samples per gradient update.",
+        )
+
+    # Third Row
+    col1_r3, col2_r3, col3_r3 = st.columns(3)
+
+    with col1_r3:
+        cuda = st.checkbox(
+            "Use CUDA (GPU)",
+            value=False,
+            help="Check to use NVIDIA CUDA for GPU acceleration if available for GMF.",
+        )
+
+    with col2_r3:
+        optimizer_name = st.selectbox(
+            "Optimizer",
+            options=["adam", "sgd", "rmsprop"],
+            index=0,  # 'adam'
+            help="The optimization algorithm to use for training the GMF model.",
+        )
+
+    # col3_r3 is left empty here if no further parameters for GMF
+    model_params = {
+        "learning_rate": learning_rate,
+        "weight_decay": weight_decay,
+        "latent_dim": latent_dim,
+        "epochs": epochs,
+        "num_negative": num_negative,
+        "batch_size": batch_size,
+        "cuda": cuda,
+        "optimizer_name": optimizer_name,
+    }
+
+elif model_option == "MLP":
+    st.subheader("MLP (Multi-Layer Perceptron) Parameters")
+
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+
+    with col1_r1:
+        learning_rate = st.number_input(
+            "Learning Rate",
+            min_value=0.0001,
+            max_value=0.1,
+            value=0.005,
+            step=0.001,
+            format="%.4f",
+            help="The step size at each iteration for the MLP model.",
+        )
+
+    with col2_r1:
+        weight_decay = st.number_input(
+            "Weight Decay",
+            min_value=0.0000001,
+            max_value=0.0001,
+            value=0.0000001,
+            step=0.0000001,
+            format="%.7f",
+            help="The regularization factor to prevent overfitting in MLP.",
+        )
+
+    with col3_r1:
+        latent_dim = st.number_input(
+            "Latent Dimension",
+            min_value=4,
+            max_value=128,
+            value=8,
+            step=4,
+            help="The number of latent factors for users and items in MLP.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=200,
+            value=30,
+            step=5,
+            help="The number of complete passes through the training data for MLP.",
+        )
+
+    with col2_r2:
+        num_negative = st.number_input(
+            "Number of Negative Samples",
+            min_value=1,
+            max_value=100,
+            value=10,
+            step=1,
+            help="The number of negative samples per positive interaction during MLP training.",
+        )
+
+    with col3_r2:
+        batch_size = st.number_input(
+            "Batch Size",
+            min_value=64,
+            max_value=4096,
+            value=1024,
+            step=64,
+            help="The number of samples per gradient update for MLP.",
+        )
+
+    # Third Row
+    col1_r3, col2_r3, col3_r3 = st.columns(3)
+
+    with col1_r3:
+        cuda = st.checkbox(
+            "Use CUDA (GPU)",
+            value=False,
+            help="Check to use NVIDIA CUDA for GPU acceleration if available for MLP.",
+        )
+
+    with col2_r3:
+        optimizer_name = st.selectbox(
+            "Optimizer",
+            options=["adam", "sgd", "rmsprop"],
+            index=0,  # 'adam'
+            help="The optimization algorithm to use for training the MLP model.",
+        )
+
+    # col3_r3 is left empty here if no further parameters for MLP
+    model_params = {
+        "learning_rate": learning_rate,
+        "weight_decay": weight_decay,
+        "latent_dim": latent_dim,
+        "epochs": epochs,
+        "num_negative": num_negative,
+        "batch_size": batch_size,
+        "cuda": cuda,
+        "optimizer_name": optimizer_name,
+    }
+
+elif model_option == "KNN":
+    st.subheader("KNN (K-Nearest Neighbors) Parameters")
+
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+
+    with col1_r1:
+        k_neighbors = st.number_input(
+            "Number of Neighbors (k)",
+            min_value=1,
+            max_value=100,
+            value=50,
+            step=1,
+            help="The number of nearest neighbors to consider for making predictions.",
+        )
+
+    with col2_r1:
+        min_k_neighbors = st.number_input(
+            "Minimum Number of Neighbors",
+            min_value=1,
+            max_value=20,
+            value=3,
+            step=1,
+            help="The minimum number of neighbors required to make a prediction.",
+        )
+
+    with col3_r1:
+        similarity_type = st.selectbox(
+            "Similarity Metric",
+            options=["cosine", "pearson"],
+            index=1,  # 'pearson'
+            help="The similarity metric to use for finding nearest neighbors.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        boolean_user_based = st.checkbox(
+            "User-Based Collaborative Filtering",
+            value=True,
+            help="Check to use user-based collaborative filtering; uncheck for item-based.",
+        )
+
+    # col2_r2 and col3_r2 is left empty here if no further parameters for KNN
+    model_params = {
+        "k_neighbors": k_neighbors,
+        "min_k_neighbors": min_k_neighbors,
+        "similarity_type": similarity_type,
+        "boolean_user_based": boolean_user_based,
+    }
+
+elif model_option == "SVD":
+    st.subheader("SVD Parameters")
+    N_FACTORS = 64
+    N_EPOCHS = 30
+    LEARNING_RATE = 0.005
+    REGULARIZATION = 0.08
+    RANDOM_STATE = 42
+    # First Row
+    col1_r1, col2_r1, col3_r1 = st.columns(3)
+    with col1_r1:
+        n_factors = st.number_input(
+            "Latent Dimensions (factors)",
+            min_value=1,
+            max_value=100,
+            value=64,
+            step=1,
+            help="The number of latent factors to compute.",
+        )
+
+    with col2_r1:
+        n_epochs = st.number_input(
+            "Epochs (iterations)",
+            min_value=1,
+            max_value=35,
+            value=30,
+            step=1,
+            help="The number of model iterations.",
+        )
+
+    with col3_r1:
+        learning_rater = st.number_input(
+            "Learning Rate",
+            min_value=0.001,
+            max_value=0.050,
+            value=0.005,
+            help="The step size at each iteration while moving toward a minimum of the loss function.",
+        )
+
+    # Second Row
+    col1_r2, col2_r2, col3_r2 = st.columns(3)
+
+    with col1_r2:
+        early_stopping = st.checkbox(
+            "Enable Early Stopping",
+            value=False,
+            help="Check to stop training when validation performance degrades.",
+        )
+    with col2_r2:
+        reg = st.number_input(
+            "Regularization Term",
+            min_value=0.01,
+            max_value=0.50,
+            value=0.08,
+            step=0.01,
+            format="%.3f",
+            help="The regularization factor.",
+        )
+    with col3_r2:
+        init_mean = st.number_input(
+            "Initialization Mean",
+            min_value=0.0,
+            max_value=0.5,
+            value=0.0,
+            step=0.01,
+            format="%.2f",
+            help="The mean for initializing latent factors.",
+        )
+    # Third Row
+
+    col1_r3, col2_r3, col3_r3 = st.columns(3)
+    with col1_r3:
+        init_std = st.number_input(
+            "Initialization Standard Deviation",
+            min_value=0.00,
+            max_value=0.5,
+            value=0.0,
+            step=0.01,
+            format="%.2f",
+            help="The standard deviation for initializing latent factors.",
+        )
+    with col2_r3:
+        random_state = st.number_input(
+            "Random State (Seed)",
+            min_value=1,
+            max_value=100,
+            value=42,
+            step=1,
+            help="The seed for random number generation to ensure reproducibility.",
+        )
+    # col3_r3 is left empty here if no further parameters for SVD
+    model_params = {
+        "n_factors": n_factors,
+        "n_epochs": n_epochs,
+        "learning_rater": learning_rater,
+        "reg": reg,
+        "init_mean": init_mean,
+        "init_std": init_std,
+        "random_state": random_state,
+        "early_stopping": early_stopping,
+    }
+else:
+    st.info(f"Configuration for **{model_option}** is not yet implemented.")
+    st.stop()
+
+#  Model Training
+st.header("3. Train the Model")
+
+if st.button("Train Model", type="primary"):
+    with st.spinner(f"Training **{model_option}** model... This may take a moment."):
+        try:
+            # Retrieve the data_reader object from session state
+            data_reader = st.session_state.data_reader
+            model = None
+            # 1. Instantiate the model with user-defined hyperparameters
+            if model_option == "ALS":
+                model = ALS(**model_params)
+            elif model_option == "BPR":
+                model = BPR(**model_params)
+            elif model_option == "Autoencoder":
+                autoencoder_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = ExplAutoencoderTorch(**autoencoder_params)
+            elif model_option == "EMF":
+                emf_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = EMFModel(**emf_params)
+            elif model_option == "GMF":
+                gmf_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = GMFModel(**gmf_params)
+            elif model_option == "MLP":
+                mlp_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = MLPModel(**mlp_params)
+            elif model_option == "KNN":
+                if "k_neighbors" in model_params:
+                    model_params["k"] = model_params.pop("k_neighbors")
+                knn_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = KNNBasic(**knn_params)
+            elif model_option == "SVD":
+                if "learning_rater" in model_params:
+                    model_params["lr"] = model_params.pop("learning_rater")
+                svd_params = {
+                    k: v
+                    for k, v in model_params.items()
+                    if k not in ["num_users", "num_items"]
+                }
+                model = SVD(**svd_params)
+            if model:
+                start_time = time.time()
+                # 2. Fit the model using the processed dataset
+                model.fit(data_reader)
+                end_time = time.time()
+                training_time = end_time - start_time
+                # 3. Store the trained model in session state for the next page
+                st.session_state.trained_model = model
+                st.session_state.model_name = model_option
+
+                st.success(
+                    f"✅ **{model_option}** model trained successfully in {training_time:.2f} seconds!"
+                )
+
+        except Exception as e:
+            st.error(f"An error occurred during model training: {e}")
+            if "trained_model" in st.session_state:
+                del st.session_state.trained_model
+
+if "trained_model" in st.session_state:
+    st.markdown("")
+    st.header("4. Offline Model Evaluation")
+
+    with st.expander("🔬 Run Model Evaluation", expanded=True):
+        st.markdown("""
+        Choose your evaluation method:
+        - **Leave-One-Out**: More thorough but slower (recommended for final evaluation)
+        - **Train/Test Split**: Faster and practical for iterative testing
+        
+        **Metrics Explained:**
+        - **Hit Ratio @10**: Percentage of users for whom we found at least one relevant item in top-10
+        - **NDCG @10**: Measures ranking quality - higher values mean better ranking of relevant items
+        """)
+
+        # Evaluation method selection
+        eval_method = st.radio(
+            "Select Evaluation Method:",
+            ["Train/Test Split (Fast)", "Leave-One-Out (Thorough)"],
+            index=0,
+        )
+
+        # Parameters
+        col1, col2 = st.columns(2)
+        with col1:
+            test_size = 0.2  # Default value
+            if eval_method == "Train/Test Split (Fast)":
+                test_size = st.slider("Test Set Size (%)", 10, 30, 20) / 100
+            eval_top_n = st.number_input("Top-N for evaluation", 1, 20, 10)
+
+        with col2:
+            if eval_method == "Leave-One-Out (Thorough)":
+                st.info("Leave-one-out will use 1 item per user for testing")
+
+        # Run evaluation button
+        eval_button_key = f"run_eval_{eval_method.replace(' ', '_').replace('(', '').replace(')', '')}"
+
+        if st.button("Run Evaluation", key=eval_button_key, type="primary"):
+            with st.spinner(
+                f"Running {eval_method.lower()} evaluation... Please wait."
+            ):
+                try:
+                    # Get the model configuration for re-instantiation
+                    model_name = st.session_state.model_name
+                    data_reader = st.session_state.data_reader
+
+                    # Re-instantiate model with same parameters
+                    if model_option == "ALS":
+                        eval_model = ALS(**model_params)
+                    elif model_option == "BPR":
+                        eval_model = BPR(**model_params)
+                    elif model_option == "Autoencoder":
+                        autoencoder_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = ExplAutoencoderTorch(**autoencoder_params)
+                    elif model_option == "EMF":
+                        emf_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = EMFModel(**emf_params)
+                    elif model_option == "GMF":
+                        gmf_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = GMFModel(**gmf_params)
+                    elif model_option == "MLP":
+                        mlp_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = MLPModel(**mlp_params)
+                    elif model_option == "KNN":
+                        if "k_neighbors" in model_params:
+                            model_params["k"] = model_params.pop("k_neighbors")
+                        knn_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = KNNBasic(**knn_params)
+                    elif model_option == "SVD":
+                        if "learning_rater" in model_params:
+                            model_params["lr"] = model_params.pop("learning_rater")
+                        svd_params = {
+                            k: v
+                            for k, v in model_params.items()
+                            if k not in ["num_users", "num_items"]
+                        }
+                        eval_model = SVD(**svd_params)
+                    else:
+                        st.error(f"Evaluation not implemented for {model_name}")
+                        st.stop()
+
+                    # Run the appropriate evaluation
+                    if eval_method == "Leave-One-Out (Thorough)":
+                        evaluation_scores = run_leave_one_out_evaluation(
+                            data_reader=data_reader,
+                            model=eval_model,
+                            top_n=eval_top_n,
+                        )
+                    else:  # Train/Test Split
+                        evaluation_scores = run_evaluation_with_proper_split(
+                            data_reader=data_reader,
+                            model=eval_model,
+                            test_size=test_size,
+                            top_n=eval_top_n,
+                        )
+
+                    # Store results
+                    st.session_state.evaluation_scores = evaluation_scores
+                    st.session_state.eval_method = eval_method
+
+                except Exception as e:
+                    st.error(f"Evaluation failed: {str(e)}")
+                    st.exception(e)
+
+        # Display results if available
+        if "evaluation_scores" in st.session_state:
+            st.markdown("")
+            st.subheader("📊 Evaluation Results")
+
+            scores = st.session_state.evaluation_scores
+            method = st.session_state.get("eval_method", "")
+
+            # Metrics display
+            col1, col2, col3 = st.columns(3)
+
+            with col1:
+                st.metric(
+                    label=f"Hit Ratio @{eval_top_n}",
+                    value=f"{scores.get('Hit Ratio', 0.0):.2%}",
+                    help="Percentage of test users for whom at least one relevant item was found in top-10",
+                )
+
+            with col2:
+                ndcg_value = scores.get("NDCG", scores.get("eNDCG", 0.0))
+                st.metric(
+                    label=f"NDCG @{eval_top_n}",
+                    value=f"{ndcg_value:.4f}",
+                    help="Normalized Discounted Cumulative Gain - measures ranking quality",
+                )
+
+            with col3:
+                st.metric(
+                    label="Evaluation Time",
+                    value=f"{scores.get('evaluation_time', 0):.1f}s",
+                    help="Time taken to complete the evaluation",
+                )
+
+            # Additional info
+            if "test_interactions" in scores:
+                st.info(
+                    f"📈 Evaluated on {scores['test_interactions']:,} test interactions using {method}"
+                )
+
+            # Performance interpretation
+            hit_ratio = scores.get("Hit Ratio", 0.0)
+            ndcg = ndcg_value
+
+            st.markdown("### 🎯 Performance Interpretation")
+
+            if hit_ratio > 0.15 and ndcg > 0.08:
+                st.success(
+                    "🎉 Excellent performance! Your model shows strong recommendation capability."
+                )
+            elif hit_ratio > 0.08 and ndcg > 0.04:
+                st.success("✅ Good performance! Your model is working well.")
+            elif hit_ratio > 0.03 and ndcg > 0.02:
+                st.warning(
+                    "⚠️ Moderate performance. Consider tuning hyperparameters or trying a different model."
+                )
+            else:
+                st.error(
+                    "❌ Poor performance. The model may need significant improvements."
+                )
+
+    st.info("Navigate to the **🎯 Group Recommendation** page to continue.")
@@ -0,0 +1,156 @@
+import streamlit as st
+import pandas as pd
+
+from pygrex.recommender import GroupRecommender
+from pygrex.utils import AggregationStrategy
+
+st.set_page_config(page_title="Group Recommendation", page_icon="🎯", layout="wide")
+st.title("🎯 Group Recommendation")
+
+#  Session State Checks
+# Ensure data is loaded and a model is trained before proceeding.
+if not st.session_state.get("data_loaded", False):
+    st.warning("⚠️ Please load data on the **📄 Data Preparation** page first.")
+    st.stop()
+if not st.session_state.get("trained_model", False):
+    st.warning("⚠️ Please train a model on the **🧠 Model Training** page first.")
+    st.stop()
+
+#  Retrieve objects from session state
+data_reader = st.session_state.data_reader
+group_handler = st.session_state.group_handler
+model = st.session_state.trained_model
+model_name = st.session_state.model_name
+
+#  Recommendation Setup
+st.header("1. Select a Group and Strategy")
+
+group_filename = st.session_state.group_filename
+
+try:
+    available_groups = group_handler.read_groups(filename=group_filename)
+
+    col1, col2 = st.columns(2)
+    with col1:
+        selected_group_id = st.selectbox(
+            "Choose a group:",
+            options=available_groups,
+            help="These groups were loaded from your group data file.",
+        )
+
+    # Parse and display members of the selected group
+    if selected_group_id:
+        group_members = group_handler.parse_group_members(selected_group_id)
+        st.write("👥 **Group Members:**", ", ".join(map(str, group_members)))
+
+    with col2:
+        # Use the AggregationStrategy Enum to populate the selectbox
+        agg_strategy_enum = st.selectbox(
+            "Choose an aggregation strategy:",
+            options=list(AggregationStrategy),
+            format_func=lambda x: x.name.replace("_", " ").title(),
+            help="Select the method for combining individual member preferences.",
+        )
+
+    # Conditional Input for Most Respected Person
+    mrp_id = None
+    if agg_strategy_enum == AggregationStrategy.MOST_RESPECTED_PERSON:
+        mrp_id = st.selectbox(
+            "Select the Most Respected Person:",
+            options=group_members,  # type: ignore
+            help="This user's preferences will solely determine the group recommendation.",
+        )
+
+except Exception as e:
+    st.error(f"Could not read groups from file '{group_filename}'. Error: {e}")
+    st.stop()
+
+# Top-K Configuration
+st.header("2. Specify Number of Recommendations")
+top_k = st.slider(
+    "Number of items to recommend (Top-K):",
+    min_value=1,
+    max_value=50,
+    value=10,
+    help="Adjust the slider to change the length of the final recommendation list.",
+)
+
+# Generate Recommendations
+st.header("3. Generate and View Recommendations")
+
+if st.button("Generate Group Recommendations", type="primary"):
+    if not selected_group_id:
+        st.warning("Please select a group first.")
+    else:
+        with st.spinner("Generating recommendations..."):
+            try:
+                # 1. Instantiate the GroupRecommender
+                group_recommender = GroupRecommender(data=data_reader)
+
+                # 2. Setup the recommendation process
+                group_recommender.setup_recommendation(
+                    model=model,
+                    members=group_members,  # type: ignore
+                    data=data_reader,
+                    aggregation_strategy=agg_strategy_enum,
+                    most_respected_person=mrp_id,
+                )
+
+                # 3. Get the final recommendation list
+                recommended_items = group_recommender.get_group_recommendations(
+                    top_k=top_k
+                )
+
+                # Store the recommender instance for the explanation page
+                st.session_state.group_recommender = group_recommender
+                st.session_state.recommended_items = recommended_items
+
+                st.success("✅ Recommendations generated successfully!")
+
+            except Exception as e:
+                st.error(f"An error occurred while generating recommendations: {e}")
+
+
+#  Display Results
+if "recommended_items" in st.session_state:
+    st.markdown("")
+    st.subheader(f"Top {top_k} Recommended Items")
+
+    recommender = st.session_state.group_recommender
+    scores = recommender.get_recommendation_scores()
+
+    # Create a DataFrame for nice display
+    rec_data = []
+    for i, item_id in enumerate(st.session_state.recommended_items):  # type: ignore
+        rec_data.append(
+            {
+                "Rank": i + 1,
+                "Item ID": item_id,
+                "Aggregated Score": scores.get(item_id, 0.0),
+            }
+        )
+
+    if not rec_data:
+        st.info("No recommendations were generated for this group.")
+    else:
+        st.dataframe(pd.DataFrame(rec_data), use_container_width=True, hide_index=True)
+
+        #  Show detailed individual predictions
+        with st.expander("🔍 View Individual Predictions"):
+            individual_preds = recommender.get_individual_predictions()
+            if individual_preds:
+                # Convert to a more readable DataFrame
+                df_preds = pd.DataFrame(
+                    individual_preds
+                ).T  # Transpose to have users as rows
+                df_preds.index.name = "User ID"
+                st.write(
+                    "Predicted scores (1-5 scale) for each user on items in the candidate pool:"
+                )
+                st.dataframe(df_preds.head(10))
+            else:
+                st.write("No individual predictions available.")
+
+    st.info(
+        "Navigate to the **💬 Explanation & Evaluation** page to analyze these recommendations."
+    )