{ "cells": [ { "cell_type": "markdown", "id": "7fa3d250", "metadata": {}, "source": [ "# Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "6b55c6e8", "metadata": {}, "outputs": [], "source": [ "# Local application/library specific imports\n", "from pygrex.config import cfg\n", "from pygrex.data_reader import DataReader, GroupInteractionHandler\n", "# from pygrex.evaluator import SlidingWindowEvaluator\n", "from pygrex.explain import SlidingWindowExplainer\n", "from pygrex.models import ALS\n", "from pygrex.recommender import GroupRecommender\n", "from pygrex.utils import SlidingWindow, AggregationStrategy\n", "from pygrex.evaluator import run_evaluation_with_proper_split\n", "\n", "\n", "import time\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "adbf9967", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "โœ… Data preparation complete.\n", "\n", "--- Data Summary ---\n", "๐Ÿ‘ฅ Unique Users: 610\n", "๐Ÿ“ฆ Unique Items: 9,724\n", "โญ Total Ratings: 100,836\n", "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Number of Groups: 17\n", "\n", "Processed Ratings DataFrame Head:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIditemIdratingtimestamp
0001964982703
1021964981247
2051964982224
30431964983815
40461964982931
\n", "
" ], "text/plain": [ " userId itemId rating timestamp\n", "0 0 0 1 964982703\n", "1 0 2 1 964981247\n", "2 0 5 1 964982224\n", "3 0 43 1 964983815\n", "4 0 46 1 964982931" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Read the ratings file.\n", "data = DataReader(**cfg.data.test)\n", "data.make_consecutive_ids_in_dataset()\n", "data.binarize(binary_threshold=1)\n", "\n", "# Read the file with the group ids\n", "group_handler = GroupInteractionHandler(**cfg.data.groups)\n", "available_groups = group_handler.read_groups(\"groupsWithHighRatings5.txt\")\n", "print(\"โœ… Data preparation complete.\\n\")\n", "\n", "# --- Display Data Summary ---\n", "print(\"--- Data Summary ---\")\n", "print(f\"๐Ÿ‘ฅ Unique Users: {data.num_user:,}\")\n", "print(f\"๐Ÿ“ฆ Unique Items: {data.num_item:,}\")\n", "print(f\"โญ Total Ratings: {len(data.get_raw_dataset()):,}\")\n", "print(f\"๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Number of Groups: {len(available_groups):,}\")\n", "print(\"\\nProcessed Ratings DataFrame Head:\")\n", "display(data.dataset.head())" ] }, { "cell_type": "markdown", "id": "5fc94aef", "metadata": {}, "source": [ "## Step 2: Model Training & Evaluation\n", "\n", "With the data prepared, we now select and train a recommendation model. We will use **Alternating Least Squares (ALS)**, a matrix factorization technique for implicit feedback. After training, we will evaluate its performance using a train/test split to measure its Hit Ratio and NDCG." ] }, { "cell_type": "code", "execution_count": 3, "id": "8c13c283", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 2.1 Model Training ---\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\usuar\\miniconda3\\envs\\pygrex-exp-grs\\Lib\\site-packages\\implicit\\cpu\\als.py:95: RuntimeWarning: OpenBLAS is configured to use 8 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n", " check_blas_config()\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4a2c30b182994b868f98ba8d9d2d7d8f", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/10 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RankItem IDAggregated Score
015434.636274
127574.582981
235644.504107
344414.488708
453794.341830
564754.279482
67434.268454
78194.225248
897484.178329
910644.147735
\n", "" ], "text/plain": [ " Rank Item ID Aggregated Score\n", "0 1 543 4.636274\n", "1 2 757 4.582981\n", "2 3 564 4.504107\n", "3 4 441 4.488708\n", "4 5 379 4.341830\n", "5 6 475 4.279482\n", "6 7 43 4.268454\n", "7 8 19 4.225248\n", "8 9 748 4.178329\n", "9 10 64 4.147735" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print(\"--- 3. Group Recommendation ---\")\n", "\n", "# Select a group and strategy\n", "selected_group_id = available_groups[0] # Let's use the first group as an example\n", "group_members = group_handler.parse_group_members(selected_group_id)\n", "aggregation_strategy = AggregationStrategy.AVG_PREDICTIONS # Use the simple average strategy\n", "top_k = 10\n", "\n", "print(f\"Generating Top-{top_k} recommendations for group: {selected_group_id}\")\n", "print(f\"๐Ÿ‘ฅ Group Members: {group_members}\")\n", "print(f\"๐Ÿ“Š Aggregation Strategy: {aggregation_strategy.name}\")\n", "\n", "# --- Generate Recommendations ---\n", "# 1. Instantiate the GroupRecommender\n", "group_recommender = GroupRecommender(data=data)\n", "\n", "# 2. Setup the recommendation process\n", "group_recommender.setup_recommendation(\n", " model=model,\n", " members=group_members, # type: ignore\n", " data=data,\n", " aggregation_strategy=aggregation_strategy,\n", " )\n", "\n", "\n", "# 3. Get the final recommendation list\n", "recommended_items = group_recommender.get_group_recommendations(top_k=top_k)\n", "recommendation_scores = group_recommender.get_recommendation_scores()\n", "\n", "print(\"\\nโœ… Recommendations generated successfully!\")\n", "\n", "# --- Display Results ---\n", "rec_data = [\n", " {\n", " \"Rank\": i + 1,\n", " \"Item ID\": item_id,\n", " \"Aggregated Score\": recommendation_scores.get(item_id, 0.0),\n", " }\n", " for i, item_id in enumerate(recommended_items) # type: ignore\n", "]\n", "\n", "rec_df = pd.DataFrame(rec_data)\n", "print(f\"\\nTop {top_k} Recommended Items:\")\n", "display(rec_df)" ] }, { "cell_type": "markdown", "id": "6268a2ed", "metadata": {}, "source": [ "## Step 4: Explanation (Sliding Window)\n", "\n", "Finally, we generate an explanation for one of the recommendations. We will use the **Sliding Window** method to find a counterfactual explanation. This method answers the question: *\"Which minimal set of items, if removed from the group's history, would cause our target item to disappear from the recommendation list?\"*\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "367063db", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- 4. Counterfactual Explanation (Sliding Window) ---\n", "Generating explanation for recommended item: 543\n", "Sliding Window Size: 3\n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6c1d8da938db475ab005c2378f99feae", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/10 [00:00