257 lines
9.5 KiB
Python
257 lines
9.5 KiB
Python
import pytest
|
|
import numpy as np
|
|
import pandas as pd
|
|
from unittest.mock import MagicMock
|
|
|
|
# Import the class to be tested
|
|
from pygrex.data_reader.data_reader import DataReader
|
|
from pygrex.utils.sliding_window_ranker import SlidingWindowRanker
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_data_reader():
|
|
"""Create a mock DataReader instance for testing."""
|
|
mock_reader = MagicMock(spec=DataReader)
|
|
|
|
# Sample dataset with user-item interactions
|
|
dataset = pd.DataFrame(
|
|
{
|
|
"userId": [1, 1, 2, 2, 3, 3, 4],
|
|
"itemId": [101, 102, 101, 103, 102, 104, 105],
|
|
"rating": [4.0, 3.5, 5.0, 2.0, 4.5, 3.0, 3.8],
|
|
"timestamp": [1700000000, 1700600000, 1701200000, 1701800000, 1702400000, 1703000000, 1703600000],
|
|
}
|
|
)
|
|
|
|
# Set up the mock to return the sample dataset
|
|
mock_reader.dataset = dataset
|
|
|
|
# Set up ID mapping methods
|
|
mock_reader.get_new_user_id = lambda user_id: user_id
|
|
mock_reader.get_new_item_id = lambda item_id: item_id
|
|
|
|
return mock_reader
|
|
|
|
|
|
@pytest.fixture
|
|
def evaluator():
|
|
"""Create a SlidingWindowRanker instance for testing."""
|
|
config = {"test_param": "test_value"}
|
|
return SlidingWindowRanker(config)
|
|
|
|
|
|
@pytest.fixture
|
|
def group_predictions():
|
|
"""Sample prediction data for multiple users."""
|
|
return {
|
|
1: {101: 4.2, 102: 3.7, 103: 2.5, 104: 1.8},
|
|
2: {101: 4.8, 102: 3.1, 103: 2.2},
|
|
3: {101: 3.9, 102: 4.3, 104: 3.2},
|
|
4: {101: 3.5, 105: 4.0},
|
|
}
|
|
|
|
|
|
class TestSlidingWindowEvaluator:
|
|
def test_initialization(self, evaluator):
|
|
"""Test that the evaluator initializes with the correct configuration."""
|
|
assert evaluator.config == {"test_param": "test_value"}
|
|
assert evaluator.group_predictions is None
|
|
assert evaluator.top_recommendation is None
|
|
|
|
def test_set_group_recommender_values(self, evaluator, group_predictions):
|
|
"""Test setting group recommender values."""
|
|
evaluator.set_group_recommender_values(group_predictions, 101)
|
|
|
|
assert evaluator.group_predictions == group_predictions
|
|
assert evaluator.top_recommendation == 101
|
|
|
|
def test_calculate_item_popularity_score(self, evaluator, mock_data_reader):
|
|
"""Test calculating item popularity scores."""
|
|
items = [101, 102, 103, 104, 105]
|
|
|
|
# Expected counts based on mock dataset:
|
|
# item 101: 2 interactions, item 102: 2 interactions, item 103: 1 interaction,
|
|
# item 104: 1 interaction, item 105: 1 interaction
|
|
popularity_scores = evaluator.calculate_item_popularity_score(
|
|
items, mock_data_reader
|
|
)
|
|
|
|
# Verify that more popular items have higher scores
|
|
assert (
|
|
popularity_scores[101] == popularity_scores[102]
|
|
) # Both have 2 interactions
|
|
assert (
|
|
popularity_scores[101] > popularity_scores[103]
|
|
) # 2 interactions > 1 interaction
|
|
|
|
# Check that all items have scores between 0 and 1
|
|
for item_id, score in popularity_scores.items():
|
|
assert 0 <= score <= 1
|
|
|
|
def test_calculate_relevance_mask_with_predictions(
|
|
self, evaluator, group_predictions
|
|
):
|
|
"""Test calculating relevance mask when predictions are available."""
|
|
evaluator.set_group_recommender_values(group_predictions, 101)
|
|
|
|
# Test for item that all users have predictions for
|
|
relevance_mask = evaluator.calculate_relevance_mask(101)
|
|
assert relevance_mask == {1: 4.2, 2: 4.8, 3: 3.9, 4: 3.5}
|
|
|
|
# Test for item that some users don't have predictions for
|
|
relevance_mask = evaluator.calculate_relevance_mask(105)
|
|
assert relevance_mask == {1: 0, 2: 0, 3: 0, 4: 4.0}
|
|
|
|
# Test for item no user has predictions for
|
|
relevance_mask = evaluator.calculate_relevance_mask(999)
|
|
assert relevance_mask == {1: 0, 2: 0, 3: 0, 4: 0}
|
|
|
|
def test_calculate_relevance_mask_without_predictions(self, evaluator):
|
|
"""Test calculating relevance mask when predictions are not set."""
|
|
with pytest.raises(ValueError, match="User predictions not set"):
|
|
evaluator.calculate_relevance_mask(101)
|
|
|
|
def test_calculate_relevance_score(
|
|
self, evaluator, mock_data_reader, group_predictions
|
|
):
|
|
"""Test calculating relevance score for an item."""
|
|
evaluator.set_group_recommender_values(group_predictions, 101)
|
|
prediction_scores = {1: 4.2, 2: 4.8, 3: 3.9, 4: 3.5}
|
|
members = [1, 2, 3, 4]
|
|
|
|
# Test for item with good data
|
|
relevance_score = evaluator.calculate_relevance_score(
|
|
101, mock_data_reader, prediction_scores, members
|
|
)
|
|
assert 0 <= relevance_score <= 1
|
|
|
|
# Test with empty members list
|
|
relevance_score = evaluator.calculate_relevance_score(
|
|
101, mock_data_reader, prediction_scores, []
|
|
)
|
|
assert relevance_score == 0
|
|
|
|
# Test with no valid users (no one has interacted with the item)
|
|
relevance_score = evaluator.calculate_relevance_score(
|
|
999, mock_data_reader, prediction_scores, members
|
|
)
|
|
assert relevance_score == 0
|
|
|
|
def test_calculate_item_intensity_score(self, evaluator, mock_data_reader):
|
|
"""Test calculating item intensity scores."""
|
|
# Test with normal group
|
|
members = [1, 2, 3, 4]
|
|
|
|
# Item 101 has been interacted with by users 1 and 2 (2/4 = 0.5)
|
|
intensity = evaluator.calculate_item_intensity_score(
|
|
101, members, mock_data_reader
|
|
)
|
|
assert intensity == 0.5
|
|
|
|
# Item 102 has been interacted with by users 1 and 3 (2/4 = 0.5)
|
|
intensity = evaluator.calculate_item_intensity_score(
|
|
102, members, mock_data_reader
|
|
)
|
|
assert intensity == 0.5
|
|
|
|
# Item 105 has been interacted with by user 4 only (1/4 = 0.25)
|
|
intensity = evaluator.calculate_item_intensity_score(
|
|
105, members, mock_data_reader
|
|
)
|
|
assert intensity == 0.25
|
|
|
|
# Test with empty members list
|
|
intensity = evaluator.calculate_item_intensity_score(101, [], mock_data_reader)
|
|
assert intensity == 0
|
|
|
|
def test_calculate_rating_score(self, evaluator, mock_data_reader):
|
|
"""Test calculating rating scores."""
|
|
members = [1, 2, 3, 4]
|
|
|
|
# Item 101 has ratings from users 1 (4.0) and 2 (5.0)
|
|
# Average over all members: (4.0 + 5.0) / 4 = 2.25
|
|
rating_score = evaluator.calculate_rating_score(101, members, mock_data_reader)
|
|
assert 0 <= rating_score <= 1
|
|
|
|
# Test with empty members list
|
|
rating_score = evaluator.calculate_rating_score(101, [], mock_data_reader)
|
|
assert rating_score == 0
|
|
|
|
def test_generate_ranked_items(
|
|
self, evaluator, mock_data_reader, group_predictions
|
|
):
|
|
"""Test generating ranked items based on various scores."""
|
|
evaluator.set_group_recommender_values(group_predictions, 101)
|
|
all_rated_items = [101, 102, 103, 104, 105]
|
|
members = [1, 2, 3, 4]
|
|
|
|
# Test with default weights
|
|
ranked_items, metrics = evaluator.generate_ranked_items(
|
|
all_rated_items, mock_data_reader, members
|
|
)
|
|
assert isinstance(ranked_items, list)
|
|
assert isinstance(metrics, dict)
|
|
assert len(ranked_items) == len(all_rated_items)
|
|
assert set(ranked_items) == set(all_rated_items)
|
|
|
|
# Test with custom weights
|
|
custom_weights = {
|
|
"popularity": 2.0,
|
|
"intensity": 0.5,
|
|
"rating": 1.0,
|
|
"relevance": 1.5,
|
|
"trend": 0.0,
|
|
}
|
|
ranked_items_custom, metrics_custom = evaluator.generate_ranked_items(
|
|
all_rated_items, mock_data_reader, members, custom_weights
|
|
)
|
|
assert isinstance(ranked_items_custom, list)
|
|
assert isinstance(metrics_custom, dict)
|
|
assert len(ranked_items_custom) == len(all_rated_items)
|
|
|
|
# Test without group predictions set
|
|
evaluator.group_predictions = None
|
|
with pytest.raises(ValueError, match="User predictions not set"):
|
|
evaluator.generate_ranked_items(all_rated_items, mock_data_reader, members)
|
|
|
|
def test_evaluate_not_implemented(self, evaluator, mock_data_reader):
|
|
"""Test that the evaluate method is defined but not implemented."""
|
|
# The evaluate method returns an empty dict placeholder
|
|
result = evaluator.evaluate(mock_data_reader)
|
|
assert result == {}
|
|
|
|
|
|
# Additional tests for edge cases
|
|
|
|
|
|
def test_with_numpy_user_ids(evaluator, mock_data_reader):
|
|
"""Test handling of numpy integer user IDs."""
|
|
# Set up a group with numpy integer user IDs
|
|
np_members = [np.int64(1), np.int64(2), np.int64(3)]
|
|
|
|
# Should not raise an error and handle numpy integers correctly
|
|
intensity = evaluator.calculate_item_intensity_score(
|
|
101, np_members, mock_data_reader
|
|
)
|
|
assert 0 <= intensity <= 1
|
|
|
|
rating_score = evaluator.calculate_rating_score(101, np_members, mock_data_reader)
|
|
assert 0 <= rating_score <= 1
|
|
|
|
|
|
def test_with_different_rating_scale(evaluator, mock_data_reader):
|
|
"""Test using a different rating scale for normalization."""
|
|
members = [1, 2, 3, 4]
|
|
custom_scale = (1, 10) # 1-10 rating scale
|
|
|
|
# Calculate score with custom rating scale
|
|
rating_score = evaluator.calculate_rating_score(
|
|
101, members, mock_data_reader, rating_scale=custom_scale
|
|
)
|
|
assert 0 <= rating_score <= 1
|
|
|
|
relevance_score = evaluator.calculate_relevance_score(
|
|
101, mock_data_reader, {1: 8, 2: 9}, members, rating_scale=custom_scale
|
|
)
|
|
assert 0 <= relevance_score <= 1
|