public code v1

2026-05-22 10:02:10 +02:00
commit 46a9ecf065
166 changed files with 6982454 additions and 0 deletions
@@ -0,0 +1,138 @@
+from typing import List, Union, Optional
+import numpy as np
+from scipy import stats
+
+
+class Scale:
+    """
+    A class for scaling numerical values using different methods.
+
+    Methods:
+        quantile: Scale values using quantile-based ranking.
+        linear: Scale values linearly to a target range with outlier handling.
+    """
+
+    @staticmethod
+    def quantile(
+        raw_predictions: Union[List[float], np.ndarray],
+        target_min: float = 1,
+        target_max: float = 5,
+    ) -> np.ndarray:
+        """
+        Scale raw predictions to the target range using quantile-based ranking.
+
+        Args:
+            raw_predictions: The raw prediction values.
+            target_min: Minimum of the target range (default: 1).
+            target_max: Maximum of the target range (default: 5).
+
+        Returns:
+            numpy.ndarray: Scaled predictions.
+
+        Raises:
+            ValueError: If raw_predictions is empty.
+        """
+        if len(raw_predictions) == 0:
+            raise ValueError("Raw predictions array is empty.")
+
+        # Convert to numpy array if it's not already
+        raw_predictions = np.array(raw_predictions)
+
+        ranks = stats.rankdata(raw_predictions, method="average")
+        if len(raw_predictions) == 1:
+            # Handle single element case
+            scaled_predictions = np.array([(target_min + target_max) / 2])
+        else:
+            scaled_predictions = target_min + (ranks - 1) * (
+                target_max - target_min
+            ) / (len(raw_predictions) - 1)
+
+        # Ensure scaled predictions are within [target_min, target_max]
+        scaled_predictions = np.clip(scaled_predictions, target_min, target_max)
+
+        return scaled_predictions
+
+    @staticmethod
+    def linear(
+        raw_predictions: Union[List[float], np.ndarray],
+        target_min: float = 1,
+        target_max: float = 5,
+        ref_min: Optional[float] = None,
+        ref_max: Optional[float] = None,
+        handle_outliers: bool = True,
+    ) -> np.ndarray:
+        """
+        Scale raw predictions to the target range [target_min, target_max].
+
+        Args:
+            raw_predictions: The raw prediction values.
+            target_min: Minimum of the target range (default: 1).
+            target_max: Maximum of the target range (default: 5).
+            ref_min: Reference minimum for raw predictions. If None, will be calculated
+                     from the data or from outlier bounds if handle_outliers=True.
+            ref_max: Reference maximum for raw predictions. If None, will be calculated
+                     from the data or from outlier bounds if handle_outliers=True.
+            handle_outliers: Whether to handle outliers using IQR method (default: True).
+
+        Returns:
+            numpy.ndarray: Scaled predictions.
+
+        Raises:
+            ValueError: If raw_predictions is empty.
+        """
+        if len(raw_predictions) == 0:
+            raise ValueError("Raw predictions array is empty.")
+
+        # Convert to numpy array if it's not already
+        raw_predictions = np.array(raw_predictions)
+
+        # Handle single element case
+        if len(raw_predictions) == 1:
+            if ref_min is not None and ref_max is not None:
+                # Scale based on provided reference range
+                value = raw_predictions[0]
+                scaled_value = (
+                    target_min
+                    + (value - ref_min)
+                    * (target_max - target_min)
+                    / (ref_max - ref_min)
+                    if ref_max != ref_min
+                    else (target_min + target_max) / 2
+                )
+                scaled_value = np.clip(scaled_value, target_min, target_max)
+                return np.array([scaled_value])
+            else:
+                # Can't determine range from single value, return middle of target range
+                return np.array([(target_min + target_max) / 2])
+
+        clipped_predictions = raw_predictions.copy()
+
+        # Handle outliers if requested
+        if handle_outliers:
+            q1, q3 = np.percentile(raw_predictions, [25, 75])
+            iqr = q3 - q1
+            lower_bound = q1 - 1.5 * iqr
+            upper_bound = q3 + 1.5 * iqr
+            clipped_predictions = np.clip(raw_predictions, lower_bound, upper_bound)
+
+        # Determine min and max values
+        min_raw = np.min(clipped_predictions)
+        max_raw = np.max(clipped_predictions)
+
+        # Use provided reference bounds if given, otherwise use data bounds
+        actual_ref_min = ref_min if ref_min is not None else min_raw
+        actual_ref_max = ref_max if ref_max is not None else max_raw
+
+        # Scale to [target_min, target_max]
+        if actual_ref_max == actual_ref_min:
+            # Reference bounds are equal, return the middle of the target range
+            return np.full_like(raw_predictions, (target_min + target_max) / 2)
+        else:
+            scaled_predictions = target_min + (raw_predictions - actual_ref_min) * (
+                target_max - target_min
+            ) / (actual_ref_max - actual_ref_min)
+
+        # Ensure scaled predictions are within [target_min, target_max]
+        scaled_predictions = np.clip(scaled_predictions, target_min, target_max)
+
+        return scaled_predictions