Source code for omnigenbench.src.metric.metric

# -*- coding: utf-8 -*-
# file: regression_metric.py
# time: 12:57 09/04/2024
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
# github: https://github.com/yangheng95
# huggingface: https://huggingface.co/yangheng
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# Copyright (C) 2019-2024. All Rights Reserved.


import types
import warnings

import numpy as np
import sklearn.metrics as metrics

from ..abc.abstract_metric import OmniMetric



[docs]
def mcrmse(y_true, y_pred):
    """
    Compute Mean Column Root Mean Square Error (MCRMSE).

    MCRMSE is a multi-target regression metric that computes the RMSE for each target
    column and then takes the mean across all targets.

    Args:
        y_true (np.ndarray): Ground truth values with shape (n_samples, n_targets)
        y_pred (np.ndarray): Predicted values with shape (n_samples, n_targets)

    Returns:
        float: Mean Column Root Mean Square Error

    Raises:
        ValueError: If y_true and y_pred have different shapes

    Example:
        >>> y_true = np.array([[1, 2], [3, 4], [5, 6]])
        >>> y_pred = np.array([[1.1, 2.1], [2.9, 4.1], [5.2, 5.8]])
        >>> mcrmse(y_true, y_pred)
        0.1833...
    """
    if y_true.shape != y_pred.shape:
        raise ValueError("y_true and y_pred must have the same shape")
    mask = y_true != -100
    filtered_y_pred = y_pred[mask]
    filtered_y_true = y_true[mask]
    rmse_per_target = np.sqrt(np.mean((filtered_y_true - filtered_y_pred) ** 2, axis=0))
    mcrmse_value = np.mean(rmse_per_target)
    return mcrmse_value



setattr(metrics, "mcrmse", mcrmse)



[docs]
class Metric(OmniMetric):
    """
    A flexible metric class that provides access to all scikit-learn metrics
    and custom metrics for evaluation.

    This class dynamically wraps scikit-learn metrics and provides a unified
    interface for computing various evaluation metrics. It handles different
    input formats including HuggingFace trainer outputs and supports
    custom metric functions.

    Attributes:
        metric_func: Custom metric function if provided
        ignore_y: Value to ignore in predictions and true values
        kwargs: Additional keyword arguments for metric computation
        metrics: Dictionary of available metrics including custom ones

    Example:
        >>> from omnigenbench import Metric
        >>> metric = Metric(ignore_y=-100)
        >>> y_true = [0, 1, 2, 0, 1]
        >>> y_pred = [0, 1, 1, 0, 1]
        >>> result = metric.accuracy(y_true, y_pred)
        >>> print(result)
        {'accuracy': 0.8}
    """

    def __init__(self, metric_func=None, ignore_y=-100, *args, **kwargs):
        """
        Initialize the Metric class.

        Args:
            metric_func (callable, optional): Custom metric function to use
            ignore_y (int, optional): Value to ignore in predictions and true values. Defaults to -100
            *args: Additional positional arguments
            **kwargs: Additional keyword arguments for metric computation
        """
        super().__init__(metric_func, ignore_y, *args, **kwargs)
        self.kwargs = kwargs
        self.metrics = {"mcrmse": mcrmse}
        for key, value in metrics.__dict__.items():
            setattr(self, key, value)

    def __getattribute__(self, name):
        """
        Dynamically create metric computation methods.

        This method intercepts attribute access and creates wrapper functions
        for scikit-learn metrics, handling different input formats and
        preprocessing the data appropriately.

        Args:
            name (str): Name of the metric to access

        Returns:
            callable: Wrapper function for the requested metric
        """
        # Get the metric function
        metric_func = getattr(metrics, name, None)

        if metric_func and isinstance(metric_func, types.FunctionType):
            setattr(self, "compute", metric_func)
            # If the metric function exists, return a wrapper function

            def wrapper(y_true=None, y_score=None, *args, **kwargs):
                """
                Compute the metric, based on the true and predicted values.

                This wrapper handles different input formats including HuggingFace
                trainer outputs and performs necessary preprocessing.

                Args:
                    y_true: The true values or HuggingFace EvalPrediction object
                    y_score: The predicted values
                    ignore_y: The value to ignore in the predictions and true values in corresponding positions
                    *args: Additional positional arguments for the metric
                    **kwargs: Additional keyword arguments for the metric

                Returns:
                    dict: Dictionary containing the metric name and computed value

                Raises:
                    ValueError: If neither y_true nor y_score is provided
                """
                # This is an ugly method to handle the case when the predictions are in the form of a tuple
                # for huggingface trainers
                if y_true is not None and y_score is None:
                    if hasattr(y_true, "predictions"):
                        y_score = y_true.predictions
                    if hasattr(y_true, "label_ids"):
                        y_true = y_true.label_ids
                    if hasattr(y_true, "labels"):
                        y_true = y_true.labels
                    if len(y_score[0][1]) == np.max(y_true) + 1:
                        y_score = y_score[0]
                    else:
                        y_score = y_score[1]
                    y_score = np.argmax(y_score, axis=1)
                elif y_true is not None and y_score is not None:
                    pass  # y_true and y_score are provided
                else:
                    raise ValueError(
                        "Please provide the true and predicted values or a dictionary with 'y_true' and 'y_score'."
                    )

                y_true, y_score = Metric.flatten(y_true, y_score)
                y_true_mask_idx = np.where(y_true != self.ignore_y)
                if self.ignore_y is not None:
                    y_true = y_true[y_true_mask_idx]
                    try:
                        y_score = y_score[y_true_mask_idx]
                    except Exception as e:
                        warnings.warn(str(e))
                kwargs.update(self.kwargs)

                return {name: self.compute(y_true, y_score, *args, **kwargs)}

            return wrapper
        else:
            return super().__getattribute__(name)


[docs]
    def compute(self, y_true, y_score, *args, **kwargs):
        """
        Compute the metric, based on the true and predicted values.

        Args:
            y_true: The true values
            y_score: The predicted values
            *args: Additional positional arguments for the metric
            **kwargs: Additional keyword arguments for the metric

        Returns:
            The computed metric value

        Raises:
            NotImplementedError: If no metric function is provided and compute is not implemented
        """
        if self.metric_func is not None:
            kwargs.update(self.kwargs)
            return self.metric_func(y_true, y_score, *args, **kwargs)

        else:
            raise NotImplementedError(
                "Method compute() is not implemented in the child class."
            )