Source code for omnigenbench.src.abc.abstract_metric

# -*- coding: utf-8 -*-
# file: abstract_metric.py
# time: 12:58 09/04/2024
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
# github: https://github.com/yangheng95
# huggingface: https://huggingface.co/yangheng
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# Copyright (C) 2019-2025. All Rights Reserved.
import numpy as np
import sklearn.metrics as metrics

from ..misc.utils import env_meta_info



[docs]
class OmniMetric:
    """
    Abstract base class providing a unified interface for evaluation metrics in the OmniGenBench
    framework. This class integrates seamlessly with scikit-learn's metric ecosystem while adding
    genomics-specific functionality for handling masked labels, multi-task evaluation, and
    specialized biological metrics.

    **Design Philosophy**: This class follows the Strategy pattern, allowing interchangeable
    metric implementations while maintaining a consistent compute() interface. All scikit-learn
    metrics are automatically exposed as attributes for convenient access without explicit imports.

    **Key Features**:

    - **Scikit-learn Integration**: Automatic exposure of all sklearn.metrics functions as
      attributes (accuracy_score, f1_score, matthews_corrcoef, etc.), eliminating the need
      for separate metric imports.

    - **Masked Label Handling**: Support for PyTorch's -100 ignore convention via the ignore_y
      parameter. Labels matching ignore_y are filtered out before metric computation, essential
      for tasks with variable-length outputs or padded sequences.

    - **Flexible Computation**: The compute() method accepts various input formats (lists,
      numpy arrays, torch tensors) and returns standardized dictionary outputs for consistent
      logging and tracking.

    - **Multi-Metric Reporting**: Subclasses (ClassificationMetric, RegressionMetric,
      RankingMetric) compute multiple relevant metrics in a single call, providing comprehensive
      evaluation without manual orchestration.

    - **Custom Metric Support**: Easy extensibility through subclassing and implementing
      custom compute() methods for domain-specific metrics (e.g., Matthews Correlation
      Coefficient for imbalanced genomic datasets).

    **Common Genomic Use Cases**:

    - **Imbalanced Classification**: MCC and AUPRC for rare variant detection, where accuracy
      alone is misleading

    - **Multi-Label Prediction**: Hamming loss and F1-macro for transcription factor binding
      site prediction across hundreds of TFs

    - **Regression Tasks**: Spearman correlation for gene expression prediction, where rank
      order matters more than absolute values

    - **Token-Level Prediction**: Per-nucleotide metrics for secondary structure prediction
      and splice site detection

    **Subclass Implementations**:

    - ``ClassificationMetric``: Comprehensive classification metrics (accuracy, precision,
      recall, F1, MCC, AUROC, AUPRC) with automatic threshold selection

    - ``RegressionMetric``: Regression-specific metrics (MSE, MAE, R², Spearman/Pearson
      correlation) for continuous predictions

    - ``RankingMetric``: Ranking and retrieval metrics (NDCG, MAP, Precision@K) for
      information retrieval tasks

    Attributes:
        metric_func (callable, optional): A callable metric function from sklearn.metrics.
            If provided, used as the primary metric computation function. If None, subclasses
            should implement their own compute() method.

        ignore_y (any, optional): A value in the ground truth labels to be ignored during
            metric computation. Commonly set to -100 (PyTorch's default ignore index) or
            None. Labels matching this value are filtered out before metric calculation,
            useful for masked language modeling, padding, or variable-length sequences.

        metadata (dict): Framework metadata including version information, timestamp, and
            environment details. Automatically populated on initialization.

    Note:
        This is an abstract base class. Use task-specific subclasses for actual evaluation:

        - Use ``ClassificationMetric`` for binary/multi-class/multi-label classification
        - Use ``RegressionMetric`` for continuous value prediction
        - Use ``RankingMetric`` for ranking and retrieval tasks
        - Subclass OmniMetric for custom metrics with specialized compute() implementations

    Example:
        >>> # Access scikit-learn metrics directly
        >>> metric = OmniMetric()
        >>> acc = metric.accuracy_score(y_true, y_pred)
        >>>
        >>> # Use with ignore_y for masked tokens
        >>> metric = OmniMetric(ignore_y=-100)
        >>> # Labels of -100 will be filtered before computation
    """

    def __init__(self, metric_func=None, ignore_y=None, *args, **kwargs):
        """
        Initializes the metric.

        Args:
            metric_func (callable, optional): A callable metric function from
                                            `sklearn.metrics`. If None, subclasses
                                            should implement their own compute method.
            ignore_y (any, optional): A value in the ground truth labels to be
                                    ignored during metric computation.
            *args: Additional positional arguments.
            **kwargs: Additional keyword arguments.

        Example:
            >>> # Initialize with a specific metric function
            >>> metric = OmniMetric(metrics.accuracy_score)

            >>> # Initialize with ignore value
            >>> metric = OmniMetric(ignore_y=-100)
        """
        self.metric_func = metric_func
        self.ignore_y = ignore_y

        # Expose all scikit-learn metrics as attributes
        for metric in metrics.__dict__.keys():
            setattr(self, metric, metrics.__dict__[metric])

        self.metadata = env_meta_info()


[docs]
    def compute(self, y_true, y_pred) -> dict:
        """
        Computes the metric. This method must be implemented by subclasses.

        Args:
            y_true: Ground truth labels.
            y_pred: Predicted labels.

        Returns:
            dict: A dictionary with the metric name as key and its value.

        Raises:
            NotImplementedError: If the method is not implemented by the subclass.

        Example:
            >>> # In a classification metric
            >>> result = metric.compute(y_true, y_pred)
            >>> print(result)  # {'accuracy': 0.85}
        """
        raise NotImplementedError(
            "Method compute() is not implemented in the child class. "
            "This function returns a dict containing the metric name and value."
            "e.g. {'accuracy': 0.9}"
        )



[docs]
    @staticmethod
    def flatten(y_true, y_pred):
        """
        Flattens the ground truth and prediction arrays. It handles various input formats and converts
        them to 1D numpy arrays.

        Args:
            y_true: Ground truth labels in any format that can be converted to numpy array.
            y_pred: Predicted labels in any format that can be converted to numpy array.

        Returns:
            tuple: A tuple of flattened `y_true` and `y_pred` as numpy arrays.

        Example:
            >>> y_true = [[1, 2], [3, 4]]
            >>> y_pred = [[1, 2], [3, 4]]
            >>> flat_true, flat_pred = OmniMetric.flatten(y_true, y_pred)
            >>> print(flat_true.shape)  # (4,)
        """
        y_true = np.array(y_true).flatten()
        y_pred = np.array(y_pred).flatten()
        return y_true, y_pred