Source code for ts_benchmark.report.utils.leaderboard

# -*- coding: utf-8 -*-
import logging
from typing import List, Union

import numpy as np
import pandas as pd

logger = logging.getLogger(__name__)


def _fill_null_value(result_df: pd.DataFrame, fill_type: str) -> pd.DataFrame:
    """
    Fills NaN values in the benchmarking records.

    :param result_df: The records to be filled.
    :param fill_type: The type of the filling method, the allowed values are:

        - mean_value: Fill with the mean value of the non-NaN elements;

    :return: The filled DataFrame。
    """
    if fill_type == "mean_value":
        numeric_columns = result_df.select_dtypes(include=[np.number]).columns

        mean_val = result_df[numeric_columns].mean(axis=0, skipna=True)
        df_no_na = result_df.fillna(mean_val)
    else:
        raise ValueError(f"Unknown fill_type {fill_type}")

    return df_no_na


def _calculate_single_metric_result(
    full_metric_df: pd.DataFrame,
    metric_name: str,
    agg_type: str,
    nan_threshold: float,
    fill_type: str,
) -> pd.Series:
    """
    Calculates the leaderboard values for a single metric.

    :param full_metric_df: The full record data.
    :param metric_name: The name of the target metric.
    :param agg_type: Aggregation method, optional values include "mean", "median", "max".
    :param nan_threshold: The metric for any algorithm will be set to NaN if the ratio
        of NaN values from that algorithm exceeds this threshold.
    :param fill_type: Fill method, optional values include "mean_value".
    :return: The leaderboard values for a single metric.
    """
    metric_df = full_metric_df.copy()
    metric_df["model_and_params"] = (
        metric_df["model_name"] + ";" + metric_df["model_params"]
    )
    # todo:inf,-inf应该变成null
    metric_df = metric_df[[metric_name, "model_and_params", "file_name"]].pivot_table(
        values=metric_name,
        index="file_name",
        columns="model_and_params",
        aggfunc=np.nanmean,
        dropna=False,
    )
    threshold_count = float(nan_threshold) * len(metric_df)
    nan_count = metric_df.isna().sum(axis=0)
    metric_values = _fill_null_value(metric_df, fill_type).aggregate(agg_type, axis=0)
    metric_values[nan_count > threshold_count] = np.nan
    return metric_values


def _get_report_metrics(
    record_metrics: np.ndarray, report_metrics: np.ndarray
) -> np.ndarray:
    """
    Get the metrics to be included in the leaderboard.

    This function tries to find metrics specified in `report_metrics`.
    If any of the `report_metrics` does not exist in the records, a warning is
    logged and the metric is ignored.

    :param record_metrics: The list of metric names in the benchmarking records.
    :param report_metrics: The list of metrics that should be included in the leaderboard,
        each item in this list can be in either format:

        - exact names: When there exists ";" symbols in name (i.e. parametrized metrics),
          the name is compared with `record_metrics` using exact match;
        - stems: When there's no ";" symbol in the name, the name is considered as a
          stem name (i.e. metric name without parameters), and is compared with stem names
          in the `record_metrics`;

    :return: An ndarray of metric names that should be included in the leaderboard.
    """
    # a specified report metric may select multiple metrics with the same prefix
    log_metric_prefix = np.array([metric.split(";", 1)[0] for metric in record_metrics])
    matching_matrix = []
    for metric in report_metrics:
        if ";" in metric:
            # metric with parameters, use exact match
            matching_matrix.append(record_metrics == metric)
        else:
            # metric prefix, use prefix match
            matching_matrix.append(log_metric_prefix == metric)
    matching_matrix = np.stack(matching_matrix, axis=0)
    not_matching = ~matching_matrix.any(axis=1)
    if not_matching.any():
        logger.warning(
            "Report metrics %s not found in record files, ignoring.",
            list(report_metrics[not_matching]),
        )
    actual_report_metrics = record_metrics[matching_matrix.any(axis=0)]
    return actual_report_metrics


[docs] def get_leaderboard( log_data: pd.DataFrame, report_metrics: Union[str, List[str]], aggregate_type: str, fill_type: str, nan_threshold: float, ) -> pd.DataFrame: """ Generate a leaderboard from benchmarking records. :param log_data: Benchmarking records. :param report_metrics: The (list of) metrics that should be included in the leaderboard, each item can be in either format: - exact names: When there exists ";" symbols in name (i.e. parametrized metrics), the name is compared with `record_metrics` using exact match; - stems: When there's no ";" symbol in the name, the name is considered as a stem name (i.e. metric name without parameters), and is compared with stem names in the `record_metrics`; :param aggregate_type: Aggregation method, optional values include "mean", "median", "max". :param fill_type: Fill method, optional values include "mean_value". :param nan_threshold: The metric for any algorithm will be set to NaN if the ratio of NaN values from that algorithm exceeds this threshold. :return: The leaderboard in DataFrame format. """ if isinstance(report_metrics, str): report_metrics = [report_metrics] actual_report_metrics = _get_report_metrics( log_data.columns.values, np.array(report_metrics) ) final_result = [] for metric_name in actual_report_metrics: if log_data["strategy_args"].nunique() != 1: raise ValueError("strategy_args are inconsistent in the log file.") single_metric_result = _calculate_single_metric_result( log_data, metric_name, aggregate_type, nan_threshold, fill_type ) final_result.append(single_metric_result) result_df = pd.concat(final_result, axis=1).T.reset_index(drop=True) result_df.insert(0, "metric_name", actual_report_metrics) result_nan_count = result_df.isna().values.sum() if result_nan_count > 0: logger.info( "There are %d NaN values in the leaderboard due to a higher-than-threshold NaN ratio " "in the corresponding model+algorithm pairs.", result_nan_count, ) return result_df