Source code for ts_benchmark.evaluation.strategy.fixed_forecast

# -*- coding: utf-8 -*-
import time
from typing import List, Optional

import pandas as pd

from ts_benchmark.evaluation.metrics import regression_metrics
from ts_benchmark.evaluation.strategy.constants import FieldNames
from ts_benchmark.evaluation.strategy.forecasting import ForecastingStrategy
from ts_benchmark.models import ModelFactory
from ts_benchmark.utils.data_processing import split_before


[docs] class FixedForecast(ForecastingStrategy): """ Fixed forecast strategy class This strategy defines a forecasting task with fixed prediction length. The required strategy configs include: - horizon (int): The length to predict, i.e. the length of the test series; - train_ratio_in_tv (float): The ratio of the training series when performing train-validation split. The accepted metrics include all regression metrics. The return fields other than the specified metrics are (in order): - FieldNames.FILE_NAME: The name of the series; - FieldNames.FIT_TIME: The training time; - FieldNames.INFERENCE_TIME: The inference time; - FieldNames.ACTUAL_DATA: The true test data, encoded as a string. - FieldNames.INFERENCE_DATA: The predicted data, encoded as a string. - FieldNames.LOG_INFO: Any log returned by the evaluator. """ REQUIRED_CONFIGS = ["horizon", "train_ratio_in_tv"] def _execute( self, series: pd.DataFrame, meta_info: Optional[pd.Series], model_factory: ModelFactory, series_name: str, ) -> List: model = model_factory() horizon = self._get_scalar_config_value("horizon", series_name) train_ratio_in_tv = self._get_scalar_config_value( "train_ratio_in_tv", series_name ) data_len = int(self._get_meta_info(meta_info, "length", len(series))) train_length = data_len - horizon if train_length <= 0: raise ValueError("The prediction step exceeds the data length") train_valid_data, test_data = split_before(series, train_length) start_fit_time = time.time() fit_method = model.forecast_fit if hasattr(model, "forecast_fit") else model.fit fit_method(train_valid_data, train_ratio_in_tv=train_ratio_in_tv) end_fit_time = time.time() predicted = model.forecast(horizon, train_valid_data) end_inference_time = time.time() single_series_results, log_info = self.evaluator.evaluate_with_log( test_data.to_numpy(), predicted, # TODO: add configs to control scaling behavior self._get_eval_scaler(train_valid_data, train_ratio_in_tv), train_valid_data.values, ) inference_data = pd.DataFrame( predicted, columns=test_data.columns, index=test_data.index ) actual_data_encoded = self._encode_data(test_data) inference_data_encoded = self._encode_data(inference_data) single_series_results += [ series_name, end_fit_time - start_fit_time, end_inference_time - end_fit_time, actual_data_encoded, inference_data_encoded, log_info, ] return single_series_results
[docs] @staticmethod def accepted_metrics(): return regression_metrics.__all__
@property def field_names(self) -> List[str]: return self.evaluator.metric_names + [ FieldNames.FILE_NAME, FieldNames.FIT_TIME, FieldNames.INFERENCE_TIME, FieldNames.ACTUAL_DATA, FieldNames.INFERENCE_DATA, FieldNames.LOG_INFO, ]