ahead
Top-level package for ahead.
1"""Top-level package for ahead.""" 2 3__author__ = """T. Moudiki""" 4__email__ = "thierry.moudiki@gmail.com" 5__version__ = "0.10.0" 6 7from .ARMAGARCH import ArmaGarch 8from .Basic import BasicForecaster 9from .DynamicRegressor import DynamicRegressor 10from .EAT import EAT 11from .FitForecast import FitForecaster 12from .Ridge2 import Ridge2Regressor 13from .VAR import VAR 14from .MLARCH import MLARCH 15 16 17__all__ = [ 18 "ArmaGarch", 19 "BasicForecaster", 20 "DynamicRegressor", 21 "EAT", 22 "FitForecaster", 23 "Ridge2Regressor", 24 "VAR", 25 "MLARCH" 26]
9class ArmaGarch(object): 10 """ARMA(1, 1)-GARCH(1, 1) forecasting (with simulation) 11 12 Parameters: 13 14 h: an integer; 15 forecasting horizon 16 17 level: an integer; 18 Confidence level for prediction intervals 19 20 B: an integer; 21 number of simulations for R's `stats::arima.sim` 22 23 cl: an integer; 24 the number of clusters for parallel execution (done in R /!\) 25 26 dist: a string; 27 distribution of innovations ("student" or "gaussian") 28 29 seed: an integer; 30 reproducibility seed 31 32 date_formatting: a string; 33 Currently: 34 - "original": yyyy-mm-dd 35 - "ms": milliseconds 36 37 Attributes: 38 39 fcast_: an object; 40 raw result from fitting R's `ahead::armagarchf` through `rpy2` 41 42 averages_: a list; 43 mean forecast in a list 44 45 ranges_: a list; 46 lower and upper prediction intervals in a list 47 48 output_dates_: a list; 49 a list of output dates (associated to forecast) 50 51 mean_: a numpy array 52 contains series mean forecast as a numpy array 53 54 lower_: a numpy array 55 contains series lower bound forecast as a numpy array 56 57 upper_: a numpy array 58 contains series upper bound forecast as a numpy array 59 60 result_df_: a data frame; 61 contains 3 columns, mean forecast, lower + upper 62 prediction intervals, and a date index 63 64 sims_: a numpy array 65 forecasting simulations 66 67 """ 68 69 def __init__( 70 self, 71 h=5, 72 level=95, 73 B=250, 74 cl=1, 75 dist="student", 76 seed=123, 77 date_formatting="original", 78 ): 79 80 self.h = h 81 self.level = level 82 self.B = B 83 self.cl = cl 84 self.dist = dist 85 self.seed = seed 86 self.date_formatting = date_formatting 87 self.input_df = None 88 89 self.fcast_ = None 90 self.averages_ = None 91 self.ranges_ = None 92 self.output_dates_ = [] 93 self.mean_ = [] 94 self.lower_ = [] 95 self.upper_ = [] 96 self.result_df_ = None 97 self.sims_ = None 98 99 def forecast(self, df): 100 """Forecasting method from `ArmaGarch` class 101 102 Parameters: 103 104 df: a data frame; 105 a data frame containing the input time series (see example) 106 107 """ 108 109 # get input dates, output dates, number of series, series names, etc. 110 self.init_forecasting_params(df) 111 112 # obtain time series object ----- 113 self.format_input() 114 115 self.get_forecast("armagarch") 116 117 # result ----- 118 ( 119 self.averages_, 120 self.ranges_, 121 self.output_dates_, 122 ) = uv.format_univariate_forecast( 123 date_formatting=self.date_formatting, 124 output_dates=self.output_dates_, 125 horizon=self.h, 126 fcast=self.fcast_, 127 ) 128 129 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 130 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 131 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 132 133 self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_) 134 135 self.sims_ = np.asarray(self.fcast_.rx2["sims"]) 136 137 return self
ARMA(1, 1)-GARCH(1, 1) forecasting (with simulation)
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
B: an integer;
number of simulations for R's `stats::arima.sim`
cl: an integer;
the number of clusters for parallel execution (done in R /!\)
dist: a string;
distribution of innovations ("student" or "gaussian")
seed: an integer;
reproducibility seed
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::armagarchf` through `rpy2`
averages_: a list;
mean forecast in a list
ranges_: a list;
lower and upper prediction intervals in a list
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_df_: a data frame;
contains 3 columns, mean forecast, lower + upper
prediction intervals, and a date index
sims_: a numpy array
forecasting simulations
10class BasicForecaster(Base): 11 """Basic forecasting functions for multivariate time series (mean, median, random walk) 12 13 Parameters: 14 15 h: an integer; 16 forecasting horizon 17 18 level: an integer; 19 Confidence level for prediction intervals 20 21 method: a string; 22 Forecasting method, either "mean", "median", or random walk ("rw") 23 24 type_pi: a string; 25 Type of prediction interval (currently "gaussian", 26 "bootstrap" (independent), "blockbootstrap" (circular), 27 "movingblockbootstrap") 28 29 block_length: an integer 30 length of block for multivariate block bootstrap (`type_pi == blockbootstrap` 31 or `type_pi == movingblockbootstrap`) 32 33 B: an integer; 34 Number of replications 35 36 date_formatting: a string; 37 Currently: 38 - "original": yyyy-mm-dd 39 - "ms": milliseconds 40 41 seed: an integer; 42 reproducibility seed 43 44 Attributes: 45 46 fcast_: an object; 47 raw result from fitting R's `ahead::ridge2f` through `rpy2` 48 49 averages_: a list of lists; 50 mean forecast in a list for each series 51 52 ranges_: a list of lists; 53 lower and upper prediction intervals in a list for each series 54 55 output_dates_: a list; 56 a list of output dates (associated to forecast) 57 58 mean_: a numpy array 59 contains series mean forecast as a numpy array 60 61 lower_: a numpy array 62 contains series lower bound forecast as a numpy array 63 64 upper_: a numpy array 65 contains series upper bound forecast as a numpy array 66 67 result_dfs_: a tuple of data frames; 68 each element of the tuple contains 3 columns, 69 mean forecast, lower + upper prediction intervals, 70 and a date index 71 72 sims_: currently a tuple of numpy arrays 73 for `type_pi == bootstrap`, simulations for each series 74 75 Examples: 76 77 ```python 78 import pandas as pd 79 from ahead import BasicForecaster 80 81 # Data frame containing the time series 82 dataset = { 83 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 84 'series1' : [34, 30, 35.6, 33.3, 38.1], 85 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 86 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 87 df = pd.DataFrame(dataset).set_index('date') 88 print(df) 89 90 # multivariate time series forecasting 91 r1 = BasicForecaster(h = 5) 92 r1.forecast(df) 93 print(r1.result_dfs_) 94 ``` 95 96 """ 97 98 def __init__( 99 self, 100 h=5, 101 level=95, 102 method="mean", 103 type_pi="gaussian", 104 block_length=5, 105 B=100, 106 date_formatting="original", 107 seed=123, 108 ): 109 110 super().__init__( 111 h=h, 112 level=level, 113 seed=seed, 114 ) 115 116 self.method = method 117 self.type_pi = type_pi 118 self.block_length = block_length 119 self.B = B 120 self.date_formatting = date_formatting 121 self.input_df = None 122 123 self.fcast_ = None 124 self.averages_ = None 125 self.ranges_ = None 126 self.output_dates_ = [] 127 self.mean_ = None 128 self.lower_ = None 129 self.upper_ = None 130 self.result_dfs_ = None 131 self.sims_ = None 132 133 def forecast(self, df): 134 """Forecasting method from `BasicForecaster` class 135 136 Parameters: 137 138 df: a data frame; 139 a data frame containing the input time series (see example) 140 141 """ 142 143 # get input dates, output dates, number of series, series names, etc. 144 self.init_forecasting_params(df) 145 146 # obtain time series object ----- 147 self.format_input() 148 149 if self.type_pi in ("blockbootstrap", "movingblockbootstrap"): 150 assert ( 151 self.block_length is not None 152 ), "For `type_pi in ('blockbootstrap', 'movingblockbootstrap')`, `block_length` must be not None" 153 154 self.get_forecast() 155 156 # result ----- 157 ( 158 self.averages_, 159 self.ranges_, 160 _, 161 ) = mv.format_multivariate_forecast( 162 n_series=self.n_series, 163 date_formatting=self.date_formatting, 164 output_dates=self.output_dates_, 165 horizon=self.h, 166 fcast=self.fcast_, 167 ) 168 169 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 170 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 171 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 172 173 self.result_dfs_ = tuple( 174 umv.compute_result_df(self.averages_[i], self.ranges_[i]) 175 for i in range(self.n_series) 176 ) 177 178 if self.type_pi in ( 179 "bootstrap", 180 "blockbootstrap", 181 "movingblockbootstrap", 182 ): 183 self.sims_ = tuple( 184 np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B) 185 ) 186 187 return self
Basic forecasting functions for multivariate time series (mean, median, random walk)
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
method: a string;
Forecasting method, either "mean", "median", or random walk ("rw")
type_pi: a string;
Type of prediction interval (currently "gaussian",
"bootstrap" (independent), "blockbootstrap" (circular),
"movingblockbootstrap")
block_length: an integer
length of block for multivariate block bootstrap (`type_pi == blockbootstrap`
or `type_pi == movingblockbootstrap`)
B: an integer;
Number of replications
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
seed: an integer;
reproducibility seed
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::ridge2f` through `rpy2`
averages_: a list of lists;
mean forecast in a list for each series
ranges_: a list of lists;
lower and upper prediction intervals in a list for each series
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_dfs_: a tuple of data frames;
each element of the tuple contains 3 columns,
mean forecast, lower + upper prediction intervals,
and a date index
sims_: currently a tuple of numpy arrays
for `type_pi == bootstrap`, simulations for each series
Examples:
import pandas as pd
from ahead import BasicForecaster
# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# multivariate time series forecasting
r1 = BasicForecaster(h = 5)
r1.forecast(df)
print(r1.result_dfs_)
10class DynamicRegressor(Base): 11 """Dynamic Regression Model adapted from R's `forecast::nnetar` 12 13 Parameters: 14 15 h: an integer; 16 forecasting horizon 17 18 level: an integer; 19 Confidence level for prediction intervals 20 21 type_pi: a string; 22 Type of prediction interval (currently "gaussian", 23 ETS: "E", Arima: "A" or Theta: "T") 24 25 date_formatting: a string; 26 Currently: 27 - "original": yyyy-mm-dd 28 - "ms": milliseconds 29 30 Attributes: 31 32 fcast_: an object; 33 raw result from fitting R's `ahead::dynrmf` through `rpy2` 34 35 averages_: a list; 36 mean forecast in a list 37 38 ranges_: a list; 39 lower and upper prediction intervals in a list 40 41 output_dates_: a list; 42 a list of output dates (associated to forecast) 43 44 mean_: a numpy array 45 contains series mean forecast as a numpy array 46 47 lower_: a numpy array 48 contains series lower bound forecast as a numpy array 49 50 upper_: a numpy array 51 contains series upper bound forecast as a numpy array 52 53 result_df_: a data frame; 54 contains 3 columns, mean forecast, lower + upper 55 prediction intervals, and a date index 56 57 Examples: 58 59 ```python 60 import pandas as pd 61 from ahead import DynamicRegressor 62 63 # Data frame containing the time series 64 dataset = { 65 'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'], 66 'value' : [34, 30, 35.6, 33.3, 38.1]} 67 68 df = pd.DataFrame(dataset).set_index('date') 69 print(df) 70 71 # univariate time series forecasting 72 d1 = DynamicRegressor(h = 5) 73 d1.forecast(df) 74 print(d1.result_df_) 75 ``` 76 77 """ 78 79 def __init__(self, h=5, level=95, type_pi="E", date_formatting="original"): 80 81 super().__init__( 82 h=h, 83 level=level, 84 ) 85 86 self.type_pi = type_pi 87 self.date_formatting = date_formatting 88 self.input_df = None 89 self.type_input = "univariate" 90 91 self.fcast_ = None 92 self.averages_ = None 93 self.ranges_ = None 94 self.output_dates_ = [] 95 self.mean_ = [] 96 self.lower_ = [] 97 self.upper_ = [] 98 self.result_df_ = None 99 100 def forecast(self, df): 101 """Forecasting method from `DynamicRegressor` class 102 103 Parameters: 104 105 df: a data frame; 106 a data frame containing the input time series (see example) 107 108 """ 109 110 # get input dates, output dates, number of series, series names, etc. 111 self.init_forecasting_params(df) 112 113 # obtain time series object ----- 114 self.format_input() 115 116 self.get_forecast("dynrm") 117 118 # result ----- 119 ( 120 self.averages_, 121 self.ranges_, 122 _, 123 ) = uv.format_univariate_forecast( 124 date_formatting=self.date_formatting, 125 output_dates=self.output_dates_, 126 horizon=self.h, 127 fcast=self.fcast_, 128 ) 129 130 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 131 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 132 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 133 134 self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_) 135 136 return self
Dynamic Regression Model adapted from R's forecast::nnetar
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
type_pi: a string;
Type of prediction interval (currently "gaussian",
ETS: "E", Arima: "A" or Theta: "T")
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::dynrmf` through `rpy2`
averages_: a list;
mean forecast in a list
ranges_: a list;
lower and upper prediction intervals in a list
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_df_: a data frame;
contains 3 columns, mean forecast, lower + upper
prediction intervals, and a date index
Examples:
import pandas as pd
from ahead import DynamicRegressor
# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# univariate time series forecasting
d1 = DynamicRegressor(h = 5)
d1.forecast(df)
print(d1.result_df_)
10class EAT(Base): 11 """Combinations of ETS (exponential smoothing), auto.arima and Theta models 12 13 Parameters: 14 15 h: an integer; 16 forecasting horizon 17 18 level: an integer; 19 Confidence level for prediction intervals 20 21 weights: a list; 22 coefficients assigned to each method in the ensemble 23 24 type_pi: a string; 25 Type of prediction interval (currently "gaussian", 26 ETS: "E", Arima: "A" or Theta: "T") 27 28 date_formatting: a string; 29 Currently: 30 - "original": yyyy-mm-dd 31 - "ms": milliseconds 32 33 Attributes: 34 35 fcast_: an object; 36 raw result from fitting R's `ahead::eatf` through `rpy2` 37 38 averages_: a list; 39 mean forecast in a list 40 41 ranges_: a list; 42 lower and upper prediction intervals in a list 43 44 output_dates_: a list; 45 a list of output dates (associated to forecast) 46 47 mean_: a numpy array 48 contains series mean forecast as a numpy array 49 50 lower_: a numpy array 51 contains series lower bound forecast as a numpy array 52 53 upper_: a numpy array 54 contains series upper bound forecast as a numpy array 55 56 result_df_: a data frame; 57 contains 3 columns, mean forecast, lower + upper 58 prediction intervals, and a date index 59 60 Examples: 61 62 ```python 63 import pandas as pd 64 from ahead import EAT 65 66 # Data frame containing the time series 67 dataset = { 68 'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'], 69 'value' : [34, 30, 35.6, 33.3, 38.1]} 70 71 df = pd.DataFrame(dataset).set_index('date') 72 print(df) 73 74 # univariate time series forecasting 75 e1 = EAT(h = 5) # default, equal weights for each model=[1/3, 1/3, 1/3] 76 e1.forecast(df) 77 print(e1.result_df_) 78 ``` 79 80 """ 81 82 def __init__( 83 self, 84 h=5, 85 level=95, 86 weights=None, 87 type_pi="E", 88 date_formatting="original", 89 ): 90 91 super().__init__(h=h, level=level) 92 93 if weights is None: 94 weights = [1 / 3, 1 / 3, 1 / 3] 95 96 assert len(weights) == 3, "must have 'len(weights) == 3'" 97 98 self.weights = weights 99 self.type_pi = type_pi 100 self.date_formatting = date_formatting 101 self.input_df = None 102 self.type_input = "univariate" 103 104 self.fcast_ = None 105 self.averages_ = None 106 self.ranges_ = None 107 self.output_dates_ = [] 108 self.mean_ = [] 109 self.lower_ = [] 110 self.upper_ = [] 111 self.result_df_ = None 112 113 def forecast(self, df): 114 """Forecasting method from `EAT` class 115 116 Parameters: 117 118 df: a data frame; 119 a data frame containing the input time series (see example) 120 121 """ 122 123 # get input dates, output dates, number of series, series names, etc. 124 self.init_forecasting_params(df) 125 126 # obtain time series object ----- 127 self.format_input() 128 129 self.get_forecast("eat") 130 131 # result ----- 132 ( 133 self.averages_, 134 self.ranges_, 135 _, 136 ) = uv.format_univariate_forecast( 137 date_formatting=self.date_formatting, 138 output_dates=self.output_dates_, 139 horizon=self.h, 140 fcast=self.fcast_, 141 ) 142 143 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 144 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 145 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 146 147 self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_) 148 149 return self
Combinations of ETS (exponential smoothing), auto.arima and Theta models
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
weights: a list;
coefficients assigned to each method in the ensemble
type_pi: a string;
Type of prediction interval (currently "gaussian",
ETS: "E", Arima: "A" or Theta: "T")
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::eatf` through `rpy2`
averages_: a list;
mean forecast in a list
ranges_: a list;
lower and upper prediction intervals in a list
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_df_: a data frame;
contains 3 columns, mean forecast, lower + upper
prediction intervals, and a date index
Examples:
import pandas as pd
from ahead import EAT
# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# univariate time series forecasting
e1 = EAT(h = 5) # default, equal weights for each model=[1/3, 1/3, 1/3]
e1.forecast(df)
print(e1.result_df_)
12class FitForecaster(Base): 13 """Fit and forecast time series with uncertainty quantification 14 15 See https://r-packages.techtonique.net/ahead/doc/manual.html#fitforecast 16 17 Examples: 18 19 ```python 20 url = "https://raw.githubusercontent.com/Techtonique/" 21 url += "datasets/main/time_series/univariate/" 22 url += "a10.csv" 23 24 df = pd.read_csv(url) 25 df.index = pd.DatetimeIndex(df.date) # must have 26 df.drop(columns=['date'], inplace=True) 27 28 # univariate ts forecasting 29 d1 = FitForecaster() 30 31 print(d1) 32 33 start = time() 34 print(d1.fit_forecast(df)) 35 print(f"Elapsed: {time()-start} \n") 36 37 print(f"after: {d1.mean_}") 38 print(f"after: {d1.lower_}") 39 print(f"after: {d1.upper_}") 40 ``` 41 42 """ 43 44 def __init__( 45 self, 46 h=None, 47 level=95, 48 pct_train=0.9, 49 pct_calibration=0.5, 50 B=1000, 51 seed=17223, 52 conformalize=False, 53 type_calibration="splitconformal", 54 gap=3, 55 agg="mean", 56 vol="constant", 57 type_sim="kde", 58 date_formatting="original", 59 ): 60 61 super().__init__( 62 h=h, 63 level=level, 64 ) 65 66 self.pct_train = pct_train 67 self.pct_calibration = pct_calibration 68 self.B = B 69 self.seed = seed 70 self.conformalize = conformalize 71 self.type_calibration = type_calibration 72 self.gap = gap 73 self.agg = agg 74 self.vol = vol 75 self.type_sim = type_sim 76 self.date_formatting = date_formatting 77 self.input_df = None 78 79 self.fcast_ = None 80 self.averages_ = None 81 self.ranges_ = None 82 self.output_dates_ = [] 83 self.mean_ = [] 84 self.lower_ = [] 85 self.upper_ = [] 86 self.result_df_ = None 87 88 def fit_forecast(self, df, method="thetaf"): 89 90 assert method in ( 91 "thetaf", 92 "arima", 93 "ets", 94 "te", 95 "tbats", 96 "tslm", 97 "dynrmf", 98 "ridge2f", 99 "naive", 100 "snaive", 101 ), 'must have method in ("thetaf", "arima", "ets", "te", "tbats", "tslm", "dynrmf", "ridge2f", "naive", "snaive")' 102 103 # keep it in this order 104 h = None 105 if self.h is not None: 106 h = self.h 107 else: 108 self.h = df.shape[0] - int(np.floor(df.shape[0] * self.pct_train)) 109 110 # get input dates, output dates, number of series, series names, etc. 111 self.init_forecasting_params(df) 112 113 # obtain time series object ----- 114 self.format_input() 115 116 self.method = method 117 118 self.fcast_ = config.AHEAD_PACKAGE.fitforecast( 119 y=self.input_ts_, 120 h=rNULL if h is None else h, 121 pct_train=self.pct_train, 122 pct_calibration=self.pct_calibration, 123 method=self.method, 124 level=self.level, 125 B=self.B, 126 seed=self.seed, 127 conformalize=self.conformalize, 128 type_calibration=self.type_calibration, 129 ) 130 131 # result ----- 132 if df.shape[1] > 1: 133 ( 134 self.averages_, 135 self.ranges_, 136 _, 137 ) = mv.format_multivariate_forecast( 138 n_series=self.n_series, 139 date_formatting=self.date_formatting, 140 output_dates=self.output_dates_, 141 horizon=self.h, 142 fcast=self.fcast_, 143 ) 144 else: 145 ( 146 self.averages_, 147 self.ranges_, 148 _, 149 ) = uv.format_univariate_forecast( 150 date_formatting=self.date_formatting, 151 output_dates=self.output_dates_, 152 horizon=self.h, 153 fcast=self.fcast_, 154 ) 155 156 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 157 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 158 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 159 160 self.result_dfs_ = umv.compute_result_df(self.averages_, self.ranges_) 161 162 if "sims" in list(self.fcast_.names): 163 self.sims_ = tuple( 164 np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B) 165 ) 166 167 return self
Fit and forecast time series with uncertainty quantification
See https://r-packages.techtonique.net/ahead/doc/manual.html#fitforecast
Examples:
<div class="pdoc-code codehilite">
<pre><span></span><code> <span class="n">url</span> <span class="o">=</span> <span class="s2">"https://raw.githubusercontent.com/Techtonique/"</span>
<span class="n">url</span> <span class="o">+=</span> <span class="s2">"datasets/main/time_series/univariate/"</span>
<span class="n">url</span> <span class="o">+=</span> <span class="s2">"a10.csv"</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
<span class="n">df</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">date</span><span class="p">)</span> <span class="c1"># must have</span>
<span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">'date'</span><span class="p">],</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># univariate ts forecasting</span>
<span class="n">d1</span> <span class="o">=</span> <span class="n">FitForecaster</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="n">d1</span><span class="p">)</span>
<span class="n">start</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="n">d1</span><span class="o">.</span><span class="n">fit_forecast</span><span class="p">(</span><span class="n">df</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Elapsed: </span><span class="si">{</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">start</span><span class="si">}</span><span class="s2"> </span>
<span class="s2">")</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">mean_</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">lower_</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">upper_</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
</code></pre>
</div>
18class Ridge2Regressor(Base): 19 """Random Vector functional link network model with 2 regularization parameters 20 21 Parameters: 22 23 h: an integer; 24 forecasting horizon 25 26 level: an integer; 27 Confidence level for prediction intervals 28 29 lags: an integer; 30 Number of lags 31 32 nb_hidden: an integer; 33 Number of nodes in hidden layer 34 35 nodes_sim: an integer; 36 Type of simulation for nodes in the hidden layer 37 ("sobol", "halton", "unif") 38 39 activation: a string; 40 Activation function ("relu", "sigmoid", "tanh", 41 "leakyrelu", "elu", "linear") 42 43 a: a float; 44 hyperparameter for activation function "leakyrelu", "elu" 45 46 lambda_1: a float; 47 Regularization parameter for original predictors 48 49 lambda_2: a float; 50 Regularization parameter for transformed predictors 51 52 dropout: a float; 53 dropout regularization parameter (dropping nodes in hidden layer) 54 55 type_pi: a string; 56 Type of prediction interval (currently "gaussian", 57 "bootstrap", (circular) "blockbootstrap", "movingblockbootstrap", "rvinecopula", 58 "conformal-split", "conformal-bootstrap", "conformal-block-bootstrap") 59 60 block_length: an integer 61 length of block for multivariate block bootstrap (`type_pi == blockbootstrap` or 62 `type_pi == movingblockbootstrap`) 63 64 margins: a string; 65 distribution of residuals' marginals for `type_pi == rvinecopula`: "empirical" (default), 66 "gaussian" 67 68 B: an integer; 69 Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap", 70 "movingblockbootstrap", or "rvinecopula" 71 72 type_aggregation: a string; 73 Type of aggregation, ONLY for bootstrapping; either "mean" or "median" 74 75 centers: an integer; 76 Number of clusters for \code{type_clustering} 77 78 type_clustering: a string; 79 "kmeans" (K-Means clustering) or "hclust" (Hierarchical clustering) 80 81 cl: an integer; 82 The number of clusters for parallel execution (done in R), for `type_pi == bootstrap` 83 84 date_formatting: a string; 85 Currently: 86 - "original": yyyy-mm-dd 87 - "ms": milliseconds 88 89 seed: an integer; 90 reproducibility seed for type_pi == 'bootstrap' 91 92 Attributes: 93 94 fcast_: an object; 95 raw result from fitting R's `ahead::ridge2f` through `rpy2` 96 97 averages_: a list of lists; 98 mean forecast in a list for each series 99 100 ranges_: a list of lists; 101 lower and upper prediction intervals in a list for each series 102 103 output_dates_: a list; 104 a list of output dates (associated to forecast) 105 106 mean_: a numpy array 107 contains series mean forecast as a numpy array 108 109 lower_: a numpy array 110 contains series lower bound forecast as a numpy array 111 112 upper_: a numpy array 113 contains series upper bound forecast as a numpy array 114 115 result_dfs_: a tuple of data frames; 116 each element of the tuple contains 3 columns, 117 mean forecast, lower + upper prediction intervals, 118 and a date index 119 120 sims_: currently a tuple of numpy arrays 121 for `type_pi == bootstrap`, simulations for each series 122 123 Examples: 124 125 ```python 126 import pandas as pd 127 from ahead import Ridge2Regressor 128 129 # Data frame containing the time series 130 dataset = { 131 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 132 'series1' : [34, 30, 35.6, 33.3, 38.1], 133 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 134 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 135 df = pd.DataFrame(dataset).set_index('date') 136 print(df) 137 138 # multivariate time series forecasting 139 r1 = Ridge2Regressor(h = 5) 140 r1.forecast(df) 141 print(r1.result_dfs_) 142 ``` 143 144 """ 145 146 def __init__( 147 self, 148 h=5, 149 level=95, 150 lags=1, 151 nb_hidden=5, 152 nodes_sim="sobol", 153 activation="relu", 154 a=0.01, 155 lambda_1=0.1, 156 lambda_2=0.1, 157 dropout=0, 158 type_pi="gaussian", 159 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 160 block_length=3, 161 margins="empirical", 162 B=100, 163 type_aggregation="mean", 164 centers=2, 165 type_clustering="kmeans", 166 cl=1, 167 date_formatting="original", 168 seed=123, 169 ): 170 171 super().__init__( 172 h=h, 173 level=level, 174 seed=seed, 175 ) 176 177 self.lags = lags 178 self.nb_hidden = nb_hidden 179 self.nodes_sim = nodes_sim 180 self.activation = activation 181 self.a = a 182 self.lambda_1 = lambda_1 183 self.lambda_2 = lambda_2 184 self.dropout = dropout 185 self.type_pi = type_pi 186 self.block_length = block_length 187 self.margins = margins 188 self.B = B 189 self.type_aggregation = type_aggregation 190 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 191 self.centers = centers 192 self.type_clustering = type_clustering 193 self.cl = cl 194 self.date_formatting = date_formatting 195 self.seed = seed 196 self.input_df = None 197 self.type_input = "multivariate" 198 199 self.fcast_ = None 200 self.averages_ = None 201 self.ranges_ = None 202 self.output_dates_ = [] 203 self.mean_ = None 204 self.lower_ = None 205 self.upper_ = None 206 self.result_dfs_ = None 207 self.sims_ = None 208 self.xreg_ = None 209 210 def forecast(self, df, xreg=None): 211 """Forecasting method from `Ridge2Regressor` class 212 213 Parameters: 214 215 df: a data frame; 216 a data frame containing the input time series (see example) 217 218 xreg: a numpy array or a data frame; 219 external regressors 220 221 """ 222 223 # get input dates, output dates, number of series, series names, etc. 224 self.init_forecasting_params(df) 225 226 # obtain time series object ----- 227 self.format_input() 228 229 self.get_forecast("ridge2") 230 231 # result ----- 232 ( 233 self.averages_, 234 self.ranges_, 235 _, 236 ) = mv.format_multivariate_forecast( 237 n_series=self.n_series, 238 date_formatting=self.date_formatting, 239 output_dates=self.output_dates_, 240 horizon=self.h, 241 fcast=self.fcast_, 242 ) 243 244 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 245 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 246 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 247 248 self.result_dfs_ = tuple( 249 umv.compute_result_df(self.averages_[i], self.ranges_[i]) 250 for i in range(self.n_series) 251 ) 252 253 if self.type_pi in ( 254 "bootstrap", 255 "blockbootstrap", 256 "movingblockbootstrap", 257 "rvinecopula", 258 ): 259 self.sims_ = tuple( 260 np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B) 261 ) 262 263 return self
Random Vector functional link network model with 2 regularization parameters
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
lags: an integer;
Number of lags
nb_hidden: an integer;
Number of nodes in hidden layer
nodes_sim: an integer;
Type of simulation for nodes in the hidden layer
("sobol", "halton", "unif")
activation: a string;
Activation function ("relu", "sigmoid", "tanh",
"leakyrelu", "elu", "linear")
a: a float;
hyperparameter for activation function "leakyrelu", "elu"
lambda_1: a float;
Regularization parameter for original predictors
lambda_2: a float;
Regularization parameter for transformed predictors
dropout: a float;
dropout regularization parameter (dropping nodes in hidden layer)
type_pi: a string;
Type of prediction interval (currently "gaussian",
"bootstrap", (circular) "blockbootstrap", "movingblockbootstrap", "rvinecopula",
"conformal-split", "conformal-bootstrap", "conformal-block-bootstrap")
block_length: an integer
length of block for multivariate block bootstrap (`type_pi == blockbootstrap` or
`type_pi == movingblockbootstrap`)
margins: a string;
distribution of residuals' marginals for `type_pi == rvinecopula`: "empirical" (default),
"gaussian"
B: an integer;
Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
"movingblockbootstrap", or "rvinecopula"
type_aggregation: a string;
Type of aggregation, ONLY for bootstrapping; either "mean" or "median"
centers: an integer;
Number of clusters for \code{type_clustering}
type_clustering: a string;
"kmeans" (K-Means clustering) or "hclust" (Hierarchical clustering)
cl: an integer;
The number of clusters for parallel execution (done in R), for `type_pi == bootstrap`
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
seed: an integer;
reproducibility seed for type_pi == 'bootstrap'
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::ridge2f` through `rpy2`
averages_: a list of lists;
mean forecast in a list for each series
ranges_: a list of lists;
lower and upper prediction intervals in a list for each series
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_dfs_: a tuple of data frames;
each element of the tuple contains 3 columns,
mean forecast, lower + upper prediction intervals,
and a date index
sims_: currently a tuple of numpy arrays
for `type_pi == bootstrap`, simulations for each series
Examples:
import pandas as pd
from ahead import Ridge2Regressor
# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# multivariate time series forecasting
r1 = Ridge2Regressor(h = 5)
r1.forecast(df)
print(r1.result_dfs_)
10class VAR(Base): 11 """Vector AutoRegressive model 12 13 Parameters: 14 15 h: an integer; 16 forecasting horizon 17 18 level: an integer; 19 Confidence level for prediction intervals 20 21 lags: an integer; 22 the lag order 23 24 type_VAR: a string; 25 Type of deterministic regressors to include 26 ("const", "trend", "both", "none") 27 28 date_formatting: a string; 29 Currently: 30 - "original": yyyy-mm-dd 31 - "ms": milliseconds 32 33 Attributes: 34 35 fcast_: an object; 36 raw result from fitting R's `ahead::varf` through `rpy2` 37 38 averages_: a list of lists; 39 mean forecast in a list for each series 40 41 ranges_: a list of lists; 42 lower and upper prediction intervals in a list for each series 43 44 output_dates_: a list; 45 a list of output dates (associated to forecast) 46 47 mean_: a numpy array 48 contains series mean forecast as a numpy array 49 50 lower_: a numpy array 51 contains series lower bound forecast as a numpy array 52 53 upper_: a numpy array 54 contains series upper bound forecast as a numpy array 55 56 result_dfs_: a tuple of data frames; 57 each element of the tuple contains 3 columns, 58 mean forecast, lower + upper prediction intervals, 59 and a date index 60 61 Examples: 62 63 ```python 64 import pandas as pd 65 from ahead import VAR 66 67 # Data frame containing the time series 68 dataset = { 69 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 70 'series1' : [34, 30, 35.6, 33.3, 38.1], 71 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 72 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 73 df = pd.DataFrame(dataset).set_index('date') 74 print(df) 75 76 # multivariate time series forecasting 77 v1 = VAR(h = 5, date_formatting = "original", type_VAR="none") 78 v1.forecast(df) 79 print(v1.result_dfs_) 80 ``` 81 82 """ 83 84 def __init__( 85 self, h=5, level=95, lags=1, type_VAR="none", date_formatting="original" 86 ): # type_VAR = "const", "trend", "both", "none" 87 88 assert type_VAR in ( 89 "const", 90 "trend", 91 "both", 92 "none", 93 ), "must have: type_VAR in ('const', 'trend', 'both', 'none')" 94 95 super().__init__( 96 h=h, 97 level=level, 98 ) 99 100 self.lags = lags 101 self.type_VAR = type_VAR 102 self.date_formatting = date_formatting 103 self.input_df = None 104 105 self.fcast_ = None 106 self.averages_ = None 107 self.ranges_ = None 108 self.output_dates_ = [] 109 self.mean_ = None 110 self.lower_ = None 111 self.upper_ = None 112 self.result_dfs_ = None 113 114 def forecast(self, df): 115 """Forecasting method from `VAR` class 116 117 Parameters: 118 119 df: a data frame; 120 a data frame containing the input time series (see example) 121 122 """ 123 124 # get input dates, output dates, number of series, series names, etc. 125 self.init_forecasting_params(df) 126 127 # obtain time series object ----- 128 self.format_input() 129 130 self.get_forecast("var") 131 132 # result ----- 133 ( 134 self.averages_, 135 self.ranges_, 136 _, 137 ) = mv.format_multivariate_forecast( 138 n_series=self.n_series, 139 date_formatting=self.date_formatting, 140 output_dates=self.output_dates_, 141 horizon=self.h, 142 fcast=self.fcast_, 143 ) 144 145 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 146 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 147 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 148 149 self.result_dfs_ = tuple( 150 umv.compute_result_df(self.averages_[i], self.ranges_[i]) 151 for i in range(self.n_series) 152 ) 153 154 return self
Vector AutoRegressive model
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
lags: an integer;
the lag order
type_VAR: a string;
Type of deterministic regressors to include
("const", "trend", "both", "none")
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::varf` through `rpy2`
averages_: a list of lists;
mean forecast in a list for each series
ranges_: a list of lists;
lower and upper prediction intervals in a list for each series
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_dfs_: a tuple of data frames;
each element of the tuple contains 3 columns,
mean forecast, lower + upper prediction intervals,
and a date index
Examples:
import pandas as pd
from ahead import VAR
# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# multivariate time series forecasting
v1 = VAR(h = 5, date_formatting = "original", type_VAR="none")
v1.forecast(df)
print(v1.result_dfs_)
10class MLARCH(Base): 11 """Conformalized Forecasting using Machine Learning (and statistical) models with ARCH effects 12 13 mean_model: `forecast::auto.arima` (main series) 14 model_residuals: `forecast::thetaf` (residuals) 15 fit_func: `ahead::ridge` (volatility) 16 17 18 19 Parameters: 20 21 h: an integer; 22 forecasting horizon 23 24 level: an integer; 25 Confidence level for prediction intervals 26 27 B: an integer; 28 Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap", 29 "movingblockbootstrap", or "rvinecopula" 30 31 type_pi: a string; 32 Type of conformal prediction interval ("surrogate", "bootstrap", "kde") for volatility modeling 33 34 type_sim_conformalize: a string; 35 Type of simulation for conformalization of standardized residuals ("block-bootstrap", "surrogate", "kde", "bootstrap", or "fitdistr") 36 37 date_formatting: a string; 38 Currently: 39 - "original": yyyy-mm-dd 40 - "ms": milliseconds 41 42 Attributes: 43 44 fcast_: an object; 45 raw result from fitting R's `ahead::MLARCHf` through `rpy2` 46 47 averages_: a list; 48 mean forecast in a list 49 50 ranges_: a list; 51 lower and upper prediction intervals in a list 52 53 output_dates_: a list; 54 a list of output dates (associated to forecast) 55 56 mean_: a numpy array 57 contains series mean forecast as a numpy array 58 59 lower_: a numpy array 60 contains series lower bound forecast as a numpy array 61 62 upper_: a numpy array 63 contains series upper bound forecast as a numpy array 64 65 result_df_: a data frame; 66 contains 3 columns, mean forecast, lower + upper 67 prediction intervals, and a date index 68 69 Examples: 70 71 ```python 72 import pandas as pd 73 from ahead import MLARCH 74 75 # Data frame containing the time series 76 dataset = { 77 'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'], 78 'value' : [34, 30, 35.6, 33.3, 38.1]} 79 80 df = pd.DataFrame(dataset).set_index('date') 81 print(df) 82 83 # univariate time series forecasting 84 e1 = MLARCH(h = 5) 85 e1.forecast(df) 86 print(e1.result_df_) 87 ``` 88 89 """ 90 91 def __init__( 92 self, 93 h=5, 94 level=95, 95 B=100, 96 type_pi="surrogate", 97 type_sim_conformalize="block-bootstrap", 98 seed=123, 99 date_formatting="original", 100 ): 101 102 super().__init__(h=h, level=level) 103 104 self.h = h 105 self.level = level 106 self.B = B 107 self.type_pi = type_pi 108 self.type_sim_conformalize = type_sim_conformalize 109 self.seed = seed 110 self.date_formatting = date_formatting 111 self.input_df = None 112 self.type_input = "univariate" 113 114 self.fcast_ = None 115 self.averages_ = None 116 self.ranges_ = None 117 self.output_dates_ = [] 118 self.mean_ = [] 119 self.lower_ = [] 120 self.upper_ = [] 121 self.result_df_ = None 122 123 def forecast(self, df): 124 """Forecasting method from `MLARCH` class 125 126 Parameters: 127 128 df: a data frame; 129 a data frame containing the input time series (see example) 130 131 """ 132 133 # get input dates, output dates, number of series, series names, etc. 134 self.init_forecasting_params(df) 135 136 # obtain time series object ----- 137 self.format_input() 138 139 self.get_forecast("MLARCH") 140 141 print(f"MLARCH: {self.fcast_}") 142 143 # result ----- 144 ( 145 self.averages_, 146 self.ranges_, 147 _, 148 ) = uv.format_univariate_forecast( 149 date_formatting=self.date_formatting, 150 output_dates=self.output_dates_, 151 horizon=self.h, 152 fcast=self.fcast_, 153 ) 154 155 self.mean_ = np.asarray(self.fcast_.rx2["mean"]) 156 self.lower_ = np.asarray(self.fcast_.rx2["lower"]) 157 self.upper_ = np.asarray(self.fcast_.rx2["upper"]) 158 159 self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_) 160 161 return self
Conformalized Forecasting using Machine Learning (and statistical) models with ARCH effects
mean_model: forecast::auto.arima
(main series)
model_residuals: forecast::thetaf
(residuals)
fit_func: ahead::ridge
(volatility)
Parameters:
h: an integer;
forecasting horizon
level: an integer;
Confidence level for prediction intervals
B: an integer;
Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
"movingblockbootstrap", or "rvinecopula"
type_pi: a string;
Type of conformal prediction interval ("surrogate", "bootstrap", "kde") for volatility modeling
type_sim_conformalize: a string;
Type of simulation for conformalization of standardized residuals ("block-bootstrap", "surrogate", "kde", "bootstrap", or "fitdistr")
date_formatting: a string;
Currently:
- "original": yyyy-mm-dd
- "ms": milliseconds
Attributes:
fcast_: an object;
raw result from fitting R's `ahead::MLARCHf` through `rpy2`
averages_: a list;
mean forecast in a list
ranges_: a list;
lower and upper prediction intervals in a list
output_dates_: a list;
a list of output dates (associated to forecast)
mean_: a numpy array
contains series mean forecast as a numpy array
lower_: a numpy array
contains series lower bound forecast as a numpy array
upper_: a numpy array
contains series upper bound forecast as a numpy array
result_df_: a data frame;
contains 3 columns, mean forecast, lower + upper
prediction intervals, and a date index
Examples:
import pandas as pd
from ahead import MLARCH
# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# univariate time series forecasting
e1 = MLARCH(h = 5)
e1.forecast(df)
print(e1.result_df_)