ahead

Top-level package for ahead.

View Source

 1"""Top-level package for ahead."""
 2
 3__author__ = """T. Moudiki"""
 4__email__ = "thierry.moudiki@gmail.com"
 5__version__ = "0.10.0"
 6
 7from .ARMAGARCH import ArmaGarch
 8from .Basic import BasicForecaster
 9from .DynamicRegressor import DynamicRegressor
10from .EAT import EAT
11from .FitForecast import FitForecaster
12from .Ridge2 import Ridge2Regressor
13from .VAR import VAR
14from .MLARCH import MLARCH
15
16
17__all__ = [
18    "ArmaGarch",
19    "BasicForecaster",
20    "DynamicRegressor",
21    "EAT",
22    "FitForecaster",
23    "Ridge2Regressor",
24    "VAR",
25    "MLARCH"
26]

class ArmaGarch: View Source

  9class ArmaGarch(object):
 10    """ARMA(1, 1)-GARCH(1, 1) forecasting (with simulation)
 11
 12    Parameters:
 13
 14        h: an integer;
 15            forecasting horizon
 16
 17        level: an integer;
 18            Confidence level for prediction intervals
 19
 20        B: an integer;
 21            number of simulations for R's `stats::arima.sim`
 22
 23        cl: an integer;
 24            the number of clusters for parallel execution (done in R /!\)
 25
 26        dist: a string;
 27            distribution of innovations ("student" or "gaussian")
 28
 29        seed: an integer;
 30            reproducibility seed
 31
 32        date_formatting: a string;
 33            Currently:
 34            - "original": yyyy-mm-dd
 35            - "ms": milliseconds
 36
 37    Attributes:
 38
 39        fcast_: an object;
 40            raw result from fitting R's `ahead::armagarchf` through `rpy2`
 41
 42        averages_: a list;
 43            mean forecast in a list
 44
 45        ranges_: a list;
 46            lower and upper prediction intervals in a list
 47
 48        output_dates_: a list;
 49            a list of output dates (associated to forecast)
 50
 51        mean_: a numpy array
 52            contains series mean forecast as a numpy array
 53
 54        lower_: a numpy array
 55            contains series lower bound forecast as a numpy array
 56
 57        upper_: a numpy array
 58            contains series upper bound forecast as a numpy array
 59
 60        result_df_: a data frame;
 61            contains 3 columns, mean forecast, lower + upper
 62            prediction intervals, and a date index
 63
 64        sims_: a numpy array
 65            forecasting simulations
 66
 67    """
 68
 69    def __init__(
 70        self,
 71        h=5,
 72        level=95,
 73        B=250,
 74        cl=1,
 75        dist="student",
 76        seed=123,
 77        date_formatting="original",
 78    ):
 79
 80        self.h = h
 81        self.level = level
 82        self.B = B
 83        self.cl = cl
 84        self.dist = dist
 85        self.seed = seed
 86        self.date_formatting = date_formatting
 87        self.input_df = None
 88
 89        self.fcast_ = None
 90        self.averages_ = None
 91        self.ranges_ = None
 92        self.output_dates_ = []
 93        self.mean_ = []
 94        self.lower_ = []
 95        self.upper_ = []
 96        self.result_df_ = None
 97        self.sims_ = None
 98
 99    def forecast(self, df):
100        """Forecasting method from `ArmaGarch` class
101
102        Parameters:
103
104            df: a data frame;
105                a data frame containing the input time series (see example)
106
107        """
108
109        # get input dates, output dates, number of series, series names, etc.
110        self.init_forecasting_params(df)
111
112        # obtain time series object -----
113        self.format_input()
114
115        self.get_forecast("armagarch")
116
117        # result -----
118        (
119            self.averages_,
120            self.ranges_,
121            self.output_dates_,
122        ) = uv.format_univariate_forecast(
123            date_formatting=self.date_formatting,
124            output_dates=self.output_dates_,
125            horizon=self.h,
126            fcast=self.fcast_,
127        )
128
129        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
130        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
131        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
132
133        self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_)
134
135        self.sims_ = np.asarray(self.fcast_.rx2["sims"])
136
137        return self

ARMA(1, 1)-GARCH(1, 1) forecasting (with simulation)

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

B: an integer;
    number of simulations for R's `stats::arima.sim`

cl: an integer;
    the number of clusters for parallel execution (done in R /!\)

dist: a string;
    distribution of innovations ("student" or "gaussian")

seed: an integer;
    reproducibility seed

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::armagarchf` through `rpy2`

averages_: a list;
    mean forecast in a list

ranges_: a list;
    lower and upper prediction intervals in a list

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_df_: a data frame;
    contains 3 columns, mean forecast, lower + upper
    prediction intervals, and a date index

sims_: a numpy array
    forecasting simulations

class BasicForecaster(ahead.Base.Base.Base): View Source

 10class BasicForecaster(Base):
 11    """Basic forecasting functions for multivariate time series (mean, median, random walk)
 12
 13    Parameters:
 14
 15        h: an integer;
 16            forecasting horizon
 17
 18        level: an integer;
 19            Confidence level for prediction intervals
 20
 21        method: a string;
 22            Forecasting method, either "mean", "median", or random walk ("rw")
 23
 24        type_pi: a string;
 25            Type of prediction interval (currently "gaussian",
 26            "bootstrap" (independent), "blockbootstrap" (circular),
 27            "movingblockbootstrap")
 28
 29        block_length: an integer
 30            length of block for multivariate block bootstrap (`type_pi == blockbootstrap`
 31            or `type_pi == movingblockbootstrap`)
 32
 33        B: an integer;
 34            Number of replications
 35
 36        date_formatting: a string;
 37            Currently:
 38            - "original": yyyy-mm-dd
 39            - "ms": milliseconds
 40
 41        seed: an integer;
 42            reproducibility seed
 43
 44    Attributes:
 45
 46        fcast_: an object;
 47            raw result from fitting R's `ahead::ridge2f` through `rpy2`
 48
 49        averages_: a list of lists;
 50            mean forecast in a list for each series
 51
 52        ranges_: a list of lists;
 53            lower and upper prediction intervals in a list for each series
 54
 55        output_dates_: a list;
 56            a list of output dates (associated to forecast)
 57
 58        mean_: a numpy array
 59            contains series mean forecast as a numpy array
 60
 61        lower_: a numpy array
 62            contains series lower bound forecast as a numpy array
 63
 64        upper_: a numpy array
 65            contains series upper bound forecast as a numpy array
 66
 67        result_dfs_: a tuple of data frames;
 68            each element of the tuple contains 3 columns,
 69            mean forecast, lower + upper prediction intervals,
 70            and a date index
 71
 72        sims_: currently a tuple of numpy arrays
 73            for `type_pi == bootstrap`, simulations for each series
 74
 75    Examples:
 76
 77    ```python
 78    import pandas as pd
 79    from ahead import BasicForecaster
 80
 81    # Data frame containing the time series
 82    dataset = {
 83    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 84    'series1' : [34, 30, 35.6, 33.3, 38.1],
 85    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 86    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 87    df = pd.DataFrame(dataset).set_index('date')
 88    print(df)
 89
 90    # multivariate time series forecasting
 91    r1 = BasicForecaster(h = 5)
 92    r1.forecast(df)
 93    print(r1.result_dfs_)
 94    ```
 95
 96    """
 97
 98    def __init__(
 99        self,
100        h=5,
101        level=95,
102        method="mean",
103        type_pi="gaussian",
104        block_length=5,
105        B=100,
106        date_formatting="original",
107        seed=123,
108    ):
109
110        super().__init__(
111            h=h,
112            level=level,
113            seed=seed,
114        )
115
116        self.method = method
117        self.type_pi = type_pi
118        self.block_length = block_length
119        self.B = B
120        self.date_formatting = date_formatting
121        self.input_df = None
122
123        self.fcast_ = None
124        self.averages_ = None
125        self.ranges_ = None
126        self.output_dates_ = []
127        self.mean_ = None
128        self.lower_ = None
129        self.upper_ = None
130        self.result_dfs_ = None
131        self.sims_ = None
132
133    def forecast(self, df):
134        """Forecasting method from `BasicForecaster` class
135
136        Parameters:
137
138            df: a data frame;
139                a data frame containing the input time series (see example)
140
141        """
142
143        # get input dates, output dates, number of series, series names, etc.
144        self.init_forecasting_params(df)
145
146        # obtain time series object -----
147        self.format_input()
148
149        if self.type_pi in ("blockbootstrap", "movingblockbootstrap"):
150            assert (
151                self.block_length is not None
152            ), "For `type_pi in ('blockbootstrap', 'movingblockbootstrap')`, `block_length` must be not None"
153
154        self.get_forecast()
155
156        # result -----
157        (
158            self.averages_,
159            self.ranges_,
160            _,
161        ) = mv.format_multivariate_forecast(
162            n_series=self.n_series,
163            date_formatting=self.date_formatting,
164            output_dates=self.output_dates_,
165            horizon=self.h,
166            fcast=self.fcast_,
167        )
168
169        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
170        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
171        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
172
173        self.result_dfs_ = tuple(
174            umv.compute_result_df(self.averages_[i], self.ranges_[i])
175            for i in range(self.n_series)
176        )
177
178        if self.type_pi in (
179            "bootstrap",
180            "blockbootstrap",
181            "movingblockbootstrap",
182        ):
183            self.sims_ = tuple(
184                np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B)
185            )
186
187        return self

Basic forecasting functions for multivariate time series (mean, median, random walk)

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

method: a string;
    Forecasting method, either "mean", "median", or random walk ("rw")

type_pi: a string;
    Type of prediction interval (currently "gaussian",
    "bootstrap" (independent), "blockbootstrap" (circular),
    "movingblockbootstrap")

block_length: an integer
    length of block for multivariate block bootstrap (`type_pi == blockbootstrap`
    or `type_pi == movingblockbootstrap`)

B: an integer;
    Number of replications

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

seed: an integer;
    reproducibility seed

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::ridge2f` through `rpy2`

averages_: a list of lists;
    mean forecast in a list for each series

ranges_: a list of lists;
    lower and upper prediction intervals in a list for each series

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_dfs_: a tuple of data frames;
    each element of the tuple contains 3 columns,
    mean forecast, lower + upper prediction intervals,
    and a date index

sims_: currently a tuple of numpy arrays
    for `type_pi == bootstrap`, simulations for each series

Examples:

import pandas as pd
from ahead import BasicForecaster

# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# multivariate time series forecasting
r1 = BasicForecaster(h = 5)
r1.forecast(df)
print(r1.result_dfs_)

class DynamicRegressor(ahead.Base.Base.Base): View Source

 10class DynamicRegressor(Base):
 11    """Dynamic Regression Model adapted from R's `forecast::nnetar`
 12
 13    Parameters:
 14
 15        h: an integer;
 16            forecasting horizon
 17
 18        level: an integer;
 19            Confidence level for prediction intervals
 20
 21        type_pi: a string;
 22            Type of prediction interval (currently "gaussian",
 23            ETS: "E", Arima: "A" or Theta: "T")
 24
 25        date_formatting: a string;
 26            Currently:
 27            - "original": yyyy-mm-dd
 28            - "ms": milliseconds
 29
 30    Attributes:
 31
 32        fcast_: an object;
 33            raw result from fitting R's `ahead::dynrmf` through `rpy2`
 34
 35        averages_: a list;
 36            mean forecast in a list
 37
 38        ranges_: a list;
 39            lower and upper prediction intervals in a list
 40
 41        output_dates_: a list;
 42            a list of output dates (associated to forecast)
 43
 44        mean_: a numpy array
 45            contains series mean forecast as a numpy array
 46
 47        lower_: a numpy array
 48            contains series lower bound forecast as a numpy array
 49
 50        upper_: a numpy array
 51            contains series upper bound forecast as a numpy array
 52
 53        result_df_: a data frame;
 54            contains 3 columns, mean forecast, lower + upper
 55            prediction intervals, and a date index
 56
 57    Examples:
 58
 59    ```python
 60    import pandas as pd
 61    from ahead import DynamicRegressor
 62
 63    # Data frame containing the time series
 64    dataset = {
 65    'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
 66    'value' : [34, 30, 35.6, 33.3, 38.1]}
 67
 68    df = pd.DataFrame(dataset).set_index('date')
 69    print(df)
 70
 71    # univariate time series forecasting
 72    d1 = DynamicRegressor(h = 5)
 73    d1.forecast(df)
 74    print(d1.result_df_)
 75    ```
 76
 77    """
 78
 79    def __init__(self, h=5, level=95, type_pi="E", date_formatting="original"):
 80
 81        super().__init__(
 82            h=h,
 83            level=level,
 84        )
 85
 86        self.type_pi = type_pi
 87        self.date_formatting = date_formatting
 88        self.input_df = None
 89        self.type_input = "univariate"
 90
 91        self.fcast_ = None
 92        self.averages_ = None
 93        self.ranges_ = None
 94        self.output_dates_ = []
 95        self.mean_ = []
 96        self.lower_ = []
 97        self.upper_ = []
 98        self.result_df_ = None
 99
100    def forecast(self, df):
101        """Forecasting method from `DynamicRegressor` class
102
103        Parameters:
104
105            df: a data frame;
106                a data frame containing the input time series (see example)
107
108        """
109
110        # get input dates, output dates, number of series, series names, etc.
111        self.init_forecasting_params(df)
112
113        # obtain time series object -----
114        self.format_input()
115
116        self.get_forecast("dynrm")
117
118        # result -----
119        (
120            self.averages_,
121            self.ranges_,
122            _,
123        ) = uv.format_univariate_forecast(
124            date_formatting=self.date_formatting,
125            output_dates=self.output_dates_,
126            horizon=self.h,
127            fcast=self.fcast_,
128        )
129
130        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
131        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
132        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
133
134        self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_)
135
136        return self

Dynamic Regression Model adapted from R's forecast::nnetar

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

type_pi: a string;
    Type of prediction interval (currently "gaussian",
    ETS: "E", Arima: "A" or Theta: "T")

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::dynrmf` through `rpy2`

averages_: a list;
    mean forecast in a list

ranges_: a list;
    lower and upper prediction intervals in a list

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_df_: a data frame;
    contains 3 columns, mean forecast, lower + upper
    prediction intervals, and a date index

Examples:

import pandas as pd
from ahead import DynamicRegressor

# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}

df = pd.DataFrame(dataset).set_index('date')
print(df)

# univariate time series forecasting
d1 = DynamicRegressor(h = 5)
d1.forecast(df)
print(d1.result_df_)

class EAT(ahead.Base.Base.Base): View Source

 10class EAT(Base):
 11    """Combinations of ETS (exponential smoothing), auto.arima and Theta models
 12
 13    Parameters:
 14
 15        h: an integer;
 16            forecasting horizon
 17
 18        level: an integer;
 19            Confidence level for prediction intervals
 20
 21        weights: a list;
 22            coefficients assigned to each method in the ensemble
 23
 24        type_pi: a string;
 25            Type of prediction interval (currently "gaussian",
 26            ETS: "E", Arima: "A" or Theta: "T")
 27
 28        date_formatting: a string;
 29            Currently:
 30            - "original": yyyy-mm-dd
 31            - "ms": milliseconds
 32
 33    Attributes:
 34
 35        fcast_: an object;
 36            raw result from fitting R's `ahead::eatf` through `rpy2`
 37
 38        averages_: a list;
 39            mean forecast in a list
 40
 41        ranges_: a list;
 42            lower and upper prediction intervals in a list
 43
 44        output_dates_: a list;
 45            a list of output dates (associated to forecast)
 46
 47        mean_: a numpy array
 48            contains series mean forecast as a numpy array
 49
 50        lower_: a numpy array
 51            contains series lower bound forecast as a numpy array
 52
 53        upper_: a numpy array
 54            contains series upper bound forecast as a numpy array
 55
 56        result_df_: a data frame;
 57            contains 3 columns, mean forecast, lower + upper
 58            prediction intervals, and a date index
 59
 60    Examples:
 61
 62    ```python
 63    import pandas as pd
 64    from ahead import EAT
 65
 66    # Data frame containing the time series
 67    dataset = {
 68    'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
 69    'value' : [34, 30, 35.6, 33.3, 38.1]}
 70
 71    df = pd.DataFrame(dataset).set_index('date')
 72    print(df)
 73
 74    # univariate time series forecasting
 75    e1 = EAT(h = 5) # default, equal weights for each model=[1/3, 1/3, 1/3]
 76    e1.forecast(df)
 77    print(e1.result_df_)
 78    ```
 79
 80    """
 81
 82    def __init__(
 83        self,
 84        h=5,
 85        level=95,
 86        weights=None,
 87        type_pi="E",
 88        date_formatting="original",
 89    ):
 90
 91        super().__init__(h=h, level=level)
 92
 93        if weights is None:
 94            weights = [1 / 3, 1 / 3, 1 / 3]
 95
 96        assert len(weights) == 3, "must have 'len(weights) == 3'"
 97
 98        self.weights = weights
 99        self.type_pi = type_pi
100        self.date_formatting = date_formatting
101        self.input_df = None
102        self.type_input = "univariate"
103
104        self.fcast_ = None
105        self.averages_ = None
106        self.ranges_ = None
107        self.output_dates_ = []
108        self.mean_ = []
109        self.lower_ = []
110        self.upper_ = []
111        self.result_df_ = None
112
113    def forecast(self, df):
114        """Forecasting method from `EAT` class
115
116        Parameters:
117
118            df: a data frame;
119                a data frame containing the input time series (see example)
120
121        """
122
123        # get input dates, output dates, number of series, series names, etc.
124        self.init_forecasting_params(df)
125
126        # obtain time series object -----
127        self.format_input()
128
129        self.get_forecast("eat")
130
131        # result -----
132        (
133            self.averages_,
134            self.ranges_,
135            _,
136        ) = uv.format_univariate_forecast(
137            date_formatting=self.date_formatting,
138            output_dates=self.output_dates_,
139            horizon=self.h,
140            fcast=self.fcast_,
141        )
142
143        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
144        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
145        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
146
147        self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_)
148
149        return self

Combinations of ETS (exponential smoothing), auto.arima and Theta models

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

weights: a list;
    coefficients assigned to each method in the ensemble

type_pi: a string;
    Type of prediction interval (currently "gaussian",
    ETS: "E", Arima: "A" or Theta: "T")

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::eatf` through `rpy2`

averages_: a list;
    mean forecast in a list

ranges_: a list;
    lower and upper prediction intervals in a list

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_df_: a data frame;
    contains 3 columns, mean forecast, lower + upper
    prediction intervals, and a date index

Examples:

import pandas as pd
from ahead import EAT

# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}

df = pd.DataFrame(dataset).set_index('date')
print(df)

# univariate time series forecasting
e1 = EAT(h = 5) # default, equal weights for each model=[1/3, 1/3, 1/3]
e1.forecast(df)
print(e1.result_df_)

class FitForecaster(ahead.Base.Base.Base): View Source

 12class FitForecaster(Base):
 13    """Fit and forecast time series with uncertainty quantification
 14
 15    See https://r-packages.techtonique.net/ahead/doc/manual.html#fitforecast
 16
 17    Examples:
 18
 19    ```python
 20    url = "https://raw.githubusercontent.com/Techtonique/"
 21    url += "datasets/main/time_series/univariate/"
 22    url += "a10.csv"
 23
 24    df = pd.read_csv(url)
 25    df.index = pd.DatetimeIndex(df.date) # must have
 26    df.drop(columns=['date'], inplace=True)
 27
 28    # univariate ts forecasting
 29    d1 = FitForecaster()
 30
 31    print(d1)
 32
 33    start = time()
 34    print(d1.fit_forecast(df))
 35    print(f"Elapsed: {time()-start} \n")
 36
 37    print(f"after: {d1.mean_}")
 38    print(f"after: {d1.lower_}")
 39    print(f"after: {d1.upper_}")
 40    ```
 41
 42    """
 43
 44    def __init__(
 45        self,
 46        h=None,
 47        level=95,
 48        pct_train=0.9,
 49        pct_calibration=0.5,
 50        B=1000,
 51        seed=17223,
 52        conformalize=False,
 53        type_calibration="splitconformal",
 54        gap=3,
 55        agg="mean",
 56        vol="constant",
 57        type_sim="kde",
 58        date_formatting="original",
 59    ):
 60
 61        super().__init__(
 62            h=h,
 63            level=level,
 64        )
 65
 66        self.pct_train = pct_train
 67        self.pct_calibration = pct_calibration
 68        self.B = B
 69        self.seed = seed
 70        self.conformalize = conformalize
 71        self.type_calibration = type_calibration
 72        self.gap = gap
 73        self.agg = agg
 74        self.vol = vol
 75        self.type_sim = type_sim
 76        self.date_formatting = date_formatting
 77        self.input_df = None
 78
 79        self.fcast_ = None
 80        self.averages_ = None
 81        self.ranges_ = None
 82        self.output_dates_ = []
 83        self.mean_ = []
 84        self.lower_ = []
 85        self.upper_ = []
 86        self.result_df_ = None
 87
 88    def fit_forecast(self, df, method="thetaf"):
 89
 90        assert method in (
 91            "thetaf",
 92            "arima",
 93            "ets",
 94            "te",
 95            "tbats",
 96            "tslm",
 97            "dynrmf",
 98            "ridge2f",
 99            "naive",
100            "snaive",
101        ), 'must have method in ("thetaf", "arima", "ets", "te", "tbats", "tslm", "dynrmf", "ridge2f", "naive", "snaive")'
102
103        # keep it in this order
104        h = None
105        if self.h is not None:
106            h = self.h
107        else:
108            self.h = df.shape[0] - int(np.floor(df.shape[0] * self.pct_train))
109
110        # get input dates, output dates, number of series, series names, etc.
111        self.init_forecasting_params(df)
112
113        # obtain time series object -----
114        self.format_input()
115
116        self.method = method
117
118        self.fcast_ = config.AHEAD_PACKAGE.fitforecast(
119            y=self.input_ts_,
120            h=rNULL if h is None else h,
121            pct_train=self.pct_train,
122            pct_calibration=self.pct_calibration,
123            method=self.method,
124            level=self.level,
125            B=self.B,
126            seed=self.seed,
127            conformalize=self.conformalize,
128            type_calibration=self.type_calibration,
129        )
130
131        # result -----
132        if df.shape[1] > 1:
133            (
134                self.averages_,
135                self.ranges_,
136                _,
137            ) = mv.format_multivariate_forecast(
138                n_series=self.n_series,
139                date_formatting=self.date_formatting,
140                output_dates=self.output_dates_,
141                horizon=self.h,
142                fcast=self.fcast_,
143            )
144        else:
145            (
146                self.averages_,
147                self.ranges_,
148                _,
149            ) = uv.format_univariate_forecast(
150                date_formatting=self.date_formatting,
151                output_dates=self.output_dates_,
152                horizon=self.h,
153                fcast=self.fcast_,
154            )
155
156        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
157        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
158        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
159
160        self.result_dfs_ = umv.compute_result_df(self.averages_, self.ranges_)
161
162        if "sims" in list(self.fcast_.names):
163            self.sims_ = tuple(
164                np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B)
165            )
166
167        return self

Fit and forecast time series with uncertainty quantification

See https://r-packages.techtonique.net/ahead/doc/manual.html#fitforecast

Examples:
<div class="pdoc-code codehilite">
<pre><span></span><code>    <span class="n">url</span> <span class="o">=</span> <span class="s2">&quot;https://raw.githubusercontent.com/Techtonique/&quot;</span>
    <span class="n">url</span> <span class="o">+=</span> <span class="s2">&quot;datasets/main/time_series/univariate/&quot;</span>
    <span class="n">url</span> <span class="o">+=</span> <span class="s2">&quot;a10.csv&quot;</span>

    <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
    <span class="n">df</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">date</span><span class="p">)</span> <span class="c1"># must have</span>
    <span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;date&#39;</span><span class="p">],</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>

    <span class="c1"># univariate ts forecasting</span>
    <span class="n">d1</span> <span class="o">=</span> <span class="n">FitForecaster</span><span class="p">()</span>

    <span class="nb">print</span><span class="p">(</span><span class="n">d1</span><span class="p">)</span>

    <span class="n">start</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
    <span class="nb">print</span><span class="p">(</span><span class="n">d1</span><span class="o">.</span><span class="n">fit_forecast</span><span class="p">(</span><span class="n">df</span><span class="p">))</span>
    <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Elapsed: </span><span class="si">{</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">start</span><span class="si">}</span><span class="s2"> </span>
<span class="s2">&quot;)</span>

    <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">mean_</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">lower_</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;after: </span><span class="si">{</span><span class="n">d1</span><span class="o">.</span><span class="n">upper_</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre>
</div>

class Ridge2Regressor(ahead.Base.Base.Base): View Source

 18class Ridge2Regressor(Base):
 19    """Random Vector functional link network model with 2 regularization parameters
 20
 21    Parameters:
 22
 23        h: an integer;
 24            forecasting horizon
 25
 26        level: an integer;
 27            Confidence level for prediction intervals
 28
 29        lags: an integer;
 30            Number of lags
 31
 32        nb_hidden: an integer;
 33            Number of nodes in hidden layer
 34
 35        nodes_sim: an integer;
 36            Type of simulation for nodes in the hidden layer
 37            ("sobol", "halton", "unif")
 38
 39        activation: a string;
 40            Activation function ("relu", "sigmoid", "tanh",
 41            "leakyrelu", "elu", "linear")
 42
 43        a: a float;
 44            hyperparameter for activation function "leakyrelu", "elu"
 45
 46        lambda_1: a float;
 47            Regularization parameter for original predictors
 48
 49        lambda_2: a float;
 50            Regularization parameter for transformed predictors
 51
 52        dropout: a float;
 53            dropout regularization parameter (dropping nodes in hidden layer)
 54
 55        type_pi: a string;
 56            Type of prediction interval (currently "gaussian",
 57            "bootstrap", (circular) "blockbootstrap", "movingblockbootstrap", "rvinecopula", 
 58            "conformal-split", "conformal-bootstrap", "conformal-block-bootstrap")
 59
 60        block_length: an integer
 61            length of block for multivariate block bootstrap (`type_pi == blockbootstrap` or
 62            `type_pi == movingblockbootstrap`)
 63
 64        margins: a string;
 65            distribution of residuals' marginals for `type_pi == rvinecopula`: "empirical" (default),
 66            "gaussian"
 67
 68        B: an integer;
 69            Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
 70            "movingblockbootstrap", or "rvinecopula"
 71
 72        type_aggregation: a string;
 73            Type of aggregation, ONLY for bootstrapping; either "mean" or "median"
 74
 75        centers: an integer;
 76            Number of clusters for \code{type_clustering}
 77
 78        type_clustering: a string;
 79            "kmeans" (K-Means clustering) or "hclust" (Hierarchical clustering)
 80
 81        cl: an integer;
 82            The number of clusters for parallel execution (done in R), for `type_pi == bootstrap`
 83
 84        date_formatting: a string;
 85            Currently:
 86            - "original": yyyy-mm-dd
 87            - "ms": milliseconds
 88
 89        seed: an integer;
 90            reproducibility seed for type_pi == 'bootstrap'
 91
 92    Attributes:
 93
 94        fcast_: an object;
 95            raw result from fitting R's `ahead::ridge2f` through `rpy2`
 96
 97        averages_: a list of lists;
 98            mean forecast in a list for each series
 99
100        ranges_: a list of lists;
101            lower and upper prediction intervals in a list for each series
102
103        output_dates_: a list;
104            a list of output dates (associated to forecast)
105
106        mean_: a numpy array
107            contains series mean forecast as a numpy array
108
109        lower_: a numpy array
110            contains series lower bound forecast as a numpy array
111
112        upper_: a numpy array
113            contains series upper bound forecast as a numpy array
114
115        result_dfs_: a tuple of data frames;
116            each element of the tuple contains 3 columns,
117            mean forecast, lower + upper prediction intervals,
118            and a date index
119
120        sims_: currently a tuple of numpy arrays
121            for `type_pi == bootstrap`, simulations for each series
122
123    Examples:
124
125    ```python
126    import pandas as pd
127    from ahead import Ridge2Regressor
128
129    # Data frame containing the time series
130    dataset = {
131    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
132    'series1' : [34, 30, 35.6, 33.3, 38.1],
133    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
134    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
135    df = pd.DataFrame(dataset).set_index('date')
136    print(df)
137
138    # multivariate time series forecasting
139    r1 = Ridge2Regressor(h = 5)
140    r1.forecast(df)
141    print(r1.result_dfs_)
142    ```
143
144    """
145
146    def __init__(
147        self,
148        h=5,
149        level=95,
150        lags=1,
151        nb_hidden=5,
152        nodes_sim="sobol",
153        activation="relu",
154        a=0.01,
155        lambda_1=0.1,
156        lambda_2=0.1,
157        dropout=0,
158        type_pi="gaussian",
159        # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0)
160        block_length=3,
161        margins="empirical",
162        B=100,
163        type_aggregation="mean",
164        centers=2,
165        type_clustering="kmeans",
166        cl=1,
167        date_formatting="original",
168        seed=123,
169    ):
170
171        super().__init__(
172            h=h,
173            level=level,
174            seed=seed,
175        )
176
177        self.lags = lags
178        self.nb_hidden = nb_hidden
179        self.nodes_sim = nodes_sim
180        self.activation = activation
181        self.a = a
182        self.lambda_1 = lambda_1
183        self.lambda_2 = lambda_2
184        self.dropout = dropout
185        self.type_pi = type_pi
186        self.block_length = block_length
187        self.margins = margins
188        self.B = B
189        self.type_aggregation = type_aggregation
190        # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0)
191        self.centers = centers
192        self.type_clustering = type_clustering
193        self.cl = cl
194        self.date_formatting = date_formatting
195        self.seed = seed
196        self.input_df = None
197        self.type_input = "multivariate"
198
199        self.fcast_ = None
200        self.averages_ = None
201        self.ranges_ = None
202        self.output_dates_ = []
203        self.mean_ = None
204        self.lower_ = None
205        self.upper_ = None
206        self.result_dfs_ = None
207        self.sims_ = None
208        self.xreg_ = None
209
210    def forecast(self, df, xreg=None):
211        """Forecasting method from `Ridge2Regressor` class
212
213        Parameters:
214
215            df: a data frame;
216                a data frame containing the input time series (see example)
217
218            xreg: a numpy array or a data frame;
219                external regressors
220
221        """
222
223        # get input dates, output dates, number of series, series names, etc.
224        self.init_forecasting_params(df)
225
226        # obtain time series object -----
227        self.format_input()
228
229        self.get_forecast("ridge2")
230
231        # result -----
232        (
233            self.averages_,
234            self.ranges_,
235            _,
236        ) = mv.format_multivariate_forecast(
237            n_series=self.n_series,
238            date_formatting=self.date_formatting,
239            output_dates=self.output_dates_,
240            horizon=self.h,
241            fcast=self.fcast_,
242        )
243
244        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
245        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
246        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
247
248        self.result_dfs_ = tuple(
249            umv.compute_result_df(self.averages_[i], self.ranges_[i])
250            for i in range(self.n_series)
251        )
252
253        if self.type_pi in (
254            "bootstrap",
255            "blockbootstrap",
256            "movingblockbootstrap",
257            "rvinecopula",
258        ):
259            self.sims_ = tuple(
260                np.asarray(self.fcast_.rx2["sims"][i]) for i in range(self.B)
261            )
262
263        return self

Random Vector functional link network model with 2 regularization parameters

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

lags: an integer;
    Number of lags

nb_hidden: an integer;
    Number of nodes in hidden layer

nodes_sim: an integer;
    Type of simulation for nodes in the hidden layer
    ("sobol", "halton", "unif")

activation: a string;
    Activation function ("relu", "sigmoid", "tanh",
    "leakyrelu", "elu", "linear")

a: a float;
    hyperparameter for activation function "leakyrelu", "elu"

lambda_1: a float;
    Regularization parameter for original predictors

lambda_2: a float;
    Regularization parameter for transformed predictors

dropout: a float;
    dropout regularization parameter (dropping nodes in hidden layer)

type_pi: a string;
    Type of prediction interval (currently "gaussian",
    "bootstrap", (circular) "blockbootstrap", "movingblockbootstrap", "rvinecopula", 
    "conformal-split", "conformal-bootstrap", "conformal-block-bootstrap")

block_length: an integer
    length of block for multivariate block bootstrap (`type_pi == blockbootstrap` or
    `type_pi == movingblockbootstrap`)

margins: a string;
    distribution of residuals' marginals for `type_pi == rvinecopula`: "empirical" (default),
    "gaussian"

B: an integer;
    Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
    "movingblockbootstrap", or "rvinecopula"

type_aggregation: a string;
    Type of aggregation, ONLY for bootstrapping; either "mean" or "median"

centers: an integer;
    Number of clusters for \code{type_clustering}

type_clustering: a string;
    "kmeans" (K-Means clustering) or "hclust" (Hierarchical clustering)

cl: an integer;
    The number of clusters for parallel execution (done in R), for `type_pi == bootstrap`

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

seed: an integer;
    reproducibility seed for type_pi == 'bootstrap'

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::ridge2f` through `rpy2`

averages_: a list of lists;
    mean forecast in a list for each series

ranges_: a list of lists;
    lower and upper prediction intervals in a list for each series

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_dfs_: a tuple of data frames;
    each element of the tuple contains 3 columns,
    mean forecast, lower + upper prediction intervals,
    and a date index

sims_: currently a tuple of numpy arrays
    for `type_pi == bootstrap`, simulations for each series

Examples:

import pandas as pd
from ahead import Ridge2Regressor

# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# multivariate time series forecasting
r1 = Ridge2Regressor(h = 5)
r1.forecast(df)
print(r1.result_dfs_)

class VAR(ahead.Base.Base.Base): View Source

 10class VAR(Base):
 11    """Vector AutoRegressive model
 12
 13    Parameters:
 14
 15        h: an integer;
 16            forecasting horizon
 17
 18        level: an integer;
 19            Confidence level for prediction intervals
 20
 21        lags: an integer;
 22            the lag order
 23
 24        type_VAR: a string;
 25            Type of deterministic regressors to include
 26            ("const", "trend", "both", "none")
 27
 28        date_formatting: a string;
 29            Currently:
 30            - "original": yyyy-mm-dd
 31            - "ms": milliseconds
 32
 33    Attributes:
 34
 35        fcast_: an object;
 36            raw result from fitting R's `ahead::varf` through `rpy2`
 37
 38        averages_: a list of lists;
 39            mean forecast in a list for each series
 40
 41        ranges_: a list of lists;
 42            lower and upper prediction intervals in a list for each series
 43
 44        output_dates_: a list;
 45            a list of output dates (associated to forecast)
 46
 47        mean_: a numpy array
 48            contains series mean forecast as a numpy array
 49
 50        lower_: a numpy array
 51            contains series lower bound forecast as a numpy array
 52
 53        upper_: a numpy array
 54            contains series upper bound forecast as a numpy array
 55
 56        result_dfs_: a tuple of data frames;
 57            each element of the tuple contains 3 columns,
 58            mean forecast, lower + upper prediction intervals,
 59            and a date index
 60
 61    Examples:
 62
 63    ```python
 64    import pandas as pd
 65    from ahead import VAR
 66
 67    # Data frame containing the time series
 68    dataset = {
 69    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 70    'series1' : [34, 30, 35.6, 33.3, 38.1],
 71    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 72    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 73    df = pd.DataFrame(dataset).set_index('date')
 74    print(df)
 75
 76    # multivariate time series forecasting
 77    v1 = VAR(h = 5, date_formatting = "original", type_VAR="none")
 78    v1.forecast(df)
 79    print(v1.result_dfs_)
 80    ```
 81
 82    """
 83
 84    def __init__(
 85        self, h=5, level=95, lags=1, type_VAR="none", date_formatting="original"
 86    ):  # type_VAR = "const", "trend", "both", "none"
 87
 88        assert type_VAR in (
 89            "const",
 90            "trend",
 91            "both",
 92            "none",
 93        ), "must have: type_VAR in ('const', 'trend', 'both', 'none')"
 94
 95        super().__init__(
 96            h=h,
 97            level=level,
 98        )
 99
100        self.lags = lags
101        self.type_VAR = type_VAR
102        self.date_formatting = date_formatting
103        self.input_df = None
104
105        self.fcast_ = None
106        self.averages_ = None
107        self.ranges_ = None
108        self.output_dates_ = []
109        self.mean_ = None
110        self.lower_ = None
111        self.upper_ = None
112        self.result_dfs_ = None
113
114    def forecast(self, df):
115        """Forecasting method from `VAR` class
116
117        Parameters:
118
119            df: a data frame;
120                a data frame containing the input time series (see example)
121
122        """
123
124        # get input dates, output dates, number of series, series names, etc.
125        self.init_forecasting_params(df)
126
127        # obtain time series object -----
128        self.format_input()
129
130        self.get_forecast("var")
131
132        # result -----
133        (
134            self.averages_,
135            self.ranges_,
136            _,
137        ) = mv.format_multivariate_forecast(
138            n_series=self.n_series,
139            date_formatting=self.date_formatting,
140            output_dates=self.output_dates_,
141            horizon=self.h,
142            fcast=self.fcast_,
143        )
144
145        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
146        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
147        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
148
149        self.result_dfs_ = tuple(
150            umv.compute_result_df(self.averages_[i], self.ranges_[i])
151            for i in range(self.n_series)
152        )
153
154        return self

Vector AutoRegressive model

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

lags: an integer;
    the lag order

type_VAR: a string;
    Type of deterministic regressors to include
    ("const", "trend", "both", "none")

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::varf` through `rpy2`

averages_: a list of lists;
    mean forecast in a list for each series

ranges_: a list of lists;
    lower and upper prediction intervals in a list for each series

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_dfs_: a tuple of data frames;
    each element of the tuple contains 3 columns,
    mean forecast, lower + upper prediction intervals,
    and a date index

Examples:

import pandas as pd
from ahead import VAR

# Data frame containing the time series
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# multivariate time series forecasting
v1 = VAR(h = 5, date_formatting = "original", type_VAR="none")
v1.forecast(df)
print(v1.result_dfs_)

class MLARCH(ahead.Base.Base.Base): View Source

 10class MLARCH(Base):
 11    """Conformalized Forecasting using Machine Learning (and statistical) models with ARCH effects
 12
 13    mean_model: `forecast::auto.arima` (main series)
 14    model_residuals: `forecast::thetaf` (residuals)
 15    fit_func: `ahead::ridge` (volatility)
 16
 17    
 18
 19    Parameters:
 20
 21        h: an integer;
 22            forecasting horizon
 23
 24        level: an integer;
 25            Confidence level for prediction intervals
 26
 27        B: an integer;
 28            Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
 29            "movingblockbootstrap", or "rvinecopula"
 30
 31        type_pi: a string;
 32            Type of conformal prediction interval ("surrogate", "bootstrap", "kde") for volatility modeling
 33
 34        type_sim_conformalize: a string;
 35            Type of simulation for conformalization of standardized residuals ("block-bootstrap", "surrogate", "kde", "bootstrap", or "fitdistr")
 36
 37        date_formatting: a string;
 38            Currently:
 39            - "original": yyyy-mm-dd
 40            - "ms": milliseconds
 41
 42    Attributes:
 43
 44        fcast_: an object;
 45            raw result from fitting R's `ahead::MLARCHf` through `rpy2`
 46
 47        averages_: a list;
 48            mean forecast in a list
 49
 50        ranges_: a list;
 51            lower and upper prediction intervals in a list
 52
 53        output_dates_: a list;
 54            a list of output dates (associated to forecast)
 55
 56        mean_: a numpy array
 57            contains series mean forecast as a numpy array
 58
 59        lower_: a numpy array
 60            contains series lower bound forecast as a numpy array
 61
 62        upper_: a numpy array
 63            contains series upper bound forecast as a numpy array
 64
 65        result_df_: a data frame;
 66            contains 3 columns, mean forecast, lower + upper
 67            prediction intervals, and a date index
 68
 69    Examples:
 70
 71    ```python
 72    import pandas as pd
 73    from ahead import MLARCH
 74
 75    # Data frame containing the time series
 76    dataset = {
 77    'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
 78    'value' : [34, 30, 35.6, 33.3, 38.1]}
 79
 80    df = pd.DataFrame(dataset).set_index('date')
 81    print(df)
 82
 83    # univariate time series forecasting
 84    e1 = MLARCH(h = 5) 
 85    e1.forecast(df)
 86    print(e1.result_df_)
 87    ```
 88
 89    """
 90
 91    def __init__(
 92        self,
 93        h=5,
 94        level=95,
 95        B=100,
 96        type_pi="surrogate",
 97        type_sim_conformalize="block-bootstrap",
 98        seed=123,
 99        date_formatting="original",
100    ):
101
102        super().__init__(h=h, level=level)
103        
104        self.h = h
105        self.level = level
106        self.B = B
107        self.type_pi = type_pi
108        self.type_sim_conformalize = type_sim_conformalize  
109        self.seed = seed
110        self.date_formatting = date_formatting
111        self.input_df = None
112        self.type_input = "univariate"
113
114        self.fcast_ = None
115        self.averages_ = None
116        self.ranges_ = None
117        self.output_dates_ = []
118        self.mean_ = []
119        self.lower_ = []
120        self.upper_ = []
121        self.result_df_ = None
122
123    def forecast(self, df):
124        """Forecasting method from `MLARCH` class
125
126        Parameters:
127
128            df: a data frame;
129                a data frame containing the input time series (see example)
130
131        """
132
133        # get input dates, output dates, number of series, series names, etc.
134        self.init_forecasting_params(df)
135
136        # obtain time series object -----
137        self.format_input()
138
139        self.get_forecast("MLARCH")
140
141        print(f"MLARCH: {self.fcast_}")
142
143        # result -----
144        (
145            self.averages_,
146            self.ranges_,
147            _,
148        ) = uv.format_univariate_forecast(
149            date_formatting=self.date_formatting,
150            output_dates=self.output_dates_,
151            horizon=self.h,
152            fcast=self.fcast_,
153        )
154
155        self.mean_ = np.asarray(self.fcast_.rx2["mean"])
156        self.lower_ = np.asarray(self.fcast_.rx2["lower"])
157        self.upper_ = np.asarray(self.fcast_.rx2["upper"])
158
159        self.result_df_ = umv.compute_result_df(self.averages_, self.ranges_)
160
161        return self

Conformalized Forecasting using Machine Learning (and statistical) models with ARCH effects

mean_model: forecast::auto.arima (main series) model_residuals: forecast::thetaf (residuals) fit_func: ahead::ridge (volatility)

Parameters:

h: an integer;
    forecasting horizon

level: an integer;
    Confidence level for prediction intervals

B: an integer;
    Number of bootstrap replications for `type_pi == bootstrap`, "blockbootstrap",
    "movingblockbootstrap", or "rvinecopula"

type_pi: a string;
    Type of conformal prediction interval ("surrogate", "bootstrap", "kde") for volatility modeling

type_sim_conformalize: a string;
    Type of simulation for conformalization of standardized residuals ("block-bootstrap", "surrogate", "kde", "bootstrap", or "fitdistr")

date_formatting: a string;
    Currently:
    - "original": yyyy-mm-dd
    - "ms": milliseconds

Attributes:

fcast_: an object;
    raw result from fitting R's `ahead::MLARCHf` through `rpy2`

averages_: a list;
    mean forecast in a list

ranges_: a list;
    lower and upper prediction intervals in a list

output_dates_: a list;
    a list of output dates (associated to forecast)

mean_: a numpy array
    contains series mean forecast as a numpy array

lower_: a numpy array
    contains series lower bound forecast as a numpy array

upper_: a numpy array
    contains series upper bound forecast as a numpy array

result_df_: a data frame;
    contains 3 columns, mean forecast, lower + upper
    prediction intervals, and a date index

Examples:

import pandas as pd
from ahead import MLARCH

# Data frame containing the time series
dataset = {
'date' : ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01'],
'value' : [34, 30, 35.6, 33.3, 38.1]}

df = pd.DataFrame(dataset).set_index('date')
print(df)

# univariate time series forecasting
e1 = MLARCH(h = 5) 
e1.forecast(df)
print(e1.result_df_)