ahead.Base.Base
1import numpy as np 2import matplotlib.pyplot as plt 3 4from rpy2.robjects import numpy2ri, r 5from rpy2.robjects.vectors import FloatVector 6 7from subprocess import Popen, PIPE 8from .. import config 9from ..utils.univariate import compute_y_ts 10from ..utils.multivariate import compute_y_mts 11from ..utils.unimultivariate import compute_input_dates, compute_output_dates 12 13 14class Base(object): 15 16 def __init__(self, h=5, level=95, date_formatting="ms", seed=123): 17 18 self.h = h 19 self.level = level 20 self.date_formatting = date_formatting 21 self.seed = seed 22 self.frequency = None 23 self.series_names = None 24 self.n_series = None 25 self.type_input = "univariate" # (or "multivariate") 26 self.B = None 27 self.input_df = None 28 self.input_dates = None 29 self.method = None 30 self.weights = None 31 self.type_pi = None 32 self.type_conformalize = None 33 self.type_sim_conformalize = None 34 self.type_aggregation = None 35 self.type_clustering = None 36 self.lags = None 37 self.lags_ = None # used for VAR 38 self.seed = None 39 40 self.input_ts_ = None # input time series 41 self.mean_ = None 42 self.lower_ = None 43 self.upper_ = None 44 self.sims_ = None 45 self.output_dates_ = None 46 self.result_dfs_ = None 47 48 R_IS_INSTALLED = False 49 50 try: 51 proc = Popen(["which", "R"], stdout=PIPE, stderr=PIPE) 52 R_IS_INSTALLED = proc.wait() == 0 53 except Exception as e: 54 pass 55 56 if not R_IS_INSTALLED: 57 raise ImportError("R is not installed! \n" + config.USAGE_MESSAGE) 58 59 def format_input(self): 60 if self.input_df.shape[1] > 0: 61 self.input_ts_ = compute_y_mts(self.input_df, self.frequency) 62 else: 63 self.input_ts_ = compute_y_ts(self.input_df, self.frequency) 64 65 def init_forecasting_params(self, df): 66 self.input_df = df 67 self.series_names = df.columns 68 self.n_series = len(self.series_names) 69 self.input_dates = compute_input_dates(df) 70 self.type_input = "multivariate" if len(df.shape) > 0 else "univariate" 71 self.output_dates_, self.frequency = compute_output_dates(df, self.h) 72 73 def getsims(self, input_tuple, ix): 74 n_sims = len(input_tuple) 75 res = [input_tuple[i].iloc[:, ix].values for i in range(n_sims)] 76 return np.asarray(res).T 77 78 def get_forecast(self, method=None, xreg=None): 79 80 if method != None: 81 self.method = method 82 83 if self.method == "armagarch": 84 self.fcast_ = config.AHEAD_PACKAGE.armagarchf( 85 y=self.input_ts_, 86 h=self.h, 87 level=self.level, 88 B=self.B, 89 cl=self.cl, 90 dist=self.dist, 91 seed=self.seed, 92 ) 93 94 if self.method in ("mean", "median", "rw"): 95 self.fcast_ = config.AHEAD_PACKAGE.basicf( 96 self.input_ts_, 97 h=self.h, 98 level=self.level, 99 method=self.method, 100 type_pi=self.type_pi, 101 block_length=self.block_length, 102 B=self.B, 103 seed=self.seed, 104 ) 105 106 if self.method == "dynrm": 107 self.fcast_ = config.AHEAD_PACKAGE.dynrmf( 108 y=self.input_ts_, 109 h=self.h, 110 level=self.level, 111 type_pi=self.type_pi, 112 ) 113 114 if self.method == "eat": 115 self.fcast_ = config.AHEAD_PACKAGE.eatf( 116 y=self.input_ts_, 117 h=self.h, 118 level=self.level, 119 type_pi=self.type_pi, 120 weights=config.FLOATVECTOR(self.weights), 121 ) 122 123 if self.method == "ridge2": 124 if xreg is None: 125 126 self.fcast_ = config.AHEAD_PACKAGE.ridge2f( 127 self.input_ts_, 128 h=self.h, 129 level=self.level, 130 lags=self.lags, 131 nb_hidden=self.nb_hidden, 132 nodes_sim=self.nodes_sim, 133 activ=self.activation, 134 a=self.a, 135 lambda_1=self.lambda_1, 136 lambda_2=self.lambda_2, 137 dropout=self.dropout, 138 type_pi=self.type_pi, 139 margins=self.margins, 140 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 141 block_length=self.block_length, 142 B=self.B, 143 type_aggregation=self.type_aggregation, 144 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 145 centers=self.centers, 146 type_clustering=self.type_clustering, 147 cl=self.cl, 148 seed=self.seed, 149 ) 150 151 else: # xreg is not None: 152 153 try: 154 self.xreg_ = xreg.values 155 except: 156 self.xreg_ = config.DEEP_COPY(xreg) 157 158 is_matrix_xreg = len(self.xreg_.shape) > 1 159 160 numpy2ri.activate() 161 162 xreg_ = ( 163 r.matrix( 164 FloatVector(self.xreg_.flatten()), 165 byrow=True, 166 nrow=self.xreg_.shape[0], 167 ncol=self.xreg_.shape[1], 168 ) 169 if is_matrix_xreg 170 else r.matrix( 171 FloatVector(self.xreg_.flatten()), 172 byrow=True, 173 nrow=self.xreg_.shape[0], 174 ncol=1, 175 ) 176 ) 177 178 self.fcast_ = config.AHEAD_PACKAGE.ridge2f( 179 self.input_ts_, 180 xreg=xreg_, 181 h=self.h, 182 level=self.level, 183 lags=self.lags, 184 nb_hidden=self.nb_hidden, 185 nodes_sim=self.nodes_sim, 186 activ=self.activation, 187 a=self.a, 188 lambda_1=self.lambda_1, 189 lambda_2=self.lambda_2, 190 dropout=self.dropout, 191 type_pi=self.type_pi, 192 margins=self.margins, 193 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 194 block_length=self.block_length, 195 B=self.B, 196 type_aggregation=self.type_aggregation, 197 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 198 centers=self.centers, 199 type_clustering=self.type_clustering, 200 cl=self.cl, 201 seed=self.seed, 202 ) 203 204 if self.method == "var": 205 self.fcast_ = config.AHEAD_PACKAGE.varf( 206 self.input_ts_, 207 h=self.h, 208 level=self.level, 209 lags=self.lags, 210 type_VAR=self.type_VAR, 211 ) 212 213 if self.method.lower() == "mlarch": 214 valid_type_pi = ("surrogate", "bootstrap", "kde") 215 type_pi = self.type_pi if self.type_pi in valid_type_pi else "surrogate" 216 valid_type_sim = ("surrogate", "block-bootstrap", "bootstrap", "kde", "fitdistr") 217 type_sim_conformalize = ( 218 self.type_sim_conformalize if self.type_sim_conformalize in valid_type_sim else "surrogate" 219 ) 220 221 mlarch_args = dict( 222 y=self.input_ts_, 223 h=self.h, 224 mean_model=getattr(self, "mean_model", None), 225 model_residuals=getattr(self, "model_residuals", None), 226 fit_func=getattr(self, "fit_func", None), 227 predict_func=getattr(self, "predict_func", None), 228 type_pi=type_pi, 229 type_sim_conformalize=type_sim_conformalize, 230 ml_method=getattr(self, "ml_method", None), 231 level=self.level, 232 B=self.B, 233 ml=True, 234 stat_model=getattr(self, "stat_model", None), 235 seed=self.seed, 236 ) 237 # Remove keys with value None 238 mlarch_args = {k: v for k, v in mlarch_args.items() if v is not None} 239 240 self.fcast_ = config.AHEAD_PACKAGE.mlarchf(**mlarch_args) 241 242 243 def plot(self, series, type_axis="dates", type_plot="pi"): 244 """Plot time series forecast 245 246 Parameters: 247 248 series: {integer} or {string} 249 series index or name 250 """ 251 assert all( 252 [ 253 self.mean_ is not None, 254 self.lower_ is not None, 255 self.upper_ is not None, 256 self.output_dates_ is not None, 257 ] 258 ), "model forecasting must be obtained first (with `forecast` method)" 259 260 if isinstance(series, str): 261 assert ( 262 series in self.series_names 263 ), f"series {series} doesn't exist in the input dataset" 264 series_idx = self.input_df.columns.get_loc(series) 265 else: 266 assert isinstance(series, int) and ( 267 0 <= series < self.n_series 268 ), f"check series index (< {self.n_series})" 269 series_idx = series 270 271 y_all = list(self.input_df.iloc[:, series_idx]) + list( 272 self.result_dfs_[series_idx]["mean"].values 273 ) 274 275 y_test = list(self.result_dfs_[series_idx]["mean"].values) 276 n_points_all = len(y_all) 277 n_points_train = self.input_df.shape[0] 278 279 if type_axis == "numeric": 280 x_all = [i for i in range(n_points_all)] 281 x_test = [i for i in range(n_points_train, n_points_all)] 282 283 if type_axis == "dates": # use dates 284 x_train = [date.strftime("%Y-%m-%d") for date in self.input_dates] 285 x_test = [date.strftime("%Y-%m-%d") for date in self.output_dates_] 286 x_all = np.concatenate((x_train, x_test), axis=None) 287 288 if type_plot == "pi": 289 fig, ax = plt.subplots() 290 ax.plot(x_all, y_all, "-") 291 ax.plot(x_test, y_test, "-", color="orange") 292 ax.fill_between( 293 x_test, 294 self.result_dfs_[series_idx]["lower"].values, 295 self.result_dfs_[series_idx]["upper"].values, 296 alpha=0.2, 297 color="orange", 298 ) 299 plt.title( 300 f"prediction intervals for {series}", 301 loc="left", 302 fontsize=12, 303 fontweight=0, 304 color="black", 305 ) 306 plt.show() 307 308 if type_plot == "spaghetti": 309 palette = plt.get_cmap("Set1") 310 sims_ix = self.getsims(self.sims_, series_idx) 311 plt.plot(x_all, y_all, "-") 312 for col_ix in range( 313 sims_ix.shape[1] 314 ): # avoid this when there are thousands of simulations 315 plt.plot( 316 x_test, 317 sims_ix[:, col_ix], 318 "-", 319 color=palette(col_ix), 320 linewidth=1, 321 alpha=0.9, 322 ) 323 plt.plot(x_all, y_all, "-", color="black") 324 plt.plot(x_test, y_test, "-", color="blue") 325 # Add titles 326 plt.title( 327 f"{self.B} simulations of {series}", 328 loc="left", 329 fontsize=12, 330 fontweight=0, 331 color="black", 332 ) 333 plt.xlabel("Time") 334 plt.ylabel("Values") 335 # Show the graph 336 plt.show()
class
Base:
15class Base(object): 16 17 def __init__(self, h=5, level=95, date_formatting="ms", seed=123): 18 19 self.h = h 20 self.level = level 21 self.date_formatting = date_formatting 22 self.seed = seed 23 self.frequency = None 24 self.series_names = None 25 self.n_series = None 26 self.type_input = "univariate" # (or "multivariate") 27 self.B = None 28 self.input_df = None 29 self.input_dates = None 30 self.method = None 31 self.weights = None 32 self.type_pi = None 33 self.type_conformalize = None 34 self.type_sim_conformalize = None 35 self.type_aggregation = None 36 self.type_clustering = None 37 self.lags = None 38 self.lags_ = None # used for VAR 39 self.seed = None 40 41 self.input_ts_ = None # input time series 42 self.mean_ = None 43 self.lower_ = None 44 self.upper_ = None 45 self.sims_ = None 46 self.output_dates_ = None 47 self.result_dfs_ = None 48 49 R_IS_INSTALLED = False 50 51 try: 52 proc = Popen(["which", "R"], stdout=PIPE, stderr=PIPE) 53 R_IS_INSTALLED = proc.wait() == 0 54 except Exception as e: 55 pass 56 57 if not R_IS_INSTALLED: 58 raise ImportError("R is not installed! \n" + config.USAGE_MESSAGE) 59 60 def format_input(self): 61 if self.input_df.shape[1] > 0: 62 self.input_ts_ = compute_y_mts(self.input_df, self.frequency) 63 else: 64 self.input_ts_ = compute_y_ts(self.input_df, self.frequency) 65 66 def init_forecasting_params(self, df): 67 self.input_df = df 68 self.series_names = df.columns 69 self.n_series = len(self.series_names) 70 self.input_dates = compute_input_dates(df) 71 self.type_input = "multivariate" if len(df.shape) > 0 else "univariate" 72 self.output_dates_, self.frequency = compute_output_dates(df, self.h) 73 74 def getsims(self, input_tuple, ix): 75 n_sims = len(input_tuple) 76 res = [input_tuple[i].iloc[:, ix].values for i in range(n_sims)] 77 return np.asarray(res).T 78 79 def get_forecast(self, method=None, xreg=None): 80 81 if method != None: 82 self.method = method 83 84 if self.method == "armagarch": 85 self.fcast_ = config.AHEAD_PACKAGE.armagarchf( 86 y=self.input_ts_, 87 h=self.h, 88 level=self.level, 89 B=self.B, 90 cl=self.cl, 91 dist=self.dist, 92 seed=self.seed, 93 ) 94 95 if self.method in ("mean", "median", "rw"): 96 self.fcast_ = config.AHEAD_PACKAGE.basicf( 97 self.input_ts_, 98 h=self.h, 99 level=self.level, 100 method=self.method, 101 type_pi=self.type_pi, 102 block_length=self.block_length, 103 B=self.B, 104 seed=self.seed, 105 ) 106 107 if self.method == "dynrm": 108 self.fcast_ = config.AHEAD_PACKAGE.dynrmf( 109 y=self.input_ts_, 110 h=self.h, 111 level=self.level, 112 type_pi=self.type_pi, 113 ) 114 115 if self.method == "eat": 116 self.fcast_ = config.AHEAD_PACKAGE.eatf( 117 y=self.input_ts_, 118 h=self.h, 119 level=self.level, 120 type_pi=self.type_pi, 121 weights=config.FLOATVECTOR(self.weights), 122 ) 123 124 if self.method == "ridge2": 125 if xreg is None: 126 127 self.fcast_ = config.AHEAD_PACKAGE.ridge2f( 128 self.input_ts_, 129 h=self.h, 130 level=self.level, 131 lags=self.lags, 132 nb_hidden=self.nb_hidden, 133 nodes_sim=self.nodes_sim, 134 activ=self.activation, 135 a=self.a, 136 lambda_1=self.lambda_1, 137 lambda_2=self.lambda_2, 138 dropout=self.dropout, 139 type_pi=self.type_pi, 140 margins=self.margins, 141 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 142 block_length=self.block_length, 143 B=self.B, 144 type_aggregation=self.type_aggregation, 145 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 146 centers=self.centers, 147 type_clustering=self.type_clustering, 148 cl=self.cl, 149 seed=self.seed, 150 ) 151 152 else: # xreg is not None: 153 154 try: 155 self.xreg_ = xreg.values 156 except: 157 self.xreg_ = config.DEEP_COPY(xreg) 158 159 is_matrix_xreg = len(self.xreg_.shape) > 1 160 161 numpy2ri.activate() 162 163 xreg_ = ( 164 r.matrix( 165 FloatVector(self.xreg_.flatten()), 166 byrow=True, 167 nrow=self.xreg_.shape[0], 168 ncol=self.xreg_.shape[1], 169 ) 170 if is_matrix_xreg 171 else r.matrix( 172 FloatVector(self.xreg_.flatten()), 173 byrow=True, 174 nrow=self.xreg_.shape[0], 175 ncol=1, 176 ) 177 ) 178 179 self.fcast_ = config.AHEAD_PACKAGE.ridge2f( 180 self.input_ts_, 181 xreg=xreg_, 182 h=self.h, 183 level=self.level, 184 lags=self.lags, 185 nb_hidden=self.nb_hidden, 186 nodes_sim=self.nodes_sim, 187 activ=self.activation, 188 a=self.a, 189 lambda_1=self.lambda_1, 190 lambda_2=self.lambda_2, 191 dropout=self.dropout, 192 type_pi=self.type_pi, 193 margins=self.margins, 194 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 195 block_length=self.block_length, 196 B=self.B, 197 type_aggregation=self.type_aggregation, 198 # can be NULL, but in R (use 0 in R instead of NULL for v0.7.0) 199 centers=self.centers, 200 type_clustering=self.type_clustering, 201 cl=self.cl, 202 seed=self.seed, 203 ) 204 205 if self.method == "var": 206 self.fcast_ = config.AHEAD_PACKAGE.varf( 207 self.input_ts_, 208 h=self.h, 209 level=self.level, 210 lags=self.lags, 211 type_VAR=self.type_VAR, 212 ) 213 214 if self.method.lower() == "mlarch": 215 valid_type_pi = ("surrogate", "bootstrap", "kde") 216 type_pi = self.type_pi if self.type_pi in valid_type_pi else "surrogate" 217 valid_type_sim = ("surrogate", "block-bootstrap", "bootstrap", "kde", "fitdistr") 218 type_sim_conformalize = ( 219 self.type_sim_conformalize if self.type_sim_conformalize in valid_type_sim else "surrogate" 220 ) 221 222 mlarch_args = dict( 223 y=self.input_ts_, 224 h=self.h, 225 mean_model=getattr(self, "mean_model", None), 226 model_residuals=getattr(self, "model_residuals", None), 227 fit_func=getattr(self, "fit_func", None), 228 predict_func=getattr(self, "predict_func", None), 229 type_pi=type_pi, 230 type_sim_conformalize=type_sim_conformalize, 231 ml_method=getattr(self, "ml_method", None), 232 level=self.level, 233 B=self.B, 234 ml=True, 235 stat_model=getattr(self, "stat_model", None), 236 seed=self.seed, 237 ) 238 # Remove keys with value None 239 mlarch_args = {k: v for k, v in mlarch_args.items() if v is not None} 240 241 self.fcast_ = config.AHEAD_PACKAGE.mlarchf(**mlarch_args) 242 243 244 def plot(self, series, type_axis="dates", type_plot="pi"): 245 """Plot time series forecast 246 247 Parameters: 248 249 series: {integer} or {string} 250 series index or name 251 """ 252 assert all( 253 [ 254 self.mean_ is not None, 255 self.lower_ is not None, 256 self.upper_ is not None, 257 self.output_dates_ is not None, 258 ] 259 ), "model forecasting must be obtained first (with `forecast` method)" 260 261 if isinstance(series, str): 262 assert ( 263 series in self.series_names 264 ), f"series {series} doesn't exist in the input dataset" 265 series_idx = self.input_df.columns.get_loc(series) 266 else: 267 assert isinstance(series, int) and ( 268 0 <= series < self.n_series 269 ), f"check series index (< {self.n_series})" 270 series_idx = series 271 272 y_all = list(self.input_df.iloc[:, series_idx]) + list( 273 self.result_dfs_[series_idx]["mean"].values 274 ) 275 276 y_test = list(self.result_dfs_[series_idx]["mean"].values) 277 n_points_all = len(y_all) 278 n_points_train = self.input_df.shape[0] 279 280 if type_axis == "numeric": 281 x_all = [i for i in range(n_points_all)] 282 x_test = [i for i in range(n_points_train, n_points_all)] 283 284 if type_axis == "dates": # use dates 285 x_train = [date.strftime("%Y-%m-%d") for date in self.input_dates] 286 x_test = [date.strftime("%Y-%m-%d") for date in self.output_dates_] 287 x_all = np.concatenate((x_train, x_test), axis=None) 288 289 if type_plot == "pi": 290 fig, ax = plt.subplots() 291 ax.plot(x_all, y_all, "-") 292 ax.plot(x_test, y_test, "-", color="orange") 293 ax.fill_between( 294 x_test, 295 self.result_dfs_[series_idx]["lower"].values, 296 self.result_dfs_[series_idx]["upper"].values, 297 alpha=0.2, 298 color="orange", 299 ) 300 plt.title( 301 f"prediction intervals for {series}", 302 loc="left", 303 fontsize=12, 304 fontweight=0, 305 color="black", 306 ) 307 plt.show() 308 309 if type_plot == "spaghetti": 310 palette = plt.get_cmap("Set1") 311 sims_ix = self.getsims(self.sims_, series_idx) 312 plt.plot(x_all, y_all, "-") 313 for col_ix in range( 314 sims_ix.shape[1] 315 ): # avoid this when there are thousands of simulations 316 plt.plot( 317 x_test, 318 sims_ix[:, col_ix], 319 "-", 320 color=palette(col_ix), 321 linewidth=1, 322 alpha=0.9, 323 ) 324 plt.plot(x_all, y_all, "-", color="black") 325 plt.plot(x_test, y_test, "-", color="blue") 326 # Add titles 327 plt.title( 328 f"{self.B} simulations of {series}", 329 loc="left", 330 fontsize=12, 331 fontweight=0, 332 color="black", 333 ) 334 plt.xlabel("Time") 335 plt.ylabel("Values") 336 # Show the graph 337 plt.show()