techtonique_apis.techto_forecast
1import tempfile 2import datetime as dt 3import numpy as np 4import pandas as pd 5 6# import seaborn as sns 7from xlwings import func, arg, ret 8from .techtonique_apis import TechtoniqueAPI 9 10api = TechtoniqueAPI() 11 12 13def excel_date_to_datetime(excel_serial): 14 # Excel's day 0 is 1899-12-30 15 return pd.to_datetime("1899-12-30") + pd.to_timedelta( 16 excel_serial, unit="D" 17 ) 18 19 20@func 21@arg( 22 "df", 23 index=False, 24 doc="Excel range with columns 'date' and one or more series.", 25) 26@arg("base_model", doc='Forecasting model (default: "RidgeCV")') 27@arg("n_hidden_features", doc="Number of hidden features (default: 5)") 28@arg("lags", doc="Number of lags (default: 25)") 29@arg("type_pi", doc='Prediction interval type (default: "kde")') 30@arg("replications", doc="Number of simulation replications (default: 10)") 31@arg("h", doc="Forecast horizon (default: 5)") 32@arg( 33 "return_sims", 34 doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)", 35) 36@arg( 37 "series_choice", 38 doc="(Optional) Name of the series to forecast if multiple are present", 39) 40@ret(index=False, doc="Forecast or simulation results as a table for Excel") 41def techto_forecast( 42 df: pd.DataFrame, 43 base_model: str = "RidgeCV", 44 n_hidden_features: int = 5, 45 lags: int = 25, 46 type_pi: str = "kde", 47 replications: int = 10, 48 h: int = 5, 49 return_sims: bool = False, 50 series_choice: str = None, 51) -> pd.DataFrame: 52 """Forecasting: pass a time series as a DataFrame from Excel, return forecast. 53 54 Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API. 55 56 Parameters 57 ---------- 58 59 df : pd.DataFrame 60 The input time series data as a DataFrame (from Excel range). 61 62 base_model : str, default "RidgeCV" 63 The base model to use for forecasting. 64 65 n_hidden_features : int, default 5 66 Number of hidden features for the model. 67 68 lags : int, default 25 69 Number of lags to use in the model. 70 71 type_pi : str, default "kde" 72 Type of prediction interval ("kde" or other supported types). 73 74 replications : int, default 10 75 Number of simulation replications. 76 77 h : int, default 5 78 Forecast horizon (number of periods ahead to forecast). 79 80 return_sims : bool, default False 81 If True, return the simulation matrix; otherwise, return the forecast summary bounds. 82 83 series_choice : str, optional 84 If provided, specifies which series to forecast from the DataFrame. 85 86 Returns 87 ------- 88 89 pd.DataFrame 90 The forecast results or simulation matrix as a DataFrame for Excel. 91 92 """ 93 # Convert Excel serial dates to datetime if needed 94 if pd.api.types.is_numeric_dtype(df["date"]): 95 df["date"] = df["date"].apply(excel_date_to_datetime) 96 97 with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: 98 df.to_csv(tmp.name, index=False) 99 result = api.forecasting( 100 file_path=tmp.name, 101 base_model=base_model, 102 n_hidden_features=n_hidden_features, 103 lags=lags, 104 type_pi=type_pi, 105 replications=replications, 106 h=h, 107 ) 108 output_dates = result["date"] 109 if df.shape[1] == 2: # univariate time series 110 res_df = pd.DataFrame(result.pop("sims")) 111 if return_sims: 112 res2_df = pd.DataFrame([]) 113 res2_df["date"] = output_dates 114 sims_df = res_df.transpose() 115 sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])] 116 return pd.concat([res2_df, sims_df], axis=1) 117 return pd.DataFrame(result) 118 else: # multivariate time series 119 n_series = len(result["mean"][0]) 120 series_names = [col for col in df.columns if col != "date"] 121 if len(series_names) != n_series: 122 series_names = [f"series_{i+1}" for i in range(n_series)] 123 124 # If series_choice is provided and valid, extract its index and return as univariate 125 if series_choice is not None and series_choice in series_names: 126 idx = series_names.index(series_choice) 127 # Extract only the selected series from each stat 128 summary_df = pd.DataFrame( 129 { 130 "date": output_dates, 131 "mean": [x[idx] for x in result["mean"]], 132 "lower": [x[idx] for x in result["lower"]], 133 "upper": [x[idx] for x in result["upper"]], 134 } 135 ) 136 if return_sims: 137 # sims: shape (replications, horizon, n_series) 138 # list of replications, each is list of horizon, each is list of n_series 139 sims = result["sims"] 140 # For each replication, extract the selected series only 141 sims_selected = [ 142 [h[idx] for h in rep] 143 # shape: (replications, horizon) 144 for rep in sims 145 ] 146 sims_df = pd.DataFrame(sims_selected).transpose() 147 sims_df.columns = [ 148 f"sim_{i+1}_{series_choice}" 149 for i in range(sims_df.shape[1]) 150 ] 151 res2_df = pd.DataFrame({"date": output_dates}) 152 return pd.concat([res2_df, sims_df], axis=1) 153 return summary_df 154 155 # Otherwise, return the full multivariate summary as before 156 summary_data = {"date": output_dates} 157 for stat in ["mean", "lower", "upper"]: 158 for s in range(n_series): 159 summary_data[f"{stat}_{series_names[s]}"] = [ 160 x[s] for x in result[stat] 161 ] 162 summary_df = pd.DataFrame(summary_data) 163 if return_sims: 164 # sims: shape (replications, horizon, n_series) 165 sims = result["sims"] 166 flat = [] 167 for rep in sims: 168 for s in range(n_series): 169 flat.append([h[s] for h in rep]) 170 sims_df = pd.DataFrame(flat).transpose() 171 colnames = [] 172 for r in range(len(sims)): 173 for s in range(n_series): 174 colnames.append(f"sim_{r+1}_{series_names[s]}") 175 sims_df.columns = colnames 176 sims_df.insert(0, "date", output_dates) 177 return sims_df 178 return summary_df
21@func 22@arg( 23 "df", 24 index=False, 25 doc="Excel range with columns 'date' and one or more series.", 26) 27@arg("base_model", doc='Forecasting model (default: "RidgeCV")') 28@arg("n_hidden_features", doc="Number of hidden features (default: 5)") 29@arg("lags", doc="Number of lags (default: 25)") 30@arg("type_pi", doc='Prediction interval type (default: "kde")') 31@arg("replications", doc="Number of simulation replications (default: 10)") 32@arg("h", doc="Forecast horizon (default: 5)") 33@arg( 34 "return_sims", 35 doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)", 36) 37@arg( 38 "series_choice", 39 doc="(Optional) Name of the series to forecast if multiple are present", 40) 41@ret(index=False, doc="Forecast or simulation results as a table for Excel") 42def techto_forecast( 43 df: pd.DataFrame, 44 base_model: str = "RidgeCV", 45 n_hidden_features: int = 5, 46 lags: int = 25, 47 type_pi: str = "kde", 48 replications: int = 10, 49 h: int = 5, 50 return_sims: bool = False, 51 series_choice: str = None, 52) -> pd.DataFrame: 53 """Forecasting: pass a time series as a DataFrame from Excel, return forecast. 54 55 Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API. 56 57 Parameters 58 ---------- 59 60 df : pd.DataFrame 61 The input time series data as a DataFrame (from Excel range). 62 63 base_model : str, default "RidgeCV" 64 The base model to use for forecasting. 65 66 n_hidden_features : int, default 5 67 Number of hidden features for the model. 68 69 lags : int, default 25 70 Number of lags to use in the model. 71 72 type_pi : str, default "kde" 73 Type of prediction interval ("kde" or other supported types). 74 75 replications : int, default 10 76 Number of simulation replications. 77 78 h : int, default 5 79 Forecast horizon (number of periods ahead to forecast). 80 81 return_sims : bool, default False 82 If True, return the simulation matrix; otherwise, return the forecast summary bounds. 83 84 series_choice : str, optional 85 If provided, specifies which series to forecast from the DataFrame. 86 87 Returns 88 ------- 89 90 pd.DataFrame 91 The forecast results or simulation matrix as a DataFrame for Excel. 92 93 """ 94 # Convert Excel serial dates to datetime if needed 95 if pd.api.types.is_numeric_dtype(df["date"]): 96 df["date"] = df["date"].apply(excel_date_to_datetime) 97 98 with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: 99 df.to_csv(tmp.name, index=False) 100 result = api.forecasting( 101 file_path=tmp.name, 102 base_model=base_model, 103 n_hidden_features=n_hidden_features, 104 lags=lags, 105 type_pi=type_pi, 106 replications=replications, 107 h=h, 108 ) 109 output_dates = result["date"] 110 if df.shape[1] == 2: # univariate time series 111 res_df = pd.DataFrame(result.pop("sims")) 112 if return_sims: 113 res2_df = pd.DataFrame([]) 114 res2_df["date"] = output_dates 115 sims_df = res_df.transpose() 116 sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])] 117 return pd.concat([res2_df, sims_df], axis=1) 118 return pd.DataFrame(result) 119 else: # multivariate time series 120 n_series = len(result["mean"][0]) 121 series_names = [col for col in df.columns if col != "date"] 122 if len(series_names) != n_series: 123 series_names = [f"series_{i+1}" for i in range(n_series)] 124 125 # If series_choice is provided and valid, extract its index and return as univariate 126 if series_choice is not None and series_choice in series_names: 127 idx = series_names.index(series_choice) 128 # Extract only the selected series from each stat 129 summary_df = pd.DataFrame( 130 { 131 "date": output_dates, 132 "mean": [x[idx] for x in result["mean"]], 133 "lower": [x[idx] for x in result["lower"]], 134 "upper": [x[idx] for x in result["upper"]], 135 } 136 ) 137 if return_sims: 138 # sims: shape (replications, horizon, n_series) 139 # list of replications, each is list of horizon, each is list of n_series 140 sims = result["sims"] 141 # For each replication, extract the selected series only 142 sims_selected = [ 143 [h[idx] for h in rep] 144 # shape: (replications, horizon) 145 for rep in sims 146 ] 147 sims_df = pd.DataFrame(sims_selected).transpose() 148 sims_df.columns = [ 149 f"sim_{i+1}_{series_choice}" 150 for i in range(sims_df.shape[1]) 151 ] 152 res2_df = pd.DataFrame({"date": output_dates}) 153 return pd.concat([res2_df, sims_df], axis=1) 154 return summary_df 155 156 # Otherwise, return the full multivariate summary as before 157 summary_data = {"date": output_dates} 158 for stat in ["mean", "lower", "upper"]: 159 for s in range(n_series): 160 summary_data[f"{stat}_{series_names[s]}"] = [ 161 x[s] for x in result[stat] 162 ] 163 summary_df = pd.DataFrame(summary_data) 164 if return_sims: 165 # sims: shape (replications, horizon, n_series) 166 sims = result["sims"] 167 flat = [] 168 for rep in sims: 169 for s in range(n_series): 170 flat.append([h[s] for h in rep]) 171 sims_df = pd.DataFrame(flat).transpose() 172 colnames = [] 173 for r in range(len(sims)): 174 for s in range(n_series): 175 colnames.append(f"sim_{r+1}_{series_names[s]}") 176 sims_df.columns = colnames 177 sims_df.insert(0, "date", output_dates) 178 return sims_df 179 return summary_df
Forecasting: pass a time series as a DataFrame from Excel, return forecast.
Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.
Parameters
df : pd.DataFrame The input time series data as a DataFrame (from Excel range).
base_model : str, default "RidgeCV" The base model to use for forecasting.
n_hidden_features : int, default 5 Number of hidden features for the model.
lags : int, default 25 Number of lags to use in the model.
type_pi : str, default "kde" Type of prediction interval ("kde" or other supported types).
replications : int, default 10 Number of simulation replications.
h : int, default 5 Forecast horizon (number of periods ahead to forecast).
return_sims : bool, default False If True, return the simulation matrix; otherwise, return the forecast summary bounds.
series_choice : str, optional If provided, specifies which series to forecast from the DataFrame.
Returns
pd.DataFrame The forecast results or simulation matrix as a DataFrame for Excel.