techtonique_apis.techto_forecast

  1import tempfile
  2import datetime as dt
  3import numpy as np
  4import pandas as pd
  5# import seaborn as sns
  6from xlwings import func, arg, ret
  7from .techtonique_apis import TechtoniqueAPI
  8
  9
 10api = TechtoniqueAPI()
 11
 12
 13def excel_date_to_datetime(excel_serial):
 14    # Excel's day 0 is 1899-12-30
 15    return pd.to_datetime('1899-12-30') + pd.to_timedelta(excel_serial, unit='D')
 16
 17
 18@func
 19@arg("df", index=False, doc="Excel range with columns 'date' and one or more series.")
 20@arg("base_model", doc='Forecasting model (default: "RidgeCV")')
 21@arg("n_hidden_features", doc="Number of hidden features (default: 5)")
 22@arg("lags", doc="Number of lags (default: 25)")
 23@arg("type_pi", doc='Prediction interval type (default: "kde")')
 24@arg("replications", doc="Number of simulation replications (default: 10)")
 25@arg("h", doc="Forecast horizon (default: 5)")
 26@arg("return_sims", doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)")
 27@arg("series_choice", doc="(Optional) Name of the series to forecast if multiple are present")
 28@ret(index=False, doc="Forecast or simulation results as a table for Excel")
 29def techto_forecast(
 30    df: pd.DataFrame,
 31    base_model: str = "RidgeCV",
 32    n_hidden_features: int = 5,
 33    lags: int = 25,
 34    type_pi: str = "kde",
 35    replications: int = 10,
 36    h: int = 5,
 37    return_sims: bool = False,
 38    series_choice: str = None
 39) -> pd.DataFrame:
 40    """Forecasting: pass a time series as a DataFrame from Excel, return forecast.
 41
 42    Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.
 43
 44    Parameters
 45    ----------
 46
 47    df : pd.DataFrame
 48        The input time series data as a DataFrame (from Excel range).
 49
 50    base_model : str, default "RidgeCV"
 51        The base model to use for forecasting.
 52
 53    n_hidden_features : int, default 5
 54        Number of hidden features for the model.
 55
 56    lags : int, default 25
 57        Number of lags to use in the model.
 58
 59    type_pi : str, default "kde"
 60        Type of prediction interval ("kde" or other supported types).
 61
 62    replications : int, default 10
 63        Number of simulation replications.
 64
 65    h : int, default 5
 66        Forecast horizon (number of periods ahead to forecast).
 67
 68    return_sims : bool, default False
 69        If True, return the simulation matrix; otherwise, return the forecast summary bounds.
 70
 71    series_choice : str, optional
 72        If provided, specifies which series to forecast from the DataFrame.
 73
 74    Returns
 75    -------
 76
 77    pd.DataFrame
 78        The forecast results or simulation matrix as a DataFrame for Excel.
 79
 80    """
 81    # Convert Excel serial dates to datetime if needed
 82    if pd.api.types.is_numeric_dtype(df['date']):
 83        df['date'] = df['date'].apply(excel_date_to_datetime)
 84
 85    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
 86        df.to_csv(tmp.name, index=False)
 87        result = api.forecasting(
 88            file_path=tmp.name,
 89            base_model=base_model,
 90            n_hidden_features=n_hidden_features,
 91            lags=lags,
 92            type_pi=type_pi,
 93            replications=replications,
 94            h=h,
 95        )
 96    output_dates = result["date"]
 97    if df.shape[1] == 2:  # univariate time series
 98        res_df = pd.DataFrame(result.pop('sims'))
 99        if return_sims:
100            res2_df = pd.DataFrame([])
101            res2_df["date"] = output_dates
102            sims_df = res_df.transpose()
103            sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])]
104            return pd.concat([res2_df, sims_df], axis=1)
105        return pd.DataFrame(result)
106    else:  # multivariate time series
107        n_series = len(result["mean"][0])
108        series_names = [col for col in df.columns if col != "date"]
109        if len(series_names) != n_series:
110            series_names = [f"series_{i+1}" for i in range(n_series)]
111
112        # If series_choice is provided and valid, extract its index and return as univariate
113        if series_choice is not None and series_choice in series_names:
114            idx = series_names.index(series_choice)
115            # Extract only the selected series from each stat
116            summary_df = pd.DataFrame({
117                "date": output_dates,
118                "mean": [x[idx] for x in result["mean"]],
119                "lower": [x[idx] for x in result["lower"]],
120                "upper": [x[idx] for x in result["upper"]],
121            })
122            if return_sims:
123                # sims: shape (replications, horizon, n_series)
124                # list of replications, each is list of horizon, each is list of n_series
125                sims = result["sims"]
126                # For each replication, extract the selected series only
127                sims_selected = [[h[idx] for h in rep]
128                                 for rep in sims]  # shape: (replications, horizon)
129                sims_df = pd.DataFrame(sims_selected).transpose()
130                sims_df.columns = [
131                    f"sim_{i+1}_{series_choice}" for i in range(sims_df.shape[1])]
132                res2_df = pd.DataFrame({"date": output_dates})
133                return pd.concat([res2_df, sims_df], axis=1)
134            return summary_df
135
136        # Otherwise, return the full multivariate summary as before
137        summary_data = {"date": output_dates}
138        for stat in ["mean", "lower", "upper"]:
139            for s in range(n_series):
140                summary_data[f"{stat}_{series_names[s]}"] = [x[s]
141                                                             for x in result[stat]]
142        summary_df = pd.DataFrame(summary_data)
143        if return_sims:
144            # sims: shape (replications, horizon, n_series)
145            sims = result["sims"]
146            flat = []
147            for rep in sims:
148                for s in range(n_series):
149                    flat.append([h[s] for h in rep])
150            sims_df = pd.DataFrame(flat).transpose()
151            colnames = []
152            for r in range(len(sims)):
153                for s in range(n_series):
154                    colnames.append(f"sim_{r+1}_{series_names[s]}")
155            sims_df.columns = colnames
156            sims_df.insert(0, "date", output_dates)
157            return sims_df
158        return summary_df
def excel_date_to_datetime(excel_serial):
14def excel_date_to_datetime(excel_serial):
15    # Excel's day 0 is 1899-12-30
16    return pd.to_datetime('1899-12-30') + pd.to_timedelta(excel_serial, unit='D')
@func
@arg('df', index=False, doc="Excel range with columns 'date' and one or more series.")
@arg('base_model', doc='Forecasting model (default: "RidgeCV")')
@arg('n_hidden_features', doc='Number of hidden features (default: 5)')
@arg('lags', doc='Number of lags (default: 25)')
@arg('type_pi', doc='Prediction interval type (default: "kde")')
@arg('replications', doc='Number of simulation replications (default: 10)')
@arg('h', doc='Forecast horizon (default: 5)')
@arg('return_sims', doc='If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)')
@arg('series_choice', doc='(Optional) Name of the series to forecast if multiple are present')
@ret(index=False, doc='Forecast or simulation results as a table for Excel')
def techto_forecast( df: pandas.core.frame.DataFrame, base_model: str = 'RidgeCV', n_hidden_features: int = 5, lags: int = 25, type_pi: str = 'kde', replications: int = 10, h: int = 5, return_sims: bool = False, series_choice: str = None) -> pandas.core.frame.DataFrame:
 19@func
 20@arg("df", index=False, doc="Excel range with columns 'date' and one or more series.")
 21@arg("base_model", doc='Forecasting model (default: "RidgeCV")')
 22@arg("n_hidden_features", doc="Number of hidden features (default: 5)")
 23@arg("lags", doc="Number of lags (default: 25)")
 24@arg("type_pi", doc='Prediction interval type (default: "kde")')
 25@arg("replications", doc="Number of simulation replications (default: 10)")
 26@arg("h", doc="Forecast horizon (default: 5)")
 27@arg("return_sims", doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)")
 28@arg("series_choice", doc="(Optional) Name of the series to forecast if multiple are present")
 29@ret(index=False, doc="Forecast or simulation results as a table for Excel")
 30def techto_forecast(
 31    df: pd.DataFrame,
 32    base_model: str = "RidgeCV",
 33    n_hidden_features: int = 5,
 34    lags: int = 25,
 35    type_pi: str = "kde",
 36    replications: int = 10,
 37    h: int = 5,
 38    return_sims: bool = False,
 39    series_choice: str = None
 40) -> pd.DataFrame:
 41    """Forecasting: pass a time series as a DataFrame from Excel, return forecast.
 42
 43    Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.
 44
 45    Parameters
 46    ----------
 47
 48    df : pd.DataFrame
 49        The input time series data as a DataFrame (from Excel range).
 50
 51    base_model : str, default "RidgeCV"
 52        The base model to use for forecasting.
 53
 54    n_hidden_features : int, default 5
 55        Number of hidden features for the model.
 56
 57    lags : int, default 25
 58        Number of lags to use in the model.
 59
 60    type_pi : str, default "kde"
 61        Type of prediction interval ("kde" or other supported types).
 62
 63    replications : int, default 10
 64        Number of simulation replications.
 65
 66    h : int, default 5
 67        Forecast horizon (number of periods ahead to forecast).
 68
 69    return_sims : bool, default False
 70        If True, return the simulation matrix; otherwise, return the forecast summary bounds.
 71
 72    series_choice : str, optional
 73        If provided, specifies which series to forecast from the DataFrame.
 74
 75    Returns
 76    -------
 77
 78    pd.DataFrame
 79        The forecast results or simulation matrix as a DataFrame for Excel.
 80
 81    """
 82    # Convert Excel serial dates to datetime if needed
 83    if pd.api.types.is_numeric_dtype(df['date']):
 84        df['date'] = df['date'].apply(excel_date_to_datetime)
 85
 86    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
 87        df.to_csv(tmp.name, index=False)
 88        result = api.forecasting(
 89            file_path=tmp.name,
 90            base_model=base_model,
 91            n_hidden_features=n_hidden_features,
 92            lags=lags,
 93            type_pi=type_pi,
 94            replications=replications,
 95            h=h,
 96        )
 97    output_dates = result["date"]
 98    if df.shape[1] == 2:  # univariate time series
 99        res_df = pd.DataFrame(result.pop('sims'))
100        if return_sims:
101            res2_df = pd.DataFrame([])
102            res2_df["date"] = output_dates
103            sims_df = res_df.transpose()
104            sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])]
105            return pd.concat([res2_df, sims_df], axis=1)
106        return pd.DataFrame(result)
107    else:  # multivariate time series
108        n_series = len(result["mean"][0])
109        series_names = [col for col in df.columns if col != "date"]
110        if len(series_names) != n_series:
111            series_names = [f"series_{i+1}" for i in range(n_series)]
112
113        # If series_choice is provided and valid, extract its index and return as univariate
114        if series_choice is not None and series_choice in series_names:
115            idx = series_names.index(series_choice)
116            # Extract only the selected series from each stat
117            summary_df = pd.DataFrame({
118                "date": output_dates,
119                "mean": [x[idx] for x in result["mean"]],
120                "lower": [x[idx] for x in result["lower"]],
121                "upper": [x[idx] for x in result["upper"]],
122            })
123            if return_sims:
124                # sims: shape (replications, horizon, n_series)
125                # list of replications, each is list of horizon, each is list of n_series
126                sims = result["sims"]
127                # For each replication, extract the selected series only
128                sims_selected = [[h[idx] for h in rep]
129                                 for rep in sims]  # shape: (replications, horizon)
130                sims_df = pd.DataFrame(sims_selected).transpose()
131                sims_df.columns = [
132                    f"sim_{i+1}_{series_choice}" for i in range(sims_df.shape[1])]
133                res2_df = pd.DataFrame({"date": output_dates})
134                return pd.concat([res2_df, sims_df], axis=1)
135            return summary_df
136
137        # Otherwise, return the full multivariate summary as before
138        summary_data = {"date": output_dates}
139        for stat in ["mean", "lower", "upper"]:
140            for s in range(n_series):
141                summary_data[f"{stat}_{series_names[s]}"] = [x[s]
142                                                             for x in result[stat]]
143        summary_df = pd.DataFrame(summary_data)
144        if return_sims:
145            # sims: shape (replications, horizon, n_series)
146            sims = result["sims"]
147            flat = []
148            for rep in sims:
149                for s in range(n_series):
150                    flat.append([h[s] for h in rep])
151            sims_df = pd.DataFrame(flat).transpose()
152            colnames = []
153            for r in range(len(sims)):
154                for s in range(n_series):
155                    colnames.append(f"sim_{r+1}_{series_names[s]}")
156            sims_df.columns = colnames
157            sims_df.insert(0, "date", output_dates)
158            return sims_df
159        return summary_df

Forecasting: pass a time series as a DataFrame from Excel, return forecast.

Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.

Parameters

df : pd.DataFrame The input time series data as a DataFrame (from Excel range).

base_model : str, default "RidgeCV" The base model to use for forecasting.

n_hidden_features : int, default 5 Number of hidden features for the model.

lags : int, default 25 Number of lags to use in the model.

type_pi : str, default "kde" Type of prediction interval ("kde" or other supported types).

replications : int, default 10 Number of simulation replications.

h : int, default 5 Forecast horizon (number of periods ahead to forecast).

return_sims : bool, default False If True, return the simulation matrix; otherwise, return the forecast summary bounds.

series_choice : str, optional If provided, specifies which series to forecast from the DataFrame.

Returns

pd.DataFrame The forecast results or simulation matrix as a DataFrame for Excel.