techtonique_apis.techto_forecast

  1import tempfile
  2import datetime as dt
  3import numpy as np
  4import pandas as pd
  5
  6# import seaborn as sns
  7from xlwings import func, arg, ret
  8from .techtonique_apis import TechtoniqueAPI
  9
 10api = TechtoniqueAPI()
 11
 12
 13def excel_date_to_datetime(excel_serial):
 14    # Excel's day 0 is 1899-12-30
 15    return pd.to_datetime("1899-12-30") + pd.to_timedelta(
 16        excel_serial, unit="D"
 17    )
 18
 19
 20@func
 21@arg(
 22    "df",
 23    index=False,
 24    doc="Excel range with columns 'date' and one or more series.",
 25)
 26@arg("base_model", doc='Forecasting model (default: "RidgeCV")')
 27@arg("n_hidden_features", doc="Number of hidden features (default: 5)")
 28@arg("lags", doc="Number of lags (default: 25)")
 29@arg("type_pi", doc='Prediction interval type (default: "kde")')
 30@arg("replications", doc="Number of simulation replications (default: 10)")
 31@arg("h", doc="Forecast horizon (default: 5)")
 32@arg(
 33    "return_sims",
 34    doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)",
 35)
 36@arg(
 37    "series_choice",
 38    doc="(Optional) Name of the series to forecast if multiple are present",
 39)
 40@ret(index=False, doc="Forecast or simulation results as a table for Excel")
 41def techto_forecast(
 42    df: pd.DataFrame,
 43    base_model: str = "RidgeCV",
 44    n_hidden_features: int = 5,
 45    lags: int = 25,
 46    type_pi: str = "kde",
 47    replications: int = 10,
 48    h: int = 5,
 49    return_sims: bool = False,
 50    series_choice: str = None,
 51) -> pd.DataFrame:
 52    """Forecasting: pass a time series as a DataFrame from Excel, return forecast.
 53
 54    Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.
 55
 56    Parameters
 57    ----------
 58
 59    df : pd.DataFrame
 60        The input time series data as a DataFrame (from Excel range).
 61
 62    base_model : str, default "RidgeCV"
 63        The base model to use for forecasting.
 64
 65    n_hidden_features : int, default 5
 66        Number of hidden features for the model.
 67
 68    lags : int, default 25
 69        Number of lags to use in the model.
 70
 71    type_pi : str, default "kde"
 72        Type of prediction interval ("kde" or other supported types).
 73
 74    replications : int, default 10
 75        Number of simulation replications.
 76
 77    h : int, default 5
 78        Forecast horizon (number of periods ahead to forecast).
 79
 80    return_sims : bool, default False
 81        If True, return the simulation matrix; otherwise, return the forecast summary bounds.
 82
 83    series_choice : str, optional
 84        If provided, specifies which series to forecast from the DataFrame.
 85
 86    Returns
 87    -------
 88
 89    pd.DataFrame
 90        The forecast results or simulation matrix as a DataFrame for Excel.
 91
 92    """
 93    # Convert Excel serial dates to datetime if needed
 94    if pd.api.types.is_numeric_dtype(df["date"]):
 95        df["date"] = df["date"].apply(excel_date_to_datetime)
 96
 97    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
 98        df.to_csv(tmp.name, index=False)
 99        result = api.forecasting(
100            file_path=tmp.name,
101            base_model=base_model,
102            n_hidden_features=n_hidden_features,
103            lags=lags,
104            type_pi=type_pi,
105            replications=replications,
106            h=h,
107        )
108    output_dates = result["date"]
109    if df.shape[1] == 2:  # univariate time series
110        res_df = pd.DataFrame(result.pop("sims"))
111        if return_sims:
112            res2_df = pd.DataFrame([])
113            res2_df["date"] = output_dates
114            sims_df = res_df.transpose()
115            sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])]
116            return pd.concat([res2_df, sims_df], axis=1)
117        return pd.DataFrame(result)
118    else:  # multivariate time series
119        n_series = len(result["mean"][0])
120        series_names = [col for col in df.columns if col != "date"]
121        if len(series_names) != n_series:
122            series_names = [f"series_{i+1}" for i in range(n_series)]
123
124        # If series_choice is provided and valid, extract its index and return as univariate
125        if series_choice is not None and series_choice in series_names:
126            idx = series_names.index(series_choice)
127            # Extract only the selected series from each stat
128            summary_df = pd.DataFrame(
129                {
130                    "date": output_dates,
131                    "mean": [x[idx] for x in result["mean"]],
132                    "lower": [x[idx] for x in result["lower"]],
133                    "upper": [x[idx] for x in result["upper"]],
134                }
135            )
136            if return_sims:
137                # sims: shape (replications, horizon, n_series)
138                # list of replications, each is list of horizon, each is list of n_series
139                sims = result["sims"]
140                # For each replication, extract the selected series only
141                sims_selected = [
142                    [h[idx] for h in rep]
143                    # shape: (replications, horizon)
144                    for rep in sims
145                ]
146                sims_df = pd.DataFrame(sims_selected).transpose()
147                sims_df.columns = [
148                    f"sim_{i+1}_{series_choice}"
149                    for i in range(sims_df.shape[1])
150                ]
151                res2_df = pd.DataFrame({"date": output_dates})
152                return pd.concat([res2_df, sims_df], axis=1)
153            return summary_df
154
155        # Otherwise, return the full multivariate summary as before
156        summary_data = {"date": output_dates}
157        for stat in ["mean", "lower", "upper"]:
158            for s in range(n_series):
159                summary_data[f"{stat}_{series_names[s]}"] = [
160                    x[s] for x in result[stat]
161                ]
162        summary_df = pd.DataFrame(summary_data)
163        if return_sims:
164            # sims: shape (replications, horizon, n_series)
165            sims = result["sims"]
166            flat = []
167            for rep in sims:
168                for s in range(n_series):
169                    flat.append([h[s] for h in rep])
170            sims_df = pd.DataFrame(flat).transpose()
171            colnames = []
172            for r in range(len(sims)):
173                for s in range(n_series):
174                    colnames.append(f"sim_{r+1}_{series_names[s]}")
175            sims_df.columns = colnames
176            sims_df.insert(0, "date", output_dates)
177            return sims_df
178        return summary_df
def excel_date_to_datetime(excel_serial):
14def excel_date_to_datetime(excel_serial):
15    # Excel's day 0 is 1899-12-30
16    return pd.to_datetime("1899-12-30") + pd.to_timedelta(
17        excel_serial, unit="D"
18    )
@func
@arg('df', index=False, doc="Excel range with columns 'date' and one or more series.")
@arg('base_model', doc='Forecasting model (default: "RidgeCV")')
@arg('n_hidden_features', doc='Number of hidden features (default: 5)')
@arg('lags', doc='Number of lags (default: 25)')
@arg('type_pi', doc='Prediction interval type (default: "kde")')
@arg('replications', doc='Number of simulation replications (default: 10)')
@arg('h', doc='Forecast horizon (default: 5)')
@arg('return_sims', doc='If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)')
@arg('series_choice', doc='(Optional) Name of the series to forecast if multiple are present')
@ret(index=False, doc='Forecast or simulation results as a table for Excel')
def techto_forecast( df: pandas.DataFrame, base_model: str = 'RidgeCV', n_hidden_features: int = 5, lags: int = 25, type_pi: str = 'kde', replications: int = 10, h: int = 5, return_sims: bool = False, series_choice: str = None) -> pandas.DataFrame:
 21@func
 22@arg(
 23    "df",
 24    index=False,
 25    doc="Excel range with columns 'date' and one or more series.",
 26)
 27@arg("base_model", doc='Forecasting model (default: "RidgeCV")')
 28@arg("n_hidden_features", doc="Number of hidden features (default: 5)")
 29@arg("lags", doc="Number of lags (default: 25)")
 30@arg("type_pi", doc='Prediction interval type (default: "kde")')
 31@arg("replications", doc="Number of simulation replications (default: 10)")
 32@arg("h", doc="Forecast horizon (default: 5)")
 33@arg(
 34    "return_sims",
 35    doc="If TRUE, return simulation matrix; else, return forecast summary (default: FALSE)",
 36)
 37@arg(
 38    "series_choice",
 39    doc="(Optional) Name of the series to forecast if multiple are present",
 40)
 41@ret(index=False, doc="Forecast or simulation results as a table for Excel")
 42def techto_forecast(
 43    df: pd.DataFrame,
 44    base_model: str = "RidgeCV",
 45    n_hidden_features: int = 5,
 46    lags: int = 25,
 47    type_pi: str = "kde",
 48    replications: int = 10,
 49    h: int = 5,
 50    return_sims: bool = False,
 51    series_choice: str = None,
 52) -> pd.DataFrame:
 53    """Forecasting: pass a time series as a DataFrame from Excel, return forecast.
 54
 55    Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.
 56
 57    Parameters
 58    ----------
 59
 60    df : pd.DataFrame
 61        The input time series data as a DataFrame (from Excel range).
 62
 63    base_model : str, default "RidgeCV"
 64        The base model to use for forecasting.
 65
 66    n_hidden_features : int, default 5
 67        Number of hidden features for the model.
 68
 69    lags : int, default 25
 70        Number of lags to use in the model.
 71
 72    type_pi : str, default "kde"
 73        Type of prediction interval ("kde" or other supported types).
 74
 75    replications : int, default 10
 76        Number of simulation replications.
 77
 78    h : int, default 5
 79        Forecast horizon (number of periods ahead to forecast).
 80
 81    return_sims : bool, default False
 82        If True, return the simulation matrix; otherwise, return the forecast summary bounds.
 83
 84    series_choice : str, optional
 85        If provided, specifies which series to forecast from the DataFrame.
 86
 87    Returns
 88    -------
 89
 90    pd.DataFrame
 91        The forecast results or simulation matrix as a DataFrame for Excel.
 92
 93    """
 94    # Convert Excel serial dates to datetime if needed
 95    if pd.api.types.is_numeric_dtype(df["date"]):
 96        df["date"] = df["date"].apply(excel_date_to_datetime)
 97
 98    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
 99        df.to_csv(tmp.name, index=False)
100        result = api.forecasting(
101            file_path=tmp.name,
102            base_model=base_model,
103            n_hidden_features=n_hidden_features,
104            lags=lags,
105            type_pi=type_pi,
106            replications=replications,
107            h=h,
108        )
109    output_dates = result["date"]
110    if df.shape[1] == 2:  # univariate time series
111        res_df = pd.DataFrame(result.pop("sims"))
112        if return_sims:
113            res2_df = pd.DataFrame([])
114            res2_df["date"] = output_dates
115            sims_df = res_df.transpose()
116            sims_df.columns = [(i + 1) for i in range(sims_df.shape[1])]
117            return pd.concat([res2_df, sims_df], axis=1)
118        return pd.DataFrame(result)
119    else:  # multivariate time series
120        n_series = len(result["mean"][0])
121        series_names = [col for col in df.columns if col != "date"]
122        if len(series_names) != n_series:
123            series_names = [f"series_{i+1}" for i in range(n_series)]
124
125        # If series_choice is provided and valid, extract its index and return as univariate
126        if series_choice is not None and series_choice in series_names:
127            idx = series_names.index(series_choice)
128            # Extract only the selected series from each stat
129            summary_df = pd.DataFrame(
130                {
131                    "date": output_dates,
132                    "mean": [x[idx] for x in result["mean"]],
133                    "lower": [x[idx] for x in result["lower"]],
134                    "upper": [x[idx] for x in result["upper"]],
135                }
136            )
137            if return_sims:
138                # sims: shape (replications, horizon, n_series)
139                # list of replications, each is list of horizon, each is list of n_series
140                sims = result["sims"]
141                # For each replication, extract the selected series only
142                sims_selected = [
143                    [h[idx] for h in rep]
144                    # shape: (replications, horizon)
145                    for rep in sims
146                ]
147                sims_df = pd.DataFrame(sims_selected).transpose()
148                sims_df.columns = [
149                    f"sim_{i+1}_{series_choice}"
150                    for i in range(sims_df.shape[1])
151                ]
152                res2_df = pd.DataFrame({"date": output_dates})
153                return pd.concat([res2_df, sims_df], axis=1)
154            return summary_df
155
156        # Otherwise, return the full multivariate summary as before
157        summary_data = {"date": output_dates}
158        for stat in ["mean", "lower", "upper"]:
159            for s in range(n_series):
160                summary_data[f"{stat}_{series_names[s]}"] = [
161                    x[s] for x in result[stat]
162                ]
163        summary_df = pd.DataFrame(summary_data)
164        if return_sims:
165            # sims: shape (replications, horizon, n_series)
166            sims = result["sims"]
167            flat = []
168            for rep in sims:
169                for s in range(n_series):
170                    flat.append([h[s] for h in rep])
171            sims_df = pd.DataFrame(flat).transpose()
172            colnames = []
173            for r in range(len(sims)):
174                for s in range(n_series):
175                    colnames.append(f"sim_{r+1}_{series_names[s]}")
176            sims_df.columns = colnames
177            sims_df.insert(0, "date", output_dates)
178            return sims_df
179        return summary_df

Forecasting: pass a time series as a DataFrame from Excel, return forecast.

Excel/xlwings custom function: Forecast a time series passed as a DataFrame from Excel, using the Techtonique API.

Parameters

df : pd.DataFrame The input time series data as a DataFrame (from Excel range).

base_model : str, default "RidgeCV" The base model to use for forecasting.

n_hidden_features : int, default 5 Number of hidden features for the model.

lags : int, default 25 Number of lags to use in the model.

type_pi : str, default "kde" Type of prediction interval ("kde" or other supported types).

replications : int, default 10 Number of simulation replications.

h : int, default 5 Forecast horizon (number of periods ahead to forecast).

return_sims : bool, default False If True, return the simulation matrix; otherwise, return the forecast summary bounds.

series_choice : str, optional If provided, specifies which series to forecast from the DataFrame.

Returns

pd.DataFrame The forecast results or simulation matrix as a DataFrame for Excel.