Title: | Forecasting Using State Space Models |
---|---|
Description: | Functions implementing Single Source of Error state space models for purposes of time series analysis and forecasting. The package includes ADAM (Svetunkov, 2023, <https://openforecast.org/adam/>), Exponential Smoothing (Hyndman et al., 2008, <doi: 10.1007/978-3-540-71918-2>), SARIMA (Svetunkov & Boylan, 2019 <doi: 10.1080/00207543.2019.1600764>), Complex Exponential Smoothing (Svetunkov & Kourentzes, 2018, <doi: 10.13140/RG.2.2.24986.29123>), Simple Moving Average (Svetunkov & Petropoulos, 2018 <doi: 10.1080/00207543.2017.1380326>) and several simulation functions. It also allows dealing with intermittent demand based on the iETS framework (Svetunkov & Boylan, 2019, <doi: 10.13140/RG.2.2.35897.06242>). |
Authors: | Ivan Svetunkov [aut, cre] (Senior Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK) |
Maintainer: | Ivan Svetunkov <[email protected]> |
License: | LGPL-2.1 |
Version: | 4.1.1.41004 |
Built: | 2024-11-22 09:35:20 UTC |
Source: | https://github.com/config-i1/smooth |
Function produces error measures for the provided object and the holdout values of the
response variable. Note that instead of parameters x
, test
, the function
accepts the vector of values in holdout
. Also, the parameters d
and D
are not supported - MASE is always calculated via division by first differences.
## S3 method for class 'smooth' accuracy(object, holdout = NULL, ...) ## S3 method for class 'smooth.forecast' accuracy(object, holdout = NULL, ...)
## S3 method for class 'smooth' accuracy(object, holdout = NULL, ...) ## S3 method for class 'smooth.forecast' accuracy(object, holdout = NULL, ...)
object |
The estimated model or a forecast from the estimated model generated via
either |
holdout |
The vector of values of the response variable in the holdout (test) set.
If not provided, then the function will return the in-sample error measures. If the
|
... |
Other variables passed to the |
The function is a wrapper for the measures function and is implemented for convenience.
Ivan Svetunkov, [email protected]
y <- rnorm(100, 100, 10) ourModel <- adam(y, holdout=TRUE, h=10) accuracy(ourModel)
y <- rnorm(100, 100, 10) ourModel <- adam(y, holdout=TRUE, h=10) accuracy(ourModel)
Function constructs an advanced Single Source of Error model, based on ETS taxonomy and ARIMA elements
adam(data, model = "ZXZ", lags = c(frequency(data)), orders = list(ar = c(0), i = c(0), ma = c(0), select = FALSE), constant = FALSE, formula = NULL, regressors = c("use", "select", "adapt"), occurrence = c("none", "auto", "fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct"), distribution = c("default", "dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dinvgauss", "dgamma"), loss = c("likelihood", "MSE", "MAE", "HAM", "LASSO", "RIDGE", "MSEh", "TMSE", "GTMSE", "MSCE"), outliers = c("ignore", "use", "select"), level = 0.99, h = 0, holdout = FALSE, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, ...) ## S3 method for class 'adam' simulate(object, nsim = 1, seed = NULL, obs = nobs(object), ...) auto.adam(data, model = "ZXZ", lags = c(frequency(data)), orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3), select = TRUE), formula = NULL, regressors = c("use", "select", "adapt"), occurrence = c("none", "auto", "fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct"), distribution = c("dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dinvgauss", "dgamma"), outliers = c("ignore", "use", "select"), level = 0.99, h = 0, holdout = FALSE, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, parallel = FALSE, ...) ## S3 method for class 'adam' sm(object, model = "YYY", lags = NULL, orders = list(ar = c(0), i = c(0), ma = c(0), select = FALSE), constant = FALSE, formula = NULL, regressors = c("use", "select", "adapt"), data = NULL, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, ...)
adam(data, model = "ZXZ", lags = c(frequency(data)), orders = list(ar = c(0), i = c(0), ma = c(0), select = FALSE), constant = FALSE, formula = NULL, regressors = c("use", "select", "adapt"), occurrence = c("none", "auto", "fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct"), distribution = c("default", "dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dinvgauss", "dgamma"), loss = c("likelihood", "MSE", "MAE", "HAM", "LASSO", "RIDGE", "MSEh", "TMSE", "GTMSE", "MSCE"), outliers = c("ignore", "use", "select"), level = 0.99, h = 0, holdout = FALSE, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, ...) ## S3 method for class 'adam' simulate(object, nsim = 1, seed = NULL, obs = nobs(object), ...) auto.adam(data, model = "ZXZ", lags = c(frequency(data)), orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3), select = TRUE), formula = NULL, regressors = c("use", "select", "adapt"), occurrence = c("none", "auto", "fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct"), distribution = c("dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dinvgauss", "dgamma"), outliers = c("ignore", "use", "select"), level = 0.99, h = 0, holdout = FALSE, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, parallel = FALSE, ...) ## S3 method for class 'adam' sm(object, model = "YYY", lags = NULL, orders = list(ar = c(0), i = c(0), ma = c(0), select = FALSE), constant = FALSE, formula = NULL, regressors = c("use", "select", "adapt"), data = NULL, persistence = NULL, phi = NULL, initial = c("optimal", "backcasting"), arma = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), bounds = c("usual", "admissible", "none"), silent = TRUE, ...)
data |
Vector, containing data needed to be forecasted. If a matrix (or
data.frame / data.table) is provided, then the first column is used as a
response variable, while the rest of the matrix is used as a set of explanatory
variables. |
model |
The type of ETS model. The first letter stands for the type of
the error term ("A" or "M"), the second (and sometimes the third as well) is for
the trend ("N", "A", "Ad", "M" or "Md"), and the last one is for the type of
seasonality ("N", "A" or "M"). In case of several lags, the seasonal components
are assumed to be the same. The model is then printed out as
ETS(M,Ad,M)[m1,m2,...], where m1, m2, ... are the lags specified by the
Also, Keep in mind that model selection with "Z" components uses Branch and Bound algorithm and may skip some models that could have slightly smaller information criteria. If you want to do a exhaustive search, you would need to list all the models to check as a vector. The default value is set to |
lags |
Defines lags for the corresponding components. All components
count, starting from level, so ETS(M,M,M) model for monthly data will have
|
orders |
The order of ARIMA to be included in the model. This should be passed
either as a vector (in which case the non-seasonal ARIMA is assumed) or as a list of
a type |
constant |
Logical, determining, whether the constant is needed in the model or not.
This is mainly needed for ARIMA part of the model, but can be used for ETS as well. In
case of pure regression, this is completely ignored (use |
formula |
Formula to use in case of explanatory variables. If |
regressors |
The variable defines what to do with the provided explanatory
variables:
|
occurrence |
The type of model used in probability estimation. Can be
The type of model used in the occurrence is equal to the one provided in the
Also, a model produced using oes or alm function can be used here. |
distribution |
what density function to assume for the error term. The full
name of the distribution should be provided, starting with the letter "d" -
"density". The names align with the names of distribution functions in R.
For example, see dnorm. For detailed explanation of available
distributions, see vignette in greybox package: |
loss |
The type of Loss Function used in optimization.
In case of LASSO / RIDGE, the variables are not normalised prior to the estimation, but the parameters are divided by the mean values of explanatory variables. Note that model selection and combination works properly only for the default
Furthermore, just for fun the absolute and half analogues of multistep estimators
are available: Last but not least, user can provide their own function here as well, making sure
that it accepts parameters
|
outliers |
Defines what to do with outliers: |
level |
What confidence level to use for detection of outliers. The default is 99%. The specific bounds of confidence interval depend on the distribution used in the model. |
h |
The forecast horizon. Mainly needed for the multistep loss functions. |
holdout |
Logical. If |
persistence |
Persistence vector |
phi |
Value of damping parameter. If |
initial |
Can be either character or a list, or a vector of initial states.
If it is character, then it can be If a use provides a list of values, it is recommended to use the named one and
to provide the initial components that are available. For example:
|
arma |
Either the named list or a vector with AR / MA parameters ordered lag-wise.
The number of elements should correspond to the specified orders e.g.
|
ic |
The information criterion to use in the model selection / combination procedure. |
bounds |
The type of bounds for the persistence to use in the model
estimation. Can be either |
silent |
Specifies, whether to provide the progress of the function or not.
If |
... |
Other non-documented parameters. For example,
You can also pass parameters to the optimiser in order to fine tune its work:
You can read more about these parameters by running the function
nloptr.print.options.
It is also possible to regulate what smoother to use to get initial seasonal indices
from the msdecompose function via the |
object |
The model previously estimated using |
nsim |
Number of series to generate from the model. |
seed |
Random seed used in simulation of data. |
obs |
Number of observations to produce in the simulated data. |
parallel |
If TRUE, the estimation of ADAM models is done in parallel (used in |
Function estimates ADAM in a form of the Single Source of Error state space model of the following type:
Where is the Bernoulli distributed random variable (in case of
normal data it equals to 1 for all observations),
is the state
vector and
is the vector of lags,
is the vector of
exogenous variables. w(.) is the measurement function, r(.) is the error
function, f(.) is the transition function, g(.) is the persistence
function and
is the vector of parameters for exogenous variables.
Finally,
is the error term.
The implemented model allows introducing several seasonal states and supports
intermittent data via the occurrence
variable.
The error term can follow different distributions, which
are regulated via the
distribution
parameter. This includes:
default
- Normal distribution is used for the Additive error models,
Gamma is used for the Multiplicative error models.
dnorm - Normal distribution,
dlaplace - Laplace distribution,
ds - S distribution,
dgnorm - Generalised Normal distribution,
dlnorm - Log-Normal distribution,
dgamma - Gamma distribution,
dinvgauss - Inverse Gaussian distribution,
For some more information about the model and its implementation, see the
vignette: vignette("adam","smooth")
. The more detailed explanation
of ADAM is provided by Svetunkov (2021).
The function auto.adam()
tries out models with the specified
distributions and returns the one with the most suitable one based on selected
information criterion.
sm.adam method estimates the scale model for the already estimated adam. In order for ADAM to take the SM model into account, the latter needs to be recorded in the former, amending the likelihood and the number of degrees of freedom. This can be done using implant method.
Object of class "adam" is returned. It contains the list of the following values:
model
- the name of the constructed model,
timeElapsed
- the time elapsed for the estimation of the model,
data
- the in-sample part of the data used for the training of the model. Includes
the actual values in the first column,
holdout
- the holdout part of the data, excluded for purposes of model evaluation,
fitted
- the vector of fitted values,
residuals
- the vector of residuals,
forecast
- the point forecast for h steps ahead (by default NA is returned). NOTE
that these do not always correspond to the conditional expectations for ETS models. See ADAM
textbook, Section 6.4. for details (https://openforecast.org/adam/ETSTaxonomyMaths.html),
states
- the matrix of states with observations in rows and states in columns,
persisten
- the vector of smoothing parameters,
phi
- the value of damping parameter,
transition
- the transition matrix,
measurement
- the measurement matrix with observations in rows and state elements
in columns,
initial
- the named list of initial values, including level, trend, seasonal, ARIMA
and xreg components,
initialEstimated
- the named vector, defining which of the initials were estimated in
the model,
initialType
- the type of initialisation used ("optimal" / "complete" / "provided"),
orders
- the orders of ARIMA used in the estimation,
constant
- the value of the constant (if it was included),
arma
- the list of AR / MA parameters used in the model,
nParam
- the matrix of the estimated / provided parameters,
occurrence
- the oes model used for the occurrence part of the model,
formula
- the formula used for the explanatory variables expansion,
loss
- the type of loss function used in the estimation,
lossValue
- the value of that loss function,
logLik
- the value of the log-likelihood,
distribution
- the distribution function used in the calculation of the likelihood,
scale
- the value of the scale parameter,
lambda
- the value of the parameter used in LASSO / dalaplace / dt,
B
- the vector of all estimated parameters,
lags
- the vector of lags used in the model construction,
lagsAll
- the vector of the internal lags used in the model,
profile
- the matrix with the profile used in the construction of the model,
profileInitial
- the matrix with the initial profile (for the before the sample values),
call
- the call used in the evaluation,
bounds
- the type of bounds used in the process,
res
- result of the model estimation, the output of the nloptr()
function, explaining
how optimisation went,
other
- the list with other parameters, such as shape for distributions or ARIMA
polynomials.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
Svetunkov, I. (2023). Forecasting and Analytics with the Augmented Dynamic Adaptive Model (ADAM) (1st ed.). Chapman and Hall/CRC. doi:10.1201/9781003452652, online version: https://openforecast.org/adam/.
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
Kolassa, S. (2011) Combining exponential smoothing forecasts using Akaike weights. International Journal of Forecasting, 27, pp 238 - 251.
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
### The main examples are provided in the adam vignette, check it out via: ## Not run: vignette("adam","smooth") # Model selection using a specified pool of models ourModel <- adam(rnorm(100,100,10), model=c("ANN","ANA","AAA"), lags=c(5,10)) adamSummary <- summary(ourModel) xtable(adamSummary) forecast(ourModel) par(mfcol=c(3,4)) plot(ourModel, c(1:11)) # Model combination using a specified pool ourModel <- adam(rnorm(100,100,10), model=c("ANN","AAN","MNN","CCC"), lags=c(5,10)) # ADAM ARIMA ourModel <- adam(rnorm(100,100,10), model="NNN", lags=c(1,4), orders=list(ar=c(1,0),i=c(1,0),ma=c(1,1))) # Fit ADAM to the data ourModel <- adam(rnorm(100,100,10), model="AAdN") # Simulate the data x <- simulate(ourModel) # Automatic selection of appropriate distribution and orders of ADAM ETS+ARIMA ourModel <- auto.adam(rnorm(100,100,10), model="ZZN", lags=c(1,4), orders=list(ar=c(2,2),ma=c(2,2),select=TRUE))
### The main examples are provided in the adam vignette, check it out via: ## Not run: vignette("adam","smooth") # Model selection using a specified pool of models ourModel <- adam(rnorm(100,100,10), model=c("ANN","ANA","AAA"), lags=c(5,10)) adamSummary <- summary(ourModel) xtable(adamSummary) forecast(ourModel) par(mfcol=c(3,4)) plot(ourModel, c(1:11)) # Model combination using a specified pool ourModel <- adam(rnorm(100,100,10), model=c("ANN","AAN","MNN","CCC"), lags=c(5,10)) # ADAM ARIMA ourModel <- adam(rnorm(100,100,10), model="NNN", lags=c(1,4), orders=list(ar=c(1,0),i=c(1,0),ma=c(1,1))) # Fit ADAM to the data ourModel <- adam(rnorm(100,100,10), model="AAdN") # Simulate the data x <- simulate(ourModel) # Automatic selection of appropriate distribution and orders of ADAM ETS+ARIMA ourModel <- auto.adam(rnorm(100,100,10), model="ZZN", lags=c(1,4), orders=list(ar=c(2,2),ma=c(2,2),select=TRUE))
Function estimates CES in state space form with information potential equal to errors with different seasonality types and chooses the one with the lowest IC value.
auto.ces(y, models = c("none", "simple", "full"), initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
auto.ces(y, models = c("none", "simple", "full"), initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
models |
The vector containing several types of seasonality that should be used in CES selection. See ces for more details about the possible types of seasonal models. |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example |
The function estimates several Complex Exponential Smoothing in the state space 2 described in Svetunkov, Kourentzes (2015) with the information potential equal to the approximation error using different types of seasonality and chooses the one with the lowest value of information criterion.
For some more information about the model and its implementation, see the
vignette: vignette("ces","smooth")
Object of class "smooth" is returned. See ces for details.
Ivan Svetunkov, [email protected]
Svetunkov, I., Kourentzes, N. (February 2015). Complex exponential smoothing. Working Paper of Department of Management Science, Lancaster University 2015:1, 1-31.
Svetunkov I., Kourentzes N. (2017) Complex Exponential Smoothing for Time Series Forecasting. Not yet published.
y <- ts(rnorm(100,10,3),frequency=12) # CES with and without holdout auto.ces(y,h=20,holdout=TRUE) auto.ces(y,h=20,holdout=FALSE) # Selection between "none" and "full" seasonalities auto.ces(AirPassengers,h=8,holdout=TRUE, models=c("n","f"),interval="p",level=0.8,ic="AIC") ourModel <- auto.ces(AirPassengers,interval="sp") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
y <- ts(rnorm(100,10,3),frequency=12) # CES with and without holdout auto.ces(y,h=20,holdout=TRUE) auto.ces(y,h=20,holdout=FALSE) # Selection between "none" and "full" seasonalities auto.ces(AirPassengers,h=8,holdout=TRUE, models=c("n","f"),interval="p",level=0.8,ic="AIC") ourModel <- auto.ces(AirPassengers,interval="sp") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
Function selects the order of GUM model based on information criteria, using fancy branch and bound mechanism.
auto.gum(y, orders = 3, lags = frequency(y), type = c("additive", "multiplicative", "select"), initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("restricted", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
auto.gum(y, orders = 3, lags = frequency(y), type = c("additive", "multiplicative", "select"), initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("restricted", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
orders |
The value of the max order to check. This is the upper bound of orders, but the real orders could be lower than this because of the increasing number of parameters in the models with higher orders. |
lags |
The value of the maximum lag to check. This should usually be a maximum frequency of the data. |
type |
Type of model. Can either be |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example |
The function checks several GUM models (see gum documentation) and selects the best one based on the specified information criterion.
The resulting model can be complicated and not straightforward, because GUM
allows capturing hidden orders that no ARIMA model can. It is advised to use
initial="b"
, because optimising GUM of arbitrary order is not a simple
task.
For some more information about the model and its implementation, see the
vignette: vignette("gum","smooth")
Object of class "smooth" is returned. See gum for details.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
x <- rnorm(50,100,3) # The best GUM model for the data ourModel <- auto.gum(x,orders=2,lags=4,h=18,holdout=TRUE,interval="np") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
x <- rnorm(50,100,3) # The best GUM model for the data ourModel <- auto.gum(x,orders=2,lags=4,h=18,holdout=TRUE,interval="np") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
Function selects the best State Space ARIMA based on information criteria, using fancy branch and bound mechanism. The resulting model can be not optimal in IC meaning, but it is usually reasonable.
auto.ssarima(y, orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3)), lags = c(1, frequency(y)), combine = FALSE, fast = TRUE, constant = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
auto.ssarima(y, orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3)), lags = c(1, frequency(y)), combine = FALSE, fast = TRUE, constant = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
orders |
List of maximum orders to check, containing vector variables
|
lags |
Defines lags for the corresponding orders (see examples). The
length of |
combine |
If |
fast |
If |
constant |
If |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example |
The function constructs bunch of ARIMAs in Single Source of Error state space form (see ssarima documentation) and selects the best one based on information criterion. The mechanism is described in Svetunkov & Boylan (2019).
Due to the flexibility of the model, multiple seasonalities can be used. For example, something crazy like this can be constructed: SARIMA(1,1,1)(0,1,1)[24](2,0,1)[24*7](0,0,1)[24*30], but the estimation may take a lot of time... It is recommended to use auto.msarima in cases with more than one seasonality and high frequencies.
For some more information about the model and its implementation, see the
vignette: vignette("ssarima","smooth")
Object of class "smooth" is returned. See ssarima for details.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
Svetunkov, I., & Boylan, J. E. (2019). State-space ARIMA for supply-chain forecasting. International Journal of Production Research, 0(0), 1–10. doi:10.1080/00207543.2019.1600764
x <- rnorm(118,100,3) # The best ARIMA for the data ourModel <- auto.ssarima(x,orders=list(ar=c(2,1),i=c(1,1),ma=c(2,1)),lags=c(1,12), h=18,holdout=TRUE,interval="np") # The other one using optimised states auto.ssarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), initial="o",h=18,holdout=TRUE) # And now combined ARIMA auto.ssarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), combine=TRUE,h=18,holdout=TRUE) summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
x <- rnorm(118,100,3) # The best ARIMA for the data ourModel <- auto.ssarima(x,orders=list(ar=c(2,1),i=c(1,1),ma=c(2,1)),lags=c(1,12), h=18,holdout=TRUE,interval="np") # The other one using optimised states auto.ssarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), initial="o",h=18,holdout=TRUE) # And now combined ARIMA auto.ssarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), combine=TRUE,h=18,holdout=TRUE) summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
Function estimates CES in state space form with information potential equal to errors and returns several variables.
ces(y, seasonality = c("none", "simple", "partial", "full"), initial = c("backcasting", "optimal"), a = NULL, b = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
ces(y, seasonality = c("none", "simple", "partial", "full"), initial = c("backcasting", "optimal"), a = NULL, b = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
seasonality |
The type of seasonality used in CES. Can be: |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
a |
First complex smoothing parameter. Should be a complex number. NOTE! CES is very sensitive to a and b values so it is advised either to leave them alone, or to use values from previously estimated model. |
b |
Second complex smoothing parameter. Can be real if
|
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example parameter
|
The function estimates Complex Exponential Smoothing in the state space 2 described in Svetunkov, Kourentzes (2017) with the information potential equal to the approximation error. The estimation of initial states of xt is done using backcast.
For some more information about the model and its implementation, see the
vignette: vignette("ces","smooth")
Object of class "smooth" is returned. It contains the list of the following values:
model
- type of constructed model.
timeElapsed
- time elapsed for the construction of the model.
states
- the matrix of the components of CES. The included
minimum is "level" and "potential". In the case of seasonal model the
seasonal component is also included. In the case of exogenous variables the
estimated coefficients for the exogenous variables are also included.
a
- complex smoothing parameter in the form a0 + ia1
b
- smoothing parameter for the seasonal component. Can either
be real (if seasonality="P"
) or complex (if seasonality="F"
)
in a form b0 + ib1.
persistence
- persistence vector. This is the place, where
smoothing parameters live.
transition
- transition matrix of the model.
measurement
- measurement vector of the model.
initialType
- Type of the initial values used.
initial
- the initial values of the state vector (non-seasonal).
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- the fitted values of CES.
forecast
- the point forecast of CES.
lower
- the lower bound of prediction interval. When
interval="none"
then NA is returned.
upper
- the upper bound of prediction interval. When
interval="none"
then NA is returned.
residuals
- the residuals of the estimated model.
errors
- The matrix of 1 to h steps ahead errors. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of
freedom into account).
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- The data provided in the call of the function.
holdout
- the holdout part of the original data.
xreg
- provided vector or matrix of exogenous variables. If
regressors="s"
, then this value will contain only selected exogenous
variables.
exogenous variables were estimated as well.
initialX
- initial values for parameters of exogenous variables.
ICs
- values of information criteria of the model. Includes
AIC, AICc, BIC and BICc.
logLik
- log-likelihood of the function.
lossValue
- Cost function value.
loss
- Type of loss function used in the estimation.
FI
- Fisher Information. Equal to NULL if FI=FALSE
or when FI
is not provided at all.
accuracy
- vector of accuracy measures for the holdout sample. In
case of non-intermittent data includes: MPE, MAPE, SMAPE, MASE, sMAE,
RelMAE, sMSE and Bias coefficient (based on complex numbers). In case of
intermittent data the set of errors will be: sMSE, sPIS, sCE (scaled
cumulative error) and Bias coefficient. This is available only when
holdout=TRUE
.
B
- the vector of all the estimated parameters.
Ivan Svetunkov, [email protected]
Svetunkov, I., Kourentzes, N. (February 2015). Complex exponential smoothing. Working Paper of Department of Management Science, Lancaster University 2015:1, 1-31.
Svetunkov I., Kourentzes N. (2017) Complex Exponential Smoothing for Time Series Forecasting. Not yet published.
y <- rnorm(100,10,3) ces(y,h=20,holdout=TRUE) ces(y,h=20,holdout=FALSE) y <- 500 - c(1:100)*0.5 + rnorm(100,10,3) ces(y,h=20,holdout=TRUE,interval="p",bounds="a") ces(BJsales,h=8,holdout=TRUE,seasonality="s",interval="sp",level=0.8) ces(AirPassengers,h=18,holdout=TRUE,seasonality="s",interval="sp") ces(AirPassengers,h=18,holdout=TRUE,seasonality="p",interval="np") ces(AirPassengers,h=18,holdout=TRUE,seasonality="f",interval="p")
y <- rnorm(100,10,3) ces(y,h=20,holdout=TRUE) ces(y,h=20,holdout=FALSE) y <- 500 - c(1:100)*0.5 + rnorm(100,10,3) ces(y,h=20,holdout=TRUE,interval="p",bounds="a") ces(BJsales,h=8,holdout=TRUE,seasonality="s",interval="sp",level=0.8) ces(AirPassengers,h=18,holdout=TRUE,seasonality="s",interval="sp") ces(AirPassengers,h=18,holdout=TRUE,seasonality="p",interval="np") ces(AirPassengers,h=18,holdout=TRUE,seasonality="f",interval="p")
Function constructs centered moving average based on state space SMA
cma(y, order = NULL, silent = TRUE, ...)
cma(y, order = NULL, silent = TRUE, ...)
y |
Vector or ts object, containing data needed to be smoothed. |
order |
Order of centered moving average. If |
silent |
If |
... |
Nothing. Needed only for the transition to the new name of variables. |
If the order is odd, then the function constructs SMA(order) and shifts it back in time. Otherwise an AR(order+1) model is constructed with the preset parameters:
This then corresponds to the centered MA with 0.5 weight for the first observation and 0.5 weight for an additional one. e.g. if this is monthly data and we use order=12, then half of the first January and half of the new one is taken.
This is not a forecasting tool. This is supposed to smooth the time series in order to find trend. So don't expect any forecasts from this function!
Object of class "smooth" is returned. It contains the list of the following values:
model
- the name of the estimated model.
timeElapsed
- time elapsed for the construction of the model.
order
- order of the moving average.
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- the fitted values, shifted in time.
forecast
- NAs, because this function does not produce forecasts.
residuals
- the residuals of the SMA / AR model.
s2
- variance of the residuals (taking degrees of freedom into
account) of the SMA / AR model.
y
- the original data.
ICs
- values of information criteria from the respective SMA or
AR model. Includes AIC, AICc, BIC and BICc.
logLik
- log-likelihood of the SMA / AR model.
lossValue
- Cost function value (for the SMA / AR model).
loss
- Type of loss function used in the estimation.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
# CMA of specific order ourModel <- cma(rnorm(118,100,3),order=12) # CMA of arbitrary order ourModel <- cma(rnorm(118,100,3)) summary(ourModel)
# CMA of specific order ourModel <- cma(rnorm(118,100,3),order=12) # CMA of arbitrary order ourModel <- cma(rnorm(118,100,3)) summary(ourModel)
Function constructs ETS model and returns forecast, fitted values, errors and matrix of states. It is a wrapper of adam function.
es(y, model = "ZZZ", lags = c(frequency(y)), persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), initialSeason = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...) es_old(y, model = "ZZZ", persistence = NULL, phi = NULL, initial = c("optimal", "backcasting"), initialSeason = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
es(y, model = "ZZZ", lags = c(frequency(y)), persistence = NULL, phi = NULL, initial = c("optimal", "backcasting", "complete"), initialSeason = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...) es_old(y, model = "ZZZ", persistence = NULL, phi = NULL, initial = c("optimal", "backcasting"), initialSeason = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
model |
The type of ETS model. The first letter stands for the type of
the error term ("A" or "M"), the second (and sometimes the third as well) is for
the trend ("N", "A", "Ad", "M" or "Md"), and the last one is for the type of
seasonality ("N", "A" or "M"). So, the function accepts words with 3 or 4
characters: The parameter Also Keep in mind that model selection with "Z" components uses Branch and Bound algorithm and may skip some models that could have slightly smaller information criteria. |
lags |
Defines lags for the corresponding components. All components
count, starting from level, so ETS(M,M,M) model for monthly data will have
|
persistence |
Persistence vector |
phi |
Value of damping parameter. If |
initial |
Can be either character or a vector of initial states.
If it is character, then it can be |
initialSeason |
Vector of initial values for seasonal components. If
|
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
Function estimates ETS in a form of the Single Source of Error state space model of the following type:
Where is the Bernoulli distributed random variable (in case of
normal data it equals to 1 for all observations),
is the state
vector and
is the vector of lags,
is the vector of
exogenous variables. w(.) is the measurement function, r(.) is the error
function, f(.) is the transition function, g(.) is the persistence
function and h(.) is the explanatory variables function.
is the
vector of parameters for exogenous variables,
is the
transitionX
matrix and is the
persistenceX
matrix.
Finally, is the error term.
For the details see Hyndman et al.(2008).
For some more information about the model and its implementation, see the
vignette: vignette("es","smooth")
.
Also, there are posts about the functions of the package smooth on the website of Ivan Svetunkov: https://openforecast.org/category/r-en/smooth/ - they explain the underlying models and how to use the functions.
Object of class "adam" is returned. It contains the list of the following values for classical ETS models:
model
- type of constructed model.
formula
- mathematical formula, describing interactions between
components of es() and exogenous variables.
timeElapsed
- time elapsed for the construction of the model.
states
- matrix of the components of ETS.
persistence
- persistence vector. This is the place, where
smoothing parameters live.
phi
- value of damping parameter.
transition
- transition matrix of the model.
measurement
- measurement vector of the model.
initialType
- type of the initial values used.
initial
- initial values of the state vector (non-seasonal).
initialSeason
- initial values of the seasonal part of state vector.
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- fitted values of ETS. In case of the intermittent model, the
fitted are multiplied by the probability of occurrence.
forecast
- the point forecast for h steps ahead (by default NA is returned). NOTE
that these do not always correspond to the conditional expectations. See ADAM
textbook, Section 4.4. for details (https://openforecast.org/adam/ETSTaxonomyMaths.html),
lower
- lower bound of prediction interval. When interval="none"
then NA is returned.
upper
- higher bound of prediction interval. When interval="none"
then NA is returned.
residuals
- residuals of the estimated model.
errors
- trace forecast in-sample errors, returned as a matrix. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of freedom into account).
This is an unbiased estimate of variance.
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- original data.
holdout
- holdout part of the original data.
xreg
- provided vector or matrix of exogenous variables. If regressors="s"
,
then this value will contain only selected exogenous variables.
initialX
- initial values for parameters of exogenous variables.
ICs
- values of information criteria of the model. Includes AIC, AICc, BIC and BICc.
logLik
- concentrated log-likelihood of the function.
lossValue
- loss function value.
loss
- type of loss function used in the estimation.
FI
- Fisher Information. Equal to NULL if FI=FALSE
or when FI
is not provided at all.
accuracy
- vector of accuracy measures for the holdout sample. In
case of non-intermittent data includes: MPE, MAPE, SMAPE, MASE, sMAE,
RelMAE, sMSE and Bias coefficient (based on complex numbers). In case of
intermittent data the set of errors will be: sMSE, sPIS, sCE (scaled
cumulative error) and Bias coefficient. This is available only when
holdout=TRUE
.
B
- the vector of all the estimated parameters.
If combination of forecasts is produced (using model="CCC"
), then a
shorter list of values is returned:
model
,
timeElapsed
,
initialType
,
fitted
,
forecast
,
lower
,
upper
,
residuals
,
s2
- variance of additive error of combined one-step-ahead forecasts,
interval
,
level
,
cumulative
,
y
,
holdout
,
ICs
- combined ic,
ICw
- ic weights used in the combination,
loss
,
xreg
,
accuracy
.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
Kolassa, S. (2011) Combining exponential smoothing forecasts using Akaike weights. International Journal of Forecasting, 27, pp 238 - 251.
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
# See how holdout and trace parameters influence the forecast es(BJsales,model="AAdN",h=8,holdout=FALSE,loss="MSE") es(AirPassengers,model="MAM",h=18,holdout=TRUE,loss="TMSE") # Model selection example es(BJsales,model="ZZN",ic="AIC",h=8,holdout=FALSE,bounds="a") # Model selection. Compare AICc of these two models: es(AirPassengers,"ZZZ",h=10,holdout=TRUE) es(AirPassengers,"MAdM",h=10,holdout=TRUE) # Model selection, excluding multiplicative trend es(AirPassengers,model="ZXZ",h=8,holdout=TRUE) # Combination example es(BJsales,model="CCN",h=8,holdout=TRUE) # Model selection using a specified pool of models ourModel <- es(AirPassengers,model=c("ANN","AAM","AMdA"),h=18) # Produce forecast and prediction interval forecast(ourModel, h=18, interval="parametric") # Semiparametric interval example forecast(ourModel, h=18, interval="semiparametric") # This will be the same model as in previous line but estimated on new portion of data es(BJsales,model=ourModel,h=18,holdout=FALSE)
# See how holdout and trace parameters influence the forecast es(BJsales,model="AAdN",h=8,holdout=FALSE,loss="MSE") es(AirPassengers,model="MAM",h=18,holdout=TRUE,loss="TMSE") # Model selection example es(BJsales,model="ZZN",ic="AIC",h=8,holdout=FALSE,bounds="a") # Model selection. Compare AICc of these two models: es(AirPassengers,"ZZZ",h=10,holdout=TRUE) es(AirPassengers,"MAdM",h=10,holdout=TRUE) # Model selection, excluding multiplicative trend es(AirPassengers,model="ZXZ",h=8,holdout=TRUE) # Combination example es(BJsales,model="CCN",h=8,holdout=TRUE) # Model selection using a specified pool of models ourModel <- es(AirPassengers,model=c("ANN","AAM","AMdA"),h=18) # Produce forecast and prediction interval forecast(ourModel, h=18, interval="parametric") # Semiparametric interval example forecast(ourModel, h=18, interval="semiparametric") # This will be the same model as in previous line but estimated on new portion of data es(BJsales,model=ourModel,h=18,holdout=FALSE)
Function produces conditional expectation (point forecasts) and prediction intervals for the estimated model.
## S3 method for class 'adam' forecast(object, h = 10, newdata = NULL, occurrence = NULL, interval = c("none", "prediction", "confidence", "simulated", "approximate", "semiparametric", "nonparametric", "empirical", "complete"), level = 0.95, side = c("both", "upper", "lower"), cumulative = FALSE, nsim = NULL, scenarios = FALSE, ...) ## S3 method for class 'smooth' forecast(object, h = 10, interval = c("parametric", "semiparametric", "nonparametric", "none"), level = 0.95, side = c("both", "upper", "lower"), ...) ## S3 method for class 'oes' forecast(object, h = 10, ...) ## S3 method for class 'msdecompose' forecast(object, h = 10, interval = c("parametric", "semiparametric", "nonparametric", "none"), level = 0.95, model = NULL, ...)
## S3 method for class 'adam' forecast(object, h = 10, newdata = NULL, occurrence = NULL, interval = c("none", "prediction", "confidence", "simulated", "approximate", "semiparametric", "nonparametric", "empirical", "complete"), level = 0.95, side = c("both", "upper", "lower"), cumulative = FALSE, nsim = NULL, scenarios = FALSE, ...) ## S3 method for class 'smooth' forecast(object, h = 10, interval = c("parametric", "semiparametric", "nonparametric", "none"), level = 0.95, side = c("both", "upper", "lower"), ...) ## S3 method for class 'oes' forecast(object, h = 10, ...) ## S3 method for class 'msdecompose' forecast(object, h = 10, interval = c("parametric", "semiparametric", "nonparametric", "none"), level = 0.95, model = NULL, ...)
object |
Time series model for which forecasts are required. |
h |
Forecast horizon. |
newdata |
The new data needed in order to produce forecasts. |
occurrence |
The vector containing the future occurrence variable (values in [0,1]), if it is known. |
interval |
What type of mechanism to use for interval construction.
the recommended option is |
level |
Confidence level. Defines width of prediction interval. |
side |
Defines, whether to provide |
cumulative |
If |
nsim |
Number of iterations to do in cases of |
scenarios |
Binary, defining whether to return scenarios produced via
simulations or not. Only works if |
... |
|
model |
The type of ETS model to fit on the decomposed trend. Only applicable to
"msdecompose" class. This is then returned in parameter "esmodel". If |
By default the function will generate conditional expectations from the estimated model and will also produce a variety of prediction intervals based on user preferences.
Returns object of class "smooth.forecast", which contains:
model
- the estimated model (ES / CES / GUM / SSARIMA).
method
- the name of the estimated model (ES / CES / GUM / SSARIMA).
forecast
aka mean
- point forecasts of the model
(conditional mean).
lower
- lower bound of prediction interval.
upper
- upper bound of prediction interval.
level
- confidence level.
interval
- binary variable (whether interval were produced or not).
scenarios
- in case of forecast.adam()
and
interval="simulated"
returns matrix with scenarios (future paths) that were
used in simulations.
Ivan Svetunkov, [email protected]
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag.
ourModel <- ces(rnorm(100,0,1),h=10) forecast(ourModel,h=10) forecast(ourModel,h=10,interval=TRUE) plot(forecast(ourModel,h=10,interval=TRUE))
ourModel <- ces(rnorm(100,0,1),h=10) forecast(ourModel,h=10) forecast(ourModel,h=10,interval=TRUE) plot(forecast(ourModel,h=10,interval=TRUE))
Function constructs Generalised Univariate Model, estimating matrices F, w, vector g and initial parameters.
gum(y, orders = c(1, 1), lags = c(1, frequency(y)), type = c("additive", "multiplicative"), persistence = NULL, transition = NULL, measurement = rep(1, sum(orders)), initial = c("optimal", "backcasting"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("restricted", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...) ges(...)
gum(y, orders = c(1, 1), lags = c(1, frequency(y)), type = c("additive", "multiplicative"), persistence = NULL, transition = NULL, measurement = rep(1, sum(orders)), initial = c("optimal", "backcasting"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("restricted", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...) ges(...)
y |
Vector or ts object, containing data needed to be forecasted. |
orders |
Order of the model. Specified as vector of number of states
with different lags. For example, |
lags |
Defines lags for the corresponding orders. If, for example,
|
type |
Type of model. Can either be |
persistence |
Persistence vector |
transition |
Transition matrix |
measurement |
Measurement vector |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. For example parameter
|
The function estimates the Single Source of Error state space model of the following type:
Where is the Bernoulli distributed random variable (in case of
normal data equal to 1),
is the state vector (defined using
orders
) and is the vector of
lags
, is the
vector of exogenous parameters.
is the
measurement
vector,
is the
transition
matrix, is the
persistence
vector, is the vector of parameters for exogenous variables,
is the
transitionX
matrix and is the
persistenceX
matrix. Finally, is the error term.
For some more information about the model and its implementation, see the
vignette: vignette("gum","smooth")
Object of class "smooth" is returned. It contains:
model
- name of the estimated model.
timeElapsed
- time elapsed for the construction of the model.
states
- matrix of fuzzy components of GUM, where rows
correspond to time and cols
to states.
initialType
- Type of the initial values used.
initial
- initial values of state vector (extracted from
states
).
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
measurement
- matrix w.
transition
- matrix F.
persistence
- persistence vector. This is the place, where
smoothing parameters live.
fitted
- fitted values.
forecast
- point forecast.
lower
- lower bound of prediction interval. When
interval="none"
then NA is returned.
upper
- higher bound of prediction interval. When
interval="none"
then NA is returned.
residuals
- the residuals of the estimated model.
errors
- matrix of 1 to h steps ahead errors. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of freedom
into account).
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- original data.
holdout
- holdout part of the original data.
xreg
- provided vector or matrix of exogenous variables. If
regressors="s"
, then this value will contain only selected exogenous variables.
initialX
- initial values for parameters of exogenous variables.
ICs
- values of information criteria of the model. Includes
AIC, AICc, BIC and BICc.
logLik
- log-likelihood of the function.
lossValue
- Cost function value.
loss
- Type of loss function used in the estimation.
FI
- Fisher Information. Equal to NULL if FI=FALSE
or
when FI
variable is not provided at all.
accuracy
- vector of accuracy measures for the holdout sample.
In case of non-intermittent data includes: MPE, MAPE, SMAPE, MASE, sMAE,
RelMAE, sMSE and Bias coefficient (based on complex numbers). In case of
intermittent data the set of errors will be: sMSE, sPIS, sCE (scaled
cumulative error) and Bias coefficient. This is available only when
holdout=TRUE
.
B
- the vector of all the estimated parameters.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
# Something simple: gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="a",interval="p") # A more complicated model with seasonality ourModel <- gum(rnorm(118,100,3),orders=c(2,1),lags=c(1,4),h=18,holdout=TRUE) # Redo previous model on a new data and produce prediction interval gum(rnorm(118,100,3),model=ourModel,h=18,interval="sp") # Produce something crazy with optimal initials (not recommended) gum(rnorm(118,100,3),orders=c(1,1,1),lags=c(1,3,5),h=18,holdout=TRUE,initial="o") # Simpler model estiamted using trace forecast error loss function and its analytical analogue gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="n",loss="TMSE") gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="n",loss="aTMSE") # Introduce exogenous variables gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,xreg=c(1:118)) # Or select the most appropriate one gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,xreg=c(1:118),regressors="s") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
# Something simple: gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="a",interval="p") # A more complicated model with seasonality ourModel <- gum(rnorm(118,100,3),orders=c(2,1),lags=c(1,4),h=18,holdout=TRUE) # Redo previous model on a new data and produce prediction interval gum(rnorm(118,100,3),model=ourModel,h=18,interval="sp") # Produce something crazy with optimal initials (not recommended) gum(rnorm(118,100,3),orders=c(1,1,1),lags=c(1,3,5),h=18,holdout=TRUE,initial="o") # Simpler model estiamted using trace forecast error loss function and its analytical analogue gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="n",loss="TMSE") gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,bounds="n",loss="aTMSE") # Introduce exogenous variables gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,xreg=c(1:118)) # Or select the most appropriate one gum(rnorm(118,100,3),orders=c(1),lags=c(1),h=18,holdout=TRUE,xreg=c(1:118),regressors="s") summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
Functions to check if an object is of the specified class
Functions to check if an object is of the specified class
is.smooth(x) is.smoothC(x) is.msarima(x) is.oes(x) is.oesg(x) is.smooth.sim(x) is.smooth.forecast(x) is.adam(x) is.adam.sim(x) is.msdecompose(x) is.msdecompose.forecast(x)
is.smooth(x) is.smoothC(x) is.msarima(x) is.oes(x) is.oesg(x) is.smooth.sim(x) is.smooth.forecast(x) is.adam(x) is.adam.sim(x) is.msdecompose(x) is.msdecompose.forecast(x)
x |
The object to check. |
The list of functions includes:
is.smooth()
tests if the object was produced by a smooth function
(e.g. es / ces / ssarima /
gum / sma / msarima);
is.msarima()
tests if the object was produced by the
msarima function;
is.smoothC()
tests if the object was produced by a combination
function (currently applies only to smoothCombine);
is.oes()
tests if the object was produced by oes
function;
is.smooth.sim()
tests if the object was produced by simulate functions
(e.g. sim.es / sim.ces / sim.ssarima
/ sim.sma / sim.gum);
is.smooth.forecast()
checks if the forecast was produced from a smooth
function using forecast() function.
The list of functions includes:
is.adam()
tests if the object was produced by a adam function
is.adam.sim()
tests if the object was produced by sim.adam() function (not implemented yet);
TRUE
if this is the specified class and FALSE
otherwise.
TRUE
if this is the specified class and FALSE
otherwise.
Ivan Svetunkov, [email protected]
ourModel <- msarima(rnorm(100,100,10)) is.smooth(ourModel) is.msarima(ourModel) ourModel <- adam(rnorm(100,100,10)) is.adam(ourModel)
ourModel <- msarima(rnorm(100,100,10)) is.smooth(ourModel) is.msarima(ourModel) ourModel <- adam(rnorm(100,100,10)) is.adam(ourModel)
Function constructs Multiple Seasonal State Space ARIMA, estimating AR, MA terms and initial states. It is a wrapper of adam function.
msarima(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, model = NULL, initial = c("optimal", "backcasting", "complete"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select", "adapt"), initialX = NULL, ...) auto.msarima(y, orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3)), lags = c(1, frequency(y)), initial = c("optimal", "backcasting", "complete"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select", "adapt"), initialX = NULL, ...) msarima_old(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
msarima(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, model = NULL, initial = c("optimal", "backcasting", "complete"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select", "adapt"), initialX = NULL, ...) auto.msarima(y, orders = list(ar = c(3, 3), i = c(2, 1), ma = c(3, 3)), lags = c(1, frequency(y)), initial = c("optimal", "backcasting", "complete"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = TRUE, xreg = NULL, regressors = c("use", "select", "adapt"), initialX = NULL, ...) msarima_old(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
orders |
List of orders, containing vector variables |
lags |
Defines lags for the corresponding orders (see examples above).
The length of |
constant |
If |
AR |
Vector or matrix of AR parameters. The order of parameters should be lag-wise. This means that first all the AR parameters of the firs lag should be passed, then for the second etc. AR of another msarima can be passed here. |
MA |
Vector or matrix of MA parameters. The order of parameters should be lag-wise. This means that first all the MA parameters of the firs lag should be passed, then for the second etc. MA of another msarima can be passed here. |
model |
Previously estimated MSARIMA model. |
initial |
Can be either character or a vector of initial states.
If it is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. see adam for details.
|
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
The model, implemented in this function differs from the one in ssarima function (Svetunkov & Boylan, 2019), but it is more efficient and better fitting the data (which might be a limitation).
The basic ARIMA(p,d,q) used in the function has the following form:
where is the actual values,
is the error term,
are the parameters for AR and MA respectively and
is
the constant. In case of non-zero differences
acts as drift.
This model is then transformed into ARIMA in the Single Source of Error State space form (based by Snyder, 1985, but in a slightly different formulation):
Where is the Bernoulli distributed random variable (in case of
normal data equal to 1),
is the state vector (defined based on
orders
) and is the vector of
lags
, is the
vector of exogenous parameters.
is the
measurement
vector,
is the
transition
matrix, is the
persistence
vector, is the vector of parameters for exogenous variables,
is the
transitionX
matrix and is the
persistenceX
matrix. The main difference from ssarima
function is that this implementation skips zero polynomials, substantially
decreasing the dimension of the transition matrix. As a result, this
function works faster than ssarima on high frequency data,
and it is more accurate.
Due to the flexibility of the model, multiple seasonalities can be used. For
example, something crazy like this can be constructed:
SARIMA(1,1,1)(0,1,1)[24](2,0,1)[24*7](0,0,1)[24*30], but the estimation may
take some time... Still this should be estimated in finite time (not like
with ssarima
).
The auto.msarima
function constructs several ARIMA models in Single
Source of Error state space form based on adam
function (see
adam documentation) and selects the best one based on the
selected information criterion.
For some additional details see the vignettes: vignette("adam","smooth")
and vignette("ssarima","smooth")
Object of class "adam" is returned. It contains the list of the following values:
model
- the name of the estimated model.
timeElapsed
- time elapsed for the construction of the model.
states
- the matrix of the fuzzy components of msarima, where
rows
correspond to time and cols
to states.
transition
- matrix F.
persistence
- the persistence vector. This is the place, where
smoothing parameters live.
measurement
- measurement vector of the model.
AR
- the matrix of coefficients of AR terms.
I
- the matrix of coefficients of I terms.
MA
- the matrix of coefficients of MA terms.
constant
- the value of the constant term.
initialType
- Type of the initial values used.
initial
- the initial values of the state vector (extracted
from states
).
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- the fitted values.
forecast
- the point forecast.
lower
- the lower bound of prediction interval. When
interval="none"
then NA is returned.
upper
- the higher bound of prediction interval. When
interval="none"
then NA is returned.
residuals
- the residuals of the estimated model.
errors
- The matrix of 1 to h steps ahead errors. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of freedom into
account).
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- the original data.
holdout
- the holdout part of the original data.
xreg
- provided vector or matrix of exogenous variables. If
regressors="s"
, then this value will contain only selected exogenous
variables.
initialX
- initial values for parameters of exogenous
variables.
ICs
- values of information criteria of the model. Includes
AIC, AICc, BIC and BICc.
logLik
- log-likelihood of the function.
lossValue
- Cost function value.
loss
- Type of loss function used in the estimation.
FI
- Fisher Information. Equal to NULL if FI=FALSE
or when FI
is not provided at all.
accuracy
- vector of accuracy measures for the holdout sample.
In case of non-intermittent data includes: MPE, MAPE, SMAPE, MASE, sMAE,
RelMAE, sMSE and Bias coefficient (based on complex numbers). In case of
intermittent data the set of errors will be: sMSE, sPIS, sCE (scaled
cumulative error) and Bias coefficient. This is available only when
holdout=TRUE
.
B
- the vector of all the estimated parameters.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov, I., & Boylan, J. E. (2019). State-space ARIMA for supply-chain forecasting. International Journal of Production Research, 0(0), 1–10. doi:10.1080/00207543.2019.1600764
adam, orders,
es, auto.ssarima
x <- rnorm(118,100,3) # The simple call of ARIMA(1,1,1): ourModel <- msarima(x,orders=c(1,1,1),lags=1,h=18,holdout=TRUE) # Example of SARIMA(2,0,0)(1,0,0)[4] msarima(x,orders=list(ar=c(2,1)),lags=c(1,4),h=18,holdout=TRUE) # SARIMA of a peculiar order on AirPassengers data ourModel <- msarima(AirPassengers,orders=list(ar=c(1,0,3),i=c(1,0,1),ma=c(0,1,2)), lags=c(1,6,12),h=10,holdout=TRUE) # ARIMA(1,1,1) with Mean Squared Trace Forecast Error msarima(x,orders=c(1,1,1),lags=1,h=18,holdout=TRUE,loss="TMSE") plot(forecast(ourModel, h=18, interval="approximate")) # The best ARIMA for the data ourModel <- auto.msarima(x,orders=list(ar=c(2,1),i=c(1,1),ma=c(2,1)),lags=c(1,12), h=18,holdout=TRUE) # The other one using optimised states auto.msarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), h=18,holdout=TRUE) # And now combined ARIMA auto.msarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), combine=TRUE,h=18,holdout=TRUE) plot(forecast(ourModel, h=18, interval="simulated"))
x <- rnorm(118,100,3) # The simple call of ARIMA(1,1,1): ourModel <- msarima(x,orders=c(1,1,1),lags=1,h=18,holdout=TRUE) # Example of SARIMA(2,0,0)(1,0,0)[4] msarima(x,orders=list(ar=c(2,1)),lags=c(1,4),h=18,holdout=TRUE) # SARIMA of a peculiar order on AirPassengers data ourModel <- msarima(AirPassengers,orders=list(ar=c(1,0,3),i=c(1,0,1),ma=c(0,1,2)), lags=c(1,6,12),h=10,holdout=TRUE) # ARIMA(1,1,1) with Mean Squared Trace Forecast Error msarima(x,orders=c(1,1,1),lags=1,h=18,holdout=TRUE,loss="TMSE") plot(forecast(ourModel, h=18, interval="approximate")) # The best ARIMA for the data ourModel <- auto.msarima(x,orders=list(ar=c(2,1),i=c(1,1),ma=c(2,1)),lags=c(1,12), h=18,holdout=TRUE) # The other one using optimised states auto.msarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), h=18,holdout=TRUE) # And now combined ARIMA auto.msarima(x,orders=list(ar=c(3,2),i=c(2,1),ma=c(3,2)),lags=c(1,12), combine=TRUE,h=18,holdout=TRUE) plot(forecast(ourModel, h=18, interval="simulated"))
Function decomposes multiple seasonal time series into components using the principles of classical decomposition.
msdecompose(y, lags = c(12), type = c("additive", "multiplicative"), smoother = c("ma", "lowess", "supsmu"), ...)
msdecompose(y, lags = c(12), type = c("additive", "multiplicative"), smoother = c("ma", "lowess", "supsmu"), ...)
y |
Vector or ts object, containing data needed to be smoothed. |
lags |
Vector of lags, corresponding to the frequencies in the data. |
type |
The type of decomposition. If |
smoother |
The type of function used in the smoother of the data to
extract the trend and in seasonality smoothing. |
... |
Other parameters passed to smoothers. Only works with lowess/supsmu. |
The function applies centred moving averages based on filter
function and order specified in lags
variable in order to smooth the
original series and obtain level, trend and seasonal components of the series.
The object of the class "msdecompose" is return, containing:
y
- the original time series.
initial
- the estimates of the initial level and trend.
trend
- the long term trend in the data.
seasonal
- the list of seasonal parameters.
lags
- the provided lags.
type
- the selected type of the decomposition.
yName
- the name of the provided data.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
# Decomposition of multiple frequency data ## Not run: ourModel <- msdecompose(forecast::taylor, lags=c(48,336), type="m") ourModel <- msdecompose(AirPassengers, lags=c(12), type="m") plot(ourModel) plot(forecast(ourModel, model="AAN", h=12))
# Decomposition of multiple frequency data ## Not run: ourModel <- msdecompose(forecast::taylor, lags=c(48,336), type="m") ourModel <- msdecompose(AirPassengers, lags=c(12), type="m") plot(ourModel) plot(forecast(ourModel, model="AAN", h=12))
This function extracts covariance matrix of 1 to h steps ahead forecast errors for
adam()
, ssarima()
, gum()
, sma()
, es()
and
ces()
models.
multicov(object, type = c("analytical", "empirical", "simulated"), h = 10, nsim = 1000, ...) ## S3 method for class 'smooth' multicov(object, type = c("analytical", "empirical", "simulated"), h = 10, nsim = 1000, ...)
multicov(object, type = c("analytical", "empirical", "simulated"), h = 10, nsim = 1000, ...) ## S3 method for class 'smooth' multicov(object, type = c("analytical", "empirical", "simulated"), h = 10, nsim = 1000, ...)
object |
Model estimated using one of the functions of smooth package. |
type |
What method to use in order to produce covariance matrix:
|
h |
Forecast horizon to use in the calculations. |
nsim |
Number of iterations to produce in the simulation. Only needed if
|
... |
Other parameters passed to simulate function (if |
The function returns either scalar (if it is a non-smooth model) or the matrix of (h x h) size with variances and covariances of 1 to h steps ahead forecast errors.
Scalar in cases of non-smooth functions. (h x h) matrix otherwise.
Ivan Svetunkov, [email protected]
x <- rnorm(100,0,1) # A simple example with a 5x5 covariance matrix ourModel <- ces(x, h=5) multicov(ourModel)
x <- rnorm(100,0,1) # A simple example with a 5x5 covariance matrix ourModel <- ces(x, h=5) multicov(ourModel)
Function returns the occurrence part of iETS model with the specified probability update and model types.
oes(y, model = "MNN", persistence = NULL, initial = "o", initialSeason = NULL, phi = NULL, occurrence = c("fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct", "auto", "none"), ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
oes(y, model = "MNN", persistence = NULL, initial = "o", initialSeason = NULL, phi = NULL, occurrence = c("fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct", "auto", "none"), ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Either numeric vector or time series vector. |
model |
The type of ETS model used for the estimation. Normally this should
be |
persistence |
Persistence vector |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
initialSeason |
The vector of the initial seasonal components. If |
phi |
The value of the dampening parameter. Used only for damped-trend models. |
occurrence |
The type of model used in probability estimation. Can be
|
ic |
The information criteria to use in case of model selection. |
h |
The forecast horizon. |
holdout |
If |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
The parameters passed to the optimiser, such as |
The function estimates probability of demand occurrence, using the selected ETS state space models.
For the details about the model and its implementation, see the respective
vignette: vignette("oes","smooth")
The object of class "occurrence" is returned. It contains following list of values:
model
- the type of the estimated ETS model;
timeElapsed
- the time elapsed for the construction of the model;
fitted
- the fitted values for the probability;
fittedModel
- the fitted values of the underlying ETS model, where applicable
(only for occurrence=c("o","i","d"));
forecast
- the forecast of the probability for h
observations ahead;
forecastModel
- the forecast of the underlying ETS model, where applicable
(only for occurrence=c("o","i","d"));
lower
- the lower bound of the interval if interval!="none"
;
upper
- the upper bound of the interval if interval!="none"
;
lowerModel
- the lower bound of the interval of the underlying ETS model
if interval!="none"
;
upperModel
- the upper bound of the interval of the underlying ETS model
if interval!="none"
;
states
- the values of the state vector;
logLik
- the log-likelihood value of the model;
nParam
- the number of parameters in the model (the matrix is returned);
residuals
- the residuals of the model;
y
- actual values of occurrence (zeros and ones).
persistence
- the vector of smoothing parameters;
phi
- the value of the damped trend parameter;
initial
- initial values of the state vector;
initialSeason
- the matrix of initials seasonal states;
occurrence
- the type of the occurrence model;
updateX
- boolean, defining, if the states of exogenous variables were
estimated as well.
initialX
- initial values for parameters of exogenous variables.
persistenceX
- persistence vector g for exogenous variables.
transitionX
- transition matrix F for exogenous variables.
accuracy
- The error measures for the forecast (in case of holdout=TRUE
).
B
- the vector of all the estimated parameters (in case of "odds-ratio",
"inverse-odds-ratio" and "direct" models).
Ivan Svetunkov, [email protected]
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
y <- rpois(100,0.1) oes(y, occurrence="auto") oes(y, occurrence="f")
y <- rpois(100,0.1) oes(y, occurrence="auto") oes(y, occurrence="f")
Function returns the general occurrence model of the of iETS model.
oesg(y, modelA = "MNN", modelB = "MNN", persistenceA = NULL, persistenceB = NULL, phiA = NULL, phiB = NULL, initialA = "o", initialB = "o", initialSeasonA = NULL, initialSeasonB = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xregA = NULL, xregB = NULL, initialXA = NULL, initialXB = NULL, regressorsA = c("use", "select"), regressorsB = c("use", "select"), ...)
oesg(y, modelA = "MNN", modelB = "MNN", persistenceA = NULL, persistenceB = NULL, phiA = NULL, phiB = NULL, initialA = "o", initialB = "o", initialSeasonA = NULL, initialSeasonB = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, bounds = c("usual", "admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xregA = NULL, xregB = NULL, initialXA = NULL, initialXB = NULL, regressorsA = c("use", "select"), regressorsB = c("use", "select"), ...)
y |
Either numeric vector or time series vector. |
modelA |
The type of the ETS for the model A. |
modelB |
The type of the ETS for the model B. |
persistenceA |
The persistence vector |
persistenceB |
The persistence vector |
phiA |
The value of the dampening parameter in the model A. Used only for damped-trend models. |
phiB |
The value of the dampening parameter in the model B. Used only for damped-trend models. |
initialA |
Either |
initialB |
Either |
initialSeasonA |
The vector of the initial seasonal components for the
model A. If |
initialSeasonB |
The vector of the initial seasonal components for the
model B. If |
ic |
Information criteria to use in case of model selection. |
h |
Forecast horizon. |
holdout |
If |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xregA |
The vector or the matrix of exogenous variables, explaining some parts of occurrence variable of the model A. |
xregB |
The vector or the matrix of exogenous variables, explaining some parts of occurrence variable of the model B. |
initialXA |
The vector of initial parameters for exogenous variables in the model
A. Ignored if |
initialXB |
The vector of initial parameters for exogenous variables in the model
B. Ignored if |
regressorsA |
Variable defines what to do with the provided |
regressorsB |
Similar to the |
... |
The parameters passed to the optimiser, such as |
The function estimates probability of demand occurrence, based on the iETS_G state-space model. It involves the estimation and modelling of the two simultaneous state space equations. Thus two parts for the model type, persistence, initials etc.
For the details about the model and its implementation, see the respective
vignette: vignette("oes","smooth")
The model is based on:
,
where a_t and b_t are the parameters of the Beta distribution and are modelled using separate ETS models.
The object of class "occurrence" is returned. It contains following list of values:
modelA
- the model A of the class oes, that contains the output similar
to the one from the oes()
function;
modelB
- the model B of the class oes, that contains the output similar
to the one from the oes()
function.
B
- the vector of all the estimated parameters.
Ivan Svetunkov, [email protected]
y <- rpois(100,0.1) oesg(y, modelA="MNN", modelB="ANN")
y <- rpois(100,0.1) oesg(y, modelA="MNN", modelB="ANN")
These functions allow extracting orders and lags for ssarima()
, gum()
and sma()
,
type of model from es()
and ces()
and name of model.
orders(object, ...) lags(object, ...) modelName(object, ...) modelType(object, ...)
orders(object, ...) lags(object, ...) modelName(object, ...) modelType(object, ...)
object |
Model estimated using one of the functions of smooth package. |
... |
Currently nothing is accepted via ellipsis. |
orders()
and lags()
are useful only for SSARIMA, GUM and SMA. They return NA
for other functions.
This can also be applied to arima()
, Arima()
and auto.arima()
functions from stats and forecast packages.
modelType()
is useful only for ETS and CES. They return NA
for other functions.
This can also be applied to ets()
function from forecast package. errorType
extracts the type of error from the model (either additive or multiplicative). Finally, modelName
returns the name of the fitted model. For example, "ARIMA(0,1,1)". This is purely descriptive and
can also be applied to non-smooth classes, so that, for example, you can easily extract the name
of the fitted AR model from ar()
function from stats
package.
Either vector, scalar or list with values is returned.
orders()
in case of ssarima returns list of values:
ar
- AR orders.
i
- I orders.
ma
- MA orders.
lags()
returns the vector of lags of the model.
All the other functions return strings of character.
Ivan Svetunkov, [email protected]
x <- rnorm(100,0,1) # Just as example. orders and lags do not return anything for ces() and es(). But modelType() does. ourModel <- ces(x, h=10) orders(ourModel) lags(ourModel) modelType(ourModel) modelName(ourModel) # And as another example it does the opposite for gum() and ssarima() ourModel <- gum(x, h=10, orders=c(1,1), lags=c(1,4)) orders(ourModel) lags(ourModel) modelType(ourModel) modelName(ourModel) # Finally these values can be used for simulate functions or original functions. ourModel <- auto.ssarima(x) ssarima(x, orders=orders(ourModel), lags=lags(ourModel), constant=ourModel$constant) sim.ssarima(orders=orders(ourModel), lags=lags(ourModel), constant=ourModel$constant) ourModel <- es(x) es(x, model=modelType(ourModel)) sim.es(model=modelType(ourModel))
x <- rnorm(100,0,1) # Just as example. orders and lags do not return anything for ces() and es(). But modelType() does. ourModel <- ces(x, h=10) orders(ourModel) lags(ourModel) modelType(ourModel) modelName(ourModel) # And as another example it does the opposite for gum() and ssarima() ourModel <- gum(x, h=10, orders=c(1,1), lags=c(1,4)) orders(ourModel) lags(ourModel) modelType(ourModel) modelName(ourModel) # Finally these values can be used for simulate functions or original functions. ourModel <- auto.ssarima(x) ssarima(x, orders=orders(ourModel), lags=lags(ourModel), constant=ourModel$constant) sim.ssarima(orders=orders(ourModel), lags=lags(ourModel), constant=ourModel$constant) ourModel <- es(x) es(x, model=modelType(ourModel)) sim.es(model=modelType(ourModel))
The function produces diagnostics plots for a smooth
model
## S3 method for class 'adam' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...) ## S3 method for class 'smooth' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...) ## S3 method for class 'msdecompose' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...)
## S3 method for class 'adam' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...) ## S3 method for class 'smooth' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...) ## S3 method for class 'msdecompose' plot(x, which = c(1, 2, 4, 6), level = 0.95, legend = FALSE, ask = prod(par("mfcol")) < length(which) && dev.interactive(), lowess = TRUE, ...)
x |
Estimated smooth model. |
which |
Which of the plots to produce. The possible options (see details for explanations):
|
level |
Confidence level. Defines width of confidence interval. Used in plots (2), (3), (7), (8), (9), (10) and (11). |
legend |
If |
ask |
Logical; if |
lowess |
Logical; if |
... |
The parameters passed to the plot functions. Recommended to use with separate plots. |
The list of produced plots includes:
Actuals vs Fitted values. Allows analysing, whether there are any issues in the fit. Does the variability of actuals increase with the increase of fitted values? Is the relation well captured? They grey line on the plot corresponds to the perfect fit of the model.
Standardised residuals vs Fitted. Plots the points and the confidence bounds
(red lines) for the specified confidence level
. Useful for the analysis of outliers;
Studentised residuals vs Fitted. This is similar to the previous plot, but with the residuals divided by the scales with the leave-one-out approach. Should be more sensitive to outliers;
Absolute residuals vs Fitted. Useful for the analysis of heteroscedasticity;
Squared residuals vs Fitted - similar to (3), but with squared values;
Q-Q plot with the specified distribution. Can be used in order to see if the
residuals follow the assumed distribution. The type of distribution depends on the one used
in the estimation (see distribution
parameter in alm);
ACF of the residuals. Are the residuals autocorrelated? See acf for details;
Fitted over time. Plots actuals (black line), fitted values (purple line), point forecast (blue line) and prediction interval (grey lines). Can be used in order to make sure that the model did not miss any important events over time;
Standardised residuals vs Time. Useful if you want to see, if there is autocorrelation or if there is heteroscedasticity in time. This also shows, when the outliers happen;
Studentised residuals vs Time. Similar to previous, but with studentised residuals;
PACF of the residuals. No, really, are they autocorrelated? See pacf function from stats package for details;
Plot of the states of the model. It is not recommended to produce this plot together with the others, because there might be several states, which would cause the creation of a different canvas. In case of "msdecompose", this will produce the decomposition of the series into states on a different canvas;
Absolute standardised residuals vs Fitted. Similar to the previous, but with absolute values. This is more relevant to the models where scale is calculated as an absolute value of something (e.g. Laplace);
Squared standardised residuals vs Fitted. This is an additional plot needed to diagnose
heteroscedasticity in a model with varying scale. The variance on this plot will be constant if
the adequate model for scale
was constructed. This is more appropriate for normal and
the related distributions.
Which of the plots to produce, is specified via the which
parameter.
The function produces the number of plots, specified in the parameter which
.
Ivan Svetunkov, [email protected]
ourModel <- es(c(rnorm(50,100,10),rnorm(50,120,10)), "ANN", h=10) par(mfcol=c(3,4)) plot(ourModel, c(1:11)) plot(ourModel, 12)
ourModel <- es(c(rnorm(50,100,10),rnorm(50,120,10)), "ANN", h=10) par(mfcol=c(3,4)) plot(ourModel, c(1:11)) plot(ourModel, 12)
Function estimates Prediction Likelihood Score for the provided model
pls(object, holdout = NULL, ...) ## S3 method for class 'smooth' pls(object, holdout = NULL, ...)
pls(object, holdout = NULL, ...) ## S3 method for class 'smooth' pls(object, holdout = NULL, ...)
object |
The model estimated using smooth functions. This thing also accepts other models (e.g. estimated using functions from forecast package), but may not always work properly with them. |
holdout |
The values for the holdout part of the sample. If the model was fitted
on the data with the |
... |
Parameters passed to multicov function. The function is called in order to get the covariance matrix of 1 to h steps ahead forecast errors. |
Prediction likelihood score (PLS) is based on either normal or log-normal distribution of errors. This is extracted from the provided model. The likelihood based on the distribution of 1 to h steps ahead forecast errors is used in the process.
A value of the log-likelihood.
Ivan Svetunkov, [email protected]
distribution. IEEE Signal Processing Letters. 13 (5): 300-303. doi:10.1109/LSP.2006.870353 - this is not yet used in the function.
Snyder, R. D., Ord, J. K., Beaumont, A., 2012. Forecasting the intermittent demand for slow-moving inventories: A modelling approach. International Journal of Forecasting 28 (2), 485-496.
Kolassa, S., 2016. Evaluating predictive count data distributions in retail sales forecasting. International Journal of Forecasting 32 (3), 788-803..
# Generate data, apply es() with the holdout parameter and calculate PLS x <- rnorm(100,0,1) ourModel <- es(x, h=10, holdout=TRUE) pls(ourModel, type="a") pls(ourModel, type="e") pls(ourModel, type="s", obs=100, nsim=100)
# Generate data, apply es() with the holdout parameter and calculate PLS x <- rnorm(100,0,1) ourModel <- es(x, h=10, holdout=TRUE) pls(ourModel, type="a") pls(ourModel, type="e") pls(ourModel, type="s", obs=100, nsim=100)
reapply
function generates the parameters based on the values in the provided
object and then reapplies the same model with those parameters to the data, getting
the fitted paths and updated states. reforecast
function uses those values
in order to produce forecasts for the h
steps ahead.
reapply(object, nsim = 1000, bootstrap = FALSE, heuristics = NULL, ...) reforecast(object, h = 10, newdata = NULL, occurrence = NULL, interval = c("prediction", "confidence", "none"), level = 0.95, side = c("both", "upper", "lower"), cumulative = FALSE, nsim = 100, ...)
reapply(object, nsim = 1000, bootstrap = FALSE, heuristics = NULL, ...) reforecast(object, h = 10, newdata = NULL, occurrence = NULL, interval = c("prediction", "confidence", "none"), level = 0.95, side = c("both", "upper", "lower"), cumulative = FALSE, nsim = 100, ...)
object |
Model estimated using one of the functions of smooth package. |
nsim |
Number of paths to generate (number of simulations to do). |
bootstrap |
The logical, which determines, whether to use bootstrap for the covariance matrix of parameters or not. |
heuristics |
The value for proportion to use for heuristic estimation of the
standard deviation of parameters. If |
... |
Other parameters passed to |
h |
Forecast horizon. |
newdata |
The new data needed in order to produce forecasts. |
occurrence |
The vector containing the future occurrence variable (values in [0,1]), if it is known. |
interval |
What type of mechanism to use for interval construction. The options
include |
level |
Confidence level. Defines width of prediction interval. |
side |
Defines, whether to provide |
cumulative |
If |
The main motivation of the function is to take the randomness due to the in-sample estimation of parameters into account when fitting the model and to propagate this randomness to the forecasts. The methods can be considered as a special case of recursive bootstrap.
reapply()
returns object of the class "reapply", which contains:
timeElapsed
- Time elapsed for the code execution;
y
- The actual values;
states
- The array of states of the model;
refitted
- The matrix with fitted values, where columns correspond
to different paths;
fitted
- The vector of fitted values (conditional mean);
model
- The name of the constructed model;
transition
- The array of transition matrices;
measurement
- The array of measurement matrices;
persistence
- The matrix of persistence vectors (paths in columns);
profile
- The array of profiles obtained by the end of each fit.
reforecast()
returns the object of the class forecast.smooth,
which contains in addition to the standard list the variable paths
- all
simulated trajectories with h in rows, simulated future paths for each state in
columns and different states (obtained from reapply()
function) in the
third dimension.
Ivan Svetunkov, [email protected]
x <- rnorm(100,0,1) # Just as example. orders and lags do not return anything for ces() and es(). But modelType() does. ourModel <- adam(x, "ANN") refittedModel <- reapply(ourModel, nsim=50) plot(refittedModel) ourForecast <- reforecast(ourModel, nsim=50)
x <- rnorm(100,0,1) # Just as example. orders and lags do not return anything for ces() and es(). But modelType() does. ourModel <- adam(x, "ANN") refittedModel <- reapply(ourModel, nsim=50) plot(refittedModel) ourForecast <- reforecast(ourModel, nsim=50)
The function extracts 1 to h steps ahead forecast errors from the model.
rmultistep(object, h = 10, ...)
rmultistep(object, h = 10, ...)
object |
Model estimated using one of the forecasting functions. |
h |
The forecasting horizon to use. |
... |
Currently nothing is accepted via ellipsis. |
The errors correspond to the error term epsilon_t in the ETS models. Don't forget that different models make different assumptions about epsilon_t and / or 1+epsilon_t.
The matrix with observations in rows and h steps ahead values in columns. So, the first row corresponds to the forecast produced from the 0th observation from 1 to h steps ahead.
Ivan Svetunkov, [email protected]
x <- rnorm(100,0,1) ourModel <- adam(x) rmultistep(ourModel, h=13)
x <- rnorm(100,0,1) ourModel <- adam(x) rmultistep(ourModel, h=13)
Function generates data using CES with Single Source of Error as a data generating process.
sim.ces(seasonality = c("none", "simple", "partial", "full"), obs = 10, nsim = 1, frequency = 1, a = NULL, b = NULL, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
sim.ces(seasonality = c("none", "simple", "partial", "full"), obs = 10, nsim = 1, frequency = 1, a = NULL, b = NULL, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
seasonality |
The type of seasonality used in CES. Can be: |
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
a |
First complex smoothing parameter. Should be a complex number. NOTE! CES is very sensitive to a and b values so it is advised to use values from previously estimated model. |
b |
Second complex smoothing parameter. Can be real if
|
initial |
A matrix with initial values for CES. In case with
|
randomizer |
Type of random number generator function used for error
term. Defaults are: |
probability |
Probability of occurrence, used for intermittent data generation. This can be a vector, implying that probability varies in time (in TSB or Croston style). |
... |
Additional parameters passed to the chosen randomizer. All the
parameters should be passed in the order they are used in chosen randomizer.
For example, passing just |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of CES model.
a
- Value of complex smoothing parameter a. If nsim>1
, then
this is a vector.
b
- Value of complex smoothing parameter b. If seasonality="n"
or seasonality="s"
, then this is equal to NULL. If nsim>1
,
then this is a vector.
initial
- Initial values of CES in a form of matrix. If nsim>1
,
then this is an array.
data
- Time series vector (or matrix if nsim>1
) of the generated
series.
states
- Matrix (or array if nsim>1
) of states. States are in
columns, time is in rows.
residuals
- Error terms used in the simulation. Either vector or matrix,
depending on nsim
.
occurrence
- Values of occurrence variable. Once again, can be either
a vector or a matrix...
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Svetunkov, I., Kourentzes, N. (February 2015). Complex exponential smoothing. Working Paper of Department of Management Science, Lancaster University 2015:1, 1-31.
Svetunkov I., Kourentzes N. (2017) Complex Exponential Smoothing for Time Series Forecasting. Not yet published.
sim.es, sim.ssarima,
ces, Distributions
# Create 120 observations from CES(n). Generate 100 time series of this kind. x <- sim.ces("n",obs=120,nsim=100) # Generate similar thing for seasonal series of CES(s)_4 x <- sim.ces("s",frequency=4,obs=80,nsim=100) # Estimate model and then generate 10 time series from it ourModel <- ces(rnorm(100,100,5)) simulate(ourModel,nsim=10)
# Create 120 observations from CES(n). Generate 100 time series of this kind. x <- sim.ces("n",obs=120,nsim=100) # Generate similar thing for seasonal series of CES(s)_4 x <- sim.ces("s",frequency=4,obs=80,nsim=100) # Estimate model and then generate 10 time series from it ourModel <- ces(rnorm(100,100,5)) simulate(ourModel,nsim=10)
Function generates data using ETS with Single Source of Error as a data generating process.
sim.es(model = "ANN", obs = 10, nsim = 1, frequency = 1, persistence = NULL, phi = 1, initial = NULL, initialSeason = NULL, bounds = c("usual", "admissible", "restricted"), randomizer = c("rnorm", "rlnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
sim.es(model = "ANN", obs = 10, nsim = 1, frequency = 1, persistence = NULL, phi = 1, initial = NULL, initialSeason = NULL, bounds = c("usual", "admissible", "restricted"), randomizer = c("rnorm", "rlnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
model |
Type of ETS model according to [Hyndman et. al., 2008]
taxonomy. Can consist of 3 or 4 chars: |
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
persistence |
Persistence vector, which includes all the smoothing
parameters. Must correspond to the chosen model. The maximum length is 3:
level, trend and seasonal smoothing parameters. If |
phi |
Value of damping parameter. If trend is not chosen in the model, the parameter is ignored. |
initial |
Vector of initial states of level and trend. The maximum
length is 2. If |
initialSeason |
Vector of initial states for seasonal coefficients.
Should have length equal to |
bounds |
Type of bounds to use for persistence vector if values are
generated. |
randomizer |
Type of random number generator function used for error
term. Defaults are: |
probability |
Probability of occurrence, used for intermittent data generation. This can be a vector, implying that probability varies in time (in TSB or Croston style). |
... |
Additional parameters passed to the chosen randomizer. All the
parameters should be passed in the order they are used in chosen randomizer.
For example, passing just |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of ETS model.
data
- Time series vector (or matrix if nsim>1
) of the generated
series.
states
- Matrix (or array if nsim>1
) of states. States are in
columns, time is in rows.
persistence
- Vector (or matrix if nsim>1
) of smoothing
parameters used in the simulation.
phi
- Value of damping parameter used in time series generation.
initial
- Vector (or matrix) of initial values.
initialSeason
- Vector (or matrix) of initial seasonal coefficients.
probability
- vector of probabilities used in the simulation.
intermittent
- type of the intermittent model used.
residuals
- Error terms used in the simulation. Either vector or matrix,
depending on nsim
.
occurrence
- Values of occurrence variable. Once again, can be either
a vector or a matrix...
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
# Create 40 observations of quarterly data using AAA model with errors from normal distribution ETSAAA <- sim.es(model="AAA",frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100) # Create 50 series of quarterly data using AAA model # with 40 observations each with errors from normal distribution ETSAAA <- sim.es(model="AAA",frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100,nsim=50) # Create 50 series of quarterly data using AAdA model # with 40 observations each with errors from normal distribution # and smoothing parameters lying in the "admissible" range. ETSAAA <- sim.es(model="AAA",phi=0.9,frequency=4,obs=40,bounds="admissible", randomizer="rnorm",mean=0,sd=100,nsim=50) # Create 60 observations of monthly data using ANN model # with errors from beta distribution ETSANN <- sim.es(model="ANN",persistence=c(1.5),frequency=12,obs=60, randomizer="rbeta",shape1=1.5,shape2=1.5) plot(ETSANN$states) # Create 60 observations of monthly data using MAM model # with errors from uniform distribution ETSMAM <- sim.es(model="MAM",persistence=c(0.3,0.2,0.1),initial=c(2000,50), phi=0.8,frequency=12,obs=60,randomizer="runif",min=-0.5,max=0.5) # Create 80 observations of quarterly data using MMM model # with predefined initial values and errors from the normal distribution ETSMMM <- sim.es(model="MMM",persistence=c(0.1,0.1,0.1),initial=c(2000,1), initialSeason=c(1.1,1.05,0.9,.95),frequency=4,obs=80,mean=0,sd=0.01) # Generate intermittent data using AAdN iETSAAdN <- sim.es("AAdN",obs=30,frequency=1,probability=0.1,initial=c(3,0),phi=0.8) # Generate iETS(MNN) with TSB style probabilities oETSMNN <- sim.oes("MNN",obs=50,occurrence="d",persistence=0.2,initial=1, randomizer="rlnorm",meanlog=0,sdlog=0.3) iETSMNN <- sim.es("MNN",obs=50,frequency=12,persistence=0.2,initial=4, probability=oETSMNN$probability)
# Create 40 observations of quarterly data using AAA model with errors from normal distribution ETSAAA <- sim.es(model="AAA",frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100) # Create 50 series of quarterly data using AAA model # with 40 observations each with errors from normal distribution ETSAAA <- sim.es(model="AAA",frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100,nsim=50) # Create 50 series of quarterly data using AAdA model # with 40 observations each with errors from normal distribution # and smoothing parameters lying in the "admissible" range. ETSAAA <- sim.es(model="AAA",phi=0.9,frequency=4,obs=40,bounds="admissible", randomizer="rnorm",mean=0,sd=100,nsim=50) # Create 60 observations of monthly data using ANN model # with errors from beta distribution ETSANN <- sim.es(model="ANN",persistence=c(1.5),frequency=12,obs=60, randomizer="rbeta",shape1=1.5,shape2=1.5) plot(ETSANN$states) # Create 60 observations of monthly data using MAM model # with errors from uniform distribution ETSMAM <- sim.es(model="MAM",persistence=c(0.3,0.2,0.1),initial=c(2000,50), phi=0.8,frequency=12,obs=60,randomizer="runif",min=-0.5,max=0.5) # Create 80 observations of quarterly data using MMM model # with predefined initial values and errors from the normal distribution ETSMMM <- sim.es(model="MMM",persistence=c(0.1,0.1,0.1),initial=c(2000,1), initialSeason=c(1.1,1.05,0.9,.95),frequency=4,obs=80,mean=0,sd=0.01) # Generate intermittent data using AAdN iETSAAdN <- sim.es("AAdN",obs=30,frequency=1,probability=0.1,initial=c(3,0),phi=0.8) # Generate iETS(MNN) with TSB style probabilities oETSMNN <- sim.oes("MNN",obs=50,occurrence="d",persistence=0.2,initial=1, randomizer="rlnorm",meanlog=0,sdlog=0.3) iETSMNN <- sim.es("MNN",obs=50,frequency=12,persistence=0.2,initial=4, probability=oETSMNN$probability)
Function generates data using GUM with Single Source of Error as a data generating process.
sim.gum(orders = c(1), lags = c(1), obs = 10, nsim = 1, frequency = 1, measurement = NULL, transition = NULL, persistence = NULL, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
sim.gum(orders = c(1), lags = c(1), obs = 10, nsim = 1, frequency = 1, measurement = NULL, transition = NULL, persistence = NULL, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
orders |
Order of the model. Specified as vector of number of states
with different lags. For example, |
lags |
Defines lags for the corresponding orders. If, for example,
|
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
measurement |
Measurement vector |
transition |
Transition matrix |
persistence |
Persistence vector |
initial |
Vector of initial values for state matrix. If |
randomizer |
Type of random number generator function used for error
term. Defaults are: |
probability |
Probability of occurrence, used for intermittent data generation. This can be a vector, implying that probability varies in time (in TSB or Croston style). |
... |
Additional parameters passed to the chosen randomizer. All the
parameters should be passed in the order they are used in chosen randomizer.
For example, passing just |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of GUM model.
measurement
- Matrix w.
transition
- Matrix F.
persistence
- Persistence vector. This is the place, where
smoothing parameters live.
initial
- Initial values of GUM in a form of matrix. If nsim>1
,
then this is an array.
data
- Time series vector (or matrix if nsim>1
) of the generated
series.
states
- Matrix (or array if nsim>1
) of states. States are in
columns, time is in rows.
residuals
- Error terms used in the simulation. Either vector or matrix,
depending on nsim
.
occurrence
- Values of occurrence variable. Once again, can be either
a vector or a matrix...
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
sim.es, sim.ssarima,
sim.ces, gum, Distributions
# Create 120 observations from GUM(1[1]). Generate 100 time series of this kind. x <- sim.gum(orders=c(1),lags=c(1),obs=120,nsim=100) # Generate similar thing for seasonal series of GUM(1[1],1[4]]) x <- sim.gum(orders=c(1,1),lags=c(1,4),frequency=4,obs=80,nsim=100,transition=c(1,0,0.9,0.9)) # Estimate model and then generate 10 time series from it ourModel <- gum(rnorm(100,100,5)) simulate(ourModel,nsim=10)
# Create 120 observations from GUM(1[1]). Generate 100 time series of this kind. x <- sim.gum(orders=c(1),lags=c(1),obs=120,nsim=100) # Generate similar thing for seasonal series of GUM(1[1],1[4]]) x <- sim.gum(orders=c(1,1),lags=c(1,4),frequency=4,obs=80,nsim=100,transition=c(1,0,0.9,0.9)) # Estimate model and then generate 10 time series from it ourModel <- gum(rnorm(100,100,5)) simulate(ourModel,nsim=10)
Function generates data using ETS with Single Source of Error as a data generating process for the demand occurrence. As the main output it produces probabilities of occurrence.
sim.oes(model = "MNN", obs = 10, nsim = 1, frequency = 1, occurrence = c("odds-ratio", "inverse-odds-ratio", "direct", "general"), bounds = c("usual", "admissible", "restricted"), randomizer = c("rlnorm", "rinvgauss", "rgamma", "rnorm"), persistence = NULL, phi = 1, initial = NULL, initialSeason = NULL, modelB = model, persistenceB = persistence, phiB = phi, initialB = initial, initialSeasonB = initialSeason, ...)
sim.oes(model = "MNN", obs = 10, nsim = 1, frequency = 1, occurrence = c("odds-ratio", "inverse-odds-ratio", "direct", "general"), bounds = c("usual", "admissible", "restricted"), randomizer = c("rlnorm", "rinvgauss", "rgamma", "rnorm"), persistence = NULL, phi = 1, initial = NULL, initialSeason = NULL, modelB = model, persistenceB = persistence, phiB = phi, initialB = initial, initialSeasonB = initialSeason, ...)
model |
Type of ETS model according to [Hyndman et. al., 2008]
taxonomy. Can consist of 3 or 4 chars: |
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
occurrence |
Type of occurrence model. See |
bounds |
Type of bounds to use for persistence vector if values are
generated. |
randomizer |
Type of random number generator function used for error
term. It is advised to use |
persistence |
Persistence vector, which includes all the smoothing
parameters. Must correspond to the chosen model. The maximum length is 3:
level, trend and seasonal smoothing parameters. If |
phi |
Value of damping parameter. If trend is not chosen in the model, the parameter is ignored. |
initial |
Vector of initial states of level and trend. The maximum
length is 2. If |
initialSeason |
Vector of initial states for seasonal coefficients.
Should have length equal to |
modelB |
Type of model B. This is used in |
persistenceB |
The persistence vector for the model B. |
phiB |
Value of damping parameter for the model B. |
initialB |
Vector of initial states of level and trend for the model B. |
initialSeasonB |
Vector of initial states for seasonal coefficients for the model B. |
... |
Additional parameters passed to the chosen randomizer. All the parameters should be passed in the order they are used in chosen randomizer. Both model A and model B share the same parameters for the randomizer. |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of ETS model.
modelA
- Model A, generated using sim.es()
function;
modelB
- Model B, generated using sim.es()
function;
probability
- The value of probability generated by the model;
occurrence
- Type of occurrence used in the model;
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
# This example uses rinvgauss function from statmod package. oETSMNNIG <- sim.oes(model="MNN",frequency=12,obs=60, randomizer="rinvgauss",mean=1,dispersion=0.5) # A simpler example with log normal distribution oETSMNNlogN <- sim.oes(model="MNN",frequency=12,obs=60,initial=1, randomizer="rlnorm",meanlog=0,sdlog=0.1)
# This example uses rinvgauss function from statmod package. oETSMNNIG <- sim.oes(model="MNN",frequency=12,obs=60, randomizer="rinvgauss",mean=1,dispersion=0.5) # A simpler example with log normal distribution oETSMNNlogN <- sim.oes(model="MNN",frequency=12,obs=60,initial=1, randomizer="rlnorm",meanlog=0,sdlog=0.1)
Function generates data using SMA in a Single Source of Error state space model as a data generating process.
sim.sma(order = NULL, obs = 10, nsim = 1, frequency = 1, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
sim.sma(order = NULL, obs = 10, nsim = 1, frequency = 1, initial = NULL, randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
order |
Order of the modelled series. If omitted, then a random order from 1 to 100 is selected. |
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
initial |
Vector of initial states for the model. If |
randomizer |
Type of random number generator function used for error
term. Defaults are: |
probability |
Probability of occurrence, used for intermittent data generation. This can be a vector, implying that probability varies in time (in TSB or Croston style). |
... |
Additional parameters passed to the chosen randomizer. All the
parameters should be passed in the order they are used in chosen randomizer.
For example, passing just |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of SMA model.
data
- Time series vector (or matrix if nsim>1
) of the generated
series.
states
- Matrix (or array if nsim>1
) of states. States are in
columns, time is in rows.
initial
- Vector (or matrix) of initial values.
probability
- vector of probabilities used in the simulation.
intermittent
- type of the intermittent model used.
residuals
- Error terms used in the simulation. Either vector or matrix,
depending on nsim
.
occurrence
- Values of occurrence variable. Once again, can be either
a vector or a matrix...
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
# Create 40 observations of quarterly data using AAA model with errors from normal distribution sma10 <- sim.sma(order=10,frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100)
# Create 40 observations of quarterly data using AAA model with errors from normal distribution sma10 <- sim.sma(order=10,frequency=4,obs=40,randomizer="rnorm",mean=0,sd=100)
Function generates data using SSARIMA with Single Source of Error as a data generating process.
sim.ssarima(orders = list(ar = 0, i = 1, ma = 1), lags = 1, obs = 10, nsim = 1, frequency = 1, AR = NULL, MA = NULL, constant = FALSE, initial = NULL, bounds = c("admissible", "none"), randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
sim.ssarima(orders = list(ar = 0, i = 1, ma = 1), lags = 1, obs = 10, nsim = 1, frequency = 1, AR = NULL, MA = NULL, constant = FALSE, initial = NULL, bounds = c("admissible", "none"), randomizer = c("rnorm", "rt", "rlaplace", "rs"), probability = 1, ...)
orders |
List of orders, containing vector variables |
lags |
Defines lags for the corresponding orders (see examples above).
The length of |
obs |
Number of observations in each generated time series. |
nsim |
Number of series to generate (number of simulations to do). |
frequency |
Frequency of generated data. In cases of seasonal models must be greater than 1. |
AR |
Vector or matrix of AR parameters. The order of parameters should be lag-wise. This means that first all the AR parameters of the firs lag should be passed, then for the second etc. AR of another ssarima can be passed here. |
MA |
Vector or matrix of MA parameters. The order of parameters should be lag-wise. This means that first all the MA parameters of the firs lag should be passed, then for the second etc. MA of another ssarima can be passed here. |
constant |
If |
initial |
Vector of initial values for state matrix. If |
bounds |
Type of bounds to use for AR and MA if values are generated.
|
randomizer |
Type of random number generator function used for error
term. Defaults are: |
probability |
Probability of occurrence, used for intermittent data generation. This can be a vector, implying that probability varies in time (in TSB or Croston style). |
... |
Additional parameters passed to the chosen randomizer. All the
parameters should be passed in the order they are used in chosen randomizer.
For example, passing just |
For the information about the function, see the vignette:
vignette("simulate","smooth")
List of the following values is returned:
model
- Name of SSARIMA model.
AR
- Value of AR parameters. If nsim>1
, then this is a
matrix.
MA
- Value of MA parameters. If nsim>1
, then this is a
matrix.
constant
- Value of constant term. If nsim>1
, then this
is a vector.
initial
- Initial values of SSARIMA. If nsim>1
, then this
is a matrix.
data
- Time series vector (or matrix if nsim>1
) of the
generated series.
states
- Matrix (or array if nsim>1
) of states. States
are in columns, time is in rows.
residuals
- Error terms used in the simulation. Either vector or
matrix, depending on nsim
.
occurrence
- Values of occurrence variable. Once again, can be
either a vector or a matrix...
logLik
- Log-likelihood of the constructed model.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov, I., & Boylan, J. E. (2019). State-space ARIMA for supply-chain forecasting. International Journal of Production Research, 0(0), 1–10. doi:10.1080/00207543.2019.1600764
sim.es, ssarima,
Distributions, orders
# Create 120 observations from ARIMA(1,1,1) with drift. Generate 100 time series of this kind. x <- sim.ssarima(ar.orders=1,i.orders=1,ma.orders=1,obs=120,nsim=100,constant=TRUE) # Generate similar thing for seasonal series of SARIMA(1,1,1)(0,0,2)_4 x <- sim.ssarima(ar.orders=c(1,0),i.orders=c(1,0),ma.orders=c(1,2),lags=c(1,4), frequency=4,obs=80,nsim=100,constant=FALSE) # Generate 10 series of high frequency data from SARIMA(1,0,2)_1(0,1,1)_7(1,0,1)_30 x <- sim.ssarima(ar.orders=c(1,0,1),i.orders=c(0,1,0),ma.orders=c(2,1,1),lags=c(1,7,30), obs=360,nsim=10)
# Create 120 observations from ARIMA(1,1,1) with drift. Generate 100 time series of this kind. x <- sim.ssarima(ar.orders=1,i.orders=1,ma.orders=1,obs=120,nsim=100,constant=TRUE) # Generate similar thing for seasonal series of SARIMA(1,1,1)(0,0,2)_4 x <- sim.ssarima(ar.orders=c(1,0),i.orders=c(1,0),ma.orders=c(1,2),lags=c(1,4), frequency=4,obs=80,nsim=100,constant=FALSE) # Generate 10 series of high frequency data from SARIMA(1,0,2)_1(0,1,1)_7(1,0,1)_30 x <- sim.ssarima(ar.orders=c(1,0,1),i.orders=c(0,1,0),ma.orders=c(2,1,1),lags=c(1,7,30), obs=360,nsim=10)
Function constructs state space simple moving average of predefined order
sma(y, order = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, silent = TRUE, fast = TRUE, ...) sma_old(y, order = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, silent = c("all", "graph", "legend", "output", "none"), ...)
sma(y, order = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, silent = TRUE, fast = TRUE, ...) sma_old(y, order = NULL, ic = c("AICc", "AIC", "BIC", "BICc"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, silent = c("all", "graph", "legend", "output", "none"), ...)
y |
Vector or ts object, containing data needed to be forecasted. |
order |
Order of simple moving average. If |
ic |
The information criterion used in the model selection procedure. |
h |
Length of forecasting horizon. |
holdout |
If |
silent |
If |
fast |
if |
... |
Other non-documented parameters. For example parameter
|
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
The function constructs AR model in the Single Source of Error state space form based on the idea that:
which is AR(n) process, that can be modelled using:
Where is a state vector.
For some more information about the model and its implementation, see the
vignette: vignette("sma","smooth")
Object of class "smooth" is returned. It contains the list of the following values:
model
- the name of the estimated model.
timeElapsed
- time elapsed for the construction of the model.
states
- the matrix of the fuzzy components of ssarima, where
rows
correspond to time and cols
to states.
transition
- matrix F.
persistence
- the persistence vector. This is the place, where
smoothing parameters live.
measurement
- measurement vector of the model.
order
- order of moving average.
initial
- Initial state vector values.
initialType
- Type of initial values used.
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- the fitted values.
forecast
- the point forecast.
lower
- the lower bound of prediction interval. When
interval=FALSE
then NA is returned.
upper
- the higher bound of prediction interval. When
interval=FALSE
then NA is returned.
residuals
- the residuals of the estimated model.
errors
- The matrix of 1 to h steps ahead errors. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of freedom into
account).
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- the original data.
holdout
- the holdout part of the original data.
ICs
- values of information criteria of the model. Includes AIC,
AICc, BIC and BICc.
logLik
- log-likelihood of the function.
lossValue
- Cost function value.
loss
- Type of loss function used in the estimation.
accuracy
- vector of accuracy measures for the
holdout sample. Includes: MPE, MAPE, SMAPE, MASE, sMAE, RelMAE, sMSE and
Bias coefficient (based on complex numbers). This is available only when
holdout=TRUE
.
Ivan Svetunkov, [email protected]
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
Svetunkov, I., & Petropoulos, F. (2017). Old dog, new tricks: a modelling view of simple moving averages. International Journal of Production Research, 7543(January), 1-14. doi:10.1080/00207543.2017.1380326
# SMA of specific order ourModel <- sma(rnorm(118,100,3), order=12, h=18, holdout=TRUE) # SMA of arbitrary order ourModel <- sma(rnorm(118,100,3), h=18, holdout=TRUE) plot(forecast(ourModel, h=18, interval="empirical"))
# SMA of specific order ourModel <- sma(rnorm(118,100,3), order=12, h=18, holdout=TRUE) # SMA of arbitrary order ourModel <- sma(rnorm(118,100,3), h=18, holdout=TRUE) plot(forecast(ourModel, h=18, interval="empirical"))
Package contains functions implementing Single Source of Error state space models for purposes of time series analysis and forecasting.
Package: | smooth |
Type: | Package |
Date: | 2016-01-27 - Inf |
License: | GPL-2 |
The following functions are included in the package:
es - Exponential Smoothing in Single Source of Errors State Space form.
ces - Complex Exponential Smoothing.
gum - Generalised Exponential Smoothing.
ssarima - SARIMA in state space framework.
auto.ces - Automatic selection between seasonal and non-seasonal CES.
auto.ssarima - Automatic selection of ARIMA orders.
sma - Simple Moving Average in state space form.
smoothCombine - the function that combines forecasts from es(), ces(), gum(), ssarima() and sma() functions.
cma - Centered Moving Average. This is for smoothing time series, not for forecasting.
sim.es - simulate time series using ETS as a model.
sim.ces - simulate time series using CES as a model.
sim.ssarima - simulate time series using SARIMA as a model.
sim.gum - simulate time series using GUM as a model.
sim.sma - simulate time series using SMA.
sim.oes - simulate time series based on occurrence part of ETS model.
oes - occurrence part of the intermittent state space model.
There are also several methods implemented in the package for the classes "smooth" and "smooth.sim":
orders - extracts orders of the fitted model.
lags - extracts lags of the fitted model.
modelType - extracts type of the fitted model.
forecast - produces forecast using provided model.
multicov - returns covariance matrix of multiple steps ahead forecast errors.
pls - returns Prediction Likelihood Score.
nparam - returns number of the estimated parameters.
fitted - extracts fitted values from provided model.
getResponse - returns actual values from the provided model.
residuals - extracts residuals of provided model.
plot - plots either states of the model or produced forecast (depending on what object is passed).
simulate - uses sim functions (sim.es, sim.ces, sim.ssarima, sim.gum, sim.sma and sim.oes) in order to simulate data using the provided object.
summary - provides summary of the object.
AICc, BICc - return, guess what...
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov Ivan and Boylan John E. (2017). Multiplicative State-Space Models for Intermittent Time Series. Working Paper of Department of Management Science, Lancaster University, 2017:4 , 1-43.
Teunter R., Syntetos A., Babai Z. (2011). Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214, 606-615.
Croston, J. (1972) Forecasting and stock control for intermittent demands. Operational Research Quarterly, 23(3), 289-303.
Syntetos, A., Boylan J. (2005) The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2), 303-314.
Svetunkov, I., Kourentzes, N. (February 2015). Complex exponential smoothing. Working Paper of Department of Management Science, Lancaster University 2015:1, 1-31.
Svetunkov I., Kourentzes N. (2017) Complex Exponential Smoothing for Time Series Forecasting. Not yet published.
Svetunkov I. (2023) Smooth forecasting with the smooth package in R. arXiv:2301.01790. doi:10.48550/arXiv.2301.01790.
Svetunkov I. (2015 - Inf) "smooth" package for R - series of posts about the underlying models and how to use them: https://openforecast.org/category/r-en/smooth/.
Kolassa, S. (2011) Combining exponential smoothing forecasts using Akaike weights. International Journal of Forecasting, 27, pp 238 - 251.
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
forecast, es,
ssarima, ces, gum
y <- ts(rnorm(100,10,3),frequency=12) adam(y,h=20,holdout=TRUE) es(y,h=20,holdout=TRUE) gum(y,h=20,holdout=TRUE) auto.ces(y,h=20,holdout=TRUE) auto.ssarima(y,h=20,holdout=TRUE)
y <- ts(rnorm(100,10,3),frequency=12) adam(y,h=20,holdout=TRUE) es(y,h=20,holdout=TRUE) gum(y,h=20,holdout=TRUE) auto.ces(y,h=20,holdout=TRUE) auto.ssarima(y,h=20,holdout=TRUE)
Function constructs ETS, SSARIMA, CES, GUM and SMA and combines their forecasts using IC weights.
smoothCombine(y, models = NULL, initial = c("optimal", "backcasting"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bins = 200, intervalCombine = c("quantile", "probability"), bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
smoothCombine(y, models = NULL, initial = c("optimal", "backcasting"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bins = 200, intervalCombine = c("quantile", "probability"), bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
models |
List of the estimated smooth models to use in the
combination. If |
initial |
Can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bins |
The number of bins for the prediction interval. The lower value means faster work of the function, but less precise estimates of the quantiles. This needs to be an even number. |
intervalCombine |
How to average the prediction interval:
quantile-wise ( |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
This currently determines nothing.
|
The combination of these models using information criteria weights is possible because they are all formulated in Single Source of Error framework. Due to the the complexity of some of the models, the estimation process may take some time. So be patient.
The prediction interval are combined either probability-wise or quantile-wise (Lichtendahl et al., 2013), which may take extra time, because we need to produce all the distributions for all the models. This can be sped up with the smaller value for bins parameter, but the resulting interval may be imprecise.
Ivan Svetunkov, [email protected]
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Kolassa, S. (2011) Combining exponential smoothing forecasts using Akaike weights. International Journal of Forecasting, 27, pp 238 - 251.
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
es, auto.ssarima,
auto.ces, auto.gum, sma
## Not run: ourModel <- smoothCombine(BJsales,interval="p") plot(ourModel) ## End(Not run) # models parameter accepts either previously estimated smoothCombine # or a manually formed list of smooth models estimated in sample: ## Not run: smoothCombine(BJsales,models=ourModel) ## Not run: models <- list(es(BJsales), sma(BJsales)) smoothCombine(BJsales,models=models) ## End(Not run)
## Not run: ourModel <- smoothCombine(BJsales,interval="p") plot(ourModel) ## End(Not run) # models parameter accepts either previously estimated smoothCombine # or a manually formed list of smooth models estimated in sample: ## Not run: smoothCombine(BJsales,models=ourModel) ## Not run: models <- list(es(BJsales), sma(BJsales)) smoothCombine(BJsales,models=models) ## End(Not run)
You need a description? So what?
sowhat(...)
sowhat(...)
... |
Any number of variables or string with a question. |
You need details? So what?
It doesn't return any value, only messages. So what?
Ivan Svetunkov, [email protected]
Nowwhat (to be implemented),
x <- rnorm(10000,0,1); sowhat(x); sowhat("What's the meaning of life?") sowhat("I don't have a girlfriend.")
x <- rnorm(10000,0,1); sowhat(x); sowhat("What's the meaning of life?") sowhat("I don't have a girlfriend.")
Function constructs State Space ARIMA, estimating AR, MA terms and initial states.
ssarima(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
ssarima(y, orders = list(ar = c(0), i = c(1), ma = c(1)), lags = c(1), constant = FALSE, AR = NULL, MA = NULL, initial = c("backcasting", "optimal"), ic = c("AICc", "AIC", "BIC", "BICc"), loss = c("likelihood", "MSE", "MAE", "HAM", "MSEh", "TMSE", "GTMSE", "MSCE"), h = 10, holdout = FALSE, cumulative = FALSE, interval = c("none", "parametric", "likelihood", "semiparametric", "nonparametric"), level = 0.95, bounds = c("admissible", "none"), silent = c("all", "graph", "legend", "output", "none"), xreg = NULL, regressors = c("use", "select"), initialX = NULL, ...)
y |
Vector or ts object, containing data needed to be forecasted. |
orders |
List of orders, containing vector variables |
lags |
Defines lags for the corresponding orders (see examples above).
The length of |
constant |
If |
AR |
Vector or matrix of AR parameters. The order of parameters should be lag-wise. This means that first all the AR parameters of the firs lag should be passed, then for the second etc. AR of another ssarima can be passed here. |
MA |
Vector or matrix of MA parameters. The order of parameters should be lag-wise. This means that first all the MA parameters of the firs lag should be passed, then for the second etc. MA of another ssarima can be passed here. |
initial |
Can be either character or a vector of initial states. If it
is character, then it can be |
ic |
The information criterion used in the model selection procedure. |
loss |
The type of Loss Function used in optimization. There are also available analytical approximations for multistep functions:
Finally, just for fun the absolute and half analogues of multistep estimators
are available: |
h |
Length of forecasting horizon. |
holdout |
If |
cumulative |
If |
interval |
Type of interval to construct. This can be:
The parameter also accepts |
level |
Confidence level. Defines width of prediction interval. |
bounds |
What type of bounds to use in the model estimation. The first letter can be used instead of the whole word. |
silent |
If |
xreg |
The vector (either numeric or time series) or the matrix (or
data.frame) of exogenous variables that should be included in the model. If
matrix included than columns should contain variables and rows - observations.
Note that |
regressors |
The variable defines what to do with the provided xreg:
|
initialX |
The vector of initial parameters for exogenous variables.
Ignored if |
... |
Other non-documented parameters. Parameter
|
The model, implemented in this function, is discussed in Svetunkov & Boylan (2019).
The basic ARIMA(p,d,q) used in the function has the following form:
where is the actual values,
is the error term,
are the parameters for AR and MA respectively and
is
the constant. In case of non-zero differences
acts as drift.
This model is then transformed into ARIMA in the Single Source of Error State space form (proposed in Snyder, 1985):
Where is the Bernoulli distributed random variable (in case of
normal data equal to 1),
is the state vector (defined based on
orders
) and is the vector of
lags
, is the
vector of exogenous parameters.
is the
measurement
vector,
is the
transition
matrix, is the
persistence
vector, is the vector of parameters for exogenous variables,
is the
transitionX
matrix and is the
persistenceX
matrix.
Due to the flexibility of the model, multiple seasonalities can be used. For example, something crazy like this can be constructed: SARIMA(1,1,1)(0,1,1)[24](2,0,1)[24*7](0,0,1)[24*30], but the estimation may take some finite time... If you plan estimating a model with more than one seasonality, it is recommended to consider doing it using msarima.
The model selection for SSARIMA is done by the auto.ssarima function.
For some more information about the model and its implementation, see the
vignette: vignette("ssarima","smooth")
Object of class "smooth" is returned. It contains the list of the following values:
model
- the name of the estimated model.
timeElapsed
- time elapsed for the construction of the model.
states
- the matrix of the fuzzy components of ssarima, where
rows
correspond to time and cols
to states.
transition
- matrix F.
persistence
- the persistence vector. This is the place, where
smoothing parameters live.
measurement
- measurement vector of the model.
AR
- the matrix of coefficients of AR terms.
I
- the matrix of coefficients of I terms.
MA
- the matrix of coefficients of MA terms.
constant
- the value of the constant term.
initialType
- Type of the initial values used.
initial
- the initial values of the state vector (extracted
from states
).
nParam
- table with the number of estimated / provided parameters.
If a previous model was reused, then its initials are reused and the number of
provided parameters will take this into account.
fitted
- the fitted values.
forecast
- the point forecast.
lower
- the lower bound of prediction interval. When
interval="none"
then NA is returned.
upper
- the higher bound of prediction interval. When
interval="none"
then NA is returned.
residuals
- the residuals of the estimated model.
errors
- The matrix of 1 to h steps ahead errors. Only returned when the
multistep losses are used and semiparametric interval is needed.
s2
- variance of the residuals (taking degrees of freedom into
account).
interval
- type of interval asked by user.
level
- confidence level for interval.
cumulative
- whether the produced forecast was cumulative or not.
y
- the original data.
holdout
- the holdout part of the original data.
xreg
- provided vector or matrix of exogenous variables. If
regressors="s"
, then this value will contain only selected exogenous
variables.
initialX
- initial values for parameters of exogenous
variables.
ICs
- values of information criteria of the model. Includes
AIC, AICc, BIC and BICc.
logLik
- log-likelihood of the function.
lossValue
- Cost function value.
loss
- Type of loss function used in the estimation.
FI
- Fisher Information. Equal to NULL if FI=FALSE
or when FI
is not provided at all.
accuracy
- vector of accuracy measures for the holdout sample.
In case of non-intermittent data includes: MPE, MAPE, SMAPE, MASE, sMAE,
RelMAE, sMSE and Bias coefficient (based on complex numbers). In case of
intermittent data the set of errors will be: sMSE, sPIS, sCE (scaled
cumulative error) and Bias coefficient. This is available only when
holdout=TRUE
.
B
- the vector of all the estimated parameters.
Ivan Svetunkov, [email protected]
Taylor, J.W. and Bunn, D.W. (1999) A Quantile Regression Approach to Generating Prediction Intervals. Management Science, Vol 45, No 2, pp 225-237.
Lichtendahl Kenneth C., Jr., Grushka-Cockayne Yael, Winkler Robert L., (2013) Is It Better to Average Probabilities or Quantiles? Management Science 59(7):1594-1611. DOI: doi:10.1287/mnsc.1120.1667
Snyder, R. D., 1985. Recursive Estimation of Dynamic Linear Models. Journal of the Royal Statistical Society, Series B (Methodological) 47 (2), 272-276.
Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D. (2008) Forecasting with exponential smoothing: the state space approach, Springer-Verlag. doi:10.1007/978-3-540-71918-2.
Svetunkov, I., & Boylan, J. E. (2019). State-space ARIMA for supply-chain forecasting. International Journal of Production Research, 0(0), 1–10. doi:10.1080/00207543.2019.1600764
auto.ssarima, orders,
msarima, auto.msarima,
sim.ssarima, adam
# ARIMA(1,1,1) fitted to some data ourModel <- ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1)),lags=c(1),h=18, holdout=TRUE,interval="p") # The previous one is equivalent to: ourModel <- ssarima(rnorm(118,100,3),ar.orders=c(1),i.orders=c(1),ma.orders=c(1), lags=c(1),h=18,holdout=TRUE,interval="p") # Model with the same lags and orders, applied to a different data ssarima(rnorm(118,100,3),orders=orders(ourModel),lags=lags(ourModel),h=18,holdout=TRUE) # The same model applied to a different data ssarima(rnorm(118,100,3),model=ourModel,h=18,holdout=TRUE) # Example of SARIMA(2,0,0)(1,0,0)[4] ssarima(rnorm(118,100,3),orders=list(ar=c(2,1)),lags=c(1,4),h=18,holdout=TRUE) # SARIMA(1,1,1)(0,0,1)[4] with different initialisations ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1,1)), lags=c(1,4),h=18,holdout=TRUE) ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1,1)), lags=c(1,4),h=18,holdout=TRUE,initial="o") # SARIMA of a peculiar order on AirPassengers data ssarima(AirPassengers,orders=list(ar=c(1,0,3),i=c(1,0,1),ma=c(0,1,2)), lags=c(1,6,12),h=10,holdout=TRUE) # ARIMA(1,1,1) with Mean Squared Trace Forecast Error ssarima(rnorm(118,100,3),orders=list(ar=1,i=1,ma=1),lags=1,h=18,holdout=TRUE,loss="TMSE") ssarima(rnorm(118,100,3),orders=list(ar=1,i=1,ma=1),lags=1,h=18,holdout=TRUE,loss="aTMSE") # SARIMA(0,1,1) with exogenous variables ssarima(rnorm(118,100,3),orders=list(i=1,ma=1),h=18,holdout=TRUE,xreg=c(1:118)) summary(ourModel) forecast(ourModel) plot(forecast(ourModel))
# ARIMA(1,1,1) fitted to some data ourModel <- ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1)),lags=c(1),h=18, holdout=TRUE,interval="p") # The previous one is equivalent to: ourModel <- ssarima(rnorm(118,100,3),ar.orders=c(1),i.orders=c(1),ma.orders=c(1), lags=c(1),h=18,holdout=TRUE,interval="p") # Model with the same lags and orders, applied to a different data ssarima(rnorm(118,100,3),orders=orders(ourModel),lags=lags(ourModel),h=18,holdout=TRUE) # The same model applied to a different data ssarima(rnorm(118,100,3),model=ourModel,h=18,holdout=TRUE) # Example of SARIMA(2,0,0)(1,0,0)[4] ssarima(rnorm(118,100,3),orders=list(ar=c(2,1)),lags=c(1,4),h=18,holdout=TRUE) # SARIMA(1,1,1)(0,0,1)[4] with different initialisations ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1,1)), lags=c(1,4),h=18,holdout=TRUE) ssarima(rnorm(118,100,3),orders=list(ar=c(1),i=c(1),ma=c(1,1)), lags=c(1,4),h=18,holdout=TRUE,initial="o") # SARIMA of a peculiar order on AirPassengers data ssarima(AirPassengers,orders=list(ar=c(1,0,3),i=c(1,0,1),ma=c(0,1,2)), lags=c(1,6,12),h=10,holdout=TRUE) # ARIMA(1,1,1) with Mean Squared Trace Forecast Error ssarima(rnorm(118,100,3),orders=list(ar=1,i=1,ma=1),lags=1,h=18,holdout=TRUE,loss="TMSE") ssarima(rnorm(118,100,3),orders=list(ar=1,i=1,ma=1),lags=1,h=18,holdout=TRUE,loss="aTMSE") # SARIMA(0,1,1) with exogenous variables ssarima(rnorm(118,100,3),orders=list(i=1,ma=1),h=18,holdout=TRUE,xreg=c(1:118)) summary(ourModel) forecast(ourModel) plot(forecast(ourModel))