I've built a method to create an error correction model (ECM) that is an average of multiple ECMs. To do this, I'm leveraging the lm()
function in R to create multiple lm
objects that represent ECMs. I'm averaging the coefficients of each object to obtain the final model. The way the lm
objects represent ECMs is that I transform the data to the format necessary for an ECM before running lm()
on the data.
I also use backwards selection using AIC to eliminate variables I don't need. The process seems to work fine in creating an ECM. However, when I create a data frame with column names that match the coefficients in my model, I get an error saying a necessary variable is missing from the data. However, in the final model this variable was not included, so it shouldn't be necessary to predict on. So why is predict()
looking for that variable? What am I doing wrong?
#Load data
library(ecm)
data(Wilshire)
trn <- Wilshire[Wilshire$date<='2015-12-01',]
y <- trn$Wilshire5000
xeq <- xtr <- trn[c('CorpProfits', 'FedFundsRate', 'UnempRate')]
#Function to split data into k partitions and build k models, each on a (k-1)/k subset of the data
avelm <- function(formula, data, k = 5, seed = 5, ...) {
lmall <- lm(formula, data, ...)
modellist <- 1:k
set.seed(seed)
models <- lapply(modellist, function(i) {
tstIdx <- sample(nrow(data), 1/k * nrow(data))
trn <- data[-tstIdx, ]
lm(as.formula(formula), data = trn)
})
lmnames <- names(lmall$coefficients)
lmall$coefficients <- rowMeans(as.data.frame(sapply(models, function(m) coef(m))))
names(lmall$coefficients) <- lmnames
lmall$fitted.values <- predict(lmall, data)
target <- trimws(gsub("~.*$", "", formula))
lmall$residuals <- data[, target] - lmall$fitted.values
return(lmall)
}
#Function to create an ECM using backwards selection based on AIC (leveraged avelm function above)
aveecmback <- function (y, xeq, xtr, k = 5, seed = 5, ...) {
xeqnames <- names(xeq)
xeqnames <- paste0(xeqnames, "Lag1")
xeq <- as.data.frame(xeq)
xeq <- rbind(rep(NA, ncol(xeq)), xeq[1:(nrow(xeq) - 1), ])
xtrnames <- names(xtr)
xtrnames <- paste0("delta", xtrnames)
xtr <- as.data.frame(xtr)
xtr <- data.frame(apply(xtr, 2, diff, 1))
yLag1 <- y[1:(length(y) - 1)]
x <- cbind(xtr, xeq[complete.cases(xeq), ])
x <- cbind(x, yLag1)
names(x) <- c(xtrnames, xeqnames, "yLag1")
x$dy <- diff(y, 1)
formula <- "dy ~ ."
model <- avelm(formula, data = x, k = k, seed = seed, ...)
fullAIC <- partialAIC <- AIC(model)
while (partialAIC <= fullAIC) {
todrop <- rownames(drop1(model))[-grep("none|yLag1", rownames(drop1(model)))][which.min(drop1(model)$AIC[-grep("none|yLag1", rownames(drop1(model)))])]
formula <- paste0(formula, " - ", todrop)
model <- avelm(formula, data = x, seed = seed, ...)
partialAIC <- AIC(model)
if (partialAIC < fullAIC & length(rownames(drop1(model))) > 2) {
fullAIC <- partialAIC
}
}
return(model)
}
finalmodel <- aveecmback(y, xeq, xtr)
print(finalmodel)
Call:
lm(formula = formula, data = data)
Coefficients:
(Intercept) deltaCorpProfits deltaUnempRate CorpProfitsLag1 yLag1
-0.177771 0.012733 -1.204489 0.002046 -0.024294
#Create data frame to predict on
set.seed(2)
df <- data.frame(deltaCorpProfits=rnorm(5), deltaUnempRate=rnorm(5), CorpProfitsLag1=rnorm(5), yLag1=rnorm(5))
predict(finalmodel, df)
Error in eval(predvars, data, env) : object 'deltaFedFundsRate' not found