Rolling regression return multiple objects

2020-07-30 02:01发布

问题:

I am trying to build a rolling regression function based on the example here, but in addition to returning the predicted values, I would like to return the some rolling model diagnostics (i.e. coefficients, t-values, and mabye R^2). I would like the results to be returned in discrete objects based on the type of results. The example provided in the link above sucessfully creates thr rolling predictions, but I need some assistance packaging and writing out the rolling model diagnostics:

In the end, I would like the function to return three (3) objects:

  1. Predictions
  2. Coefficients
  3. T values
  4. R^2

Below is the code:

require(zoo)
require(dynlm)

## Create Some Dummy Data
set.seed(12345)
x <- rnorm(mean=3,sd=2,100)
y <- rep(NA,100)
y[1] <- x[1]
for(i in 2:100) y[i]=1+x[i-1]+0.5*y[i-1]+rnorm(1,0,0.5)
int <- 1:100
dummydata <- data.frame(int=int,x=x,y=y)
zoodata <- as.zoo(dummydata)


rolling.regression <- function(series) {
  mod <- dynlm(formula = y ~ L(y) + L(x), data = as.zoo(series)) # get model

  nextOb <- max(series[,'int'])+1 # To get the first row that follows the window
  if (nextOb<=nrow(zoodata)) {   # You won't predict the last one

    # 1) Make Predictions
    predicted <- predict(mod,newdata=data.frame(x=zoodata[nextOb,'x'],y=zoodata[nextOb,'y']))
    attributes(predicted) <- NULL
    c(predicted=predicted,square.res <-(predicted-zoodata[nextOb,'y'])^2)

    # 2) Extract coefficients
    #coefficients <- coef(mod)

    # 3) Extract rolling coefficient t values
    #tvalues <- ????(mod)

    # 4) Extract rolling R^2
    #rsq <-


  }
}    

rolling.window <- 20
results.z <-  rollapply(zoodata, width=rolling.window, FUN=rolling.regression, by.column=F, align='right')

So after figuring out how to extract t values from model (i.e. mod) , what do I need to do to make the function return three (3) seperate objects (i.e. Predictions, Coefficients, and T-values)?

I am fairly new to R, really new to functions, and extreemly new to zoo, and I'm stuck.

Any assistance would be greatly appreciated.

回答1:

I hope I got you correctly, but here is a small edit of your function:

rolling.regression <- function(series) {
  mod <- dynlm(formula = y ~ L(y) + L(x), data = as.zoo(series)) # get model

  nextOb <- max(series[,'int'])+1 # To get the first row that follows the window
  if (nextOb<=nrow(zoodata)) {   # You won't predict the last one
    # 1) Make Predictions
    predicted=predict(mod,newdata=data.frame(x=zoodata[nextOb,'x'],y=zoodata[nextOb,'y']))
    attributes(predicted)<-NULL
    #Solution 1; Quicker to write
    #     c(predicted=predicted, 
    #       square.res=(predicted-zoodata[nextOb,'y'])^2,
    #       summary(mod)$coef[, 1],
    #       summary(mod)$coef[, 3],
    #       AdjR = summary(mod)$adj.r.squared)

    #Solution 2; Get column names right
    c(predicted=predicted, 
      square.res=(predicted-zoodata[nextOb,'y'])^2,
      coef_intercept = summary(mod)$coef[1, 1],
      coef_Ly = summary(mod)$coef[2, 1],
      coef_Lx = summary(mod)$coef[3, 1],
      tValue_intercept = summary(mod)$coef[1, 3],
      tValue_Ly = summary(mod)$coef[2, 3],
      tValue_Lx = summary(mod)$coef[3, 3],
      AdjR = summary(mod)$adj.r.squared)
  }
}



rolling.window <- 20
results.z <-  rollapply(zoodata, width=rolling.window, FUN=rolling.regression, by.column=F, align='right')

    head(results.z)
   predicted square.res coef_intercept   coef_Ly  coef_Lx tValue_intercept tValue_Ly tValue_Lx      AdjR
20 10.849344   0.721452     0.26596465 0.5798046 1.049594       0.38309211  7.977627  13.59831 0.9140886
21 12.978791   2.713053     0.26262820 0.5796883 1.039882       0.37741499  7.993014  13.80632 0.9190757
22  9.814676  11.719999     0.08050796 0.5964808 1.073941       0.12523824  8.888657  15.01353 0.9340732
23  5.616781  15.013297     0.05084124 0.5984748 1.077133       0.08964998  9.881614  16.48967 0.9509550
24  3.763645   6.976454     0.26466039 0.5788949 1.068493       0.51810115 11.558724  17.22875 0.9542983
25  9.433157  31.772658     0.38577698 0.5812665 1.034862       0.70969330 10.728395  16.88175 0.9511061

To see how it works, make a small example with a regression:

x <- rnorm(1000); y <- 2*x + rnorm(1000)
reg <- lm(y ~ x)
summary(reg)$coef
              Estimate Std. Error    t value Pr(>|t|)
(Intercept) 0.02694322 0.03035502  0.8876033 0.374968
x           1.97572544 0.03177346 62.1816310 0.000000

As you can see, calling summary first and then getting the coefficients of it (coef(summary(reg)) works as well) gives you a table with estimates, standard errors, and t-values. So estimates are saved in column 1 of that table, t-values in column 3. And that's how I obtain them in the updated rolling.regression function.

EDIT

I updated my solution; now it also contains the adjusted R2. If you just want the normal R2, get rid of the .adj.

EDIT 2

Quick and dirty hack how to name the columns:

rolling.regression <- function(series) {
  mod <- dynlm(formula = y ~ L(y) + L(x), data = as.zoo(series)) # get model

  nextOb <- max(series[,'int'])+1 # To get the first row that follows the window
  if (nextOb<=nrow(zoodata)) {   # You won't predict the last one
    # 1) Make Predictions
    predicted=predict(mod,newdata=data.frame(x=zoodata[nextOb,'x'],y=zoodata[nextOb,'y']))
    attributes(predicted)<-NULL
    #Get variable names
    strVar <- c("Intercept", paste0("L", 1:(nrow(summary(mod)$coef)-1)))
    vec <- c(predicted=predicted, 
             square.res=(predicted-zoodata[nextOb,'y'])^2,
             AdjR = summary(mod)$adj.r.squared,
             summary(mod)$coef[, 1],
             summary(mod)$coef[, 3])
    names(vec)[4:length(vec)] <- c(paste0("Coef_", strVar), paste0("tValue_", strVar))

    vec
  }
}