I'm running a rolling regression very similar to the following code:

library(PerformanceAnalytics)
library(quantmod)
data(managers)

FL <- as.formula(Next(HAM1)~HAM1+HAM2+HAM3+HAM4)
MyRegression <- function(df,开发者_JAVA百科FL) {
  df <- as.data.frame(df)
  model <- lm(FL,data=df[1:30,])
  predict(model,newdata=df[31,])
}

system.time(Result <- rollapply(managers, 31, FUN="MyRegression",FL,
    by.column = FALSE, align = "right", na.pad = TRUE))

I've got some extra processors, so I'm trying to find a way to parallelize the rolling window. If this was a non-rolling regression I could easily parallelize it using the apply family of functions...

The obvious one is to use lm.fit() instead of lm() so you don't incur all the overhead in processing the formula etc.

Update: So when I said obvious what I meant to say was blindingly obvious but deceptively difficult to implement!

After a bit of fiddling around, I came up with this

library(PerformanceAnalytics)
library(quantmod)
data(managers)

The first stage is to realise that the model matrix can be prebuilt, so we do that and convert it back to a Zoo object for use with rollapply():

mmat2 <- model.frame(Next(HAM1) ~ HAM1 + HAM2 + HAM3 + HAM4, data = managers, 
                     na.action = na.pass)
mmat2 <- cbind.data.frame(mmat2[,1], Intercept = 1, mmat2[,-1])
mmatZ <- as.zoo(mmat2)

Now we need a function that will employ lm.fit() to do the heavy lifting without having to create design matrices at each iteration:

MyRegression2 <- function(Z) {
    ## store value we want to predict for
    pred <- Z[31, -1, drop = FALSE]
    ## get rid of any rows with NA in training data
    Z <- Z[1:30, ][!rowSums(is.na(Z[1:30,])) > 0, ]
    ## Next() would lag and leave NA in row 30 for response
    ## but we precomputed model matrix, so drop last row still in Z
    Z <- Z[-nrow(Z),]
    ## fit the model
    fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
    ## get things we need to predict, in case pivoting turned on in lm.fit
    p <- fit$rank
    p1 <- seq_len(p)
    piv <- fit$qr$pivot[p1]
    ## model coefficients
    beta <- fit$coefficients
    ## this gives the predicted value for row 31 of data passed in
    drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

A comparison of timings:

> system.time(Result <- rollapply(managers, 31, FUN="MyRegression",FL,
+                                 by.column = FALSE, align = "right", 
+                                 na.pad = TRUE))
   user  system elapsed 
  0.925   0.002   1.020 
> 
> system.time(Result2 <- rollapply(mmatZ, 31, FUN = MyRegression2,
+                                  by.column = FALSE,  align = "right",
+                                  na.pad = TRUE))
   user  system elapsed 
  0.048   0.000   0.05

Which affords a pretty reasonable improvement over the original. And now check that the resulting objects are the same:

> all.equal(Result, Result2)
[1] TRUE

Enjoy!

New answer

I wrote a package, rollRegres, that does this much faster. It is ~ 58 times faster than Gavin Simpson's answer. Here is an example

# simulate data
set.seed(101)
n <- 10000
wdth <- 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for next obs
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(rollRegres) # the new package
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"
    )$one_step_forecasts)
#R [1] TRUE

# benchmark
library(compiler)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  newnew = roll_regres.fit(
    x = X, y = y, width = wdth, do_compute = "1_step_forecasts"),
  prev   = rollapply(Z, wdth + 1, FUN = lm_version,
                     by.column = FALSE,  align = "right", fill = NA_real_),
  times = 10)
#R Unit: milliseconds
#R   expr       min        lq      mean    median        uq       max neval
#R newnew  10.27279  10.48929  10.91631  11.04139  11.13877  11.87121    10
#R   prev 555.45898 565.02067 582.60309 582.22285 602.73091 605.39481    10

Old answer

You can reduce the run time by updating a decomposition. This yields an $\mathcal{O}(p^2)$ cost at each iteration instead of $\mathcal{O}(np^2 + p^3)$ where n is you window width. Below is a code to compare the two. It would likely be much faster doing it in C++ but the LINPACK dchud and dchdd are not included with R so you would have to write a package to do so. Further, I recall reading that you may do faster with other implementations than the LINPACK dchud and dchdd for the R update

library(SamplerCompare) # for LINPACK `chdd` and `chud`
roll_forcast <- function(X, y, width){
  n <- nrow(X)
  p <- ncol(X)
  out <- rep(NA_real_, n)

  is_first <- TRUE
  i <- width 
  while(i < n){
    if(is_first){
      is_first <- FALSE
      qr. <- qr(X[1:width, ])
      R <- qr.R(qr.)

      # Use X^T for the rest
      X <- t(X)

      XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
    } else {
      x_new <- X[, i]
      x_old <- X[, i - width]

      # update R 
      R <- .Fortran(
        "dchud", R, p, p, x_new, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), 
        PACKAGE = "SamplerCompare")[[1]]

      # downdate R
      R <- .Fortran(
        "dchdd", R, p, p, x_old, 0., 0L, 0L, 
        0., 0., numeric(p), numeric(p), integer(1),
        PACKAGE = "SamplerCompare")[[1]]

      # update XtY
      XtY <- XtY + y[i] * x_new - y[i - width] * x_old
    }

    coef. <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
    coef. <- .Internal(backsolve(R, coef., p, TRUE, FALSE))

    i <- i + 1
    out[i] <- X[, i] %*% coef.
  }

  out
}

# simulate data
set.seed(101)
n <- 10000
wdth = 50
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)

# assign other function
lm_version <- function(Z, width = wdth) {
  pred <- Z[width + 1, -1, drop = FALSE]
  ## fit the model
  Z <- Z[-nrow(Z), ]
  fit <- lm.fit(Z[, -1, drop = FALSE], Z[,1])
  ## get things we need to predict, in case pivoting turned on in lm.fit
  p <- fit$rank
  p1 <- seq_len(p)
  piv <- fit$qr$pivot[p1]
  ## model coefficients
  beta <- fit$coefficients
  ## this gives the predicted value for row 31 of data passed in
  drop(pred[, piv, drop = FALSE] %*% beta[piv])
}

# show that they yield the same
library(zoo)
all.equal(
  rollapply(Z, wdth + 1, FUN = lm_version,  
            by.column = FALSE,  align = "right", fill = NA_real_),
  roll_forcast(X, y, wdth))
#R> [1] TRUE

# benchmark
library(compiler)
roll_forcast <- cmpfun(roll_forcast)
lm_version <- cmpfun(lm_version)
microbenchmark::microbenchmark(
  new =  roll_forcast(X, y, wdth),
  prev = rollapply(Z, wdth + 1, FUN = lm_version,  
                   by.column = FALSE,  align = "right", fill = NA_real_), 
  times = 10)
#R> Unit: milliseconds
#R> expr      min       lq     mean   median       uq      max neval cld
#R>  new 113.7637 115.4498 129.6562 118.6540 122.4930 230.3414    10  a 
#R> prev 639.6499 674.1677 682.1996 678.6195 686.8816 763.8034    10   b