MATH 6627 2011-12 Practicum in Statistical Consulting/Introduction to Splines R script

From Wiki1

Jump to: navigation, search
##
##   MATH 6627
##   Functions of time
##
##   A brief look at splines and non-parametric regression with smoothing using
##   mixed models.
##
##   March 25, 2009
##
##
##  Splines are functions on the reals that are piecewise-polynomial
##    over adjoining intervals. The the points between two intervals are
##    called knots. The polynomials can be required to be continuous or
##    to be 'smooth' of degree n (continuous derivatives up to the nth derivative)
##    at each knot.
##
##    Common splines in fun.R:
##    lsp:  linear splines, continuous at knots
##             lsp( time, knots = c(1,5,6))
##    qs:   quadratic spline, smooth at knots (continuous 1st derivative)
##             qs ( time, c(-1,1))
##    cs:   cubic spline, continuous 2nd derivatives at knots
##    gsp:  general spline:
##          different degrees of polynomials in each interval and
##          different degress of smoothness at each knot:
##             gsp( x, knots = c(2,5), degree = c(2,3,2), smooth = c(0,1))
##          is quadratic, cubic and quadratic over the intervals:
##           (-inf, 2), (2,5) and (5,inf) respectively, is continuous
##          at 2 (this means that there could be a 'kink' at 2) and is
##          smooth at 5
##          E.g. to define a cubic spline:  gsp( x, c(2,5), c(3,3,3), c(2,2))
##    sc: is used to generate hypothesis matrices for splines generated with
##          gsp. It can be used to estimate the value, or any derivative
##          of the spline at selected locations.
##          sp <- function(x) gsp(x, c(2,10), c(3,2,3), c(0,1))
##          fit <- lm( y ~ sp(x), dd)
##          sc( sp, c(10,10,10,10,20), D= c(0,1,1,2,2), type = c(1,0,1,2,1))
##              generates a matrix to estimate the:
##                  value of the fitted function at 10,
##                  left derivative at 10,
##                  right derivative at 10
##                  the derivative 'saltus' at 10 (right - left)
##                  the quadratic at 10
##          The matrix generated with 'sc' can then be combined with
##          rbind and/or cbind to construct a complete hypothesis matrix L, say
##          which can then be used with 'glh' or 'wald'
##          e.g. >  L <- cbind( 1, sc( sp, c(10,10,10,10,20), D= c(0,1,1,2,2), type = c(1,0,1,2,1)))
##               >  wald( fit, L)
##
##
##

## Illustration

    plot( 1,1, xlim = c(0,10), ylim = c(0,10), type = 'n')
    xy2 <- locator( 50, type = 'p')
    # xy <- locator( 50, type = 'p')
    xy <- as.data.frame(xy)
    fit.q <- lm( y ~ x + I(x^2), xy)

    pred <- expand.grid( x = seq( 0,10, .05))
    pred$q <- predict( fit.q, pred)
    lines( q ~ x, pred, col = 'green')

    fit.cube <- lm( y ~ x + I(x^2)+I(x^3), xy)   # why not poly(x,3)
    pred$cube <- predict( fit.cube, pred)
    lines( cube ~ x, pred, col = 'blue')

    fit.high <- lm( y ~ x + I(x^2)+I(x^3)+I(x^4)+I(x^5)+I(x^6), xy)   # why not poly(x,3)
    pred$high <- predict( fit.high, pred)
    lines( high ~ x, pred, col = 'purple')

    fit.sp <- lm( y ~ gsp( x, 6, c(2,2), 1), xy)
    pred$sp <- predict( fit.sp, pred)
    lines( sp ~ x, pred, col = 'red')

    fit.sp7 <- lm( y ~ gsp( x, 7, c(2,2), 1), xy)
    summary(fit.sp7)
    pred$sp7 <- predict( fit.sp7, pred)
    lines( sp7 ~ x, pred, col = 'red')

    sp <- function( x ) gsp( x, 7.5, c(2,2), 1)
    fit.sp75 <- lm( y ~ sp(x) , xy)
    summary(fit.sp75)
    pred$sp75 <- predict( fit.sp75, pred)
    lines( sp75 ~ x, pred, col = 'red', lty = 2)
    gsp( 0:10, 7.5, c(2,2), 1)

    matplot( pred$x, gsp( pred$x, 7.5, c(2,2), 1), type ='l', lwd =3)

    # hypothesis testing
    sc.mat <-
      sc( sp,  c(5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 8),
            D= c(2,   0,   1,   1,   2,   2,   2, 2),
        type = c(0,   0,   0,   1,   0,   1,   2, 0))
    sc.mat
    wald( fit.sp75)
    
    L <- cbind( 0, sc.mat )  # estimates f(7.5) - f(0)
    wald( fit.sp75, L)
    L [2,1] <- 1    # to estimate f(7.5)
    wald( fit.sp75, L)
    
    
    # smoothing spline

    library(nlme)
    xy$all = 1
    fit.sm <- lme( y ~ x, xy,
           random = list( all = pdIdent( ~ smsp(x,seq(-1,11,.5))-1)))

    pred$all <- 1
    pred$sm <- predict( fit.sm, pred, level = 1)
    lines( sm ~ x, pred, col = 'red', lwd = 2)



##
## Smoothing splines on the sunspot data
## Note: this data looks periodic but it does not
## have a fixed period. You could try to fit the wave
## with trigonometric functions but my guess is that
## you would fail.  Better suited to a time series with
## randomly changing period are ARIMA or spectral methods.
##
##



  data(sunspots)
  ?sunspots
  ds <- data.frame( year = rep(1749:1983,each = 12), sunspots = sunspots)
  ds$month <- 1:12
  ds$observatory <- factor( ifelse(ds$year > 1960, "Tokyo","Zurich"))
  
  library( car )
  some(ds)
  ds$time = ds$year + (ds$month - 1)/12

  library( lattice )
  xyplot( sunspots ~ time, ds)
  xyplot( sunspots ~ time, ds, type = 'l')
  ds$Time <- equal.count( ds$time, number = 6, overlap = .1)
  
  xyplot( sunspots ~ time | Time, ds, layout = c(1,6),
          scales = list( x = 'free'), type = 'l', strip = F)

  ds$all = 1

  library( nlme )
  range( ds$time)

  sp.basis <- smsp( ds$time, 1749:1984)
  
  
  matplot( ds$time, sp.basis[,1:5], xlim =c(1748,1760), type = 'l')
  fit <- lme( sunspots ~ 1, ds, random = list( all =
         pdIdent( ~ smsp( time, 1749:1984 )-1)))
  ds$fit.sp <- predict( fit, ds, level = 1)
  dim( sp( ds$time ) )


  fit.cs <- lm( sunspots ~ cs(time, seq(1750,1980,10)) , ds)
  ds$fit.cs <- predict( fit.cs, ds)

  fit.qs <- lm( sunspots ~ qs(time, seq(1750,1980,10)) , ds)
  ds$fit.qs <- predict( fit.qs, ds)


  summary(fit.cs)
  random = list( all =
         pdIdent( ~ smsp( time, 1749:1984 )-1)))

  xyplot( sunspots + fit.sp + fit.cs + fit.qs ~ time | Time, ds, layout = c(1,6),
          scales = list( x = 'free'), type = 'l', strip = F)


  
  
  fit.nls <- nls( sunspots ~ a0 + a1*time + b1*sin( alpha*time) +
             b2*cos( alpha * time), ds, start = list( a0 = 30, a1=0,
                           b1=10,b2=10, alpha = 1),
                           control = nls.control( maxiter = 200))

  
   summary(fit.nls)
     ds$ fit.nls <- predict( fit.nls)
     

  xyplot( sunspots + fit.sp + fit.cs + fit.qs + fit.nls~ time | Time, ds, layout = c(1,6),
          scales = list( x = 'free'), type = 'l', strip = F,
          auto.key = list( columns = 3))

  xyplot( fit.sp + fit.cs + fit.qs + fit.nls~ time | Time, ds, layout = c(1,6),
          scales = list( x = 'free'), type = 'l', strip = F,
          auto.key = list( columns = 3))


  install.packages('fEcofin')  # good source of data sets to play with
Personal tools