# MATH 6627 2011-12 Practicum in Statistical Consulting/Introduction to Splines R script

```##
##   MATH 6627
##   Functions of time
##
##   A brief look at splines and non-parametric regression with smoothing using
##   mixed models.
##
##   March 25, 2009
##
##
##  Splines are functions on the reals that are piecewise-polynomial
##    over adjoining intervals. The the points between two intervals are
##    called knots. The polynomials can be required to be continuous or
##    to be 'smooth' of degree n (continuous derivatives up to the nth derivative)
##    at each knot.
##
##    Common splines in fun.R:
##    lsp:  linear splines, continuous at knots
##             lsp( time, knots = c(1,5,6))
##    qs:   quadratic spline, smooth at knots (continuous 1st derivative)
##             qs ( time, c(-1,1))
##    cs:   cubic spline, continuous 2nd derivatives at knots
##    gsp:  general spline:
##          different degrees of polynomials in each interval and
##          different degress of smoothness at each knot:
##             gsp( x, knots = c(2,5), degree = c(2,3,2), smooth = c(0,1))
##           (-inf, 2), (2,5) and (5,inf) respectively, is continuous
##          at 2 (this means that there could be a 'kink' at 2) and is
##          smooth at 5
##          E.g. to define a cubic spline:  gsp( x, c(2,5), c(3,3,3), c(2,2))
##    sc: is used to generate hypothesis matrices for splines generated with
##          gsp. It can be used to estimate the value, or any derivative
##          of the spline at selected locations.
##          sp <- function(x) gsp(x, c(2,10), c(3,2,3), c(0,1))
##          fit <- lm( y ~ sp(x), dd)
##          sc( sp, c(10,10,10,10,20), D= c(0,1,1,2,2), type = c(1,0,1,2,1))
##              generates a matrix to estimate the:
##                  value of the fitted function at 10,
##                  left derivative at 10,
##                  right derivative at 10
##                  the derivative 'saltus' at 10 (right - left)
##          The matrix generated with 'sc' can then be combined with
##          rbind and/or cbind to construct a complete hypothesis matrix L, say
##          which can then be used with 'glh' or 'wald'
##          e.g. >  L <- cbind( 1, sc( sp, c(10,10,10,10,20), D= c(0,1,1,2,2), type = c(1,0,1,2,1)))
##               >  wald( fit, L)
##
##
##

## Illustration

plot( 1,1, xlim = c(0,10), ylim = c(0,10), type = 'n')
xy2 <- locator( 50, type = 'p')
# xy <- locator( 50, type = 'p')
xy <- as.data.frame(xy)
fit.q <- lm( y ~ x + I(x^2), xy)

pred <- expand.grid( x = seq( 0,10, .05))
pred\$q <- predict( fit.q, pred)
lines( q ~ x, pred, col = 'green')

fit.cube <- lm( y ~ x + I(x^2)+I(x^3), xy)   # why not poly(x,3)
pred\$cube <- predict( fit.cube, pred)
lines( cube ~ x, pred, col = 'blue')

fit.high <- lm( y ~ x + I(x^2)+I(x^3)+I(x^4)+I(x^5)+I(x^6), xy)   # why not poly(x,3)
pred\$high <- predict( fit.high, pred)
lines( high ~ x, pred, col = 'purple')

fit.sp <- lm( y ~ gsp( x, 6, c(2,2), 1), xy)
pred\$sp <- predict( fit.sp, pred)
lines( sp ~ x, pred, col = 'red')

fit.sp7 <- lm( y ~ gsp( x, 7, c(2,2), 1), xy)
summary(fit.sp7)
pred\$sp7 <- predict( fit.sp7, pred)
lines( sp7 ~ x, pred, col = 'red')

sp <- function( x ) gsp( x, 7.5, c(2,2), 1)
fit.sp75 <- lm( y ~ sp(x) , xy)
summary(fit.sp75)
pred\$sp75 <- predict( fit.sp75, pred)
lines( sp75 ~ x, pred, col = 'red', lty = 2)
gsp( 0:10, 7.5, c(2,2), 1)

matplot( pred\$x, gsp( pred\$x, 7.5, c(2,2), 1), type ='l', lwd =3)

# hypothesis testing
sc.mat <-
sc( sp,  c(5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 8),
D= c(2,   0,   1,   1,   2,   2,   2, 2),
type = c(0,   0,   0,   1,   0,   1,   2, 0))
sc.mat
wald( fit.sp75)

L <- cbind( 0, sc.mat )  # estimates f(7.5) - f(0)
wald( fit.sp75, L)
L [2,1] <- 1    # to estimate f(7.5)
wald( fit.sp75, L)

# smoothing spline

library(nlme)
xy\$all = 1
fit.sm <- lme( y ~ x, xy,
random = list( all = pdIdent( ~ smsp(x,seq(-1,11,.5))-1)))

pred\$all <- 1
pred\$sm <- predict( fit.sm, pred, level = 1)
lines( sm ~ x, pred, col = 'red', lwd = 2)

##
## Smoothing splines on the sunspot data
## Note: this data looks periodic but it does not
## have a fixed period. You could try to fit the wave
## with trigonometric functions but my guess is that
## you would fail.  Better suited to a time series with
## randomly changing period are ARIMA or spectral methods.
##
##

data(sunspots)
?sunspots
ds <- data.frame( year = rep(1749:1983,each = 12), sunspots = sunspots)
ds\$month <- 1:12
ds\$observatory <- factor( ifelse(ds\$year > 1960, "Tokyo","Zurich"))

library( car )
some(ds)
ds\$time = ds\$year + (ds\$month - 1)/12

library( lattice )
xyplot( sunspots ~ time, ds)
xyplot( sunspots ~ time, ds, type = 'l')
ds\$Time <- equal.count( ds\$time, number = 6, overlap = .1)

xyplot( sunspots ~ time | Time, ds, layout = c(1,6),
scales = list( x = 'free'), type = 'l', strip = F)

ds\$all = 1

library( nlme )
range( ds\$time)

sp.basis <- smsp( ds\$time, 1749:1984)

matplot( ds\$time, sp.basis[,1:5], xlim =c(1748,1760), type = 'l')
fit <- lme( sunspots ~ 1, ds, random = list( all =
pdIdent( ~ smsp( time, 1749:1984 )-1)))
ds\$fit.sp <- predict( fit, ds, level = 1)
dim( sp( ds\$time ) )

fit.cs <- lm( sunspots ~ cs(time, seq(1750,1980,10)) , ds)
ds\$fit.cs <- predict( fit.cs, ds)

fit.qs <- lm( sunspots ~ qs(time, seq(1750,1980,10)) , ds)
ds\$fit.qs <- predict( fit.qs, ds)

summary(fit.cs)
random = list( all =
pdIdent( ~ smsp( time, 1749:1984 )-1)))

xyplot( sunspots + fit.sp + fit.cs + fit.qs ~ time | Time, ds, layout = c(1,6),
scales = list( x = 'free'), type = 'l', strip = F)

fit.nls <- nls( sunspots ~ a0 + a1*time + b1*sin( alpha*time) +
b2*cos( alpha * time), ds, start = list( a0 = 30, a1=0,
b1=10,b2=10, alpha = 1),
control = nls.control( maxiter = 200))

summary(fit.nls)
ds\$ fit.nls <- predict( fit.nls)

xyplot( sunspots + fit.sp + fit.cs + fit.qs + fit.nls~ time | Time, ds, layout = c(1,6),
scales = list( x = 'free'), type = 'l', strip = F,
auto.key = list( columns = 3))

xyplot( fit.sp + fit.cs + fit.qs + fit.nls~ time | Time, ds, layout = c(1,6),
scales = list( x = 'free'), type = 'l', strip = F,
auto.key = list( columns = 3))

install.packages('fEcofin')  # good source of data sets to play with```