######################################################
## Dino P Christenson
## Ohio State University
## Intro to Survey Package in R for GISSR Course
## Replicating examples from Lumley: http://faculty.washington.edu/tlumley/survey/html/svydesign.html
######################################################

## Set WD
#setwd("[]")  # directory of your choice

## Load necessary libraries
library(survey)  # Lumley's package
library(SDaA) # Verbeke's data for reproduction of Lohr (esp useful to those of you who will go on for the GIS)
#library(foreign)  # FYI to load foreign formatted data 
## see PRISM intro for other starter hints: http://polisci.osu.edu/grads/dchristenson/papers/IntrotoUsingR.pdf

## Load Data 
#data() # find available data
data(api) 

## LMs assuming SRS
names(apipop) #take a look
lmfull<-glm(api00~ell+meals+mobility, data=apipop)
lmstrat<-glm(api00~ell+meals+mobility, data=apistrat)
lmclus1<-glm(api00~ell+meals+mobility, data=apiclus1)
lmclus2<-glm(api00~ell+meals+mobility, data=apiclus2)

summary(lmfull)
summary(lmstrat)
summary(lmclus1)
summary(lmclus2)

######################################################
## Taylor Series linearization examples
######################################################

##### Stratified sample
dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
dstrat  # design of survey
summary(dstrat)  # more detail
svymean(~api00+I(api00-api99), dstrat) # estimate mean api score
svytotal(~enroll, dstrat)  # estimate total enrollment
summary(svyglm(api00~ell+meals+mobility, design=dstrat))  # Regression of api on social disadvantage measures


## Let's repeat these analyses for cluster samples and then for cluster samples by replicate weights 

##### One-stage cluster sample
dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)

##### Two-stage cluster sample: weights computed from population sizes.
dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2)
summary(dclus2)
#svymean(~api00, dclus2)
#svytotal(~enroll, dclus2,na.rm=TRUE)

##### Stratified cluster sample (Lumley notes that in this case the data are not really sampled this way)
dstratclus<-svydesign(id=~dnum, strata=~stype, weights=~pw, data=apistrat,nest=TRUE)
summary(dstratclus)

##### PPS sampling without replacement
#neat example but not used in the presentation
#data(election)
#dpps<- svydesign(id=~1, weights=~wt, fpc=~p, data=election_pps, pps="brewer")

######################################################
## Replicate weight examples
######################################################

##### Convert one stage cluster sample above to JK1 jackknife
rclus1<-as.svrepdesign(dclus1)

##### Convert to bootstrap
bclus1<-as.svrepdesign(dclus1,type="bootstrap", replicates=100)

## Compare the results of the clusters
svymean(~api00, dclus1)
svytotal(~enroll, dclus1)

svymean(~api00, rclus1)
svytotal(~enroll, rclus1)

svymean(~api00, bclus1)
svytotal(~enroll, bclus1)


######################################################
## Some other examples from Lumley
######################################################

data(scd)
scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,nest=TRUE, fpc=rep(5,6))
scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,nest=TRUE)

# convert to BRR replicate weights
scd2brr <- as.svrepdesign(scdnofpc, type="BRR")
scd2fay <- as.svrepdesign(scdnofpc, type="Fay",fay.rho=0.3)
# convert to JKn weights 
scd2jkn <- as.svrepdesign(scdnofpc, type="JKn")

# convert to JKn weights with finite population correction
scd2jknf <- as.svrepdesign(scddes, type="JKn")

## with user-supplied hadamard matrix
scd2brr1 <- as.svrepdesign(scdnofpc, type="BRR", hadamard.matrix=paley(11))

svyratio(~alive, ~arrests, design=scd2brr)
svyratio(~alive, ~arrests, design=scd2brr1)
svyratio(~alive, ~arrests, design=scd2fay)
svyratio(~alive, ~arrests, design=scd2jkn)
svyratio(~alive, ~arrests, design=scd2jknf)


### End ###