###################################################### ## Dino P Christenson ## Ohio State University ## Intro to Survey Package in R for GISSR Course ## Replicating examples from Lumley: http://faculty.washington.edu/tlumley/survey/html/svydesign.html ###################################################### ## Set WD #setwd("[]") # directory of your choice ## Load necessary libraries library(survey) # Lumley's package library(SDaA) # Verbeke's data for reproduction of Lohr (esp useful to those of you who will go on for the GIS) #library(foreign) # FYI to load foreign formatted data ## see PRISM intro for other starter hints: http://polisci.osu.edu/grads/dchristenson/papers/IntrotoUsingR.pdf ## Load Data #data() # find available data data(api) ## LMs assuming SRS names(apipop) #take a look lmfull<-glm(api00~ell+meals+mobility, data=apipop) lmstrat<-glm(api00~ell+meals+mobility, data=apistrat) lmclus1<-glm(api00~ell+meals+mobility, data=apiclus1) lmclus2<-glm(api00~ell+meals+mobility, data=apiclus2) summary(lmfull) summary(lmstrat) summary(lmclus1) summary(lmclus2) ###################################################### ## Taylor Series linearization examples ###################################################### ##### Stratified sample dstrat<-svydesign(id=~1,strata=~stype, weights=~pw, data=apistrat, fpc=~fpc) dstrat # design of survey summary(dstrat) # more detail svymean(~api00+I(api00-api99), dstrat) # estimate mean api score svytotal(~enroll, dstrat) # estimate total enrollment summary(svyglm(api00~ell+meals+mobility, design=dstrat)) # Regression of api on social disadvantage measures ## Let's repeat these analyses for cluster samples and then for cluster samples by replicate weights ##### One-stage cluster sample dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc) ##### Two-stage cluster sample: weights computed from population sizes. dclus2<-svydesign(id=~dnum+snum, fpc=~fpc1+fpc2, data=apiclus2) summary(dclus2) #svymean(~api00, dclus2) #svytotal(~enroll, dclus2,na.rm=TRUE) ##### Stratified cluster sample (Lumley notes that in this case the data are not really sampled this way) dstratclus<-svydesign(id=~dnum, strata=~stype, weights=~pw, data=apistrat,nest=TRUE) summary(dstratclus) ##### PPS sampling without replacement #neat example but not used in the presentation #data(election) #dpps<- svydesign(id=~1, weights=~wt, fpc=~p, data=election_pps, pps="brewer") ###################################################### ## Replicate weight examples ###################################################### ##### Convert one stage cluster sample above to JK1 jackknife rclus1<-as.svrepdesign(dclus1) ##### Convert to bootstrap bclus1<-as.svrepdesign(dclus1,type="bootstrap", replicates=100) ## Compare the results of the clusters svymean(~api00, dclus1) svytotal(~enroll, dclus1) svymean(~api00, rclus1) svytotal(~enroll, rclus1) svymean(~api00, bclus1) svytotal(~enroll, bclus1) ###################################################### ## Some other examples from Lumley ###################################################### data(scd) scddes<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,nest=TRUE, fpc=rep(5,6)) scdnofpc<-svydesign(data=scd, prob=~1, id=~ambulance, strata=~ESA,nest=TRUE) # convert to BRR replicate weights scd2brr <- as.svrepdesign(scdnofpc, type="BRR") scd2fay <- as.svrepdesign(scdnofpc, type="Fay",fay.rho=0.3) # convert to JKn weights scd2jkn <- as.svrepdesign(scdnofpc, type="JKn") # convert to JKn weights with finite population correction scd2jknf <- as.svrepdesign(scddes, type="JKn") ## with user-supplied hadamard matrix scd2brr1 <- as.svrepdesign(scdnofpc, type="BRR", hadamard.matrix=paley(11)) svyratio(~alive, ~arrests, design=scd2brr) svyratio(~alive, ~arrests, design=scd2brr1) svyratio(~alive, ~arrests, design=scd2fay) svyratio(~alive, ~arrests, design=scd2jkn) svyratio(~alive, ~arrests, design=scd2jknf) ### End ###