> get.towns function() { n <- 1000 #sample size # first read in the date, and discard unneeded fields tp <- read.table("town_pops.txt", header = T, sep = ",") tp <- tp[, 4:10] names(tp) <- c("pop", "white", "black", "amer", "asian","other","hisp") Ssize <- tp$pop # strata sizes hisp.p <- tp$hisp/tp$pop #proportion hisp in each stratum var.within <- hisp.p * (1 - hisp.p) Hisp <- sum(tp$hisp)/sum(tp$pop) # prop hisp in whole pop weights <- Ssize/sum(Ssize) var.srs <- Hisp * (1 - Hisp) var.prop <- sum(weights * var.within) var.opt <- (sum(weights * sqrt(var.within)))^2 cat("1000 times variances:\n", "srs= ", round(var.srs, 4), "\nprop= ", round(var.prop, 4), "\noptimal= ", round(var.opt, 4),"\n") }Here is the output from the function:
> get.towns() 1000 times variances: srs= 0.075 prop= 0.0632 optimal= 0.0462Notice that the sample size (1000) has the same effect on each estimator.
I found myself getting confused about the differences between the "population of mortalities" (the 301 values) and the population (sizes).
I used the cut() function to form strata. It puts about 75 counties in each stratum. Perhaps it would be better to place the larger counties in a stratum consisting of a smaller number of counties. For this problem it did not seem worth the trouble, but perhaps I am wrong.
If you like playing with Splus, try to rewrite my function by replacing it by another for-loop.
function() { bc <- read.table("breast.cancer.data") names(bc) <- c("mort", "pop") # ###part (i) popmean <- mean(bc$pop) NN <- length(bc$pop) popvar <- sum((bc$pop - popmean)^2)/NN ) cat("Pop mean= ", round(mean(bc$pop), 1), " total mort= ",sum(bc$mort ), "\n") cat("Pop variance= ", round(popvar, 1), " pop std= ",round(sqrt(popvar), 1), "\n") # ###part (ii) srs100 <- 1:100 # dummy values for(i in 1:100) { srs100[i] <- (NN/25) * sum(sample(bc$mort, 25)) } cat("srs mean = ", mean(srs100), " srs sd = ",sqrt(var(srs100)), "\n") # ###part (iii) prop100 <- 1:100 # dummy values strata <- cut(bc$pop, c(0, quantile(bc$pop, (1:4/4)))) #form strata Strata.sizes <- as.vector(table(strata)) samp.sizes <- c(6, 6, 6, 7) ssam <- 1:4 #dummy values for(i in 1:100) { ssam[1] <- sum(sample(bc$mort[strata == 1], 6)) ssam[2] <- sum(sample(bc$mort[strata == 2], 6)) ssam[3] <- sum(sample(bc$mort[strata == 3], 6)) ssam[4] <- sum(sample(bc$mort[strata == 4], 7)) prop100[i] <- sum(Strata.sizes * ssam/samp.sizes) } cat("prop mean = ", mean(prop100), " prop sd=",sqrt(var(prop100)),"\n") # Now draw the histograms. par(mfrow = c(3, 1)) hist(bc$mort, nclass = 16) hist(srs100) hist(prop100) }