knitr::opts_chunk$set(echo = TRUE)
require(Sleuth3)
require(ggplot2)
require(tigerstats)
require(rstan)
finches <- case0201
The data consists of measurements of the beak depths of 178 finches, some measured before the draught, and some measured after the draught.
This is a bit of an overkill, but let’s see from what years the data is:
qplot(finches$Year, bins=3)
Now, let’s see how the beak depths differ across the years
ggplot(finches, aes(x=Depth)) +
geom_histogram(data=subset(finches, Year == 1976), fill = "red", alpha = 0.2) +
geom_histogram(data=subset(finches, Year == 1978), fill = "blue", alpha = 0.2)
Box plots and density plots can also be instructive
bwplot(Year ~ Depth, data = finches)
densityplot(~Depth, groups = Year, auto.key = TRUE, data = finches)
y1976 <- subset(finches, Year == 1976)$Depth
y1978 <- subset(finches, Year == 1978)$Depth
t.test(y1976, y1978, var.equal = F )
##
## Welch Two Sample t-test
##
## data: y1976 and y1978
## t = -4.5833, df = 172.98, p-value = 8.739e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9564436 -0.3806350
## sample estimates:
## mean of x mean of y
## 9.469663 10.138202
finch_model_stan <- "
data{
int<lower=0> N1; //number of points in example 1
int<lower=0> N2; //number of points in example 2
real sample1[N1];
real sample2[N2];
}
parameters{
real mu1;
real mu2;
real<lower=0> sigma1;
real<lower=0> sigma2;
}
model{
sample1 ~ normal(mu1, sigma1);
sample2 ~ normal(mu2, sigma2);
}"
adaptSteps = 1000 # Number of steps to "tune" the samplers.
burnInSteps = 5000 # Number of steps to "burn-in" the samplers.
nChains = 3 # Number of chains to run.
numSavedSteps=12000 # Total number of steps in chains to save.
thinSteps=10 # Number of steps to "thin" (1=keep every step).
finch_model <- stan_model(model_code = finch_model_stan, model_name = "finch_model")
finch_fit <- sampling(object=finch_model,
data = list(sample1=y1976, sample2=y1978,N1=length(y1976),N2=length(y1978)),
chains = nChains ,
iter = ( ceiling(numSavedSteps/nChains)*thinSteps
+burnInSteps ) ,
warmup = burnInSteps ,
thin = thinSteps ,
init = "random" )
##
## SAMPLING FOR MODEL 'finch_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.5e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 45000 [ 0%] (Warmup)
## Chain 1: Iteration: 4500 / 45000 [ 10%] (Warmup)
## Chain 1: Iteration: 5001 / 45000 [ 11%] (Sampling)
## Chain 1: Iteration: 9500 / 45000 [ 21%] (Sampling)
## Chain 1: Iteration: 14000 / 45000 [ 31%] (Sampling)
## Chain 1: Iteration: 18500 / 45000 [ 41%] (Sampling)
## Chain 1: Iteration: 23000 / 45000 [ 51%] (Sampling)
## Chain 1: Iteration: 27500 / 45000 [ 61%] (Sampling)
## Chain 1: Iteration: 32000 / 45000 [ 71%] (Sampling)
## Chain 1: Iteration: 36500 / 45000 [ 81%] (Sampling)
## Chain 1: Iteration: 41000 / 45000 [ 91%] (Sampling)
## Chain 1: Iteration: 45000 / 45000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.097298 seconds (Warm-up)
## Chain 1: 3.31313 seconds (Sampling)
## Chain 1: 3.41043 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'finch_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 7e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 45000 [ 0%] (Warmup)
## Chain 2: Iteration: 4500 / 45000 [ 10%] (Warmup)
## Chain 2: Iteration: 5001 / 45000 [ 11%] (Sampling)
## Chain 2: Iteration: 9500 / 45000 [ 21%] (Sampling)
## Chain 2: Iteration: 14000 / 45000 [ 31%] (Sampling)
## Chain 2: Iteration: 18500 / 45000 [ 41%] (Sampling)
## Chain 2: Iteration: 23000 / 45000 [ 51%] (Sampling)
## Chain 2: Iteration: 27500 / 45000 [ 61%] (Sampling)
## Chain 2: Iteration: 32000 / 45000 [ 71%] (Sampling)
## Chain 2: Iteration: 36500 / 45000 [ 81%] (Sampling)
## Chain 2: Iteration: 41000 / 45000 [ 91%] (Sampling)
## Chain 2: Iteration: 45000 / 45000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.112358 seconds (Warm-up)
## Chain 2: 0.898386 seconds (Sampling)
## Chain 2: 1.01074 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'finch_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 45000 [ 0%] (Warmup)
## Chain 3: Iteration: 4500 / 45000 [ 10%] (Warmup)
## Chain 3: Iteration: 5001 / 45000 [ 11%] (Sampling)
## Chain 3: Iteration: 9500 / 45000 [ 21%] (Sampling)
## Chain 3: Iteration: 14000 / 45000 [ 31%] (Sampling)
## Chain 3: Iteration: 18500 / 45000 [ 41%] (Sampling)
## Chain 3: Iteration: 23000 / 45000 [ 51%] (Sampling)
## Chain 3: Iteration: 27500 / 45000 [ 61%] (Sampling)
## Chain 3: Iteration: 32000 / 45000 [ 71%] (Sampling)
## Chain 3: Iteration: 36500 / 45000 [ 81%] (Sampling)
## Chain 3: Iteration: 41000 / 45000 [ 91%] (Sampling)
## Chain 3: Iteration: 45000 / 45000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.08641 seconds (Warm-up)
## Chain 3: 0.714046 seconds (Sampling)
## Chain 3: 0.800456 seconds (Total)
## Chain 3:
samples <- extract(finch_fit)
Let’s look at the posterior distribution for mu1
hist(samples$mu1)
(Are the variances equal?)
hist(samples$sigma1-samples$sigma2)
So what about the means?
hist(samples$mu1-samples$mu2)
mean(samples$mu1-samples$mu2 >= 0)
## [1] 0