scd.data <- read.csv('../data/scd_RAL_data_final.csv', na.strings="NA")
scd.data <- na.omit(scd.data)

#Filter out the WT's from the dataset
scd.data <- scd.data[!((1:length(scd.data[,1])) %in% grep("WT", as.character(scd.data$genotype))),]
#And get rid of the factor levels that are no longer used
scd.data$genotype <- droplevels(scd.data$genotype)

#####################################################
#1. Penetrance of ts2 comb differs among lines
#####################################################
penetrance.data <- data.frame(	genotype = scd.data$genotype, 
								ectopic_present = (scd.data$t2_teeth > 0)
							  )
							  
penetrance.data.tabulated <- data.frame(aggregate(penetrance.data[,2],
    by=list(genotype=penetrance.data$genotype), FUN=sum), 
        aggregate(penetrance.data[,2], 
        by=list(genotype=penetrance.data$genotype), FUN=length)[2])
        
penetrance.model <- glm(ectopic_present ~ genotype, 
    family = binomial(link="logit"), 
    data = penetrance.data)

chi2 <- penetrance.model$null.deviance - penetrance.model$deviance

#Will want to re-do this using mle2 or a Bayesian model, and then plot confidence intervals for the parameter estimates

###########
#Fit confidence intervals individually

library(MCMCpack)

penetrance.data.tabulated$estimate <- rep(0, 18)
penetrance.data.tabulated$ci.min <- rep(0,18)
penetrance.data.tabulated$ci.max <- rep(0,18)
penetrance.data.tabulated$p.est <- rep(0,18)
penetrance.data.tabulated$p.min <- rep(0,18)
penetrance.data.tabulated$p.max <- rep(0,18)

tunes <- c( 2.5, #1
			0.0008, #2
			2.5, #3
			0.0008, #4
			2.5, #5
			0.0008, #6
			2.5, #7
			0.0008, #8
			0.0008, #9
			0.0008, #10
			2.5, #11
			0.0008, #12
			2.5, #13
			2.5, #14
			2.5, #15
			2.5, #16
			2.5, #17
			0.0008 #18
			)

for (i in 1:18) {
	hits <- penetrance.data.tabulated[i,2]
	trials <- penetrance.data.tabulated[i,3]
	cur.genotype <- as.character(penetrance.data.tabulated[i,1])
	cur.data <- penetrance.data[as.character(penetrance.data$genotype)==cur.genotype,]
	cur.model <- MCMClogit(ectopic_present ~ 1, data=cur.data, burnin=3000, mcmc=100000, thin=100, tune=tunes[i], verbose=10000)
	penetrance.data.tabulated$estimate[i] <- summary(cur.model)$statistics[1]
	penetrance.data.tabulated$ci.min[i] <- summary(cur.model)$quantiles[1]
	penetrance.data.tabulated$ci.max[i] <- summary(cur.model)$quantiles[5]
	penetrance.data.tabulated$p.min[i] <- plogis(penetrance.data.tabulated$ci.min[i])
	penetrance.data.tabulated$p.max[i] <- plogis(penetrance.data.tabulated$ci.max[i])
	penetrance.data.tabulated$p.est[i] <- penetrance.data.tabulated[i,2] / penetrance.data.tabulated[i,3]
}

library(plotrix)
penetrance.data.tabulated <- penetrance.data.tabulated[order(penetrance.data.tabulated$p.est),]
penetrance.data.tabulated$genotype <- as.factor(as.character(penetrance.data.tabulated$genotype))

pdf(file="../outputs/Figure3_RAL.pdf", width=6, height=10)
par(mfrow=c(2,1))

par(mar=c(5,5,1,1))
plotCI(x=1:18, y=penetrance.data.tabulated$p.est, 
    ui=penetrance.data.tabulated$p.max, 
    li=penetrance.data.tabulated$p.min, 
    cex.lab = 1.5, pch = 20, lwd = 2, cex = 1.5,
    xlim = c(0.5,18.5), ylim = c(0, 1.1), xaxt = "n", xlab = NA, 
    ylab = expression(paste(italic(scd)^1, " penetrance")))
    
xlabels <- as.character(penetrance.data.tabulated$genotype)
#xlabels[1] <- "scd[1] stock"
xlabels[1] <- expression(italic(scd)^1)

axis(side=1, at=1:18, labels=xlabels, las=2, cex.axis=0.8)
text(x=0.7, y=1.05, "A", cex=2)
								
#####################################################
#2. Expressivity of ts2 comb differs among lines
#####################################################
ts2.size.data <- scd.data[scd.data$t2_teeth>0,c("genotype", "t2_teeth")]
ts2.size.model <- glm(t2_teeth ~ genotype, data=ts2.size.data)

ts2.confints <- confint(ts2.size.model)
penetrance.data.tabulated$ts2.tooth.est <- rep(0,18)
penetrance.data.tabulated$ts2.tooth.ci.min <- rep(0,18)
penetrance.data.tabulated$ts2.tooth.ci.max <- rep(0,18)
for (i in 1:18) {
	cur.genotype <- as.character(penetrance.data.tabulated$genotype[i])
	cur.genotype <- ifelse(cur.genotype=="5070", "Intercept", cur.genotype)
	confint.row <- grep(cur.genotype, names(ts2.confints[,1]))
	penetrance.data.tabulated$ts2.tooth.ci.min[i] <- ts2.confints[confint.row,1]
	penetrance.data.tabulated$ts2.tooth.ci.max[i] <- ts2.confints[confint.row,2]
	penetrance.data.tabulated$ts2.tooth.est[i] <- coef(ts2.size.model)[confint.row]
	print('ok1')
	if (cur.genotype == "Intercept") {
		print('ok2')
		intercept <- penetrance.data.tabulated$ts2.tooth.est[i]
	}
	else {
		print('ok3')
		penetrance.data.tabulated$ts2.tooth.ci.min[i] <- penetrance.data.tabulated$ts2.tooth.ci.min[i] + intercept
		penetrance.data.tabulated$ts2.tooth.ci.max[i] <- penetrance.data.tabulated$ts2.tooth.ci.max[i] + intercept
		penetrance.data.tabulated$ts2.tooth.est[i] <- penetrance.data.tabulated$ts2.tooth.est[i] + intercept
	}
}

par(mar=c(5,5,1,1))

plotCI(x = 1:18, y = penetrance.data.tabulated$ts2.tooth.est, 
    ui = penetrance.data.tabulated$ts2.tooth.ci.max, 
    li = penetrance.data.tabulated$ts2.tooth.ci.min, 
    xlim = c(0.5,18.5), ylim = c(0, 5), xaxt = "n", xlab = NA, 
    cex.lab = 1.5, pch = 20, lwd = 2, cex = 1.5,
    #ylab="scd[1] expressivity \n (mean ectopic sex comb tooth number)")
    ylab = expression(paste(italic(scd)^1, " expressivity")))
        
xlabels <- as.character(penetrance.data.tabulated$genotype)
xlabels[1] <- expression(italic(scd)^1)
axis(side=1, at=1:18, labels=xlabels, las=2, cex.axis=1.1)

text(x=0.7, y=4.77, "B", cex=2)

dev.off()

#Plot distributions by genotype
#Are they normal-ish?
#If so, ok							
library(lattice)								
histogram( ~ t2_teeth | genotype, data=scd.data)