#do_srd_analysis.R library(dplyr) library(ggplot2) library(data.table) library(RColorBrewer) library(ggrepel) library(tidyr) setwd("~/Box/iMac/Documents/Projects/SexRatioBias/Manuscript/SupplementalData/") colors = c(brewer.pal(n = 8, name = "Dark2")[4], "deepskyblue2") data = fread(file = "TableS10_do21_breeding_records.csv") #convert NA's to 0's in the offspring count columns data = data %>% replace_na(list(TOT_1 = 0, F_1 = 0, M_1 = 0, TOT_2 = 0, F_2=0, M_2=0, TOT_3=0, F_3=0, M_3=0)) ################################################ #add sex ratio info to the data table ################################################ data <- data %>% mutate(nF = F_1+F_2+F_3, nM = M_1+M_2+M_3) data <- data %>% mutate(n = nF+nM, sex_ratio=nF/(nF+nM)) lower = numeric(dim(data)[1]); upper = numeric(dim(data)[1]); p = numeric(dim(data)[1]) for (row in 1:dim(data)[1]) { out = binom.test(x = c(data$nF[row], data$nM[row]), alternative="two.sided") lower[row] = out$conf.int[1]; upper[row] = out$conf.int[2]; p[row] = out$p.value } data = mutate(data, lowerCI=lower, upperCI=upper, binomP=p) #determine how many tests are significant n.tests = dim(data)[1] #5315 (expect 266 sig tests by chance, observe 96) male_biased = data %>% filter(binomP < 0.05) %>% filter(sex_ratio < 0.5) #64 breeder pairs yield sig'ly male biased female_biased = data %>% filter(binomP < 0.05) %>% filter(sex_ratio > 0.5) #42 are sig'ly female biased ################################################ #exploratory plots for quick visualization of sex ratio variation #and to ensure ~ normality of the sex ratio across breeding pairs ################################################ #pdf(file = "Histogram_SR.pdf", height = 5, width = 5) par(las = 1) hist(data$sex_ratio, breaks = seq(0,1,0.025), xlab = "% Female", main = "", col = "gray75") abline(v = 0.5, lwd = 3, lty = 2, col = "black") #dev.off() qqnorm(data$sex_ratio) ################################################### #Does lineage provide any significant SR predictive power #YES! ################################################### #MALE LINEAGE anova(lm(sex_ratio ~ as.factor(M_LINEAGE), data = data, na.action = na.omit)) # Analysis of Variance Table # # Response: sex_ratio # Df Sum Sq Mean Sq F value Pr(>F) # as.factor(M_LINEAGE) 174 4.779 0.027466 1.2342 0.02364 * # Residuals 2353 52.363 0.022254 #FEMALE LINEAGE anova(lm(sex_ratio ~ as.factor(F_LINEAGE), data = data, na.action = na.omit)) # Analysis of Variance Table # # Response: sex_ratio # Df Sum Sq Mean Sq F value Pr(>F) # as.factor(F_LINEAGE) 174 4.733 0.027198 1.2211 0.02994 * # Residuals 2353 52.410 0.022274 ################################################## #determine by simulation whether there is extra-binomial variance for SR ################################################### #at the level of mating pairs obs.var = var(sr, na.rm = T) sim.var = numeric(1000) for (r in 1:1000) { print(r) sim.sr = numeric(dim(data)[1]) for (row in 1:dim(data)[1]) { x = rbinom(data$n[row], size = 1, prob = 0.5) sim.sr[row] = sum(x)/data$n[row] } sim.var[r] = var(sim.sr, na.rm = T) } #pdf(file = "VarSR_matingLevel.pdf", height=4,width = 5) par(las = 1) hist(sim.var, breaks = seq(0.02,0.03,0.00025), main = "", xlab = "Simulated Variance in SR", col = "gray75") abline(v=obs.var, lty = 2, lwd = 2, col = "black") #dev.off() #Conclusion: there is significantly LESS variance in sex ratio among mating pairs than expected #at the litter level obs.sr.litter = c() for (row in 1:dim(data)[1]) { #litter 1 n = sum(c(data$F_1[row], data$M_1[row]), na.rm = T) if (n > 0) {obs.sr.litter = c(obs.sr.litter, data$F_1[row]/n)} #litter 2 n = sum(c(data$F_2[row], data$M_2[row]), na.rm = T) if (n > 0) {obs.sr.litter = c(obs.sr.litter, data$F_2[row]/n)} #litter 3 n = sum(c(data$F_3[row], data$M_3[row]), na.rm = T) if (n > 0) {obs.sr.litter = c(obs.sr.litter, data$F_3[row]/n)} } obs.var = var(obs.sr.litter, na.rm = T) sim.var = numeric(1000) for (r in 1:1000) { print(r) sim.sr = numeric(dim(data)[1]*3) for (row in 1:dim(data)[1]) { #sim litter 1 n = sum(c(data$F_1[row], data$M_1[row]), na.rm = T) if (n > 0) { x = rbinom(n, size = 1, prob = 0.5) sim.sr[row] = sum(x)/n } #sim litter 2 n = sum(c(data$F_2[row], data$M_2[row]), na.rm = T) if (n > 0) { x = rbinom(n, size = 1, prob = 0.5) sim.sr[row] = sum(x)/n } #sim litter 3 n = sum(c(data$F_3[row], data$M_3[row]), na.rm = T) if (n > 0) { x = rbinom(n, size = 1, prob = 0.5) sim.sr[row] = sum(x)/n } } sim.var[r] = var(sim.sr, na.rm = T) } #pdf(file = "VarSR_litterLevel.pdf", height=4,width = 5) par(las = 1) hist(sim.var, breaks = seq(0,0.08,0.002), main = "", xlab = "Simulated Variance in SR", col = "gray75") abline(v=obs.var, lty = 2, lwd = 2, col = "black") #dev.off() #Conclusion: there is significantly LESS variance in sex ratio from litter to litter than expected ################################################################## #ask whether there's a significant bias in SR from generation to generation ################################################################## #wrangle the data into an easy format for analysis sr.by.gen = data %>% group_by(GENERATION) %>% summarize(nF=sum(nF), nM=sum(nM), n=sum(n)) sr.by.gen <- sr.by.gen %>% mutate(sex_ratio=nF/n) lower = numeric(dim(sr.by.gen)[1]); upper = numeric(dim(sr.by.gen)[1]); p = numeric(dim(sr.by.gen)[1]) for (row in 1:dim(sr.by.gen)[1]) { out = binom.test(x = c(sr.by.gen$nF[row], sr.by.gen$nM[row]), alternative="two.sided") lower[row] = out$conf.int[1]; upper[row] = out$conf.int[2]; p[row] = out$p.value } sr.by.gen <- mutate(sr.by.gen, lowerCI=lower, upperCI=upper, binomP=p) sig = rep(0, times = dim(sr.by.gen)[1]) sig[which(sr.by.gen$binomP < 0.05)] = 1 sr.by.gen <- mutate(sr.by.gen, sig=sig) #G10, G12, G15, and G16 are slightly male biased sr.by.gen$GENERATION=sub("G", "", sr.by.gen$GENERATION) sr.by.gen <- arrange(sr.by.gen, as.numeric(GENERATION)) #plot sex ratios over time #pdf(file = "SR_by_gen_DO6_22.pdf", width = 8, height = 6) p <- ggplot(sr.by.gen, mapping=aes(x=as.numeric(GENERATION), y = sex_ratio, color=as.factor(sig))) + geom_point(size = 3.5) + theme_bw(base_size = 16) + scale_color_manual(values = c("gray75", colors[2])) + geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0.2) + ylab("Generation") + xlab("Fraction Females at Wean") + geom_hline(yintercept=0.5, linetype="dashed") + theme(legend.position = "none") p #dev.off() #conclusion: no consistent trend, but several generations do exhibit a significant male bias ################################################## #ask whether there's a sex bias per male lineage ################################################## #data wrangling: data.by.m.lin = data %>% filter(! is.na(M_LINEAGE)) %>% group_by(M_LINEAGE) %>% summarize(nF = sum(nF), nM = sum(nM), n = sum(n)) data.by.f.lin = data %>% filter(! is.na(F_LINEAGE)) %>% group_by(F_LINEAGE) %>% summarize(nF=sum(nF), nM=sum(nM), n = sum(n)) data.by.line <- bind_rows(data.by.m.lin, data.by.f.lin) data.by.line <- mutate(data.by.line, sex_ratio = nF/n) lower = numeric(dim(data.by.line)[1]); upper = numeric(dim(data.by.line)[1]); p = numeric(dim(data.by.line)[1]) for (row in 1:dim(data.by.line)[1]) { out = binom.test(x = c(data.by.line$nF[row], data.by.line$nM[row]), alternative="two.sided") lower[row] = out$conf.int[1]; upper[row] = out$conf.int[2]; p[row] = out$p.value } data.by.line <- mutate(data.by.line, lowerCI=lower, upperCI=upper, binomP=p) data.by.line <- arrange(data.by.line, M_LINEAGE, F_LINEAGE) sig = numeric(length(data.by.line$n)) sig[intersect(which(data.by.line$binomP < 0.05), which(data.by.line$sex_ratio < 0.5))] = 1 #19 male-biased lineages sig[intersect(which(data.by.line$binomP < 0.05), which(data.by.line$sex_ratio > 0.5))] = 2 #8 female-biased lineages data.by.line <- mutate(data.by.line, sig) #plot sex ratio by DO lineage pdf(file = "Figure9a.pdf", width = 11, height = 4) p <- ggplot(data = filter(data.by.line, ! is.na(M_LINEAGE)), aes(x = M_LINEAGE, y = sex_ratio, fill = as.factor(sig))) + geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0.2) + geom_hline(yintercept=0.5, linetype="dashed") + geom_point(size = 2.5, shape = 21) + theme_bw(base_size = 16) + scale_fill_manual(values = c("gray75", colors[2], colors[1])) + xlab("Male DO Lineage") + ylab("Fraction Females at Wean") + theme(legend.position = "none") + xlim(-1, 176) p dev.off() pdf(file = "Figure9b.pdf", width = 11, height = 4) p <- ggplot(data = filter(data.by.line, ! is.na(F_LINEAGE)), aes(x = F_LINEAGE, y = sex_ratio, fill = as.factor(sig))) + geom_errorbar(aes(ymin = lowerCI, ymax = upperCI), width = 0.2) + geom_hline(yintercept=0.5, linetype="dashed") + geom_point(size = 2.5, shape = 21) + theme_bw(base_size = 16) + scale_fill_manual(values = c("gray75", colors[2], colors[1])) + xlab("Female DO Lineage") + ylab("Fraction Females at Wean") + theme(legend.position = "none") + xlim(-1, 176) p dev.off() ######################################################### #measure correlation between sex ratios of males and females from the same breeding lineages ######################################################### male = filter(data.by.line, ! is.na(M_LINEAGE)) female = filter(data.by.line, ! is.na(F_LINEAGE)) cor.test(male$sex_ratio, female$sex_ratio, method = "spearman") # Spearman's rank correlation rho # # data: male$sex_ratio and female$sex_ratio # S = 1001655, p-value = 0.1094 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # -0.1214234 #define color swatches and plot the data male.colors = rep("gray75", times = 175) male.colors[which(male$sig == 1)] = colors[2] male.colors[which(male$sig == 2)] = colors[1] female.colors = rep("gray75", times = 175) female.colors[which(female$sig == 1)] = colors[2] female.colors[which(female$sig == 2)] = colors[1] pdf(file = "FigureS13.pdf", height = 5, width = 5) par(las = 1) plot(x = male$sex_ratio, y = female$sex_ratio, pch = 21, col = male.colors, bg = female.colors, xlab = "Male Sex Ratio", ylab = "Female Sex Ratio", main = "", cex = 1.2) abline(lm(female$sex_ratio ~ male$sex_ratio), lty = 2, col = "gray75") dev.off()