#ReproPhenotypeAnalysis.R library(dplyr) library(ggplot2) library(data.table) library(RColorBrewer) library(ggrepel) colors = c("deepskyblue2", brewer.pal(n = 8, name = "Dark2")[4]) founder.strains = c("129S1/SvImJ", "A/J", "C57BL/6J", "CAST/EiJ", "NOD/ShiLtJ", "NZO/HlLtJ", "PWK/PhJ", "WSB/EiJ") ########################################################################### #load in the data ########################################################################### setwd("~/Box/iMac/Documents/Projects/SexRatioBias/Manuscript/SupplementalData/") phenos = fread("TableS6_cc_repro_phenotypes.csv") female.biased.strains = phenos$Strain[which(phenos$sig == 2)] male.biased.strains = phenos$Strain[which(phenos$sig == 1)] ############################################################################### #TESTIS WEIGHT PHENOTYPES ############################################################################### #remove parental strains from the table justCC = phenos %>% filter(! Strain %in% founder.strains) %>% group_by(Strain) %>% summarize(tw = mean(`Testis_Avg (mg)`), std.tw = mean(`Std_Testis_Avg (mg/g)`), n_samples = n(), bias=mean(sig), sex_ratio=mean(sex_ratio)) #look at overall distribution of the phenotypes hist(justCC$tw, breaks = seq(0,200, by=5)) #bimodal with peaks at ~75 and ~100 mg hist(justCC$std.tw, breaks = seq(0,6, by=0.5)) #normal-ish qqnorm(justCC$std.tw) hist(justCC$sex_ratio, breaks = seq(0,1,by=0.05)) #normal-ish qqnorm(justCC$sex_ratio) #create an indicator column marking strains with significant SRD sig = justCC$bias sig[which(justCC$bias == 2)] = 1 justCC <- justCC %>% mutate(sig, srd_dev = abs(0.5-sex_ratio)) hist(justCC$srd_dev, breaks = seq(0,0.3,by=0.05)) #not normal #contrast testis weight in SRD strains versus non SRD strains wilcox.test(tw ~ as.factor(sig), data = justCC) # Wilcoxon rank sum test with continuity correction # # data: tw by as.factor(sig) # W = 66, p-value = 0.4595 # alternative hypothesis: true location shift is not equal to 0 wilcox.test(std.tw ~ as.factor(sig), data = justCC) # Wilcoxon rank sum exact test # # data: std.tw by as.factor(sig) # W = 15, p-value = 0.6095 # alternative hypothesis: true location shift is not equal to 0 #plot testis weight phenotypes in CC and CC founders #reduce data complexity for plotting just testis weights tw = phenos %>% group_by(Strain) %>% summarize(tw = mean(`Testis_Avg (mg)`), sd = sd(`Testis_Avg (mg)`), count = n(), sex_ratio = mean(sex_ratio), sig = mean(sig)) shortname=unlist(strsplit(tw$Strain, split = "/")) shortname=shortname[seq(1,length(shortname),2)] tw <- tw %>% mutate(shortname=shortname) #define colors pt.colors = rep("gray75", times = length(tw$Strain)) pt.colors[which(tw$Strain %in% founder.strains)] = "white" pt.colors[which(tw$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(tw$Strain %in% male.biased.strains)] = colors[1] tw <- tw %>% mutate(color = pt.colors) #plot distribution of testis weights across CC and parental strains sorted.tw = arrange(tw, tw) sorted.tw <- mutate(sorted.tw, StrainF = factor(Strain, Strain)) p <- ggplot(data = sorted.tw, aes(x = StrainF, y = tw)) + geom_errorbar(aes(x = StrainF, ymin = tw-sd, ymax = tw+sd), width = 0.2, size = 0.5) + geom_point(size = 3.5, shape = 21, aes(fill = factor(Strain))) + scale_fill_manual(values=pt.colors) + theme_bw(base_size = 16) + theme(legend.position = "none", axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1), axis.title.x = element_blank()) + ylab("Testis Weight (mg)") p #ask whether testis weight is correlated with sex ratio in the CC cor.test(sorted.tw$tw, abs(0.5-sorted.tw$sex_ratio), method = "spearman") # Spearman's rank correlation rho # # data: sorted.tw$tw and abs(0.5 - sorted.tw$sex_ratio) # S = 1925.1, p-value = 0.2742 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # -0.2500812 #plot testis weight as a function of SRD pdf(file = "Figure6a.pdf", height = 5, width = 5) p <- ggplot(data = sorted.tw, aes(x = tw, y = abs(0.5-sex_ratio))) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed", size = 0.5, color = "black") + geom_point(size = 3.5, shape = 21, aes(fill = as.factor(sig))) + scale_fill_manual(values=c("gray75", colors)) + theme_bw(base_size = 16) + theme(legend.position = "none") + ylab("Deviation from Sex Ratio Parity") + xlab("Testis Weight (mg)") + geom_text_repel(aes(label = shortname), size = 3) p dev.off() #now look at standardized testis weights std.tw <- filter(phenos, ! is.na(`Std_Testis_Avg (mg/g)`)) %>% group_by(Strain) %>% summarize(std.tw = mean(`Std_Testis_Avg (mg/g)`), sd = sd(`Std_Testis_Avg (mg/g)`), count = n(), sex_ratio = mean(sex_ratio), sig = mean(sig)) shortname=unlist(strsplit(std.tw$Strain, split = "/")) shortname=shortname[seq(1,length(shortname),2)] std.tw <- std.tw %>% mutate(shortname=shortname) #define colors pt.colors = rep("gray75", times = length(std.tw$Strain)) pt.colors[which(std.tw$Strain %in% founder.strains)] = "white" pt.colors[which(std.tw$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(std.tw$Strain %in% male.biased.strains)] = colors[1] std.tw <- std.tw %>% mutate(color = pt.colors) #sort data sorted.std.tw = arrange(std.tw, std.tw) sorted.std.tw <- mutate(sorted.std.tw, StrainF = factor(Strain, Strain)) pdf(file = "Figure6a_std.pdf", height = 5, width = 5) p <- ggplot(sorted.std.tw, aes(x = std.tw, y = abs(0.5-sex_ratio))) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed", size = 0.5, color = "black") + geom_point(size = 3.5, shape = 21, aes(fill = factor(sig))) + scale_fill_manual(values=c("gray75", colors)) + theme_bw(base_size = 16) + theme(legend.position = "none") + xlab("Testis Weight/Body Weight (mg/g)") + ylab("Deviation from Sex Ratio Parity") + geom_text_repel(aes(label = shortname), size = 3) p dev.off() #ask whether standardized testis weight is correlated with sex ratio cor.test(sorted.std.tw$std.tw, abs(0.5-sorted.std.tw$sex_ratio), method = "spearman") # Spearman's rank correlation rho # # data: sorted.std.tw$std.tw and abs(0.5 - sorted.std.tw$sex_ratio) # S = 304, p-value = 0.2484 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # -0.3818182 #plot standardized testis weight (with error bars) across CC strains sorted.std.tw.noFounders = filter(sorted.std.tw, ! Strain %in% founder.strains) sorted.std.tw.noFounders <- mutate(sorted.std.tw.noFounders, StrainF = factor(Strain, Strain)) pt.colors = rep("gray75", times = length(sorted.std.tw.noFounders$Strain)) pt.colors[which(sorted.std.tw.noFounders$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(sorted.std.tw.noFounders$Strain %in% male.biased.strains)] = colors[1] p <- ggplot(sorted.std.tw.noFounders, aes(x = StrainF, y = std.tw)) + geom_errorbar(aes(x = StrainF, ymin = std.tw-sd, ymax = std.tw+sd), width = 0.2, size = 0.5) + geom_point(size = 3.5, shape = 21, aes(fill = StrainF)) + scale_fill_manual(values=pt.colors) + theme_bw(base_size = 16) + theme(legend.position = "none", axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1), axis.title.x = element_blank()) + ylab("Testis Weight/Body Weight (mg/g)") p ########################################################################### #compare sperm density in CC and founders ########################################################################### justCC = phenos %>% filter(! Strain %in% founder.strains) %>% group_by(Strain) %>% mutate(`sp.den.x10^6` = `Sperm Density (cells/mL)`/1000000) %>% summarize(sp_den = mean(`sp.den.x10^6`), n_samples = n(), sig=mean(sig), sex_ratio=mean(sex_ratio)) #create indicator column sig = justCC$bias sig[which(justCC$bias == 2)] = 1 justCC <- justCC %>% mutate(sig, srd_dev = abs(0.5-sex_ratio)) #look at normality of sperm density measure hist(justCC$sp_den, seq(0,50,2)) #not normal kruskal.test(sp_den ~ srd_dev, data = justCC) # Kruskal-Wallis rank sum test # # data: sp_den by srd_dev # Kruskal-Wallis chi-squared = 18, df = 18, p-value = 0.4557 #look at correlation between sperm density and deviation from expected sex ratio cor.test(justCC$sp_den, justCC$srd_dev, method = "spearman") # Spearman's rank correlation rho # # data: justCC$sp_den and justCC$srd_dev # S = 983.93, p-value = 0.5762 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.1369022 #look at correlation between sperm density and SRD: cor.test(justCC$sp_den, justCC$sex_ratio, method = "spearman") # Spearman's rank correlation rho # # data: justCC$sp_den and justCC$sex_ratio # S = 1008.9, p-value = 0.6393 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.1149627 #summarize strain variation in sperm density sp.den = phenos %>% filter(! is.na(`Sperm Density (cells/mL)`)) %>% mutate(`sp.den.x10^6` = `Sperm Density (cells/mL)`/1000000) %>% group_by(Strain) %>% summarize(avg = mean(`sp.den.x10^6`), sd = sd(`sp.den.x10^6`), count = n(), sex_ratio = mean(sex_ratio), sig= mean(sig)) shortname=unlist(strsplit(sp.den$Strain, split = "/")) shortname=shortname[seq(1,length(shortname),2)] sp.den <- sp.den %>% mutate(shortname=shortname) sorted.sp.den = arrange(sp.den, avg) sorted.sp.den <- mutate(sorted.sp.den, StrainF = factor(Strain, Strain)) sorted.sp.den <- mutate(sorted.sp.den, lowerCI = avg-sd) sorted.sp.den$lowerCI[which(sorted.sp.den$lowerCI < 0)] = 0 #define colors pt.colors = rep("gray75", times = length(sorted.sp.den$Strain)) pt.colors[which(sorted.sp.den$Strain %in% founder.strains)] = "white" pt.colors[which(sorted.sp.den$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(sorted.sp.den$Strain %in% male.biased.strains)] = colors[1] sorted.sp.den <- sorted.sp.den %>% mutate(colors = pt.colors) p <- ggplot(data = sorted.sp.den, aes(x = StrainF, y = avg)) + geom_errorbar(aes(x = StrainF, ymin = lowerCI, ymax = avg+sd), width = 0.2, size = 0.5) + geom_point(size = 3.5, shape = 21, aes(fill = factor(StrainF))) + scale_fill_manual(values=pt.colors) + theme_bw(base_size = 16) + theme(legend.position = "none", axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1), axis.title.x = element_blank()) + ylab("Sperm Density (x10^6 cells/mL)") p pdf(file = "Figure6b.pdf", height = 5, width = 5) p <- ggplot(sorted.sp.den, aes(x = avg, y = abs(0.5-sex_ratio))) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed", size = 0.5, color = "black") + geom_point(size = 3.5, shape = 21, aes(fill = factor(sig))) + scale_fill_manual(values=c("gray75", colors)) + theme_bw(base_size = 16) + theme(legend.position = "none") + xlab("Sperm Density (x10^6 cells/mL)") + ylab("Deviation from Sex Ratio Parity") + geom_text_repel(aes(label = shortname), size = 3) p dev.off() #look at correlations between CC strain means and SRD: sorted.sp.den <- sorted.sp.den %>% filter(! Strain %in% founder.strains) cor.test(sorted.sp.den$avg, sorted.sp.den$sex_ratio, method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.den$avg and sorted.sp.den$sex_ratio # S = 1168.9, p-value = 0.6111 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.1210982 cor.test(sorted.sp.den$avg, abs(0.5-sorted.sp.den$sex_ratio), method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.den$avg and abs(0.5 - sorted.sp.den$sex_ratio) # S = 1179.9, p-value = 0.6358 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.1128244 ########################################################################### #compare sperm motility in CC and founders ########################################################################### #summarize strain variation in sperm mobility sp.mob = filter(phenos, ! is.na(`% motile sperm`)) %>% group_by(Strain) %>% summarize(avg = mean(`% motile sperm`), sd = sd(`% motile sperm`), count = n(), sex_ratio = mean(sex_ratio), sig = mean(sig)) shortname=unlist(strsplit(sp.mob$Strain, split = "/")) shortname=shortname[seq(1,length(shortname),2)] sp.mob <- sp.mob %>% mutate(shortname=shortname) #define colors pt.colors = rep("gray75", times = length(sp.mob$Strain)) pt.colors[which(sp.mob$Strain %in% founder.strains)] = "white" pt.colors[which(sp.mob$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(sp.mob$Strain %in% male.biased.strains)] = colors[1] sorted.sp.mob = arrange(sp.mob, avg) sorted.sp.mob <- mutate(sorted.sp.mob, StrainF = factor(Strain, Strain)) p <- ggplot(data = sorted.sp.mob, aes(x = StrainF, y = avg)) + geom_errorbar(aes(x = StrainF, ymin = avg-sd, ymax = avg+sd), width = 0.2, size = 0.5) + geom_point(size = 3.5, shape = 21, aes(fill = factor(Strain))) + scale_fill_manual(values=pt.colors) + theme_bw(base_size = 16) + theme(legend.position = "none", axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1), axis.title.x = element_blank()) + ylab("% Motile Sperm") p pdf(file = "Figure6c.pdf", height = 5, width = 5) p <- ggplot(sorted.sp.mob, aes(x = avg, y = abs(0.5-sex_ratio))) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed", size = 0.5, color = "black") + geom_point(size = 3.5, shape = 21, aes(fill = factor(sig))) + scale_fill_manual(values=c("gray75", colors)) + theme_bw(base_size = 16) + theme(legend.position = "none") + xlab("% Motile Sperm") + ylab("Deviation from Sex Ratio Parity") + geom_text_repel(aes(label = shortname), size = 3) p dev.off() #look at correlation between sperm motility and SRD cor.test(sorted.sp.mob$avg, sorted.sp.mob$sex_ratio, method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.mob$avg and sorted.sp.mob$sex_ratio # S = 866, p-value = 0.6743 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.1062951 cor.test(sorted.sp.mob$avg, abs(0.5-sorted.sp.mob$sex_ratio), method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.mob$avg and abs(0.5 - sorted.sp.mob$sex_ratio) # S = 948, p-value = 0.9345 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.02167183 ################################################################################ #Sperm morphology ################################################################################ #summarize strain variation in sperm morphology sp.morph = filter(phenos, ! is.na(`% normal sperm`)) %>% group_by(Strain) %>% summarize(avg = mean(`% normal sperm`), sd = sd(`% normal sperm`), count = n(), sex_ratio = mean(sex_ratio), sig = mean(sig)) shortname=unlist(strsplit(sp.morph$Strain, split = "/")) shortname=shortname[seq(1,length(shortname),2)] sp.morph <- sp.morph %>% mutate(shortname=shortname) #define colors pt.colors = rep("gray75", times = length(sp.morph$Strain)) pt.colors[which(sp.morph$Strain %in% founder.strains)] = "white" pt.colors[which(sp.morph$Strain %in% female.biased.strains)] = colors[2] pt.colors[which(sp.morph$Strain %in% male.biased.strains)] = colors[1] sorted.sp.morph = arrange(sp.morph, avg) sorted.sp.morph <- mutate(sorted.sp.morph, StrainF = factor(Strain, Strain)) p <- ggplot(data = sorted.sp.morph, aes(x = StrainF, y = avg)) + geom_errorbar(aes(x = StrainF, ymin = avg-sd, ymax = avg+sd), width = 0.2, size = 0.5) + geom_point(size = 3.5, shape = 21, aes(fill = factor(Strain))) + scale_fill_manual(values=pt.colors) + theme_bw(base_size = 16) + theme(legend.position = "none", axis.text.x = element_text(angle=90, vjust = 0.5, hjust = 1), axis.title.x = element_blank()) + ylab("% Morphologically Normal Sperm") p pdf(file = "Figure6d.pdf", height = 5, width = 5) p <- ggplot(sorted.sp.morph, aes(x = avg, y = abs(0.5-sex_ratio))) + geom_smooth(method = "lm", se = FALSE, linetype = "dashed", size = 0.5, color = "black") + geom_point(size = 3.5, shape = 21, aes(fill = factor(sig))) + scale_fill_manual(values=c("gray75", colors)) + theme_bw(base_size = 16) + theme(legend.position = "none") + xlab("% Morphologically Normal Sperm") + ylab("Deviation from Sex Ratio Parity") + geom_text_repel(aes(label = shortname), size = 3) p dev.off() #look at correlation with SRD cor.test(sorted.sp.morph$avg, sorted.sp.morph$sex_ratio, method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.morph$avg and sorted.sp.morph$sex_ratio # S = 94, p-value = 0.5809 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.2166667 cor.test(sorted.sp.morph$avg, abs(0.5-sorted.sp.morph$sex_ratio), method = "spearman") # Spearman's rank correlation rho # # data: sorted.sp.morph$avg and abs(0.5 - sorted.sp.morph$sex_ratio) # S = 92, p-value = 0.5517 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # 0.2333333