############################################################################# # # # Extraction of BLUPs and estimation of broad-sense heritability # # # ############################################################################# ### Extraction of BLUPs library(lme4) data<-read.table('Quality.csv',h=T,sep=";", dec=".") data$Years<-as.factor(data$Years) data$Lots<-as.factor(data$Lots) #F.Weight modWeight<-lmer(F.Weight~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modWeight) VarCorr(modWeight) pblup = ranef(modWeight) str(pblup) pblupInd<-pblup$Ind str(pblupInd) write.csv(pblupInd,file="F.Weight.csv") #Hueg modHueg<-lmer(Hue.g~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modHueg) VarCorr(modHueg) Huegblup = ranef(modHueg) str(Huegblup) HuegblupInd<-Huegblup$Ind str(HuegblupInd) write.csv(HuegblupInd,file="Hueg.csv") #Ethylene modEthylene<-lmer(Ethylene~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modEthylene) VarCorr(modEthylene) Ethyleneblup = ranef(modEthylene) str(Ethyleneblup) EthyleneblupInd<-Ethyleneblup$Ind str(EthyleneblupInd) write.csv(EthyleneblupInd,file="Ethylene.csv") #RI modRI<-lmer(RI~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modRI) VarCorr(modRI) RIblup = ranef(modRI) str(RIblup) RIblupInd<-RIblup$Ind str(RIblupInd) write.csv(RIblupInd,file="RI.csv") #TA modTA<-lmer(TA~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modTA) VarCorr(modTA) TAblup = ranef(modTA) str(TAblup) TAblupInd<-TAblup$Ind str(TAblupInd) write.csv(TAblupInd,file="TA.csv") #Glucose modGlu<-lmer(Glucose~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modGlu) VarCorr(modGlu) Glublup = ranef(modGlu) str(Glublup) GlublupInd<-Glublup$Ind str(GlublupInd) write.csv(GlublupInd,file="Glucose.csv") #Fructose modFru<-lmer(Fructose~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modFru) VarCorr(modFru) Frublup = ranef(modFru) str(Frublup) FrublupInd<-Frublup$Ind str(FrublupInd) write.csv(FrublupInd,file="Fructose.csv") #Sucrose modSacch<-lmer(Sucrose~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modSacch) VarCorr(modSacch) Sacchblup = ranef(modSacch) str(Sacchblup) SacchblupInd<-Sacchblup$Ind str(SacchblupInd) write.csv(SacchblupInd,file="Sucrose.csv") #Malic.A. modMal<-lmer(Malic.A.~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modMal) VarCorr(modMal) Malblup = ranef(modMal) str(Malblup) MalblupInd<-Malblup$Ind str(MalblupInd) write.csv(MalblupInd,file="Malic.A.csv") #Citric.A. modcitr<-lmer(Citric.A.~(1|Ind)+(1|Ind:Years)+Years+Lots, data=data) summary(modcitr) VarCorr(modcitr) citrblup = ranef(modcitr) str(citrblup) citrblupInd<-citrblup$Ind str(citrblupInd) write.csv(citrblupInd,file="Citric.A.csv") ### Estimation of broad-sense heritability #Weight m<-lmer(Weight ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Hue.g m<-lmer(Hue.g ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Ethylene m<-lmer(Ethylene ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #RI m<-lmer(RI ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #TA m<-lmer(TA ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Glucose m<-lmer(Glucose ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Fructose m<-lmer(Fructose ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Sucrose m<-lmer(Sucrose ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Citric.A. m<-lmer(Citric.A. ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux #Malic.A. m<-lmer(Malic.A. ~ (1|Ind)+(1|Ind:Years)+Years+Lots, data=data) mu <- summary(m)$coefficients VL <- as.numeric(VarCorr(m)$Ind) VRes <- as.numeric(attr(VarCorr(m), "sc"))^2 hdeux = VL/(VL + VRes) hdeux ############################################################################# # # # Construction of genetic maps # # # ############################################################################# library(ASMap) #Genotyping data data1 <- as.matrix(read.table("4922SNPs_155indiv.txt")) SNP_Id<-rownames(data1) data1<-cbind(SNP_Id,data1) ##Parents' data data2<-data1[,c(1,155,156)] ##Progeny's data data3<-data1[,-c(155,156)] ##Allelic doses for each SNP SNPs_counts<-data.frame(SNP_Id=NULL,count_0=NULL,count_1=NULL,count_2=NULL,count_NA=NULL,Goldrich=NULL, Moniqui=NULL) count_0<-apply(data3,MARGIN = 1, function (x) length (which(x==0))) count_1<-apply(data3,MARGIN = 1, function (x) length (which(x==1))) count_2<-apply(data3,MARGIN = 1, function (x) length (which(x==2))) count_NA<-apply(data3,MARGIN = 1, function (x) length (which(is.na(x)))) Total_counts<-count_NA+count_1+count_2+count_0 Data_counts<-count_1+count_2+count_0 SNPs_counts <- data.frame (cbind(SNP_Id,count_0, count_1, count_2, count_NA, Total_counts, Data_counts)) ##Data for each parent SNPs_counts[,"Goldrich"]<-data2[,"Goldrich"] SNPs_counts[,"Moniqui"]<-data2[,"Moniqui"] #SNPs segregating for Goldrich NAR<-1#Tolerated missing data rate (%) ER<-1#Tolerated error rate (%) ### Goldrich ####List of heterozygous SNPs in Goldrich SNPs_G<-SNPs_counts[SNPs_counts$Goldrich=="1",] SNPs_G <- SNPs_G[!is.na(SNPs_G$SNP_Id), -1] SNP_Id<-rownames(SNPs_G) SNPs_G<-cbind(SNP_Id,SNPs_G) ###Step 1: Heterozygote SNPs for Goldrich and homozygote 2 for Moniqui SNPs_G<-SNPs_G[SNPs_G$Moniqui=="2",] SNPs_G <- SNPs_G[!is.na(SNPs_G$SNP_Id), -1] #Convert characters to numeric SNPs_G$count_0<-as.numeric(as.character(SNPs_G$count_0)) SNPs_G$count_1<-as.numeric(as.character(SNPs_G$count_1)) SNPs_G$count_2<-as.numeric(as.character(SNPs_G$count_2)) SNPs_G$count_NA<-as.numeric(as.character(SNPs_G$count_NA)) SNPs_G$Total_counts<-as.numeric(as.character(SNPs_G$Total_counts)) SNPs_G$Data_counts<-as.numeric(as.character(SNPs_G$Data_counts)) SNPs_G$Goldrich<-as.numeric(as.character(SNPs_G$Goldrich)) SNPs_G$Moniqui<-as.numeric(as.character(SNPs_G$Moniqui)) # % NA rate SNP_Id<-rownames(SNPs_G) SNPs_G<-cbind(SNP_Id,SNPs_G) Genotypes_nb <- 153#without parents SNPs_G<-SNPs_G[SNPs_G$count_NASNPs_G$Temp,] SNPs_G<-SNPs_G[SNPs_G$count_2>SNPs_G$Temp,] SNPs_G<-SNPs_G[SNPs_G$count_00,] Wrongs00<-SNPs_G[SNPs_G$Moniqui=="2"&SNPs_G$count_0>0,"SNP_Id"] length(Wrongs00) ###Step 2: Heterozygote SNPs for Goldrich and homozygote 0 for Moniqui SNPs_G<-SNPs_counts[SNPs_counts$Goldrich=="1",] c<-rownames(SNPs_G) SNPs_G<-SNPs_G[SNPs_G$Moniqui=="0",] SNPs_G <- SNPs_G[!is.na(SNPs_G$SNP_Id), -1] SNP_Id<-rownames(SNPs_G) SNPs_G<-cbind(SNP_Id,SNPs_G) #Convert characters to numeric SNPs_G$count_0<-as.numeric(as.character(SNPs_G$count_0)) SNPs_G$count_1<-as.numeric(as.character(SNPs_G$count_1)) SNPs_G$count_2<-as.numeric(as.character(SNPs_G$count_2)) SNPs_G$count_NA<-as.numeric(as.character(SNPs_G$count_NA)) SNPs_G$Total_counts<-as.numeric(as.character(SNPs_G$Total_counts)) SNPs_G$Data_counts<-as.numeric(as.character(SNPs_G$Data_counts)) SNPs_G$Goldrich<-as.numeric(as.character(SNPs_G$Goldrich)) SNPs_G$Moniqui<-as.numeric(as.character(SNPs_G$Moniqui)) # % NA rate SNPs_G<-SNPs_G[SNPs_G$count_NASNPs_G$Temp,] SNPs_G<-SNPs_G[SNPs_G$count_0>SNPs_G$Temp,] SNPs_G<-SNPs_G[SNPs_G$count_20,] Wrongs11<-SNPs_G[SNPs_G$Moniqui=="0"&SNPs_G$count_2>0,"SNP_Id"] length(Wrongs11) ListSNP_G_2<-SNPs_G$SNP_Id ListSNP_G<-union(ListSNP_G_1,ListSNP_G_2) ###Validated SNPs for Goldrich SNP_Id<-data3[,1] GBS_G<-data3[SNP_Id%in%ListSNP_G,] #Genetic map of Goldrich GBS_G.ASMap<-GBS_G str(GBS_G.ASMap) GBS_G.ASMap<-GBS_G.ASMap[,-1] ncol(GBS_G.ASMap) nrow(GBS_G.ASMap) GBS_G_P2.ASMap<-GBS_G.ASMap rownames(GBS_G_P2.ASMap)<-paste0(rownames(GBS_G.ASMap),"_P2") rownames(GBS_G.ASMap)<-paste0(rownames(GBS_G.ASMap),"_P1") ###Phase 1 for (i in 1:ncol(GBS_G.ASMap)){ GBS_G.ASMap[,i]<-gsub(pattern = "NA","U",fixed=T, GBS_G.ASMap[,i]) GBS_G.ASMap[,i]<-gsub(pattern = "1","A",fixed=T, GBS_G.ASMap[,i]) GBS_G.ASMap[,i]<-gsub(pattern = "0","B",fixed=T, GBS_G.ASMap[,i]) GBS_G.ASMap[,i]<-gsub(pattern = "2","B",fixed=T, GBS_G.ASMap[,i]) GBS_G.ASMap[which(is.na(GBS_G.ASMap[,i])),i]<-"U" } ###Phase 2 for (i in 1:ncol(GBS_G_P2.ASMap)){ GBS_G_P2.ASMap[,i]<-gsub(pattern = "NA","U",fixed=T, GBS_G_P2.ASMap[,i]) GBS_G_P2.ASMap[,i]<-gsub(pattern = "1","B",fixed=T, GBS_G_P2.ASMap[,i]) GBS_G_P2.ASMap[,i]<-gsub(pattern = "0","A",fixed=T, GBS_G_P2.ASMap[,i]) GBS_G_P2.ASMap[,i]<-gsub(pattern = "2","A",fixed=T, GBS_G_P2.ASMap[,i]) GBS_G_P2.ASMap[which(is.na(GBS_G_P2.ASMap[,i])),i]<-"U" } GBS_G.ASMap<-rbind(GBS_G.ASMap,GBS_G_P2.ASMap) rm(GBS_G_P2.ASMap) #GBS_G.ASMap should be a data.frame object containing marker information. #The data.frame must explicitly be arranged with markers in rows and genotypes in columns. #Marker names are obtained from the rownames of the object and genotype names are #obtained from the names component of the object class(GBS_G.ASMap)#matrix #convert the matrix to a data.frame object GBS_G.ASMap<-data.frame(GBS_G.ASMap) rownames(GBS_G.ASMap)#Marker names colnames(GBS_G.ASMap)#genotype names #Convert factors to character objects: Columns of the marker data.frame must be all of type "character" or all of "numeric" for (i in 1:153) { GBS_G.ASMap[,i]<-as.character(GBS_G.ASMap[,i]) } GBS_G.Map<-mstmap.data.frame(GBS_G.ASMap,pop.type="BC",dist.fun="kosambi",p.value=1/1000000) heatMap(GBS_G.Map) summary(GBS_G.Map) GBS_G.Map1<-jittermap(GBS_G.Map) Map.summary<-summary.map(GBS_G.Map1) ###Discarding duplicate linkage groups Map.summary<-Map.summary[order(Map.summary$n.mar,Map.summary$max.spacing),]#Tri des groupes Map.summary LGs_ToRemove<-rownames(Map.summary)[c(seq(1,18,2),seq(2,2,1))] LGs_ToRemove<-LGs_ToRemove[1:10] for (LG in LGs_ToRemove){ ToDrop<-names(pull.map(GBS_G.Map1,chr=LG)[[1]]) GBS_G.Map1<-drop.markers(GBS_G.Map1,ToDrop) } ###Discarding duplicate SNPs (l<-length(findDupMarkers(GBS_G.Map1))) ToDrop<-unlist(findDupMarkers(GBS_G.Map1)) GBS_G.Map1<-drop.markers(GBS_G.Map1,ToDrop) summary(GBS_G.Map1) ###Identification of linkage groups GBS_G.Map1$geno$L1 GBS_G.Map1$geno$L4 GBS_G.Map1$geno$L6 GBS_G.Map1$geno$L7 GBS_G.Map1$geno$L11 GBS_G.Map1$geno$L13 GBS_G.Map1$geno$L16 GBS_G.Map1$geno$L17 names(GBS_G.Map1[[1]])<-c("Chr1","Chr2","Chr3","Chr4","Chr6","Chr7","Chr8","Chr5") ### % ER GBS_G.Map1<-calc.errorlod(GBS_G.Map1,error.prob = 0.001) #Prints those genotypes with error LOD scores above a specified cutoff toperr<-top.errorlod(GBS_G.Map1,cutoff=3) length(toperr$id) ### Convert suspecious data into NA GBS_G.CleanMap<-GBS_G.Map1 for (i in 1:nrow(toperr)) { chr<-toperr$chr[i] id<-toperr$id[i] mar<-toperr$marker[i] GBS_G.CleanMap$geno[[chr]]$data[GBS_G.CleanMap$pheno$id==id,mar]<-NA } ###Imputation of missing data GBS_G.CleanMap<-fill.geno(GBS_G.CleanMap) (l<-length(findDupMarkers(GBS_G.CleanMap))) ToDrop<-unlist(findDupMarkers(GBS_G.CleanMap)) GBS_G.CleanMap<-drop.markers(GBS_G.CleanMap,ToDrop) #Estimate genetic maps NewMap<-est.map(GBS_G.CleanMap,map.function="kosambi",n.cluster=4,maxit=4000) GBS_G.CleanMap<-replace.map(GBS_G.CleanMap,NewMap) summary.map(GBS_G.CleanMap) plot.map(GBS_G.CleanMap, main="Genetic map of Goldrich") #Exporting the file write.cross(GBS_G.CleanMap,format = "csvs",filestem = "GBS_G_1%_4922") ### Moniqui ####List of heterozygous SNPs in Moniqui SNPs_M<-SNPs_counts[SNPs_counts$Moniqui=="1",] ###Step 1: Heterozygote SNPs for Moniqui and homozygote 2 for Goldrich SNPs_M<-SNPs_M[SNPs_M$Goldrich=="2",] SNPs_M <- SNPs_M[!is.na(SNPs_M$SNP_Id), -1] #Convert characters to numeric SNPs_M$count_0<-as.numeric(as.character(SNPs_M$count_0)) SNPs_M$count_1<-as.numeric(as.character(SNPs_M$count_1)) SNPs_M$count_2<-as.numeric(as.character(SNPs_M$count_2)) SNPs_M$count_NA<-as.numeric(as.character(SNPs_M$count_NA)) SNPs_M$Total_counts<-as.numeric(as.character(SNPs_M$Total_counts)) SNPs_M$Data_counts<-as.numeric(as.character(SNPs_M$Data_counts)) SNPs_M$Goldrich<-as.numeric(as.character(SNPs_M$Goldrich)) SNPs_M$Moniqui<-as.numeric(as.character(SNPs_M$Moniqui)) # % NA rate Genotypes_nb<-153 SNPs_M<-SNPs_M[SNPs_M$count_NASNPs_M$Temp,] SNPs_M<-SNPs_M[SNPs_M$count_2>SNPs_M$Temp,] SNPs_M<-SNPs_M[SNPs_M$count_00,] Wrongs00<-SNPs_M[SNPs_M$Moniqui=="2"&SNPs_M$count_0>0,"SNP_Id"] length(Wrongs00) ###Step 2: Heterozygote SNPs for Moniqui et homozyogote 0 for Goldrich SNPs_M<-SNPs_counts[SNPs_counts$M=="1",] SNPs_M<-SNPs_M[SNPs_M$Goldrich=="0",] SNPs_M <- SNPs_M[!is.na(SNPs_M$SNP_Id), -1] SNP_Id<-rownames(SNPs_M) SNPs_M<-cbind(SNP_Id,SNPs_M) #Convert characters to numeric SNPs_M$count_0<-as.numeric(as.character(SNPs_M$count_0)) SNPs_M$count_1<-as.numeric(as.character(SNPs_M$count_1)) SNPs_M$count_2<-as.numeric(as.character(SNPs_M$count_2)) SNPs_M$count_NA<-as.numeric(as.character(SNPs_M$count_NA)) SNPs_M$Total_counts<-as.numeric(as.character(SNPs_M$Total_counts)) SNPs_M$Data_counts<-as.numeric(as.character(SNPs_M$Data_counts)) SNPs_M$Goldrich<-as.numeric(as.character(SNPs_M$Goldrich)) SNPs_M$Moniqui<-as.numeric(as.character(SNPs_M$Moniqui)) # % NA rate Genotypes_nb<-153#Without parents SNPs_M<-SNPs_M[SNPs_M$count_NASNPs_M$Temp,] SNPs_M<-SNPs_M[SNPs_M$count_0>SNPs_M$Temp,] SNPs_M<-SNPs_M[SNPs_M$count_20,] Wrongs11<-SNPs_M[SNPs_M$Moniqui=="0"&SNPs_M$count_2>0,"SNP_Id"] length(Wrongs11) ListSNP_M_2<-SNPs_M$SNP_Id ListSNP_M<-union(ListSNP_M_1,ListSNP_M_2) ###Validated SNPs for Moniqui SNP_Id<-data3[,1] GBS_M<-data3[SNP_Id%in%ListSNP_M,] #Genetic map of Moniqui SNP_Id<-rownames(GBS_M) GBS_M.ASMap<-GBS_M[,-1] GBS_M_P2.ASMap<-GBS_M.ASMap rownames(GBS_M_P2.ASMap)<-paste0(rownames(GBS_M.ASMap),"_P2") rownames(GBS_M.ASMap)<-paste0(rownames(GBS_M.ASMap),"_P1") #Phase 1 for (i in 1:ncol(GBS_M.ASMap)){ GBS_M.ASMap[,i]<-gsub(pattern = "NA","U",fixed=T, GBS_M.ASMap[,i]) GBS_M.ASMap[,i]<-gsub(pattern = "1","A",fixed=T, GBS_M.ASMap[,i]) GBS_M.ASMap[,i]<-gsub(pattern = "0","B",fixed=T, GBS_M.ASMap[,i]) GBS_M.ASMap[,i]<-gsub(pattern = "2","B",fixed=T, GBS_M.ASMap[,i]) GBS_M.ASMap[which(is.na(GBS_M.ASMap[,i])),i]<-"U" } #Phase 2 for (i in 1:ncol(GBS_M_P2.ASMap)){ GBS_M_P2.ASMap[,i]<-gsub(pattern = "NA","U",fixed=T, GBS_M_P2.ASMap[,i]) GBS_M_P2.ASMap[,i]<-gsub(pattern = "1","B",fixed=T, GBS_M_P2.ASMap[,i]) GBS_M_P2.ASMap[,i]<-gsub(pattern = "0","A",fixed=T, GBS_M_P2.ASMap[,i]) GBS_M_P2.ASMap[,i]<-gsub(pattern = "2","A",fixed=T, GBS_M_P2.ASMap[,i]) GBS_M_P2.ASMap[which(is.na(GBS_M_P2.ASMap[,i])),i]<-"U" } GBS_M.ASMap<-rbind(GBS_M.ASMap,GBS_M_P2.ASMap) class(GBS_M.ASMap)#matrix GBS_M.ASMap<-data.frame(GBS_M.ASMap) rownames(GBS_M.ASMap)#Marker names colnames(GBS_M.ASMap)#genotype names #Convert factors to character objects: Columns of the marker data.frame must be all of type "character" or all of "numeric" for (i in 1:153) { GBS_M.ASMap[,i]<-as.character(GBS_M.ASMap[,i]) } str(GBS_M.ASMap) GBS_M.Map<-mstmap.data.frame(GBS_M.ASMap,pop.type="BC",dist.fun="kosambi",p.value=1/1000000) heatMap(GBS_M.Map) summary(GBS_M.Map) GBS_M.Map1<-jittermap(GBS_M.Map) Map.summary<-summary.map(GBS_M.Map1) ###Discarding duplicate linkage groups Map.summary<-Map.summary[order(Map.summary$n.mar,Map.summary$max.spacing),] summary(Map.summary) LGs_ToRemove<-rownames(Map.summary)[c(seq(1,20,2),seq(2,2,1))] LGs_ToRemove<-LGs_ToRemove[1:10] length(LGs_ToRemove) for (LG in LGs_ToRemove){ ToDrop<-names(pull.map(GBS_M.Map1,chr=LG)[[1]]) GBS_M.Map1<-drop.markers(GBS_M.Map1,ToDrop) } ###Discarding duplicate SNPs (l<-length(findDupMarkers(GBS_M.Map1))) ToDrop<-unlist(findDupMarkers(GBS_M.Map1)) GBS_M.Map1<-drop.markers(GBS_M.Map1,ToDrop) summary(GBS_M.Map1) ###Identification of linkage groups GBS_M.Map1$geno$L1 GBS_M.Map1$geno$L5 GBS_M.Map1$geno$L7 GBS_M.Map1$geno$L9 GBS_M.Map1$geno$L11 GBS_M.Map1$geno$L13 GBS_M.Map1$geno$L15 GBS_M.Map1$geno$L18 GBS_M.Map1$geno$L19 GBS_M.Map1$geno$L20 names(GBS_M.Map1[[1]])<-c("Chr1a","Chr1b","Chr2","Chr3","Chr4","Chr5","Chr6","Chr8","Chr7a","Chr7b") ### % ER GBS_M.Map1<-calc.errorlod(GBS_M.Map1,error.prob = 0.001) #Prints those genotypes with error LOD scores above a specified cutoff toperr<-top.errorlod(GBS_M.Map1,cutoff=3) length(toperr$id) ### Convert suspecious data into NA GBS_M.CleanMap<-GBS_M.Map1 for (i in 1:nrow(toperr)) { chr<-toperr$chr[i] id<-toperr$id[i] mar<-toperr$marker[i] GBS_M.CleanMap$geno[[chr]]$data[GBS_M.CleanMap$pheno$id==id,mar]<-NA } rm(toperr) ###Imputation of missing data (l<-length(findDupMarkers(GBS_M.CleanMap))) ToDrop<-unlist(findDupMarkers(GBS_M.CleanMap)) GBS_M.CleanMap<-drop.markers(GBS_M.CleanMap,ToDrop) NewMap<-est.map(GBS_M.CleanMap,map.function="kosambi",n.cluster=4,maxit=4000) GBS_M.CleanMap<-replace.map(GBS_M.CleanMap,NewMap) summary.map(GBS_M.CleanMap) plot.map(GBS_M.CleanMap, main="Genetic map of Moniqui") #Exporting the file write.cross(GBS_M.CleanMap,format = "csvs",filestem = "GBS_M_1%_4922") ############################################################################# # # # QTL detection for apricot fruit quality # # # ############################################################################# library(qtl) ############################################################################# ################ Goldrich ################################ ############################################################################# donSNP1<-read.cross("csv", file="GoldQTL1.csv", na.strings = "NA", genotypes=c("A","H"),map.function="kosambi",sep=";",dec=",") Pheno<-read.table("Quality0607_10.csv", header=T, sep=";",dec=".") # calculate the probabilities of the true underlying genotypes given the observed multipoint marker data with possible allowance for genotyping errors ## Simple interval mapping geno<-calc.genoprob(donSNP1, step=7,map.function = "kosambi", stepwidth = "fixed") #F.weight out.hk<-scanone(cross = geno, pheno.col = Pheno[,2], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,2],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01, pvalues=TRUE) donnees.composite <- cim(geno, pheno.col = Pheno[,2], n.marcovar = 1, method = "hk",map.function = "kosambi") #The permutations are carried out to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,2], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01, pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="F.weight") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) add.cim.covar(donnees.composite) #Indicate marker covariates from composite interval mapping out.hk.gl1 <- scanone(cross = geno, chr = '1', pheno.col =Pheno[,2], model = "normal",method = "hk") plot(out.hk.gl1, donnees.composite, col = c("blue", "red"), main="F.weight") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl1, chr="1", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="1", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot ####### Means of the genotype classes with effectplot par(mfrow=c(1,1)) eff<-effectplot(geno, pheno.col=2, mname1="Pp01:11998309_A/G_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,chr=1,pos=99.2) summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=2,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(out.hk.gl1,chr=1,expandtomarkers=FALSE) lodint(donnees.composite,chr=1,expandtomarkers=FALSE) #Hue.g out.hk<-scanone(cross = geno, pheno.col = Pheno[,3], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,3],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,3], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,3], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01, pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="Hue angle of ground color") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl3 <- scanone(cross = geno, chr = '3', pheno.col =Pheno[,3], model = "normal",method = "hk") plot(out.hk.gl3, donnees.composite, col = c("blue", "red"), main="Hue angle of ground color") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl3, chr="3", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="3", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=3, mname1="Pp03:24812147_C/T_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"3", 3.76, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=3,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=3,expandtomarkers=FALSE) #Ethylene out.hk<-scanone(cross = geno, pheno.col = Pheno[,4], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,4],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,4], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,4], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Ethylene") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #1 out.hk.gl1 <- scanone(cross = geno, chr = '1', pheno.col =Pheno[,4], model = "normal",method = "hk") plot(out.hk.gl1, donnees.composite, col = c("blue", "red"), main="Ethylene") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl1, chr="1", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="1", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,4], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Ethylene") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=4, mname1="Pp01:11244186_T/C_P1",add.legend = T) eff<-effectplot(geno, pheno.col=4, mname1="Pp02:21721333_C/T_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers #1 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"1", 99.2, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=4,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=1,expandtomarkers=FALSE) #2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 34.3, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=4,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #RI out.hk<-scanone(cross = geno, pheno.col = Pheno[,5], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,5],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,5], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,5], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Refraction index") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl4 <- scanone(cross = geno, chr = '4', pheno.col =Pheno[,5], model = "normal",method = "hk") plot(out.hk.gl4, donnees.composite, col = c("blue", "red"), main="Refraction index") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl4, chr="4", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="4", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=5, mname1="Pp04:10534651_G/T_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"4", 32.5, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=5,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=4,expandtomarkers=FALSE) #TA out.hk<-scanone(cross = geno, pheno.col = Pheno[,6], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,6],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,6], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,6], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,6], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Glucose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=6, mname1="Pp06:12455040_G/C_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 35, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=6,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #Pseudomarker #c2.loc35 m<-find.marker(cross= geno, chr=2, pos=35)#Pp02:19781073_G/A_P2 m #6 out.hk.gl6 <- scanone(cross = geno, chr = '6', pheno.col =Pheno[,6], model = "normal",method = "hk") plot(out.hk.gl6, donnees.composite, col = c("blue", "red"), main="Titratable acidity") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl6, chr="6", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="6", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=6, mname1="Pp06:12455040_G/C_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"6", 21.4, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=6,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=6,expandtomarkers=FALSE) #7 out.hk.gl7 <- scanone(cross = geno, chr = '7', pheno.col =Pheno[,6], model = "normal",method = "hk") plot(out.hk.gl7, donnees.composite, col = c("blue", "red"), main="Glucose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl7, chr="7", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="7", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=6, mname1="Pp07:14468943_C/T_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"7", 35, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=6,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=7,expandtomarkers=FALSE) #Glucose out.hk<-scanone(cross = geno, pheno.col = Pheno[,7], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,7],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,7], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,7], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Glucose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl6 <- scanone(cross = geno, chr = '6', pheno.col =Pheno[,7], model = "normal",method = "hk") plot(out.hk.gl6, donnees.composite, col = c("blue", "red"), main="Glucose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl6, chr="6", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="6", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=7, mname1="Pp06:12860854_G/C_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"6", 22.1, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=7,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=6,expandtomarkers=FALSE) #Fructose out.hk<-scanone(cross = geno, pheno.col = Pheno[,8], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,8],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,8], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,8], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Fructose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #Sucrose out.hk<-scanone(cross = geno, pheno.col = Pheno[,9], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,9],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,9], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,9], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Sucrose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl4 <- scanone(cross = geno, chr = '4', pheno.col =Pheno[,9], model = "normal",method = "hk") plot(out.hk.gl4, donnees.composite, col = c("blue", "red"), main="Sucrose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl4, chr="4", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="4", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=9, mname1="Pp04:6576886_T/C_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"4", 70, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=9,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=4,expandtomarkers=FALSE) #Citric.A. out.hk<-scanone(cross = geno, pheno.col = Pheno[,10], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,10],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,10], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,10], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01,0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="Citric.A.") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,10], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Citric.A.") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=10, mname1="Pp02:21721333_C/T_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 34.3, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=10,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #7 out.hk.gl7 <- scanone(cross = geno, chr = '7', pheno.col =Pheno[,10], model = "normal",method = "hk") plot(out.hk.gl7, donnees.composite, col = c("blue", "red"), main="Citric.A.") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl7, chr="7", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="7", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=10, mname1="Pp07:14468943_C/T_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"7", 35, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=10,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=7,expandtomarkers=FALSE) #Malic.A. out.hk<-scanone(cross = geno, pheno.col = Pheno[,11], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,11],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,11], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,11], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01,0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="Malic.A.") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,11], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Malic.A.") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=11, mname1="Pp02:26342242_T/A_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 27.3, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=11,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) ############################################################################# ################ Moniqui ################################ ############################################################################# donSNP2<-read.cross("csv", file="MonQTL1.csv", na.strings = "NA", genotypes=c("A","H"),map.function="kosambi",sep=";",dec=",") donSNP2<-jittermap(donSNP2) geno<-calc.genoprob(donSNP2, step=7,map.function = "kosambi", stepwidth = "fixed") #F.weight out.hk<-scanone(cross = geno, pheno.col = Pheno[,2], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,2],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01, pvalues=TRUE) donnees.composite <- cim(geno, pheno.col = Pheno[,2], n.marcovar = 1, method = "hk",map.function = "kosambi") #The permutations are carried out to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,2], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01, pvalues=TRUE) #Hue.g out.hk<-scanone(cross = geno, pheno.col = Pheno[,3], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,3],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,3], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,3], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01, pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="Hue angle of ground color") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #Ethylene out.hk<-scanone(cross = geno, pheno.col = Pheno[,4], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,4],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,4], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,4], n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Ethylene") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,4], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Ethylene") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=4, mname1="Pp02:22232517_A/C_P2",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers #2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 59.9, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=4,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #RI out.hk<-scanone(cross = geno, pheno.col = Pheno[,5], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,5],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,5], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,5], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Refraction index") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,5], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Refraction index") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location #2 bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers #2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 56, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=5,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #TA out.hk<-scanone(cross = geno, pheno.col = Pheno[,6], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,6],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,6], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,6], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) #Glucose out.hk<-scanone(cross = geno, pheno.col = Pheno[,7], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,7],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,7], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,7], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Glucose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #Fructose out.hk<-scanone(cross = geno, pheno.col = Pheno[,8], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,8],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,8], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,8], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Fructose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) out.hk.gl1 <- scanone(cross = geno, chr = '1a', pheno.col =Pheno[,8], model = "normal",method = "hk") plot(out.hk.gl1, donnees.composite, col = c("blue", "red"), main="Fructose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl1, chr="1a", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="1a", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=8, mname1="Pp01:10479938_A/C_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"1a", 44.1, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=8,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr="1a",expandtomarkers=FALSE) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,8], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Fructose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 63, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=8,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr="2",expandtomarkers=FALSE) #Pseudomarker #c2.loc63 m<-find.marker(cross= geno, chr=2, pos=63)#Pp02:21837529_A/G_P1 m #Sucrose out.hk<-scanone(cross = geno, pheno.col = Pheno[,9], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,9],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,9], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,9], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01, 0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) plot(out.hk, donnees.composite, col = c("blue", "red"), main="Sucrose") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,9], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Sucrose") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location #2 bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers #2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 49, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=9,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #Pseudomarker #c2.loc49 m<-find.marker(cross= geno, chr=2, pos=49) m #Pp02:24095980_G/A_P2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 49, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=9,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #Citric.A. out.hk<-scanone(cross = geno, pheno.col = Pheno[,10], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,10],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,10], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,10], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01,0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) #Malic.A. out.hk<-scanone(cross = geno, pheno.col = Pheno[,11], model = "normal",method = "hk") out.hk.permutations <- scanone(cross = geno,pheno.col = Pheno[,11],model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) summary(out.hk, perms = out.hk.permutations, alpha = 0.01) donnees.composite <- cim(geno, pheno.col = Pheno[,11], n.marcovar = 1, method = "hk",map.function = "kosambi") #We perform permutations to know the value of the threshold donnees.composite.permutations <- cim(geno, pheno.col = Pheno[,11], n.marcovar = 1,method = "hk", map.function = "kosambi", n.perm = 1000) summary(donnees.composite.permutations, alpha = c(0.01,0.05)) summary(donnees.composite, perms = donnees.composite.permutations, alpha = 0.01,pvalues=TRUE) binom.test(0, 1000)$conf.int plot(out.hk, donnees.composite, col = c("blue", "red"), main="Malic.A.") legend("topright", legend=c("Simple Interval Mapping", "Composite Interval Mapping"), col=c("blue", "red"), lty=1:2, cex=0.8) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #Indicate marker covariates from composite interval mapping add.cim.covar(donnees.composite) #2 out.hk.gl2 <- scanone(cross = geno, chr = '2', pheno.col =Pheno[,11], model = "normal",method = "hk") plot(out.hk.gl2, donnees.composite, col = c("blue", "red"), main="Malic.A.") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) #3 out.hk.gl3 <- scanone(cross = geno, chr = '3', pheno.col =Pheno[,11], model = "normal",method = "hk") plot(out.hk.gl3, donnees.composite, col = c("blue", "red"), main="Malic.A.") add.cim.covar(donnees.composite) add.threshold(out = donnees.composite, perms = donnees.composite.permutations, alpha = 0.01) # Interval estimates of QTL location #2 bayesint(out.hk.gl2, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="2", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) #3 bayesint(out.hk.gl3, chr="3", prob=0.95, lodcolumn=1, expandtomarkers=F) bayesint(donnees.composite, chr="3", prob=0.95, lodcolumn=1, expandtomarkers=FALSE) ####### Draws an effect plot eff<-effectplot(geno, pheno.col=11, mname1="Pp02:26917301_C/G_P2",add.legend = T) eff<-effectplot(geno, pheno.col=11, mname1="Pp03:6961578_G/A_P1",add.legend = T) #Allele effect and % variance explained by each QTL #Add pseudomarkers every 2 cM to densify the map on the basis of the observed markers #2 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"2", 22.9, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=11,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=2,expandtomarkers=FALSE) #3 fake.f2 <- sim.geno(geno, n.draws=8, step=2,map.function="kosambi",err=0.001) geno.mark<- makeqtl(fake.f2,"3", 64.4, what="prob") summary(geno.mark) my.formula<-y~Q1 geno.mark.Q<- fitqtl(fake.f2, geno.mark,pheno.col=11,formula=my.formula,method="hk", get.ests=TRUE) geno.mark.Q.sum<- summary(geno.mark.Q) geno.mark.Q.sum lodint(donnees.composite,chr=3,expandtomarkers=FALSE) ############################################################################# # # # Factors affecting prediction accuracy of RR-BLUP model # # # ############################################################################# #Genotyping data data1 <- as.matrix(read.table("FruitSelGen-apricot-PRPER-Lovell-v2-avignon_GoxMo_raw-snps_gatk-filter_153indiv.txt")) data1<-data1[,-c(154,155)] geno <- data1 meanimputed <- function(X){ colimputed <- function(x){ x[is.na(x)] <- mean(!is.na(x)) return(x) } Ximp <- apply(X,2,colimputed) return(as.matrix(Ximp)) } genoimputed <- meanimputed(geno) geno <- genoimputed data1<-geno data2<-replace(data1,is.na(data1),5) #Matrix transposition data2<-t(data2) #Phenotypic data Pheno<-read.table("Quality0607.csv", header=T, sep=";",dec=".") Pheno <- Pheno[,-1] Pheno <- as.matrix(Pheno) ############################################################################# # # # Effect of statistical models on prediction accuracy # # # ############################################################################# library(BGLR) #Genotyping data data1 <- as.matrix(read.table("FruitSelGen-apricot-PRPER-Lovell-v2-avignon_GoxMo_raw-snps_gatk-filter_153indiv.txt")) data1<-data1[,-c(154,155)] geno <- data1 meanimputed <- function(X){ colimputed <- function(x){ x[is.na(x)] <- mean(!is.na(x)) return(x) } Ximp <- apply(X,2,colimputed) return(as.matrix(Ximp)) } genoimputed <- meanimputed(geno) geno <- genoimputed data1<-geno data2<-replace(data1,is.na(data1),5) #Matrix transposition data2<-t(data2) #Phenotypic data Pheno<-read.table("Quality0607.csv", header=T, sep=";",dec=".") Pheno <- Pheno[,-1] Pheno <- as.matrix(Pheno) #################################################################### ######################## Bayes A ################################# #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) effectsmarkers = matrix(nrow=61030, ncol=traits) for(n in c(1:traits)){ bHat_pheno<-c(1:61030) for(r in c(1: cycles)){ #2# Creating a Testing set y<-Pheno[,n] yNA<-y tst<-sample(1:153,size=38,replace=FALSE) yNA[tst]<-NA #3# Setting the linear predictor ETA<-list(list(X=data2, model='BayesA')) #4# Fitting the model fm<-BGLR(y=yNA,ETA=ETA) #2# Predictions phenotype yHat<-fm$yHat[tst] cor(fm$yHat[tst],y[tst]) #TST #3# Predictions effet des markers bHat<- fm$ETA[[1]]$b bHat_pheno<-cbind(bHat_pheno,bHat) dim(bHat_pheno) head(bHat_pheno) accuracy[r, n] <- cor(fm$yHat[tst],y[tst]) } } write.table(accuracy, "BayesA_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) #################################################################### ######################## Bayes B ################################# #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) effectsmarkers = matrix(nrow=61030, ncol=traits) for(n in c(1:traits)){ bHat_pheno<-c(1:61030) for(r in c(1: cycles)){ #2# Creating a Testing set y<-Pheno[,n] yNA<-y tst<-sample(1:153,size=38,replace=FALSE) yNA[tst]<-NA #3# Setting the linear predictor ETA<-list(list(X=data2, model='BayesB')) #4# Fitting the model fm<-BGLR(y=yNA,ETA=ETA) #2# Predictions phenotype yHat<-fm$yHat[tst] cor(fm$yHat[tst],y[tst]) #TST #3# Predictions effet des markers bHat<- fm$ETA[[1]]$b bHat_pheno<-cbind(bHat_pheno,bHat) dim(bHat_pheno) head(bHat_pheno) accuracy[r, n] <- cor(fm$yHat[tst],y[tst]) } } write.table(accuracy, "BayesB_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) #################################################################### ######################## Bayes C ################################# #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) effectsmarkers = matrix(nrow=61030, ncol=traits) for(n in c(1:traits)){ bHat_pheno<-c(1:61030) for(r in c(1: cycles)){ #2# Creating a Testing set y<-Pheno[,n] yNA<-y tst<-sample(1:153,size=38,replace=FALSE) yNA[tst]<-NA #3# Setting the linear predictor ETA<-list(list(X=data2, model='BayesC')) #4# Fitting the model fm<-BGLR(y=yNA,ETA=ETA) #2# Predictions phenotype yHat<-fm$yHat[tst] cor(fm$yHat[tst],y[tst]) #TST #3# Predictions effet des markers bHat<- fm$ETA[[1]]$b bHat_pheno<-cbind(bHat_pheno,bHat) dim(bHat_pheno) head(bHat_pheno) accuracy[r, n] <- cor(fm$yHat[tst],y[tst]) } } write.table(accuracy, "BayesC_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) #################################################################### ######################## BRR ################################# #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) effectsmarkers = matrix(nrow=61030, ncol=traits) for(n in c(1:traits)){ bHat_pheno<-c(1:61030) for(r in c(1: cycles)){ #2# Creating a Testing set y<-Pheno[,n] yNA<-y tst<-sample(1:153,size=38,replace=FALSE) yNA[tst]<-NA #3# Setting the linear predictor ETA<-list(list(X=data2, model='BRR')) #4# Fitting the model fm<-BGLR(y=yNA,ETA=ETA) #2# Predictions phenotype yHat<-fm$yHat[tst] cor(fm$yHat[tst],y[tst]) #TST #3# Predictions effet des markers bHat<- fm$ETA[[1]]$b bHat_pheno<-cbind(bHat_pheno,bHat) dim(bHat_pheno) head(bHat_pheno) accuracy[r, n] <- cor(fm$yHat[tst],y[tst]) } } write.table(accuracy, "BayesBRR_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) #################################################################### ######################## BL ################################# #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) effectsmarkers = matrix(nrow=61030, ncol=traits) for(n in c(1:traits)){ bHat_pheno<-c(1:61030) for(r in c(1: cycles)){ #2# Creating a Testing set y<-Pheno[,n] yNA<-y tst<-sample(1:153,size=38,replace=FALSE) yNA[tst]<-NA #3# Setting the linear predictor ETA<-list(list(X=data2, model='BL')) #4# Fitting the model fm<-BGLR(y=yNA,ETA=ETA) #2# Predictions phenotype yHat<-fm$yHat[tst] cor(fm$yHat[tst],y[tst]) #TST #3# Predictions effet des markers bHat<- fm$ETA[[1]]$b bHat_pheno<-cbind(bHat_pheno,bHat) dim(bHat_pheno) head(bHat_pheno) accuracy[r, n] <- cor(fm$yHat[tst],y[tst]) } } write.table(accuracy, "BayesBL_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) #################################################################### ##################### RR-BLUP ############################### #################################################################### #################################################################### ################ GOMO =153_TP=75%_VP=25% ####################### #################################################################### library(rrBLUP) Markers<-t(data1) ## Calculates the realized additive relationship matrix. #impute missing markers with A.mat impute=A.mat(Markers,impute.method="mean",return.imputed=T) #Imputed markers matrix Markers_impute=impute$imputed #define the training and test populations #training-75% validation-25% traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) for(r in 1:cycles) { for(n in c(1:traits)) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] y= Pheno_train[,n] y_answer<-mixed.solve(y, Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) p = y_answer$u e = as.matrix(p) pred_y_valid = m_valid %*% e pred_y=(pred_y_valid[,1]) + y_answer$beta y_valid = Pheno_valid[,n] accuracy[r,n] <-cor(pred_y_valid, y_valid, use="complete" ) } } write.table(accuracy, "RRBLUP_100_75_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ###### Plotting prediction accuracy colnames(accuracy)<-c("F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") boxplot(accuracy, xlab="Phenotypes", ylab="Prediction accuracy", col="salmon", las=2, cex.axis=.7) ############################################################################# # # # Effect of TP size on prediction accuracy # # # ############################################################################# #################################################################### ################ GOMO =153_TP=50%_VP=50% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) for(r in 1:cycles) { for(n in c(1:traits)) { train= as.matrix(sample(1:153, 76)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] y= Pheno_train[,n] y_answer<-mixed.solve(y, Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) p = y_answer$u e = as.matrix(p) pred_y_valid = m_valid %*% e pred_y=(pred_y_valid[,1]) + y_answer$beta y_valid = Pheno_valid[,n] accuracy[r,n] <-cor(pred_y_valid, y_valid, use="complete" ) } } write.table(accuracy, "RRBLUP_100_TP50%_10.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ################ GOMO =153_TP=25%_VP=75% ####################### #################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) for(r in 1:cycles) { for(n in c(1:traits)) { train= as.matrix(sample(1:153, 38)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] y= Pheno_train[,n] y_answer<-mixed.solve(y, Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) p = y_answer$u e = as.matrix(p) pred_y_valid = m_valid %*% e pred_y=(pred_y_valid[,1]) + y_answer$beta y_valid = Pheno_valid[,n] accuracy[r,n] <-cor(pred_y_valid, y_valid, use="complete" ) } } write.table(accuracy, "RRBLUP_100_TP25%_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ############################################################################# # # # Effect of SNPs number on prediction accuracy # # # ############################################################################# traits=10 cycles=100 AccurrBLUP = matrix(nrow=cycles, ncol=traits) out_marker_density = NULL for(i in c(50,100,610,6103,12206,18309,24412,30515,36618,42721,48824,54927,61030)){ for(n in c(1: traits)){ for(r in c(1: cycles)){ pick <- as.matrix(sample(1:61030,i)) pick_M = Markers[,pick] impute=A.mat(pick_M,impute.method="mean",return.imputed=T) Markers_impute=impute$imputed train= as.matrix(sample(1:153,115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] phe01=(Pheno_train[,n]) phe01_answer<-mixed.solve(phe01, Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) phe01a = phe01_answer$u e = as.matrix(phe01a) pred_phe01_valid = m_valid %*% e pred_phe01=(pred_phe01_valid[,1])+ phe01_answer$beta pred_phe01 phe01_valid = Pheno_valid[,n] AccurrBLUP[r,n] <-cor(pred_phe01_valid, phe01_valid, use="complete" ) } } out <- data.frame(rep(i,cycles),AccurrBLUP) names(out) <- c("MarkerDensity","F.Weight", "Hue.g", "Ethylene", "RI", "TA", "Glucose", "Fructose", "Sucrose", "Citric.A.", "Malic.A.") out_marker_density <- rbind(out_marker_density,out) } write.table(out_marker_density,"RRBLUP_100_SNPsnumber_10.txt", sep=" ", quote=F, col.names=F, row.names=T) ############################################################################# # # # Optimization of prediction accuracy # # # ############################################################################# library(qtl) library(dplyr) library(rrBLUP) Pheno<-read.table("QTL_pheno.csv", header=T, sep=";",dec=".") pheno_data=Pheno[-c(1:2),] ####################################################################################### ############# Mapping QTLs in the training set ##################### ####################################################################################### traits=10 cycles=100 accuracy = matrix(nrow=cycles, ncol=traits) for(n in c(1:traits)){ for(r in c(1: cycles)){ p=0.25 ## Size of validation population y <- rownames(pheno_data) # Removing 38 individuals (Training set of 75%) rem_indiv_NAs= sample(1:length(y),size=p*length(y)) pheno_remv<-data.frame(pheno_data[-c(rem_indiv_NAs),]) genot<-data.frame(Pos=1:nrow(pheno_remv),names=pheno_remv$Ind) ####################################################################################### ############################# Goldrich ############################# ####################################################################################### Geno1<-read.table("GoldQTL1_geno.csv", header=T, sep=";",dec=".") data <- t(Geno1) data <- Geno1[-c(1,2),] zeros=pheno_data[1:2,] pheno_comp1=cbind(Pheno, Geno1) pheno_comp1 <- pheno_comp1[,-12] ap(pheno_comp1) write.csv2(pheno_comp1, file = "Data1.csv",row.names=FALSE, na = "") # calculate the probabilities of the true underlying genotypes given the observed multipoint marker data with possible allowance for genotyping errors ## Simple interval mapping donSNP1<-read.cross("csv", file="Data1.csv", na.strings = "NA", genotypes=c("A","H"),map.function="kosambi",sep=";",dec=",") Geno1<-calc.genoprob(donSNP1, step=7,map.function = "kosambi", stepwidth = "fixed") out.hk1<-scanone(cross = Geno1, pheno.col = n+1, model = "normal",method = "hk") out.hk.permutations1 <- scanone(cross = Geno1,pheno.col = n+1,model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) donnees.composite1 <- cim(Geno1, pheno.col = n+1, n.marcovar = 1, method = "hk",map.function = "kosambi") #The permutations are carried out to know the value of the threshold donnees.composite.permutations1 <- cim(Geno1, pheno.col = n+1, n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) out_t1 <- summary(donnees.composite1, perms = donnees.composite.permutations1, alpha = 0.01, pvalues=TRUE) ########################################################################################## ############################# Moniqui ################################# ########################################################################################## Geno2<-read.table("MonQTL1_geno.csv", header=T, sep=";",dec=".") data <- t(Geno2) data <- Geno2[-c(1,2),] zeros=pheno_data[1:2,] pheno_comp2=cbind(Pheno, Geno2) pheno_comp2 <- pheno_comp2[,-12] ap(pheno_comp2) write.csv2(pheno_comp2, file = "Data2.csv",row.names=FALSE, na = "") # calculate the probabilities of the true underlying genotypes given the observed multipoint marker data with possible allowance for genotyping errors ## Simple interval mapping donSNP2<-read.cross("csv", file="Data2.csv", na.strings = "NA", genotypes=c("A","H"),map.function="kosambi",sep=";",dec=",") Geno2<-calc.genoprob(donSNP2, step=7,map.function = "kosambi", stepwidth = "fixed") out.hk2<-scanone(cross = Geno2, pheno.col = n+1, model = "normal",method = "hk") out.hk.permutations2 <- scanone(cross = Geno2,pheno.col = n+1,model = "normal", method = "hk", n.perm = 1000, perm.Xsp = FALSE, verbose = FALSE) donnees.composite2 <- cim(Geno2, pheno.col = n+1, n.marcovar = 1, method = "hk",map.function = "kosambi") #The permutations are carried out to know the value of the threshold donnees.composite.permutations2 <- cim(Geno2, pheno.col = n+1, n.marcovar = 1, method = "hk", map.function = "kosambi", n.perm = 1000) out_t2 <- summary(donnees.composite2, perms = donnees.composite.permutations2, alpha = 0.01, pvalues=TRUE) out_t <- rbind(out_t1,out_t2) Row_old <- rownames(out_t) Row_new1 <- gsub(pattern = "_P1",replacement = "", x = Row_old) Row_new2 <- gsub(pattern = "_P2",replacement = "", x = Row_new1) rownames(out_t) <- Row_new2 sort=arrange(out_t, desc(lod)) chr=sort %>% distinct(chr, .keep_all = TRUE) #2# Creating a Testing set ## Take the significant SNP (higher value) look.for=chr$pos data1 <- as.matrix(read.table("FruitSelGen-apricot-PRPER-Lovell-v2-avignon_GoxMo_raw-snps_gatk-filter_153indiv.txt")) data1[data1==0] <- -1 data1[data1==1] <- 0 data1[data1==2] <- 1 data1<-data1[,-c(154,155)] geno <- data1 meanimputed <- function(X){ colimputed <- function(x){ x[is.na(x)] <- mean(!is.na(x)) return(x) } Ximp <- apply(X,2,colimputed) return(as.matrix(Ximp)) } genoimputed <- meanimputed(geno) geno <- genoimputed data1<-geno cols_snp=data1[rownames(data1) %in% rownames(out_t), ] snp=as.matrix(cols_snp) dim(snp)#153 1 if (nrow(snp) > ncol(snp)){ snp <- snp } if(nrow(snp) < ncol(snp)){ snp <- t(snp) } QTL_cov=snp #How to test when condition returns numeric(0) in R isEmpty <- function(x) { return(length(x)==0) } #There were no LOD peaks above the threshold. if (!rownames(out_t1) %in% rownames(data1) && nrow(out_t1)!=0){ QTL_cov <- out_t1 m<-find.marker(cross= Geno1, chr=out_t1[,1], pos=out_t1[,2]) rownames(out_t1)<- m } if (!rownames(out_t2) %in% rownames(data1) && nrow(out_t2)!=0){ QTL_cov <- out_t2 m<-find.marker(cross= Geno2, chr=out_t2[,1], pos=out_t2[,2]) rownames(out_t2)<- m } out_t <- rbind(out_t1,out_t2) Row_old <- rownames(out_t) Row_new1 <- gsub(pattern = "_P1",replacement = "", x = Row_old) Row_new2 <- gsub(pattern = "_P2",replacement = "", x = Row_new1) rownames(out_t) <- Row_new2 cols_snp=data1[rownames(data1) %in% rownames(out_t), ] snp=as.matrix(cols_snp) if (nrow(snp) > ncol(snp)){ snp <- snp } if(nrow(snp) < ncol(snp)){ snp <- t(snp) } QTL_cov=snp colnames(QTL_cov) <- rownames(out_t) SNPs=data1[!(rownames(data1) %in% colnames(QTL_cov)),] length_snp = nrow(out_t) if(length_snp > 0){ #significant SNPs identified for target trait SNPs <- t(SNPs) Pheno_train=pheno_data[!(rownames(pheno_data) %in% rem_indiv_NAs),] m_train=SNPs[!(rownames(SNPs) %in% rem_indiv_NAs),] Pheno_valid=pheno_data[rem_indiv_NAs,] m_valid=SNPs[rem_indiv_NAs,] m_train=SNPs[!(rownames(SNPs) %in% rownames(m_valid)),] y= Pheno_train[,n+1] QTL_cov <- QTL_cov [rem_indiv_NAs,] QTL_cov <- as.matrix(QTL_cov) y_answer<-mixed.solve(y, X=QTL_cov,Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) p = y_answer$u b = y_answer$beta p = as.matrix(p) b = as.matrix(b) pred_y_valid = m_valid %*% p + SNPs[rem_indiv_NAs,]%*%b pred_y=(pred_y_valid[,1]) + mean(y_answer$beta) y_valid = Pheno_valid[,n+1] } else { #No detected QTLs for target trait y_answer<-mixed.solve(y,Z=m_train, K=NULL, SE = FALSE, return.Hinv=FALSE) p = y_answer$u e = as.matrix(p) pred_y_valid = m_valid %*% e pred_y=(pred_y_valid[,1]) + y_answer$beta y_valid = Pheno_valid[,n+1] } accuracy[r,n] <-cor(pred_y_valid, y_valid, use="complete" ) } } write.table(accuracy, "Opti_QTL_TP_quality_RRBLUP.txt", sep=" ", quote=F, col.names=F, row.names=T) ############################################################################# # # # Multivariate genomic selection # # # ############################################################################# library(sommer) #Phenotypic data Pheno<-read.table("Quality0607.csv", header=T, sep=";",dec=".") rownames(Pheno)<-Pheno[,1] #Genotyping data data1 <- as.matrix(read.table("FruitSelGen-apricot-PRPER-Lovell-v2-avignon_GoxMo_raw-snps_gatk-filter_153indiv.txt")) data1[data1==0] <- -1 data1[data1==1] <- 0 data1[data1==2] <- 1 data1<-data1[,-c(154,155)] geno <- data1 meanimputed <- function(X){ colimputed <- function(x){ x[is.na(x)] <- mean(!is.na(x)) return(x) } Ximp <- apply(X,2,colimputed) return(as.matrix(Ximp)) } genoimputed <- meanimputed(geno) geno <- genoimputed data1<-geno G.MATRIX=read.table("relationshipmatrix153ind.txt",header=T,sep = "\t") G.MATRIX <- as.matrix(G.MATRIX) rownames(G.MATRIX) <- colnames(G.MATRIX) <- colnames(data1) <- rownames(Pheno) Markers<-t(data1) ## Calculates the realized additive relationship matrix. #impute missing markers with A.mat impute=A.mat(Markers,impute.method="mean",return.imputed=T) #Imputed markers matrix Markers_impute=impute$imputed ############################################################################# # # # RI as proxy predictand # # # ############################################################################# #################################################################### ######################## Ethylene ########################## #################################################################### #################################################################### ######################## 10% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.1 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } write.table(accuracy, "Multi-trait_Ethylene_RI_10%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 20% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.2 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_20%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 30% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.3 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_30%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 40% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.4 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_40%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 50% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.5 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_50%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 60% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.6 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_60%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 70% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.7 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_70%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 80% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.8 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_80%.txt", sep=" ", quote=F, col.names=F, row.names=T) #################################################################### ######################## 90% ########################## #################################################################### accuracy = matrix(nrow=cycles, ncol=1) cycles=100 for(r in 1:cycles) { train= as.matrix(sample(1:153, 115)) test<-setdiff(1:153,train) Pheno_train=Pheno[train,] m_train=Markers_impute[train,] Pheno_valid=Pheno[test,] m_valid=Markers_impute[test,] RI= Pheno_train[,5] Ethylene=Pheno_train[,4] n_rows <- nrow(Pheno_train) row_missing <- sample(1:n_rows, round(0.9 * n_rows,0)) col_missing <- 4 # define column DATA.MODEL <- rbind(Pheno_train,Pheno_valid) DATA.MODEL$Ethylene[which(DATA.MODEL$Ind %in% Pheno_valid$Ind)] <- NA DATA.MODEL[row_missing, col_missing] <- NA # assign missing values MODEL <- mmer(fixed=cbind(Ethylene,RI)~1,random=~vs(Ind,Gu=G.MATRIX,Gtc = unsm(2)),rcov=~vs(units,Gtc=unsm(2)),na.method.Y="include2",data=DATA.MODEL,method="NR") GEBVs <- data.frame(Ind=names(MODEL$U$`u:Ind`$Ethylene),gebv.ethy=as.numeric(MODEL$U$`u:Ind`$Ethylene)) valid<-DATA.MODEL[is.na(DATA.MODEL$Ethylene),1] Pheno_test <- subset(Pheno,row.names(Pheno)%in% valid) VALIDATION <- droplevels(merge(Pheno_test[,c(1,4)],GEBVs,by.x="Ind",by.y="Ind",all.x=F,all.y=F)) accuracy[r,] <- cor(VALIDATION$Ethylene,VALIDATION$gebv.ethy) } cov2cor(MODEL$sigma$`u:Ind`) write.table(accuracy, "Multi-trait_Ethylene_RI_90%.txt", sep=" ", quote=F, col.names=F, row.names=T)