###################################################################################################### ###################################################################################################### #0. Beagle Imputation: impute F34 Array/GBS, F39-43 GBS, and F34-F3943 GBS to LG/SM reference panels## ###################################################################################################### ###################################################################################################### ####################### #! /bin/bash #PBS -S /bin/bash #PBS -N ImputetoRef_Beagle #PBS -l walltime=8:00:00 #PBS -j oe #PBS -l nodes=1:ppn=8 #PBS -t 1-19 chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1` /usr/lib/jvm/jre-1.8.0/bin/java -Xms12g -Xmx12g -jar beagle.22Feb16.8ef.jar \ gt=${chrom}.recode.vcf.gz \ out=${chrom}.refImp \ chrom=${chrom} \ ref=LGSM.${chrom}.onlyPoly.phased.vcf.gz \ map=Beag4_Map.lgsm_angsd.allChr.map \ impute=true \ window=70000 \ overlap=32000 \ niterations=10 \ err=0.03 \ nthreads=4 echo "Done running command." ######################################################################################## ######################################################################################## #1. GWAS dosage prep ######################################################################################## ######################################################################################## ################# #a. DR2 filtering ################# #grep columns number 1-9 zcat chrALL.vcf.gz | grep 'chr' | cut -f 1-9 > snpDR2filter_chrALL.txt #remove INFO rows sed '/^##/d' snpDR2filter_chrALL.txt > snpD2filter_removeINFO.txt #replace every semicolon with a tab, replace AR2=number with only the number, extract useful columns sed 's/;/\t/g' snpD2filter_removeINFO.txt | sed 's/DR2=//g' | sed 's/AR2=//g' | sed 's/AF=//g' | cut -f 1-3,8-10 > snpDR2filter_chrALL_trimmed.txt #threshold=0.9 awk -v threshold="0.9" '$5 >= threshold' snpDR2filter_chrALL_trimmed.txt > DR2_0.9_chrALL_SNPs.txt #concatenate field 1 and filed 2 to create snp annotation for extracting SNPs from original files cat DR2_0.9_chrALL_SNPs.txt | awk '{print $1 "." $2}' > DR2_0.9_chrALL_snpanno.txt #filter on DR2 0.9 vcftools --gzvcf chrALL.vcf.gz --snps DR2_0.9_chrALL_snpanno.txt --recode --out DR2_chrALL #################################### #b. Filter on heterozygosity and MAF #################################### #filter on maf and hwe in autosomes vcftools --vcf DR2_0.9_chrALL.recode.vcf --maf 0.1 --hwe 0.000001 --recode --out DR2_0.9_MAF_HWE_chrALL ################################## #c. Make dosage files for each chr ################################## bgzip DR2_0.9_MAF_HWE_chrALL.recode.vcf tabix -p vcf DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz ######################### #! /bin/bash #PBS -N bcftools_LOCO_auto_dosages #PBS -l walltime=6:00:00 #PBS -j oe #PBS -l nodes=1:ppn=1 #PBS -t 1-19 chrom=`head -$PBS_ARRAYID mice_autoChr.list | tail -1` bcftools query -f '%CHROM\.%POS %REF %ALT[ %DS]\n' -r ${chrom} DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz -o ${chrom}.AIL.dosages bcftools query -f '%CHROM\.%POS %POS %CHROM\n' -r ${chrom} DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz -o ${chrom}.AIL.snpinfo #concatenate all chromosomes into one .dosage and one .snpinfo file cat chr*.AIL.dosages >> chrALL.AIL.dosages cat chr*.AIL.snpinfo >> chrALL.AIL.snpinfo ####################### #d. Omit one chromosome ####################### #! /bin/bash #PBS -N Omit_chr #PBS -l walltime=6:00:00 #PBS -j oe #PBS -l nodes=1:ppn=1 #PBS -t 1-19 chrom=`head -$PBS_ARRAYID mice_autoChr.list | tail -1` grep -vw ${chrom} chrALL.AIL.dosages > autoExcept${chrom}.AIL.dosages grep -vw ${chrom} chrALL.AIL.snpinfo > autoExcept${chrom}.AIL.snpinfo ######################################################################################## ######################################################################################## #2. Estimate kinship matrix ######################################################################################## ######################################################################################## #! /bin/bash #PBS -N Estimate_relatedness_matrix_gk1 #PBS -l walltime=2:00:00 #PBS -j oe #PBS -l nodes=1:ppn=1 #PBS -t 1-19 #####ESTIMATE RELATEDNESS MATRIX FROM GENOTYPES##### chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1` gemma-0.98.1-linux-static -g autoExcept${chrom}.AIL.dosages -p pheno.txt -gk 1 -o autoExcept${chrom}.AIL.dosages ####################################################### #! /bin/bash #PBS -N Estimate_relatedness_matrix_gk2 #PBS -l walltime=2:00:00 #PBS -j oe #PBS -l nodes=1:ppn=1 #PBS -t 1-19 #####ESTIMATE RELATEDNESS MATRIX FROM GENOTYPES##### chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1` gemma-0.98.1-linux-static -g autoExcept${chrom}.AIL.dosages -p pheno.txt -gk 2 -o autoExcept${chrom}.AIL.dosages ######################################################################################## ######################################################################################## #3. RUN GWAS on GEMMA ######################################################################################## ######################################################################################## #Example command line in file GWAS.CMDS: gemma-0.98.1-linux-static -g chr1.AIL.dosages -p pheno.txt -n 1 -k autoExceptchr1.F34_array_SI.AIL.dosages.cXX.txt -a chr1.AIL.snpinfo -c cov.txt -lmm 4 -o chr1.pheno.1.AIL #################################### ##!/bin/bash #PBS -N F34 #PBS -S /bin/bash #PBS -l walltime=8:00:00 #PBS -l nodes=1:ppn=2 #PBS -j oe #PBS -t 1-120 echo "Running command:" `tail -n +$PBS_ARRAYID GWAS.CMDS | head -1` cat GWAS.CMDS | xargs -P 8 -n 1 -I CMD bash -c CMD echo "Done running GEMMA." ######################################################################################## ######################################################################################## #4. Clumping ######################################################################################## ######################################################################################## ~/plink-1.90/plink --vcf DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz --make-bed --recode --out DR2_0.9_MAF_HWE_chrALL #convert GEMMA association file to PLINK format in R. module load R R setwd("~/GWAS_results") Pheno_lu <- read.table("Pheno_lookup.txt", header=FALSE, row.names=1, check.names = FALSE, stringsAsFactors = FALSE) library(qqman) file.names <- dir("~/GWAS_results", pattern ="*.assoc.txt.sorted.nochr") for (i in 1: length(file.names)){ pheno_name <- paste(read.table(text = file.names[i], sep = ".", as.is = TRUE)$V2, read.table(text = file.names[i], sep = ".", as.is = TRUE)$V4, sep=".") Pheno_char <- Pheno_lu[pheno_name, 1] Gfile <- read.table(file.names.MA[i],header=FALSE, check.names = FALSE, stringsAsFactors=FALSE) colnames(Gfile) <- c("chr", "rs", "ps", "n_miss", "allele1", "allele0", "af", "beta", "se", "logl_H1", "l_remle", "l_mle", "p_wald", "p_lrt", "p_score") Pfile <- data.frame(CHR=Gfile$chr, SNP=paste("chr", Gfile$rs, sep=""), BP=Gfile$ps, P=Gfile$p_lrt, A1=Gfile$allele1, A2=Gfile$allele0, FRQ=Gfile$af, BETA=Gfile$beta, SE=Gfile$se) write.table(Pfile, paste(Pheno_char, ".assoc.PLINK", sep=""), row.names=FALSE, sep="\t") } #r2 0.1, 12150kb for file in *.assoc.PLINK; do cat ${file} | sed 's/"//g' > ${file}.txt ~/plink-1.90/plink --file DR2_0.9_MAF_HWE_chrALL --clump-p1 1.28E-05 --clump-r2 0.1 --clump-kb 12150 --clump ${file}.txt --out ${file}.R0.1.12150kb done for file in *.assoc.PLINK.R0.1.12150kb.clumped;do cat ${file} >> pheno_r0.1_12150kb_clumped_SNPs.txt done for file in *.assoc.PLINK.R0.1.12150kb.clumped;do cat ${file} | awk '{print $1, $3, $4, $5}' | sed '/^ /d' | sed 's/.*/& '${file}'/' | sed '/^CHR/d' | sed 's/ /\t/g' >> pheno_r0.1_12150kb_clumped_SNPs_w_pheno.txt done ######################################################################################## ######################################################################################## #5. Credible Set analysis ######################################################################################## ######################################################################################## ###################################### #a. make metal format association file ###################################### #Generate metal formatted files: The file must have 2 columns: markers (SNPs), and p-values. cat chr1.pheno.1.AIL.assoc.txt.sorted.nochr >> chrALL.pheno.1.AIL.assoc.txt.sorted.nochr #make metal files for file in chrALL.*.AIL.assoc.txt.sorted.nochr;do cat ${file} | awk '{print "chr"$2, $14}' > ${file}_locuszoom_metal.txt done for file in chrALL.*.AIL.assoc.txt.sorted.nochr_locuszoom_metal.txt;do sed -i '1i MarkerName P-value' ${file} done ###################### #b. calculate ld files ###################### #Example ld calculation ~/plink-1.90/plink --bfile DR2_0.9_MAF_HWE_chrALL --chr 4 --nonfounders --r2 --ld-snp chr4.66414508 --ld-window 100000 -ld-window-kb 6000 --ld-window-r2 0 --out pheno.assoc.PLINK.R0.1.12150kb.clumped.chr4.66414508 for file in pheno.assoc.PLINK.R0.1.12150kb.clumped.*.ld;do cat ${file} | awk '{print $6, $3, $7}' | awk '{print $0, "NA"}' | sed -e '1s/SNP_B/snp1/' -e '1s/SNP_A/snp2/' -e '1s/R2/rsquare/' -e '1s/NA/dprime/' > ${file}_locuszoom_ld.txt done ############################# #c. Run credible set analysis ############################# #example credible set analysis for F34GBS module load R R library(data.table) F34GBS<-read.table("pheno_r0.1_12150kb_clumped_SNPs_w_pheno.txt",header=F, stringsAsFactors = FALSE) F34GBS.new <- data.frame(trait=F34GBS$V5, topsnp=F34GBS$V2, topsnp_P=F34GBS$V4, chr=F34GBS$V1) for(i in 1:nrow(F34GBS.new)){ F34GBS.new$pos[i]<-strsplit(as.character(F34GBS.new$topsnp),split="[.]")[[i]][2] } #import clumpled and metal file matching table CMetal <- read.table("F34GBS_clumpled_metal_filematch.txt", header=T, stringsAsFactors=FALSE) #append metal file names to data frame # metal file names format morder <- c() for (i in 1: nrow(F34GBS.new)){ idx <- which(CMetal$Clumped_Significant_SNP_File == F34GBS.new$trait[i]) morder <- c(morder, idx) } F34GBS.new$metal <- CMetal$Metal_format_file_for_all_SNPs[morder] #append ld file names to data frame F34GBS.new$ld <- paste0(F34GBS.new$trait,".",F34GBS.new$topsnp, ".ld") #all the original metal files are for SNPs across all CHR. Subset SNPs # where top SNP resides all_metal_chr <- list() for (i in 1:nrow(F34GBS.new)){ data<-fread(F34GBS.new[i,6],header=T,stringsAsFactors = F) data.1 <- data[data$MarkerName %like% paste0("chr", F34GBS.new[i,4], "."), ] all_metal_chr[[i]] <- data.1 } names(all_metal_chr)<-F34GBS.new$metal for(i in seq_along(names(all_metal_chr))){ colnames(all_metal_chr[[i]])<-c("SNP","P") } #LD files are ld R2 per chr read_file<-function(x){ data<-fread(x,header=T,stringsAsFactors = F) return(data) } LD<-lapply(F34GBS.new$ld,read_file) #remove extra columns from LD file #CHR_A BP_A SNP_A CHR_B BP_B SNP_B R2 for (i in seq_along(1:length(LD))){ LD[[i]]$CHR_A<-NULL LD[[i]]$BP_A<-NULL LD[[i]]$SNP_A<-NULL LD[[i]]$CHR_B<-NULL LD[[i]]$BP_B<-NULL } for (i in seq_along(1:length(LD))){ colnames(LD[[i]])<-c("SNP","R2") } ##change the "." in SNP name to ":" #for (i in seq_along(1:length(LD))){ # LD[[i]]$SNP<-gsub("\\.",":",LD[[i]]$SNP) # colnames(LD[[i]])<-c("SNP","R2") #} summary<-replicate(nrow(F34GBS.new),data.frame()) F34GBS.new$trait_snp <- paste0(F34GBS.new$trait, ".", F34GBS.new$topsnp) names(summary)<- F34GBS.new$trait_snp for (i in seq_along(names(summary))){ summary[[i]]<-merge(all_metal_chr[[i]],LD[[i]],by="SNP",all=F) } #credible set function getCredibleSNP <- function(snp, logProb, threshold=y){ prob <- exp(logProb) prob_normed <- prob/sum(prob) prob_cumsum <- cumsum(sort(prob_normed, decreasing=TRUE)) nSNP <- as.numeric(which.max(prob_cumsum>threshold )) if(nSNP<=0){ nSNP=1 } credible_set <- snp[order(prob_normed, decreasing=TRUE)[1:nSNP]] select <- rep(FALSE, length(snp)) select[order(prob_normed, decreasing=TRUE)[1:nSNP]] <- T ret <- list(nSNP = nSNP, prob_normed = prob_normed, prob_cumsum = prob_cumsum[rank(-prob_normed, ties.method="random")], credible_set=credible_set, select=select ) } R2_threshold <- 0.8 y=0.99 for (i in seq_along(names(summary))){ dat<-summary[[i]] select <- !is.na(dat$R2) & dat$R2 > R2_threshold max_SCZ_P <- min(dat$P[select]) max_SCZ_snp <- as.character(dat$SNP[select][which.min(dat$P[select])]) ret <- getCredibleSNP(as.character(dat$SNP[select]), qchisq(as.numeric(dat$P[select]), 1, low=F)/2) inCredible <- rep(NA, length(dat$P)) inCredible[match( dat$SNP[select], dat$SNP )] <- 0 inCredible[match( dat$SNP[select][ret$select], dat$SNP )] <- 1 prob_norm <- rep(NA, length(dat$P)) prob_norm[match(dat$SNP[select], dat$SNP )] <- ret$prob_normed prob_cumsum <- rep(NA, length(dat$P)) prob_cumsum[match(dat$SNP[select], dat$SNP )] <- ret$prob_cumsum result <- cbind(dat, inCredible=inCredible, probNorm=prob_norm, cumSum=prob_cumsum) summary[[i]] <- result } #write out credible set summary tables for (i in seq_along(names(summary))){ summary[[i]]$log10P <- -log10(as.numeric(summary[[i]]$P)) } for (i in seq_along(names(summary))){ summary[[i]]= as.data.table(summary[[i]]) temp=summary[[i]] temp[, c("CHR", "BP") := tstrsplit(SNP, ":")] summary[[i]] <- temp } CS <-summary for(i in seq_along(names(CS))){ df<-CS[[i]] colnames(df)[which(colnames(df)=="SNP")]<-"snp" colnames(df)[which(colnames(df)=="CHR")]<-"chr" colnames(df)[which(colnames(df)=="BP")]<-"pos" colnames(df)[which(colnames(df)=="probNorm")]<-"pp" df$group<-NA df$color<-NA idx<-which(df$inCredible==1) df$group[idx]<-"Credible Set" df$color[idx]<-"purple" df$chr <- gsub("chr","",df$chr) write.table(df[,c("snp","chr","pos","pp","group","color")],names(CS)[i],"_R20.8_credibleSet.txt"),row.names=F,quote=F,sep = "\t" ) } ######################################################################################## ######################################################################################## #6. Calculating thresholds using MultiTrans ######################################################################################## ######################################################################################## ############################################# #a. Estimate variance components using GEMMMA ############################################# #!/bin/bash #PBS -N GEMMA_VC_est #PBS -S /bin/bash #PBS -l walltime=12:00:00 #PBS -l nodes=1:ppn=2 #PBS -j oe #PBS -t 1-3 echo "Running command:" `tail -n +$PBS_ARRAYID VC_est_F343943.CMDS | head -1` cat VC_est_F343943.CMDS | xargs -P 8 -n 1 -I CMD bash -c CMD echo "Done running GEMMA Variance Componence Esimtation." #Extract Sigma2 values from variance component .log output files for file in *.AIL.VC2.*; do grep '## sigma2 estimates' ${file} | sed 's/## sigma2 estimates = //g' >> pheno_VC2.txt done ################################### #b. Dosage file prep for MultiTrans ################################### for file in chr*.AIL.dosages; do cat ${file} | cut -f 1,2,3 --complement -d" " > ${file}.4MultiTrans done wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/rowsToCols #!/bin/bash #PBS -N rowtocol #PBS -S /bin/bash #PBS -l walltime=12:00:00 #PBS -l nodes=1:ppn=4 #PBS -j oe #PBS -t 1-20 chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1` cat ${chrom}.AIL.dosages.4MultiTrans | $HOME/rowsToCols stdin ${chrom}.AIL.dosages.4MultiTrans.transpose ##################################################### #c. Generate correlation matrix r in the rotate space ##################################################### #!/bin/bash #PBS -N MultiTrans_corr #PBS -S /bin/bash #PBS -l walltime=12:00:00 #PBS -l nodes=1:ppn=4 #PBS -j oe #PBS -t 1-19 chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1` mkdir genR_autoK_CR_${chrom} module load R R CMD BATCH --args \ -Xpath="~/${chrom}.AIL.dosages.4MultiTrans.transpose" \ -Kpath="~/autoExcept${chrom}.AIL.dosages.cXX.txt" \ -VCpath="~/pheno_VC2.txt" \ -outputPath="~/genR_autoK_CR_${chrom}" \ ~/MultiTrans/generateR.R ~/generateR_autoK_CR_${chrom}.log ###################################### #d. Generate correlation band matrix c ###################################### #!/bin/bash #PBS -N MultiTrans_matrix_C #PBS -S /bin/bash #PBS -l walltime=12:00:00 #PBS -l nodes=1:ppn=4 #PBS -j oe #PBS -t 1-20 chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1` ~/java -jar ~/MultiTrans/generateC/generateC.jar 1000 ~/genR_autoK_CR_${chrom}/r.txt ~/genR_autoK_CR_${chrom}/c.txt ######### #e. SLIDE ######### #!/bin/bash #PBS -N MultiTrans_SLIDE #PBS -S /bin/bash #PBS -l walltime=12:00:00 #PBS -l nodes=1:ppn=4 #PBS -j oe #PBS -t 1-20 chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1` ####e.1. slide_1prep ~/slide.1.0/slide_1prep -C ~/genR_autoK_CR_${chrom}/c.txt 1000 ~/genR_autoK_CR_${chrom}/prep ####e.2. slide_2run ~/slide.1.0/slide_2run ~/genR_autoK_CR_${chrom}/prep ~/genR_autoK_CR_${chrom}/maxstat 10000000 1234 #####e.3. slide_3sort ~/slide.1.0/slide_3sort ~/sorted \ ~/genR_autoK_CR_chr1/maxstat \ ~/genR_autoK_CR_chr2/maxstat \ ~/genR_autoK_CR_chr3/maxstat \ ~/genR_autoK_CR_chr4/maxstat \ ~/genR_autoK_CR_chr5/maxstat \ ~/genR_autoK_CR_chr6/maxstat \ ~/genR_autoK_CR_chr7/maxstat \ ~/genR_autoK_CR_chr8/maxstat \ ~/genR_autoK_CR_chr9/maxstat \ ~/genR_autoK_CR_chr10/maxstat \ ~/genR_autoK_CR_chr11/maxstat \ ~/genR_autoK_CR_chr12/maxstat \ ~/genR_autoK_CR_chr13/maxstat \ ~/genR_autoK_CR_chr14/maxstat \ ~/genR_autoK_CR_chr15/maxstat \ ~/genR_autoK_CR_chr16/maxstat \ ~/genR_autoK_CR_chr17/maxstat \ ~/genR_autoK_CR_chr18/maxstat \ ~/genR_autoK_CR_chr19/maxstat \ ~/genR_autoK_CR_chrX/maxstat ####e.4. slide_4correct ~/slide.1.0/slide_4correct -t ~/sorted ~/threshold.txt MultiTrans.output ######################################################################################## ######################################################################################## #7. Power analysis ######################################################################################## ######################################################################################## ##################################### #a. Example power simulation in F34 ##################################### #****************************************************** # Copyright (c) 2019 Palmer Lab * # * # This is free software WITHOUT ANY WARRANTY. You can * # use or distribute it within Palmer lab * #****************************************************** # # This program is to simulation power over QTL effects # using GEMMA # ################ # prepare data # ################ if(FALSE){ #---------------------------------- SNPs<- c( "chr4.66414508", "chr6.81405109", "chr14.79312393", "chr7.87642045", "chr2.154464466", "chr19.21812298", "chr8.17410225", "chr14.82586326", "chr1.89192209", "chr7.87255156", "chr2.155091628", "chr15.67627183" ) Chrs<- strsplit(SNPs,"\\.") Pos<- sapply(Chrs, function(x)as.numeric(x[[2]])) Pos<- as.numeric(Pos) Chrs<- sapply(Chrs, function(x)as.numeric(substring(x[[1]],4))) QTL<- data.frame( SNP = SNPs, Chr = Chrs, Pos = Pos, Pop = c(rep("F39-43",7), rep("F34",5)), Pheno = c("body_weight", "body_weight", "body_weight", "coat_color_albino", "coat_color_agouti", "MA_D1_T30", "MA_D2_T30", "body_weight", "body_weight", "coat_color_albino", "coat_color_agouti", "MA_D2_T30"), check.names = FALSE, stringsAsFactors = FALSE ) idx<- order(Chrs, Pos) QTL<- QTL[idx,] table(QTL$Chr) rm(list=setdiff(ls(),"QTL")) dr<- "/projects/ps-palmer/xinxin/AIL_F34F3943_Feb5_2018_copy/AIL_F34F3943/F34to43_genos/Power_Analysis/" K<- read.table(paste(dr, "F34GBS/allExceptchrX.F34.AIL.dosages.sXX.txt", sep=""), header=FALSE) K<- as.matrix(K) GRMs<- vector("list", length(unique(QTL$Chr))) names(GRMs)<- unique(QTL$Chr) cnt<- 0 for(ch in unique(QTL$Chr)){ cnt<- cnt + 1 cat("Chr ", ch, "...", sep="") grmFl<- paste(dr,"F34GBS/autoExceptchr", ch, ".F34.AIL.dosages.sXX.txt", sep="") GRMs[[cnt]]<- as.matrix(read.table(grmFl, header=FALSE)) } rm(cnt, ch, grmFl) dsgFl<- paste(dr, "F34GBS/F34_allExceptchrX.F34.AIL.dosages", sep="") dsg<- read.table(dsgFl, header=FALSE, stringsAsFactors=FALSE) idx<- match(QTL$SNP, dsg[,1]) dsg<- dsg[idx,] rm(dsgFl, idx) pheno<- read.csv(paste(dr, "F34GBS/F34GBS_original.csv", sep=""), header=TRUE, na.string="NA", check.names=FALSE) colnames(pheno) # check if renaming is needed colnames(pheno)[c(7,4:6,3,2)]<- c("body_weight", "MA_D1_T30", "MA_D2_T30", "MA_D3_T30", "coat_color_agouti", "coat_color_albino") rm(dr) save.image("Rdata/pwr34.RData") #---------------------------------- } #################### ## basic functions # #################### gemmaScanf<- function(tid, tn, b1, pheno, SNPs, Chrs, dsg, K, GRMs, test=2, clean=TRUE){ # tid: task ID; QTL ffect = 0.1*tid # pheno: animal x phenotype # dsg: genotype dosage (SNP x Animal) # K: kinship matrix # GRMS: list of LOCO type G matrices # Chrs: chrosomes on which some SNPs to scan # tn: name of the trait to analyze # test: GEMMA -lmm option # clean: clean temporary files cwd<- getwd() on.exit({ setwd(cwd) if(clean) unlink(dr, recursive=TRUE, force=TRUE) }) dr<- paste(".TmpF34_", tn, "_", sprintf("%02d", tid), sep="") if(!dir.exists(dr)){ dir.create(dr) } setwd(dr) ## remove NA's and exteme outliers iqr<- IQR(pheno[,tn],na.rm=T) m<- median(pheno[,tn],na.rm=T) ii<- !is.na(pheno[,tn]) if(iqr > 0){ ii<- ii & pheno[,tn] > m - iqr*2.5 & pheno[,tn] < m + iqr*2.5 } ii<- (1:length(ii))[ii] if(FALSE){ ## change FALSE to TRUE to run GEMMA for variance components write.table(pheno[ii,tn], file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE) write.table(K[ii,ii], file="ksp", quote=FALSE, row.names=FALSE, col.names=FALSE) str<- paste("-o vc_", tn, sep="") str<- paste("-p pdt -k ksp -vc 2 ", str) str<- paste("../gemma-0.98.1 ", str) system(str, ignore.stdout=TRUE) vc<- system(paste("grep \"sigma2 estimates\" output/vc_", tn, ".log.txt", sep=""), intern=TRUE) vc<- strsplit(trimws(vc), "\\s+") vc<- as.numeric(vc[[1]][-(1:4)]) vc<- c(mean(pheno[ii,tn]), vc) }else{ vc<- estVC(y=pheno[ii,tn], v=K[ii,ii])$par } yf<- dsg[,ii+3]*b1*tid yf<- apply(yf,2,sum) if(FALSE){ ## if run original data write.table(pheno[ii,tn]+yf, file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE) }else{ ## if run similation sigma<- K[ii,ii]*vc[2] + diag(vc[3],length(ii)) yr<- rmvnorm(1, mean=rep(vc[1], length(ii)), sigma=sigma) yr<- as.vector(yr) write.table(yr+yf, file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE) } pv<- rep(NA, length(Chrs)) for(ch in unique(Chrs)){ idx<- Chrs == ch write.table(dsg[idx, c(1:3,ii+3)], file="gdt", quote=FALSE, row.names=FALSE, col.names=FALSE, sep=",") write.table(GRMs[[match(ch,names(GRMs))]][ii,ii], file="grm", quote=FALSE, row.names=FALSE, col.names=FALSE) str<- paste("-o out", sprintf("%02d",ch), sep="") str<- paste("-p pdt -g gdt -k grm -lmm ", test, str) str<- paste("../gemma-0.98.1 ", str) system(str, ignore.stdout=TRUE) p<- read.table(paste("output/out",sprintf("%02d",ch),".assoc.txt",sep=""),header=TRUE) pv[idx]<- p[match(SNPs[idx],sapply(p$rs,as.character)), ncol(p)] } setwd(cwd) pv } ################ ## simulations # ################ args<- commandArgs(TRUE) args if(length(args)==0){ tid<- 1 }else for(i in 1:length(args)){ eval(parse(text=args[[i]])) rm(i) } if(exists(".Random.seed")) rm(.Random.seed) rsd<- runif(100000,-1,1)*2^31 rsd<- floor(rsd) set.seed(rsd[tid]) cat("TID", tid,":",round(rnorm(5),5), "\n", sep=" ") library(QTLRel) library(mvtnorm) load("Rdata/pwr34.RData") nsim<- 2500 # number of simulations date() ##---------------------------------------------------- tn<- "body_weight" # trait to analyze idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(8,9) # which of SNPs to consider b1<- 0.02 # (1:50)*b1 effects to simulate pv<- NULL for(i in 1:nsim){ cat("...............", i, "...............") pv<- rbind(pv, gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,], K=K, GRMs=GRMs, test=2, clean=TRUE) ) } colnames(pv)<- QTL$SNP[idx] write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE) date() ##---------------------------------------------------- tn<- "MA_D2_T30" idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(12) b1<- 0.02 pv<- NULL for(i in 1:nsim){ cat("..........", i, "...............") pv<- rbind(pv, gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,], K=K, GRMs=GRMs, test=2, clean=TRUE) ) } colnames(pv)<- QTL$SNP[idx] write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE) date() ##---------------------------------------------------- tn<- "coat_color_agouti" idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(11) b1<- 0.0065 pv<- NULL for(i in 1:nsim){ cat("...............", i, "...............") pv<- rbind(pv, gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,], K=K, GRMs=GRMs, test=2, clean=TRUE) ) } colnames(pv)<- QTL$SNP[idx] write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE) date() ##---------------------------------------------------- tn<- "coat_color_albino" idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(10) b1<- 0.0065 pv<- NULL for(i in 1:nsim){ cat("...............", i, "...............") pv<- rbind(pv, gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,], K=K, GRMs=GRMs, test=2, clean=TRUE) ) } colnames(pv)<- QTL$SNP[idx] write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE) date() q("no") ############ ## summary # ############ tn<- 1 if(tn ==1){ tn<- "body_weight" # trait to analyze b1<- 0.02 # (1:50)*b1 effects to simulate }else if(tn == 2){ stop("no MA_D1!") }else if(tn == 3){ tn<- "MA_D2_T30" b1<- 0.02 }else if(tn == 4){ stop("no MA_D1!") }else if(tn == 5){ tn<- "coat_color_agouti" b1<- 0.0065 }else if(tn == 6){ tn<- "coat_color_albino" b1<- 0.0065 } nsim<- 2500 opt<- 1 if(opt ==1 ){ str<- "" }else if(opt == 2){ str<- "_0.05" } pwr<- sd<- pv<- NULL for(tid in 1:50){ pvTmp<- read.csv(paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), header=TRUE) pv<- rbind(pv,pvTmp) if(opt == 1) lpv<- -log10(pvTmp) > 4.852154 else lpv<- -log10(pvTmp) > -log10(0.05) pwr<- rbind(pwr, apply(lpv, 2, mean)) sd<- rbind(sd, apply(lpv, 2, sd)) } b<- (1:50)*b1 rownames(pwr)<- rownames(sd)<- as.character(b) # save(b, pwr, sd, nsim, tn, file=paste("Rdata/pwr34_", tn, str, ".RData",sep="")) ## run the following in local # load(file=paste("Rdata/pwr34_", tn, str, ".RData",sep="")) pdf(paste("graphs/pwr34_", tn, str, ".pdf",sep="")) matplot(b, pwr, type="b", cex=0.5, xlab="QTL Effect", ylab="Simulated Power", main=tn) legend("topleft", legend=colnames(pwr), pch=as.character(1:ncol(pwr)), col=1:ncol(pwr), lty=1:ncol(pwr)) dev.off() #################### # the end # ########### sef<- function(pheno, dsg, Chrs, tn){ # pheno: animal x phenotype # dsg: genotype dosage (SNP x Animal) # K: kinship matrix # Chrs: chrosomes on which some SNPs to scan # tn: name of the trait to analyze ## remove NA's and exteme outliers iqr<- IQR(pheno[,tn],na.rm=T) m<- median(pheno[,tn],na.rm=T) ii<- !is.na(pheno[,tn]) if(iqr > 0){ ii<- ii & pheno[,tn] > m - iqr*2.5 & pheno[,tn] < m + iqr*2.5 } ii<- (1:length(ii))[ii] se<- matrix(NA, nrow=length(Chrs), ncol=2) for(ch in unique(Chrs)){ idx<- match(ch,names(GRMs)) vc<- estVC(y=pheno[ii,tn], v=GRMs[[idx]][ii,ii]) sigma<- GRMs[[idx]][ii,ii]*vc$par[2] + diag(vc$par[3],length(ii)) idx<- Chrs == ch dtf<- data.frame(y=pheno[ii,tn], t(dsg[idx, ii+3])) g<- gls(y ~ ., vc=vc, data=dtf) se[idx,]<- g[-1,1:2] } colnames(se)<- c("Estimate", "Std. Error") se } library(QTLRel) se<- NULL load("Rdata/pwr34.RData") tn<- "body_weight" idx<- c(11) Chrs<- Chrs[idx] Pos<- Pos[idx] SNPs<- SNPs[idx] dsg<- dsg[idx,] tse<- sef(pheno, dsg,Chrs, tn) rownames(tse)<- SNPs se<- rbind(se, tse) load("Rdata/pwr34.RData") tn<- "MA_D2_Total_30" idx<- c(12) Chrs<- Chrs[idx] Pos<- Pos[idx] SNPs<- SNPs[idx] dsg<- dsg[idx,] tse<- sef(pheno, dsg,Chrs, tn) rownames(tse)<- SNPs se<- rbind(se, tse) load("Rdata/pwr34.RData") tn<- "MA_D3_Total_30" idx<- c(8) Chrs<- Chrs[idx] Pos<- Pos[idx] SNPs<- SNPs[idx] dsg<- dsg[idx,] tse<- sef(pheno, dsg, Chrs, tn) rownames(tse)<- SNPs se<- rbind(se, tse) load("Rdata/pwr34.RData") tn<- "coat_color_agouti" idx<- c(2) Chrs<- Chrs[idx] Pos<- Pos[idx] SNPs<- SNPs[idx] dsg<- dsg[idx,] tse<- sef(pheno, dsg, Chrs, tn) rownames(tse)<- SNPs se<- rbind(se, tse) load("Rdata/pwr34.RData") tn<- "coat_color_albino" idx<- c(6) Chrs<- Chrs[idx] Pos<- Pos[idx] SNPs<- SNPs[idx] dsg<- dsg[idx,] tse<- sef(pheno, dsg, Chrs, tn) rownames(tse)<- SNPs se<- rbind(se, tse) se # simulate F34 # Estimate Std. Error #chr14.82586326 -0.1598899 0.06663027 #chr15.67235072 -94.4524563 124.84243867 #chr7.113250866 -91.5336565 545.88800945 #chr2.155091628 0.6328329 0.02525096 #chr7.87255156 -0.5764215 0.02058022 load("Rdata/pwr34_body_weight.RData") b<- as.numeric(rownames(pwr)) b<- c(-b[length(b):1], 0, b) z<- b/0.06663027 p<- rbind(as.matrix(pwr[nrow(pwr):1,]), 0, pwr) b34<- -qnorm(1-0.5*2.87e-5) b39<- -qnorm(1-0.5*2.63e-6) w<- qnorm(1-0.05/4) plot(z, p, type="b", xlab="z", ylab="Power", main="F39 MA D1") abline(v=b34, col=2); abline(v=b34-w, col=2, lty=2); abline(v=b34+w, col=2, lty=2) abline(v=b39, col=1); abline(v=b39-w, col=1, lty=2); abline(v=b39+w, col=1, lty=2) ################################### #b. Power simulations job submission ################################### #! /bin/bash #PBS -N pwr ##PBS -l mem=239Gb #PBS -l walltime=98:00:00 #PBS -j oe #PBS -o pwr_${PBS_ARRAYID}.log #PBS -l nodes=1:ppn=6 #PBS -q home #PBS -t 1-50%8 cd $PBS_O_WORKDIR NPROCS=`wc -l < $PBS_NODEFILE` NNODES=`uniq $PBS_NODEFILE | wc -l` ### log info echo -n ' Started: ' ; date echo "Environment: $PBS_ENVIRONMENT" echo " Queue: $PBS_QUEUE" echo " Job ID: $PBS_JOBID" echo "Master node: $PBS_O_HOST" echo " No. CPU(s): $NPROCS" echo "No. node(s): $NNODES node(s):" echo "-----------------------" uniq $PBS_NODEFILE | sort echo "-----------------------" ### end of log info module load R R CMD BATCH "--args tid=$PBS_ARRAYID" pwr34.R Rout/pwr34_$(printf "%02d" $PBS_ARRAYID)-$(date +%y%m%d%H%M%S).Rout ######################################################################################## ######################################################################################## #8. Heritability ######################################################################################## ######################################################################################## ####################################### #Estimate SNP heritability using GEMMA ######################################3 #kinship is calculated using dosages from all chromosomes; heritability is in GEMMA output .log file for file in *.AIL.log.txt; do grep '## pve estimate in the null model ' ${file} | sed 's/## pve estimate in the null model = //g' >> AllPheno_heritability.txt done ##################################### #Estimate SNP heritability using GCTA ##################################### ~/gcta/gcta64 --reml --pheno pheno.file --grm chrALL.grm --covar covar.file --qcovar qcovar.file --mpheno ${n} --out output.${n} ######################################################################################## ######################################################################################## #9. Genetic correlations ######################################################################################## ######################################################################################## ~/gcta/gcta64 --reml-bivar --grm chrALL.grm --pheno pheno.file --reml-bivar-lrt-rg 1 --out output.file