######################################################################################################
######################################################################################################
#0. Beagle Imputation: impute F34 Array/GBS, F39-43 GBS, and F34-F3943 GBS to LG/SM reference panels##
######################################################################################################
######################################################################################################

#######################
#! /bin/bash

#PBS -S /bin/bash
#PBS -N ImputetoRef_Beagle
#PBS -l walltime=8:00:00
#PBS -j oe
#PBS -l nodes=1:ppn=8
#PBS -t 1-19


chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1`

/usr/lib/jvm/jre-1.8.0/bin/java -Xms12g -Xmx12g -jar beagle.22Feb16.8ef.jar \
	gt=${chrom}.recode.vcf.gz \
	out=${chrom}.refImp \
	chrom=${chrom} \
        ref=LGSM.${chrom}.onlyPoly.phased.vcf.gz \
	map=Beag4_Map.lgsm_angsd.allChr.map \
        impute=true \
	window=70000 \
	overlap=32000 \
	niterations=10 \
	err=0.03 \
	nthreads=4


echo "Done running command."


########################################################################################
########################################################################################
#1. GWAS dosage prep
########################################################################################
########################################################################################

#################
#a. DR2 filtering
#################

#grep columns number 1-9
zcat chrALL.vcf.gz | grep 'chr' | cut -f 1-9 > snpDR2filter_chrALL.txt

#remove INFO rows
sed '/^##/d' snpDR2filter_chrALL.txt > snpD2filter_removeINFO.txt

#replace every semicolon with a tab, replace AR2=number with only the number, extract useful columns
sed 's/;/\t/g' snpD2filter_removeINFO.txt | sed 's/DR2=//g' | sed 's/AR2=//g' | sed 's/AF=//g' | cut -f 1-3,8-10 > snpDR2filter_chrALL_trimmed.txt

#threshold=0.9
awk -v threshold="0.9" '$5 >= threshold' snpDR2filter_chrALL_trimmed.txt > DR2_0.9_chrALL_SNPs.txt

#concatenate field 1 and filed 2 to create snp annotation for extracting SNPs from original files
cat DR2_0.9_chrALL_SNPs.txt | awk '{print $1 "." $2}' > DR2_0.9_chrALL_snpanno.txt


#filter on DR2 0.9
vcftools --gzvcf chrALL.vcf.gz --snps DR2_0.9_chrALL_snpanno.txt --recode --out DR2_chrALL

####################################
#b. Filter on heterozygosity and MAF 
####################################

#filter on maf and hwe in autosomes

vcftools --vcf DR2_0.9_chrALL.recode.vcf --maf 0.1 --hwe 0.000001  --recode --out  DR2_0.9_MAF_HWE_chrALL

##################################
#c. Make dosage files for each chr
##################################

bgzip DR2_0.9_MAF_HWE_chrALL.recode.vcf

tabix -p vcf DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz

#########################
#! /bin/bash

#PBS -N bcftools_LOCO_auto_dosages
#PBS -l walltime=6:00:00
#PBS -j oe
#PBS -l nodes=1:ppn=1
#PBS -t 1-19

chrom=`head -$PBS_ARRAYID mice_autoChr.list | tail -1`

bcftools query -f '%CHROM\.%POS %REF %ALT[ %DS]\n' -r ${chrom} DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz -o ${chrom}.AIL.dosages

bcftools query -f '%CHROM\.%POS %POS %CHROM\n' -r ${chrom} DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz -o ${chrom}.AIL.snpinfo

#concatenate all chromosomes into one .dosage and one .snpinfo file
cat chr*.AIL.dosages >> chrALL.AIL.dosages
cat chr*.AIL.snpinfo >> chrALL.AIL.snpinfo

#######################
#d. Omit one chromosome
#######################

#! /bin/bash

#PBS -N Omit_chr
#PBS -l walltime=6:00:00
#PBS -j oe
#PBS -l nodes=1:ppn=1
#PBS -t 1-19


chrom=`head -$PBS_ARRAYID mice_autoChr.list | tail -1`

grep -vw ${chrom} chrALL.AIL.dosages > autoExcept${chrom}.AIL.dosages
grep -vw ${chrom} chrALL.AIL.snpinfo > autoExcept${chrom}.AIL.snpinfo


########################################################################################
########################################################################################
#2. Estimate kinship matrix
########################################################################################
########################################################################################

#! /bin/bash

#PBS -N Estimate_relatedness_matrix_gk1
#PBS -l walltime=2:00:00
#PBS -j oe
#PBS -l nodes=1:ppn=1
#PBS -t 1-19

#####ESTIMATE RELATEDNESS MATRIX FROM GENOTYPES#####

chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1`

gemma-0.98.1-linux-static -g autoExcept${chrom}.AIL.dosages -p pheno.txt  -gk 1 -o autoExcept${chrom}.AIL.dosages

#######################################################


#! /bin/bash

#PBS -N Estimate_relatedness_matrix_gk2
#PBS -l walltime=2:00:00
#PBS -j oe
#PBS -l nodes=1:ppn=1
#PBS -t 1-19

#####ESTIMATE RELATEDNESS MATRIX FROM GENOTYPES#####

chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1`

gemma-0.98.1-linux-static -g autoExcept${chrom}.AIL.dosages -p pheno.txt  -gk 2 -o autoExcept${chrom}.AIL.dosages


########################################################################################
########################################################################################
#3. RUN GWAS on GEMMA
########################################################################################
########################################################################################

#Example command line in file GWAS.CMDS:

gemma-0.98.1-linux-static -g chr1.AIL.dosages -p pheno.txt -n 1	-k autoExceptchr1.F34_array_SI.AIL.dosages.cXX.txt -a chr1.AIL.snpinfo -c cov.txt -lmm 4 -o chr1.pheno.1.AIL

####################################
##!/bin/bash

#PBS -N F34
#PBS -S /bin/bash
#PBS -l walltime=8:00:00
#PBS -l nodes=1:ppn=2
#PBS -j oe
#PBS -t 1-120


echo "Running command:" `tail -n +$PBS_ARRAYID GWAS.CMDS | head -1`
cat GWAS.CMDS  | xargs -P 8 -n 1 -I CMD bash -c CMD
echo "Done running GEMMA."


########################################################################################
########################################################################################
#4. Clumping
########################################################################################
########################################################################################

~/plink-1.90/plink --vcf DR2_0.9_MAF_HWE_chrALL.recode.vcf.gz --make-bed --recode --out DR2_0.9_MAF_HWE_chrALL


#convert GEMMA association file to PLINK format in R.

module load R
R

setwd("~/GWAS_results")

Pheno_lu <- read.table("Pheno_lookup.txt", header=FALSE, row.names=1, check.names = FALSE, stringsAsFactors = FALSE)

library(qqman)
file.names <- dir("~/GWAS_results", pattern ="*.assoc.txt.sorted.nochr")

for (i in 1: length(file.names)){
  pheno_name <- paste(read.table(text = file.names[i], sep = ".", as.is = TRUE)$V2, read.table(text = file.names[i], sep = ".", as.is = TRUE)$V4, sep=".")
  Pheno_char <- Pheno_lu[pheno_name, 1]
  Gfile <- read.table(file.names.MA[i],header=FALSE, check.names = FALSE, stringsAsFactors=FALSE)
  colnames(Gfile) <- c("chr",	"rs",	"ps",	"n_miss",	"allele1",	"allele0",	"af",	"beta",	"se",	"logl_H1",	"l_remle",	"l_mle",	"p_wald",	"p_lrt",	"p_score")
  Pfile <- data.frame(CHR=Gfile$chr, SNP=paste("chr", Gfile$rs, sep=""), BP=Gfile$ps, P=Gfile$p_lrt, A1=Gfile$allele1, A2=Gfile$allele0, FRQ=Gfile$af, BETA=Gfile$beta, SE=Gfile$se)
  write.table(Pfile, paste(Pheno_char, ".assoc.PLINK", sep=""), row.names=FALSE, sep="\t")
}

#r2 0.1, 12150kb
for file in *.assoc.PLINK; do
cat ${file} | sed 's/"//g' > ${file}.txt
~/plink-1.90/plink --file DR2_0.9_MAF_HWE_chrALL --clump-p1 1.28E-05 --clump-r2 0.1 --clump-kb 12150 --clump ${file}.txt --out ${file}.R0.1.12150kb
done

for file in *.assoc.PLINK.R0.1.12150kb.clumped;do
cat ${file} >> pheno_r0.1_12150kb_clumped_SNPs.txt
done

for file in *.assoc.PLINK.R0.1.12150kb.clumped;do
cat ${file} |  awk '{print $1, $3, $4, $5}' | sed '/^   /d' | sed 's/.*/& '${file}'/' | sed '/^CHR/d' | sed 's/ /\t/g' >> pheno_r0.1_12150kb_clumped_SNPs_w_pheno.txt
done

########################################################################################
########################################################################################
#5. Credible Set analysis
########################################################################################
########################################################################################

######################################
#a. make metal format association file
######################################

#Generate metal formatted files: The file must have 2 columns: markers (SNPs), and p-values.
cat chr1.pheno.1.AIL.assoc.txt.sorted.nochr >> chrALL.pheno.1.AIL.assoc.txt.sorted.nochr

#make metal files

for file in chrALL.*.AIL.assoc.txt.sorted.nochr;do
cat ${file} | awk '{print "chr"$2, $14}'  > ${file}_locuszoom_metal.txt
done

for file in chrALL.*.AIL.assoc.txt.sorted.nochr_locuszoom_metal.txt;do
sed  -i '1i MarkerName P-value' ${file}
done

######################
#b. calculate ld files
######################

#Example ld calculation
~/plink-1.90/plink --bfile DR2_0.9_MAF_HWE_chrALL  --chr 4 --nonfounders --r2  --ld-snp chr4.66414508 --ld-window 100000 -ld-window-kb 6000 --ld-window-r2 0 --out pheno.assoc.PLINK.R0.1.12150kb.clumped.chr4.66414508

for file in pheno.assoc.PLINK.R0.1.12150kb.clumped.*.ld;do
cat ${file} | awk '{print $6, $3, $7}' | awk '{print $0, "NA"}' |  sed -e '1s/SNP_B/snp1/' -e '1s/SNP_A/snp2/' -e '1s/R2/rsquare/' -e '1s/NA/dprime/' > ${file}_locuszoom_ld.txt
done

#############################
#c. Run credible set analysis
#############################

#example credible set analysis for F34GBS

module load R
R

library(data.table)

F34GBS<-read.table("pheno_r0.1_12150kb_clumped_SNPs_w_pheno.txt",header=F, stringsAsFactors = FALSE)
F34GBS.new <- data.frame(trait=F34GBS$V5, topsnp=F34GBS$V2, topsnp_P=F34GBS$V4, chr=F34GBS$V1)

for(i in 1:nrow(F34GBS.new)){
  
  F34GBS.new$pos[i]<-strsplit(as.character(F34GBS.new$topsnp),split="[.]")[[i]][2]
  
}

#import clumpled and metal file matching table
CMetal <- read.table("F34GBS_clumpled_metal_filematch.txt", header=T, stringsAsFactors=FALSE)

#append metal file names to data frame

# metal file names format

morder <- c()
for (i in 1: nrow(F34GBS.new)){
  idx <- which(CMetal$Clumped_Significant_SNP_File == F34GBS.new$trait[i])
  morder <- c(morder, idx)
}

F34GBS.new$metal <- CMetal$Metal_format_file_for_all_SNPs[morder]

#append ld file names to data frame

F34GBS.new$ld <- paste0(F34GBS.new$trait,".",F34GBS.new$topsnp, ".ld")

#all the original metal files are for SNPs across all CHR. Subset SNPs 
# where top SNP resides 

all_metal_chr <- list()
for (i in 1:nrow(F34GBS.new)){
  data<-fread(F34GBS.new[i,6],header=T,stringsAsFactors = F)
  data.1 <- data[data$MarkerName %like% paste0("chr", F34GBS.new[i,4], "."), ]
  all_metal_chr[[i]] <- data.1
}

names(all_metal_chr)<-F34GBS.new$metal

for(i in seq_along(names(all_metal_chr))){
  colnames(all_metal_chr[[i]])<-c("SNP","P")
}

#LD files are ld R2 per chr
read_file<-function(x){
  data<-fread(x,header=T,stringsAsFactors = F)
  return(data)
}

LD<-lapply(F34GBS.new$ld,read_file)

#remove extra columns from LD file
#CHR_A      BP_A          SNP_A CHR_B      BP_B          SNP_B         R2
for (i in seq_along(1:length(LD))){
  
  LD[[i]]$CHR_A<-NULL
  LD[[i]]$BP_A<-NULL
  LD[[i]]$SNP_A<-NULL
  LD[[i]]$CHR_B<-NULL
  LD[[i]]$BP_B<-NULL
  
}

for (i in seq_along(1:length(LD))){
  
  colnames(LD[[i]])<-c("SNP","R2")
}

##change the "." in SNP name to ":"
#for (i in seq_along(1:length(LD))){
#  LD[[i]]$SNP<-gsub("\\.",":",LD[[i]]$SNP)
#  colnames(LD[[i]])<-c("SNP","R2")
#}

summary<-replicate(nrow(F34GBS.new),data.frame())
F34GBS.new$trait_snp <- paste0(F34GBS.new$trait, ".", F34GBS.new$topsnp)
names(summary)<- F34GBS.new$trait_snp


for (i in seq_along(names(summary))){
  
  summary[[i]]<-merge(all_metal_chr[[i]],LD[[i]],by="SNP",all=F)
}

#credible set function
getCredibleSNP <- function(snp, logProb, threshold=y){
  prob <- exp(logProb) 
  prob_normed <- prob/sum(prob)
  prob_cumsum <- cumsum(sort(prob_normed, decreasing=TRUE))
  nSNP <- as.numeric(which.max(prob_cumsum>threshold ))
  if(nSNP<=0){ nSNP=1 }
  credible_set <- snp[order(prob_normed, decreasing=TRUE)[1:nSNP]]
  select <- rep(FALSE, length(snp))
  select[order(prob_normed, decreasing=TRUE)[1:nSNP]] <- T 
  ret <- list(nSNP = nSNP, 
              prob_normed = prob_normed, 
              prob_cumsum = prob_cumsum[rank(-prob_normed, ties.method="random")], 
              credible_set=credible_set, 
              select=select 
  )
}

R2_threshold <- 0.8
y=0.99
for (i in seq_along(names(summary))){
  dat<-summary[[i]]
  select <- !is.na(dat$R2) & dat$R2 > R2_threshold
  max_SCZ_P <- min(dat$P[select])
  max_SCZ_snp <- as.character(dat$SNP[select][which.min(dat$P[select])])
  ret <- getCredibleSNP(as.character(dat$SNP[select]), qchisq(as.numeric(dat$P[select]), 1, low=F)/2)
  inCredible <- rep(NA, length(dat$P))
  inCredible[match( dat$SNP[select], dat$SNP )] <- 0
  inCredible[match( dat$SNP[select][ret$select], dat$SNP )] <- 1
  prob_norm <- rep(NA, length(dat$P))
  prob_norm[match(dat$SNP[select], dat$SNP )] <- ret$prob_normed
  prob_cumsum <- rep(NA, length(dat$P))
  prob_cumsum[match(dat$SNP[select], dat$SNP )] <- ret$prob_cumsum
  result <- cbind(dat, inCredible=inCredible, probNorm=prob_norm, cumSum=prob_cumsum)
  summary[[i]]  <- result
}

#write out credible set summary tables
for (i in seq_along(names(summary))){
  summary[[i]]$log10P <- -log10(as.numeric(summary[[i]]$P))
}

for (i in seq_along(names(summary))){
  summary[[i]]= as.data.table(summary[[i]])
  temp=summary[[i]]
  temp[, c("CHR", "BP") := tstrsplit(SNP, ":")]
  summary[[i]] <- temp
}

CS <-summary
for(i in seq_along(names(CS))){
  
  df<-CS[[i]]
  colnames(df)[which(colnames(df)=="SNP")]<-"snp"
  colnames(df)[which(colnames(df)=="CHR")]<-"chr"
  colnames(df)[which(colnames(df)=="BP")]<-"pos"
  colnames(df)[which(colnames(df)=="probNorm")]<-"pp"
  df$group<-NA
  df$color<-NA
  idx<-which(df$inCredible==1)
  df$group[idx]<-"Credible Set"
  df$color[idx]<-"purple"
  df$chr <- gsub("chr","",df$chr)
  write.table(df[,c("snp","chr","pos","pp","group","color")],names(CS)[i],"_R20.8_credibleSet.txt"),row.names=F,quote=F,sep = "\t" )
}


########################################################################################
########################################################################################
#6. Calculating thresholds using MultiTrans 
########################################################################################
########################################################################################


#############################################
#a. Estimate variance components using GEMMMA
#############################################


#!/bin/bash

#PBS -N GEMMA_VC_est
#PBS -S /bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:ppn=2
#PBS -j oe
#PBS -t 1-3

echo "Running command:" `tail -n +$PBS_ARRAYID VC_est_F343943.CMDS | head -1`
cat VC_est_F343943.CMDS  | xargs -P 8 -n 1 -I CMD bash -c CMD
echo "Done running GEMMA Variance Componence Esimtation."


#Extract Sigma2 values from variance component .log output files

for file in *.AIL.VC2.*; do
grep '## sigma2 estimates' ${file} | sed 's/## sigma2 estimates =   //g' >> pheno_VC2.txt
done


###################################
#b. Dosage file prep for MultiTrans
###################################

for file in chr*.AIL.dosages; do
cat ${file} | cut -f 1,2,3 --complement -d" " > ${file}.4MultiTrans
done

wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/rowsToCols

#!/bin/bash

#PBS -N rowtocol
#PBS -S /bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:ppn=4
#PBS -j oe
#PBS -t 1-20

chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1`

cat ${chrom}.AIL.dosages.4MultiTrans | $HOME/rowsToCols stdin ${chrom}.AIL.dosages.4MultiTrans.transpose


#####################################################
#c. Generate correlation matrix r in the rotate space
#####################################################

#!/bin/bash

#PBS -N MultiTrans_corr
#PBS -S /bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:ppn=4
#PBS -j oe
#PBS -t 1-19

chrom=`head -$PBS_ARRAYID auto_chr.list | tail -1`

mkdir genR_autoK_CR_${chrom}

module load R

R CMD BATCH --args \
-Xpath="~/${chrom}.AIL.dosages.4MultiTrans.transpose" \
-Kpath="~/autoExcept${chrom}.AIL.dosages.cXX.txt" \
-VCpath="~/pheno_VC2.txt" \
-outputPath="~/genR_autoK_CR_${chrom}" \
~/MultiTrans/generateR.R ~/generateR_autoK_CR_${chrom}.log

######################################
#d. Generate correlation band matrix c
######################################

#!/bin/bash

#PBS -N MultiTrans_matrix_C
#PBS -S /bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:ppn=4
#PBS -j oe
#PBS -t 1-20

chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1`

~/java -jar ~/MultiTrans/generateC/generateC.jar 1000 ~/genR_autoK_CR_${chrom}/r.txt ~/genR_autoK_CR_${chrom}/c.txt

#########
#e. SLIDE
#########

#!/bin/bash

#PBS -N MultiTrans_SLIDE
#PBS -S /bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:ppn=4
#PBS -j oe
#PBS -t 1-20


chrom=`head -$PBS_ARRAYID all_auto_X_chr.list | tail -1`

####e.1. slide_1prep
~/slide.1.0/slide_1prep -C ~/genR_autoK_CR_${chrom}/c.txt 1000 ~/genR_autoK_CR_${chrom}/prep

####e.2. slide_2run
~/slide.1.0/slide_2run ~/genR_autoK_CR_${chrom}/prep ~/genR_autoK_CR_${chrom}/maxstat 10000000 1234

#####e.3. slide_3sort
~/slide.1.0/slide_3sort ~/sorted \
~/genR_autoK_CR_chr1/maxstat \
~/genR_autoK_CR_chr2/maxstat \
~/genR_autoK_CR_chr3/maxstat \
~/genR_autoK_CR_chr4/maxstat \
~/genR_autoK_CR_chr5/maxstat \
~/genR_autoK_CR_chr6/maxstat \
~/genR_autoK_CR_chr7/maxstat \
~/genR_autoK_CR_chr8/maxstat \
~/genR_autoK_CR_chr9/maxstat \
~/genR_autoK_CR_chr10/maxstat \
~/genR_autoK_CR_chr11/maxstat \
~/genR_autoK_CR_chr12/maxstat \
~/genR_autoK_CR_chr13/maxstat \
~/genR_autoK_CR_chr14/maxstat \
~/genR_autoK_CR_chr15/maxstat \
~/genR_autoK_CR_chr16/maxstat \
~/genR_autoK_CR_chr17/maxstat \
~/genR_autoK_CR_chr18/maxstat \
~/genR_autoK_CR_chr19/maxstat \
~/genR_autoK_CR_chrX/maxstat

####e.4. slide_4correct
~/slide.1.0/slide_4correct -t ~/sorted ~/threshold.txt MultiTrans.output

########################################################################################
########################################################################################
#7. Power analysis
########################################################################################
########################################################################################

#####################################
#a. Example power simulation in F34
#####################################

#******************************************************
# Copyright (c) 2019 Palmer Lab                       *
#                                                     *
# This is free software WITHOUT ANY WARRANTY. You can *
# use or distribute it within Palmer lab              *
#******************************************************

#
# This program is to simulation power over QTL effects
#    using GEMMA
#

################
# prepare data #
################
if(FALSE){
#----------------------------------
SNPs<- c(
   "chr4.66414508",
   "chr6.81405109",
   "chr14.79312393",
   "chr7.87642045",
   "chr2.154464466",
   "chr19.21812298",
   "chr8.17410225",
   "chr14.82586326",
   "chr1.89192209",
   "chr7.87255156",
   "chr2.155091628",
   "chr15.67627183"
)
Chrs<- strsplit(SNPs,"\\.")
Pos<- sapply(Chrs, function(x)as.numeric(x[[2]]))
   Pos<- as.numeric(Pos)
Chrs<- sapply(Chrs, function(x)as.numeric(substring(x[[1]],4)))
QTL<- data.frame(
                SNP = SNPs,
                Chr = Chrs,
                Pos = Pos,
                Pop = c(rep("F39-43",7), rep("F34",5)),
              Pheno = c("body_weight", "body_weight", "body_weight", "coat_color_albino", "coat_color_agouti", "MA_D1_T30",  "MA_D2_T30", "body_weight", "body_weight", "coat_color_albino", "coat_color_agouti", "MA_D2_T30"),
        check.names = FALSE,
   stringsAsFactors = FALSE
)

idx<- order(Chrs, Pos)
QTL<- QTL[idx,]

table(QTL$Chr)

rm(list=setdiff(ls(),"QTL"))

dr<- "/projects/ps-palmer/xinxin/AIL_F34F3943_Feb5_2018_copy/AIL_F34F3943/F34to43_genos/Power_Analysis/"
K<- read.table(paste(dr, "F34GBS/allExceptchrX.F34.AIL.dosages.sXX.txt", sep=""), header=FALSE)
   K<- as.matrix(K)
GRMs<- vector("list", length(unique(QTL$Chr)))
   names(GRMs)<- unique(QTL$Chr)
cnt<- 0
for(ch in unique(QTL$Chr)){
   cnt<- cnt + 1
   cat("Chr ", ch, "...", sep="")
   grmFl<- paste(dr,"F34GBS/autoExceptchr", ch, ".F34.AIL.dosages.sXX.txt", sep="")
   GRMs[[cnt]]<- as.matrix(read.table(grmFl, header=FALSE))
}
rm(cnt, ch, grmFl)

dsgFl<- paste(dr, "F34GBS/F34_allExceptchrX.F34.AIL.dosages", sep="")
dsg<- read.table(dsgFl, header=FALSE, stringsAsFactors=FALSE)
   idx<- match(QTL$SNP, dsg[,1])
   dsg<- dsg[idx,]
rm(dsgFl, idx)

pheno<- read.csv(paste(dr, "F34GBS/F34GBS_original.csv", sep=""), header=TRUE, na.string="NA", check.names=FALSE)
   colnames(pheno) # check if renaming is needed
   colnames(pheno)[c(7,4:6,3,2)]<- c("body_weight", "MA_D1_T30", "MA_D2_T30", "MA_D3_T30", "coat_color_agouti", "coat_color_albino")

rm(dr)

save.image("Rdata/pwr34.RData")

#----------------------------------
}

####################
## basic functions #
####################

gemmaScanf<- function(tid, tn, b1, pheno, SNPs, Chrs, dsg, K, GRMs, test=2, clean=TRUE){
# tid: task ID; QTL ffect = 0.1*tid
# pheno: animal x phenotype
# dsg: genotype dosage (SNP x Animal)
# K: kinship matrix
# GRMS: list of LOCO type G matrices
# Chrs: chrosomes on which some SNPs to scan
# tn: name of the trait to analyze
# test: GEMMA -lmm option
# clean: clean temporary files
   cwd<- getwd()
   on.exit({
      setwd(cwd)
      if(clean) unlink(dr, recursive=TRUE, force=TRUE)
   })

   dr<- paste(".TmpF34_", tn, "_", sprintf("%02d", tid), sep="")
   if(!dir.exists(dr)){
      dir.create(dr)
   }
   setwd(dr)

   ## remove NA's and exteme outliers
   iqr<- IQR(pheno[,tn],na.rm=T)
   m<- median(pheno[,tn],na.rm=T)
   ii<- !is.na(pheno[,tn])
   if(iqr > 0){
      ii<- ii & pheno[,tn] > m - iqr*2.5 & pheno[,tn] < m + iqr*2.5
   }
      ii<- (1:length(ii))[ii]
if(FALSE){
## change FALSE to TRUE to run GEMMA for variance components
   write.table(pheno[ii,tn], file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE)
   write.table(K[ii,ii], file="ksp", quote=FALSE, row.names=FALSE, col.names=FALSE)

   str<- paste("-o vc_", tn, sep="")
      str<- paste("-p pdt -k ksp -vc 2 ", str)
      str<- paste("../gemma-0.98.1 ", str)
   system(str, ignore.stdout=TRUE)

   vc<- system(paste("grep \"sigma2 estimates\" output/vc_", tn, ".log.txt", sep=""), intern=TRUE)
      vc<- strsplit(trimws(vc), "\\s+")
      vc<- as.numeric(vc[[1]][-(1:4)])
   vc<- c(mean(pheno[ii,tn]), vc)
}else{
   vc<- estVC(y=pheno[ii,tn], v=K[ii,ii])$par
}

   yf<- dsg[,ii+3]*b1*tid
      yf<- apply(yf,2,sum)
if(FALSE){
   ## if run original data
   write.table(pheno[ii,tn]+yf, file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE)
}else{
   ## if run similation
   sigma<- K[ii,ii]*vc[2] + diag(vc[3],length(ii))
   yr<- rmvnorm(1, mean=rep(vc[1], length(ii)), sigma=sigma)
      yr<- as.vector(yr)
   write.table(yr+yf, file="pdt", quote=FALSE, row.names=FALSE, col.names=FALSE)
}

   pv<- rep(NA, length(Chrs))
   for(ch in unique(Chrs)){
      idx<- Chrs == ch
      write.table(dsg[idx, c(1:3,ii+3)], file="gdt", quote=FALSE, row.names=FALSE, col.names=FALSE, sep=",")

      write.table(GRMs[[match(ch,names(GRMs))]][ii,ii], file="grm", quote=FALSE, row.names=FALSE, col.names=FALSE)

      str<- paste("-o out", sprintf("%02d",ch), sep="")
         str<- paste("-p pdt -g gdt -k grm -lmm ", test, str)
         str<- paste("../gemma-0.98.1 ", str)
      system(str, ignore.stdout=TRUE)

      p<- read.table(paste("output/out",sprintf("%02d",ch),".assoc.txt",sep=""),header=TRUE)
      pv[idx]<- p[match(SNPs[idx],sapply(p$rs,as.character)), ncol(p)]
   }
   setwd(cwd)
   pv
}

################
## simulations #
################

args<- commandArgs(TRUE)
args
if(length(args)==0){
   tid<- 1
}else for(i in 1:length(args)){
   eval(parse(text=args[[i]]))
   rm(i)
}
if(exists(".Random.seed")) rm(.Random.seed)
rsd<- runif(100000,-1,1)*2^31
   rsd<- floor(rsd)
set.seed(rsd[tid])
cat("TID", tid,":",round(rnorm(5),5), "\n", sep=" ")

library(QTLRel)
library(mvtnorm)

load("Rdata/pwr34.RData")

nsim<- 2500 # number of simulations

date()
##----------------------------------------------------
   tn<- "body_weight" # trait to analyze
   idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(8,9) # which of SNPs to consider
   b1<- 0.02 # (1:50)*b1 effects to simulate

   pv<- NULL
   for(i in 1:nsim){
      cat("...............", i, "...............")
      pv<- rbind(pv,
         gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,],
            K=K, GRMs=GRMs, test=2, clean=TRUE)
      )
   }
   colnames(pv)<- QTL$SNP[idx]

   write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE)

date()
##----------------------------------------------------
   tn<- "MA_D2_T30"
   idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(12)
   b1<- 0.02

   pv<- NULL
   for(i in 1:nsim){
      cat("..........", i, "...............")
      pv<- rbind(pv,
         gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,],
            K=K, GRMs=GRMs, test=2, clean=TRUE)
      )
   }
   colnames(pv)<- QTL$SNP[idx]

   write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE)

date()
##----------------------------------------------------
   tn<- "coat_color_agouti"
   idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(11)
   b1<- 0.0065

   pv<- NULL
   for(i in 1:nsim){
      cat("...............", i, "...............")
      pv<- rbind(pv,
         gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,],
            K=K, GRMs=GRMs, test=2, clean=TRUE)
      )
   }
   colnames(pv)<- QTL$SNP[idx]

   write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE)

date()
##----------------------------------------------------
   tn<- "coat_color_albino"
   idx<- QTL$Pop == "F34" & QTL$Pheno == tn # c(10)
   b1<- 0.0065

   pv<- NULL
   for(i in 1:nsim){
      cat("...............", i, "...............")
      pv<- rbind(pv,
         gemmaScanf(tid, tn=tn, b1=b1, pheno=pheno, SNPs=QTL$SNP[idx], Chrs=QTL$Chr[idx], dsg=dsg[idx,],
            K=K, GRMs=GRMs, test=2, clean=TRUE)
      )
   }
   colnames(pv)<- QTL$SNP[idx]

   write.csv(pv, file=paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), row.names=FALSE, quote=FALSE)

date()

q("no")

############
## summary #
############

tn<- 1
if(tn ==1){
   tn<- "body_weight" # trait to analyze
   b1<- 0.02 # (1:50)*b1 effects to simulate
}else if(tn == 2){
   stop("no MA_D1!")
}else if(tn == 3){
   tn<- "MA_D2_T30"
   b1<- 0.02
}else if(tn == 4){
   stop("no MA_D1!")
}else if(tn == 5){
   tn<- "coat_color_agouti"
   b1<- 0.0065
}else if(tn == 6){
   tn<- "coat_color_albino"
   b1<- 0.0065
}

nsim<- 2500

opt<- 1
if(opt ==1 ){
   str<- ""
}else if(opt == 2){
   str<- "_0.05"
}

pwr<- sd<- pv<- NULL
for(tid in 1:50){
   pvTmp<- read.csv(paste("results/pv34_", tn, "_", sprintf("%02d", tid), ".csv",sep=""), header=TRUE)
   pv<- rbind(pv,pvTmp)
   if(opt == 1) lpv<- -log10(pvTmp) > 4.852154 else
      lpv<- -log10(pvTmp) > -log10(0.05)
   pwr<- rbind(pwr, apply(lpv, 2, mean))
   sd<- rbind(sd, apply(lpv, 2, sd))
}
b<- (1:50)*b1
rownames(pwr)<- rownames(sd)<- as.character(b)

# save(b, pwr, sd, nsim, tn, file=paste("Rdata/pwr34_", tn, str, ".RData",sep=""))

## run the following in local
# load(file=paste("Rdata/pwr34_", tn, str, ".RData",sep=""))

pdf(paste("graphs/pwr34_", tn, str, ".pdf",sep=""))
   matplot(b, pwr, type="b", cex=0.5, xlab="QTL Effect", ylab="Simulated Power", main=tn)
   legend("topleft", legend=colnames(pwr), pch=as.character(1:ncol(pwr)), col=1:ncol(pwr), lty=1:ncol(pwr))
dev.off()

####################
# the end #
###########

sef<- function(pheno, dsg, Chrs, tn){
# pheno: animal x phenotype
# dsg: genotype dosage (SNP x Animal)
# K: kinship matrix
# Chrs: chrosomes on which some SNPs to scan
# tn: name of the trait to analyze
   ## remove NA's and exteme outliers
   iqr<- IQR(pheno[,tn],na.rm=T)
   m<- median(pheno[,tn],na.rm=T)
   ii<- !is.na(pheno[,tn])
   if(iqr > 0){
      ii<- ii & pheno[,tn] > m - iqr*2.5 & pheno[,tn] < m + iqr*2.5
   }
      ii<- (1:length(ii))[ii]

   se<- matrix(NA, nrow=length(Chrs), ncol=2)
   for(ch in unique(Chrs)){
      idx<- match(ch,names(GRMs))
      vc<- estVC(y=pheno[ii,tn], v=GRMs[[idx]][ii,ii])
      sigma<- GRMs[[idx]][ii,ii]*vc$par[2] + diag(vc$par[3],length(ii))
      idx<- Chrs == ch
      dtf<- data.frame(y=pheno[ii,tn], t(dsg[idx, ii+3]))
      g<- gls(y ~ ., vc=vc, data=dtf)
      se[idx,]<- g[-1,1:2]
   }
   colnames(se)<- c("Estimate", "Std. Error")

   se
}

library(QTLRel)

se<- NULL

load("Rdata/pwr34.RData")

tn<- "body_weight"
idx<- c(11)
Chrs<- Chrs[idx]
Pos<- Pos[idx]
SNPs<- SNPs[idx]
dsg<- dsg[idx,]
tse<- sef(pheno, dsg,Chrs, tn)
rownames(tse)<- SNPs
se<- rbind(se, tse)

load("Rdata/pwr34.RData")

tn<- "MA_D2_Total_30"
idx<- c(12)
Chrs<- Chrs[idx]
Pos<- Pos[idx]
SNPs<- SNPs[idx]
dsg<- dsg[idx,]
tse<- sef(pheno, dsg,Chrs, tn)
rownames(tse)<- SNPs
se<- rbind(se, tse)

load("Rdata/pwr34.RData")

tn<- "MA_D3_Total_30"
idx<- c(8)
Chrs<- Chrs[idx]
Pos<- Pos[idx]
SNPs<- SNPs[idx]
dsg<- dsg[idx,]
tse<- sef(pheno, dsg, Chrs, tn)
rownames(tse)<- SNPs
se<- rbind(se, tse)

load("Rdata/pwr34.RData")

tn<- "coat_color_agouti"
idx<- c(2)
Chrs<- Chrs[idx]
Pos<- Pos[idx]
SNPs<- SNPs[idx]
dsg<- dsg[idx,]
tse<- sef(pheno, dsg, Chrs, tn)
rownames(tse)<- SNPs
se<- rbind(se, tse)

load("Rdata/pwr34.RData")

tn<- "coat_color_albino"
idx<- c(6)
Chrs<- Chrs[idx]
Pos<- Pos[idx]
SNPs<- SNPs[idx]
dsg<- dsg[idx,]
tse<- sef(pheno, dsg, Chrs, tn)
rownames(tse)<- SNPs
se<- rbind(se, tse)

se # simulate F34
#                  Estimate   Std. Error
#chr14.82586326  -0.1598899   0.06663027
#chr15.67235072 -94.4524563 124.84243867
#chr7.113250866 -91.5336565 545.88800945
#chr2.155091628   0.6328329   0.02525096
#chr7.87255156   -0.5764215   0.02058022

load("Rdata/pwr34_body_weight.RData")

b<- as.numeric(rownames(pwr))
   b<- c(-b[length(b):1], 0, b)
z<- b/0.06663027
p<- rbind(as.matrix(pwr[nrow(pwr):1,]), 0, pwr)

b34<- -qnorm(1-0.5*2.87e-5)
b39<- -qnorm(1-0.5*2.63e-6)
w<- qnorm(1-0.05/4)

plot(z, p, type="b", xlab="z", ylab="Power", main="F39 MA D1")
abline(v=b34, col=2); abline(v=b34-w, col=2, lty=2); abline(v=b34+w, col=2, lty=2)
abline(v=b39, col=1); abline(v=b39-w, col=1, lty=2); abline(v=b39+w, col=1, lty=2)

###################################
#b. Power simulations job submission
###################################

#! /bin/bash

#PBS -N pwr
##PBS -l mem=239Gb
#PBS -l walltime=98:00:00
#PBS -j oe
#PBS -o pwr_${PBS_ARRAYID}.log
#PBS -l nodes=1:ppn=6
#PBS -q home
#PBS -t 1-50%8

cd $PBS_O_WORKDIR

NPROCS=`wc -l < $PBS_NODEFILE`
NNODES=`uniq $PBS_NODEFILE | wc -l`

### log info
echo -n '    Started: ' ; date
echo "Environment: $PBS_ENVIRONMENT"
echo "      Queue: $PBS_QUEUE"
echo "     Job ID: $PBS_JOBID"
echo "Master node: $PBS_O_HOST"
echo " No. CPU(s): $NPROCS"
echo "No. node(s): $NNODES node(s):"
echo "-----------------------"
uniq $PBS_NODEFILE | sort
echo "-----------------------"
### end of log info

module load R
R CMD BATCH "--args tid=$PBS_ARRAYID" pwr34.R Rout/pwr34_$(printf "%02d" $PBS_ARRAYID)-$(date +%y%m%d%H%M%S).Rout


########################################################################################
########################################################################################
#8. Heritability
########################################################################################
########################################################################################

#######################################
#Estimate SNP heritability using GEMMA
######################################3

#kinship is calculated using dosages from all chromosomes; heritability is in GEMMA output .log file

for file in *.AIL.log.txt; do
grep '## pve estimate in the null model ' ${file} | sed 's/## pve estimate in the null model = //g' >> AllPheno_heritability.txt
done

#####################################
#Estimate SNP heritability using GCTA
#####################################

~/gcta/gcta64 --reml --pheno pheno.file --grm chrALL.grm --covar covar.file --qcovar qcovar.file --mpheno ${n} --out output.${n}


########################################################################################
########################################################################################
#9. Genetic correlations
########################################################################################
########################################################################################

~/gcta/gcta64  --reml-bivar  --grm chrALL.grm --pheno pheno.file  --reml-bivar-lrt-rg 1 --out output.file