#--------------------------------------------------------------------------------------- # Supplementary File S2 for the manuscirpt # # "On the use of GBLUP and its extension for GWAS with additive and epistatic effects" #--------------------------------------------------------------------------------------- # R Code implementing the GWAS for epistatic effects using the Q+2K model # # (with the "P3D" approaximation) #--------------------------------------------------------------------------------------- rm(list=ls()) library(BGLR) snp.file <- "File_S5.txt" phe.file <- "File_S4.txt" out.pro <- "sample_data" #Function of MKLMM model. #Y: a vector of phenotypic data. #GNdata: a matrix of genotypic data, which coding as 0, 1, 2. #K: a n x n kinship matrix. MKLMM <- function(Y, GNdata, K){ nmar <- ncol(GNdata) ngen <- nrow(GNdata) alf <- apply(GNdata,2,mean)/2 P <- matrix(rep(2*alf, ngen), nrow=ngen, byrow=TRUE) W <- GNdata-P #Matrix operation for epistasis. X <- matrix(1,ngen,1) I <- diag(1,ngen) svdX <- svd(X) U1 <- svdX$u U2 <- qr.Q(qr(U1), complete=TRUE)[,-1] L3 <- chol(diag(1,ngen-ncol(U1))+t(U2)%*%K3%*%U2) L3tinv <- t(solve(L3)) T3 <- L3tinv%*%t(U2) y <- T3%*%Y tGNdata3 <- T3%*%W #Run EpGWAS with all marker pairs. res_test <- matrix(0, nmar, nmar) rownames(res_test) <- colnames(GNdata) colnames(res_test) <- colnames(GNdata) for (i in 1:(nmar-1)) { for (j in (i+1):nmar) { subdata <- data.frame(Y=y, mu=T3%*%rep(1,ngen), A1=tGNdata3[,i], A2=tGNdata3[,j], Ep=T3%*%(W[,i]*W[,j])) fit.model <- lm(as.formula("Y ~ -1 + mu + A1 + A2 + Ep"), data=subdata) coef_mat <- summary(fit.model)$coefficients if ("Ep"%in%rownames(coef_mat)) { res_test[i,j] <- round(coef_mat["Ep","Pr(>|t|)"],digits=3) }else{ res_test[i,j] <- 1 } } if(i %% 100 == 0){ print(paste0("The scan of the epistatic effects with the ", i, "th SNP has been completed.")) } } return(res_test) } #Import the data sets. #Phenotype. phdata <- read.table(phe.file, header=T, stringsAsFactors=F) row.names(phdata) <- as.vector(phdata[,1]) PHdata <- phdata[,2,drop=FALSE] colnames(PHdata) <- c("phe") PHdata <- as.matrix(PHdata) # dim(phdata) # head(phdata) #Genotype. GNdata <- read.table(snp.file, header=TRUE) alfreq <- apply(GNdata,2,mean)/2 GNdata <- as.matrix(GNdata) alf <- alfreq ngen <- nrow(GNdata) nmar <- ncol(GNdata) #Calculate the kinship matrices. P <- matrix(rep(2*alf, ngen), nrow=ngen, byrow=TRUE) W <- GNdata-P c <- 2 * sum(alf*(1-alf)) Z <- W / sqrt(c) G <- Z %*% t(Z) H <- 0.5 * (G*G - (Z*Z) %*% (t(Z)*t(Z))) #Making the kinship matirces positive-definite. eigG <- eigen(G) d <- eigG$values U <- eigG$vectors zeropos1 <- which(d < 0.00001) d0 <- d if (length(zeropos1) > 0) { d0[zeropos1] <- 10^(floor(log10(min(d[-zeropos1])))) } G0 <- U%*%diag(d0)%*%t(U) eigH <- eigen(H) c <- eigH$values V <- eigH$vectors zeropos2 <- which(c < 0.00001) c0 <- c if (length(zeropos2) > 0) { c0[zeropos1] <- 10^(floor(log10(min(c[-zeropos2])))) } H0 <- V%*%diag(c0)%*%t(V) #Estimating the variance components and producing a combined kinship matrix from G + H. Y <- PHdata[,1] ETA <- list(list(K1=G0, model="RKHS"), list(K2=H0, model="RKHS")) fmA <- BGLR(y=Y, ETA=ETA, nIter=10000, burnIn=1000, saveAt=paste0(out.pro, "_MKLMM_"), verbose=FALSE) v1_null <- fmA$ETA[[1]]$varU v2_null <- fmA$ETA[[2]]$varU ve_null <- fmA$varE K3 <- G0*v1_null/ve_null+H0*v2_null/ve_null #Output the variance components. var.frame <- data.frame(A=v1_null, AxA=v2_null, Residual=ve_null) write.table(var.frame, paste0(out.pro, ".MKLMM.VarComp.txt"), quote=F, sep="\t", append=F, col.names=T, row.names=F) #Running MKLMM model. EpGWAS.out <- MKLMM(Y, GNdata, K3) write.table(EpGWAS.out, paste0(out.pro, ".MKLMM.res.txt"), quote=F, sep="\t", append=F, col.names=T, row.names=T)