GBSv2 Discovery/Production Pipeline Overview 1) GBSSeqToTagDBPlugin ./tassel-5-standalone/run_pipeline.pl -Xms100G -Xmx128G -fork1 -GBSSeqToTagDBPlugin -e EcoT22I -i Tassel5GBSv2Pipeline -db Tassel5GBSv2Pipeline/GooseGBS.db -k Tassel5GBSv2Pipeline/Goose_Keyfile_Tassel5GBSv2Pipeline.txt -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 100000000 -endPlugin -runfork1 2) TagExportToFastqPlugin ./tassel-5-standalone/run_pipeline.pl -fork1 -TagExportToFastqPlugin -db Tassel5GBSv2Pipeline/GooseGBS.db -o Tassel5GBSv2Pipeline/GooseTags.fa.gz -c 1 -endPlugin -runfork1 3) Run Alignment programs #The sequencing data was aligned to reference genome with Burrows-Wheeler Aligner bwa aln -t 4 -n 0.04 Tassel5GBSv2Pipeline/referenceSequence/Chinese_Goose_genome.fna Tassel5GBSv2Pipeline/GooseTags.fa.gz > Tassel5GBSv2Pipeline/GooseTagsForAlign.sai bwa samse Tassel5GBSv2Pipeline/referenceSequence/Chinese_Goose_genome.fna Tassel5GBSv2Pipeline/GooseTagsForAlign.sai Tassel5GBSv2Pipeline/GooseTags.fa.gz > Tassel5GBSv2Pipeline/GooseTagsForAlign.sam 4) SAMToGBSdbPlugin ./tassel-5-standalone/run_pipeline.pl -fork1 -SAMToGBSdbPlugin -i Tassel5GBSv2Pipeline/GooseTagsForAlign.sam -db Tassel5GBSv2Pipeline/GooseGBS.db -endPlugin -runfork1 5) DiscoverySNPCallerPluginV2 ./tassel-5-standalone/run_pipeline.pl -fork1 -DiscoverySNPCallerPluginV2 -db Tassel5GBSv2Pipeline/GooseGBS.db -sC 1 -mnLCov 0.8 -mnMAF 0.01 -deleteOldData true -endPlugin -runfork1 6) SNPQualityProfilerPlugin ./tassel-5-standalone/run_pipeline.pl -fork1 -SNPQualityProfilerPlugin -db ./Tassel5GBSv2Pipeline/GooseGBS.db -taxa ./Tassel5GBSv2Pipeline/GBSGooseTaxaNames.txt -tname "Goose" -statFile "OutputGooseStats.txt" -deleteOldData true -endPlugin -runfork1 7) ProductionSNPCallerPlugin ./tassel-5-standalone/run_pipeline.pl -fork1 -ProductionSNPCallerPluginV2 -db Tassel5GBSv2Pipeline/GooseGBS.db -e EcoT22I -mnQS 20 -i ./Tassel5GBSv2Pipeline -k ./Tassel5GBSv2Pipeline/Goose_Keyfile_Tassel5GBSv2Pipeline.txt -kmerLength 64 -o ./Tassel5GBSv2Pipeline/GooseGBS.vcf -endPlugin -runfork1 Filtering with vcftools #remove indels vcftools --vcf GooseGBS.vcf --remove-indels --recode --recode-INFO-all --out SNPs_only #keep only biallelic SNPs vcftools --vcf SNPs_only.recode.vcf --min-alleles 2 --max-alleles 2 --recode --recode-INFO-all --out BiAllelic_SNPs_only #keep SNPs with MAF 0.05 or higher vcftools --vcf BiAllelic_SNPs_only.recode.vcf --maf 0.05 --recode --recode-INFO-all --out Filtered_Goose_SNPs