#assemble genome by SPAdes spades.py --isolate -1 1112_1shortreads.fq -2 1112_2shortreads.fq --pacbio 1112.subreads.fastq -o 1112 #assemble genome by Canu canu -d 12 -p canu_output genomeSize=14m -pacbio-raw 1112.fastq.gz #polish genome genome=1112.fa echo -e "task = best\ngenome = $genome\nsgs_fofn = sgs.fofn" > run.cfg nextPolish run.cfg #quality genome stats stats.sh in=1112.fasta gc=./1112.gc gchist=./1112.gchist shist=./1112.shist > ./1112.stats #quality genome completeneess busco -m geno -i 1112.fasta -l ./ascomycota_odb10 -o 1112 -c 16 --offline --auto-lineage-euk -f #search for ITS from genome ITSx --cpu 16 -t F -i 1112.fasta -o 1112.its #search for tRNA genes tRNAscan-SE -E -o ./ -f tNRA.ss -m tRNAstats 1112.fasta #predict the location of ribosomal RNA genes barrnap --kingdom euk --threads 8 --outseq FASTA 1112.fasta #search RNA sequence families of structural RNAs cmscan -Z 27.073154 1112.fasta --cut_ga --rfam --nohmmonly --tblout my-genome.tblout --fmt 2 --clanin Rfam12.2.claninfo Rfam.cm 1112.fasta > 1112.cmscan grep -v '=' my-genome.tblout >my-genome.deoverlapped.tblout awk 'BEGIN{OFS="\t";}{if(FNR==1) print "target_name\taccession\tquery_name\tquery_start\tquery_end\tstrand\tscore\tEvalue"; if(FNR>2 && $20!="=" && $0!~/^#/) print $2,$3,$4,$10,$11,$12,$17,$18; }' my-genome.deoverlapped.tblout >my-genome.tblout.final.xls #looking for and mask genome repeat elements RepeatMasker -pa 16 -s -lib ./consensi.fa -poly -html -gff -dir 1112/RepeatMasker_output/ ./1112.fasta #annotate genome maker maker_bopts.ctl maker_exe.ctl maker_opts.ctl #genome function annotation interproscan.sh -iprlookup -goterms -dp -i 1112.fa -f xml -b 12 blastp -query ./1112.pep.fa -db nr -evalue 1e-5 -out 12.augustus.nr.xml -outfmt "5" -num_alignments 5 -num_threads 10 blastp -query ./1112.pep.fa -db swiss_database -evalue 1e-5 -out 12.maker.xml -outfmt "5" -num_alignments 5 -num_threads 12 blastp -query ./1112.pep.fa -db trembl -evalue 1e-3 -out 20_C.albicans.xml -outfmt "5" -num_alignments 10 -num_threads 16