##### Scripts associated with bioinformatic analysis of Jauregui-Lozano,2021a


################## LINUX-based analysis
#### Programs used
# trimmomatic [v0.39]
# bowtie2 [v2.3.5.1]
# STAR [v1.3]
# samtools [v1.8] 
# deeptools [v3.1.1]
# macs [v2]


##### Trimming 
#RNA-seq
trimmomatic PE file_1.fastq.gz file_2.fastq.gz file_IP.fq file_IU.fq file_IIP.fq file_IIU.fq HEADCROP:8 ILLUMINACLIP:TruSeq3-PE-2.fa:2:30:10 SLIDINGWINDOW:4:20 MINLEN:75
#ATAC-seq
trimmomatic PE file_1.fq.gz file_2_2.fq.gz file.IP.fq file.IU.fq file.IIP.fq file.IIU.fq HEADCROP:15 CROP:35


#### Mapping 
#RNA-seq
STAR --runThreadN 64 --genomeDir . --readFilesIn file_IP.fq file_IIP.fq --outSAMtype BAM SortedByCoordinate --quantMode GeneCounts --outFileNamePrefix file
#ATAC-seq, ChIP-seq and CUT&Tag
bowtie2 --sensitive -x dm6 -1 file.IP.fq -2 file.IIP.fq | samtools view -@ 20 -bSo file.bam

####SAM processing
#RNA-seq, ATAC-seq, ChIP-seq and CUT&Tag
samtools sort -o file.sorted.bam file.bam
samtools index file.sorted.bam

#### Bigwig generation
bamCoverage -b file.sorted.bam -bs 10 --normalizeUsing CPM -o file.bw

#### Bigwig processing
## TSS-centered matrix
computeMatrix reference-point --referencePoint TSS --beforeRegionStartLength 1000 --afterRegionStartLength 1000 -R gene.annotation.gtf -S file.bw -o output.matrix.gz --transcriptID gene --transcript_id_designator gene_id
## gene body matrix
computeMatrix scale-regions --startLabel TSS -R gene.annotation.gtf --transcriptID gene --transcript_id_designator gene_id --regionBodyLength 15000 -bs 500 --missingDataAsZero --skipZeros -S file.bw -o output.gz 
## Correlation matrix
multiBigwigSummary bins -b file.bw -o corr.matrix.npz


################## R-based analysis
#### RNA-seq
## packages used:
# R [v4.0.3]
# RStudio [v1.3.1056]
# DEseq2 


# Differential gene expression (DGE) analysis
sampletable <- read.table("sampletable.txt", header=T)
rownames(sampletable) <- sampletable$SampleName
head(sampletable)
count_matrix <- read.delim("rawcounts.txt", header=T, sep="\t", row.names=1)
se_star <- DESeqDataSetFromMatrix(countData = count_matrix,
                                  colData = sampletable,
                                  design = ~ Condition)

nrow(se_star)
se_star <- se_star[rowSums(counts(se_star)) > 10, ]
nrow(se_star)
se_star2 <- DESeq(se_star)
norm_counts <- log2(counts(se_star2, normalized = TRUE)+1)
norm_counts_symbols <- merge(unique(tx2gene[,2:3]), data.frame(ID=rownames(norm_counts), norm_counts), by=1, all=F)
write.table(norm_counts_symbols, "AvsB_normalized_counts.txt", quote=F, col.names=T, row.names=F, sep="\t")
vsd <- vst(se_star2)
resultsNames(se_star2)
> Condition_A_vs_B
de <- results(object = se_star2, 
              name="Condition_A_vs_B")
de_shrink <- lfcShrink(dds = se_star2,
                       coef="Condition_A_vs_B",
                       type="apeglm")
head(de)
head(de_shrink)
de_symbols <- merge(unique(tx2gene[,2:3]), data.frame(ID=rownames(de_shrink), de_shrink), by=1, all=F)
write.table(de_symbols, "AvsB_DESeq2.results.txt", quote=F, col.names=T, row.names=F, sep="\t")