##### Scripts associated with bioinformatic analysis of Jauregui-Lozano,2021a ################## LINUX-based analysis #### Programs used # trimmomatic [v0.39] # bowtie2 [v2.3.5.1] # STAR [v1.3] # samtools [v1.8] # deeptools [v3.1.1] # macs [v2] ##### Trimming #RNA-seq trimmomatic PE file_1.fastq.gz file_2.fastq.gz file_IP.fq file_IU.fq file_IIP.fq file_IIU.fq HEADCROP:8 ILLUMINACLIP:TruSeq3-PE-2.fa:2:30:10 SLIDINGWINDOW:4:20 MINLEN:75 #ATAC-seq trimmomatic PE file_1.fq.gz file_2_2.fq.gz file.IP.fq file.IU.fq file.IIP.fq file.IIU.fq HEADCROP:15 CROP:35 #### Mapping #RNA-seq STAR --runThreadN 64 --genomeDir . --readFilesIn file_IP.fq file_IIP.fq --outSAMtype BAM SortedByCoordinate --quantMode GeneCounts --outFileNamePrefix file #ATAC-seq, ChIP-seq and CUT&Tag bowtie2 --sensitive -x dm6 -1 file.IP.fq -2 file.IIP.fq | samtools view -@ 20 -bSo file.bam ####SAM processing #RNA-seq, ATAC-seq, ChIP-seq and CUT&Tag samtools sort -o file.sorted.bam file.bam samtools index file.sorted.bam #### Bigwig generation bamCoverage -b file.sorted.bam -bs 10 --normalizeUsing CPM -o file.bw #### Bigwig processing ## TSS-centered matrix computeMatrix reference-point --referencePoint TSS --beforeRegionStartLength 1000 --afterRegionStartLength 1000 -R gene.annotation.gtf -S file.bw -o output.matrix.gz --transcriptID gene --transcript_id_designator gene_id ## gene body matrix computeMatrix scale-regions --startLabel TSS -R gene.annotation.gtf --transcriptID gene --transcript_id_designator gene_id --regionBodyLength 15000 -bs 500 --missingDataAsZero --skipZeros -S file.bw -o output.gz ## Correlation matrix multiBigwigSummary bins -b file.bw -o corr.matrix.npz ################## R-based analysis #### RNA-seq ## packages used: # R [v4.0.3] # RStudio [v1.3.1056] # DEseq2 # Differential gene expression (DGE) analysis sampletable <- read.table("sampletable.txt", header=T) rownames(sampletable) <- sampletable$SampleName head(sampletable) count_matrix <- read.delim("rawcounts.txt", header=T, sep="\t", row.names=1) se_star <- DESeqDataSetFromMatrix(countData = count_matrix, colData = sampletable, design = ~ Condition) nrow(se_star) se_star <- se_star[rowSums(counts(se_star)) > 10, ] nrow(se_star) se_star2 <- DESeq(se_star) norm_counts <- log2(counts(se_star2, normalized = TRUE)+1) norm_counts_symbols <- merge(unique(tx2gene[,2:3]), data.frame(ID=rownames(norm_counts), norm_counts), by=1, all=F) write.table(norm_counts_symbols, "AvsB_normalized_counts.txt", quote=F, col.names=T, row.names=F, sep="\t") vsd <- vst(se_star2) resultsNames(se_star2) > Condition_A_vs_B de <- results(object = se_star2, name="Condition_A_vs_B") de_shrink <- lfcShrink(dds = se_star2, coef="Condition_A_vs_B", type="apeglm") head(de) head(de_shrink) de_symbols <- merge(unique(tx2gene[,2:3]), data.frame(ID=rownames(de_shrink), de_shrink), by=1, all=F) write.table(de_symbols, "AvsB_DESeq2.results.txt", quote=F, col.names=T, row.names=F, sep="\t")