File S3 for Rounds et al., 2021, Genetics. Below are custom scripts, written in either R or PRISM, generated to produce, analyze, or visualize data for the above article and associated figures, tables, supplemental material, and database accessions. Scripts are titled and bracketed by lines of hyphens. Many are well-annotated with informative in-line comments. Notably, all scripts are included as-is, and may contain unnecessary code chunks. -------------------------------------------------------------------------------- R SCRIPTS R SCRIPTS R SCRIPTS R SCRIPTS R SCRIPTS -------------------------------------------------------------------------------- Script Title: Combining ANOVA Output Files from PRISM Script Type: R Script Script Purpose: To iterate through and combine information from .txt files containing PRISM results of gene-by-gene one-way ANOVA analyses of the 5,760 genes in the RIP-Seq testable set. These ANOVAs produced two .txt files per gene, or 11,520 .txt files in total; this script combines data from all of these into a single table to enable further analysis. Original File Name: “Combing ANOVA Output Files from PRISM - for '20-8-20, ALL12WFX Inps, ForXRIP Cnts Gr 10' - Genes 1-5760.R” Script: library(dplyr) #The dplyr library contains the function bind_rows, which will append two data frames vertically regardless of #whether the data frames match in column number (it will add NAs as needed). I believe this understanding is #correct, check if necessary. #library(gtools) #The gtools library contains the function mixedsort, which allows listing files in alphanumeric order #(i.e. "Drug 50mg" listed before "Drug 100mg") and without respect to case. setwd("D:/Lab PC/R Scripts/Combine PRISM ANOVA Output/20-8-20, ALL12WFX Inps, ForXRIP Cnts Gr 10") getwd() list.files() FileList_With_Extensions = list.files() FileList_Without_Extensions = gsub(pattern = ".txt$", "", FileList_With_Extensions) order(FileList_Without_Extensions) FileList = sort(FileList_Without_Extensions) FileList FileListLength = length(list.files()) ######### #Proof of principle (PoP) df_Summary_ANOVA = read.delim("_Ordinary one-way ANOVA of Ephrin.txt", header = FALSE, sep= "\t") df_Summary_ANOVA df_MultiComp_ANOVA = read.delim("_Ordinary one-way ANOVA of Ephrin-1.txt", header = FALSE, sep = "\t") df_MultiComp_ANOVA df_ANOVA_Combo = bind_rows(df_Summary_ANOVA, df_MultiComp_ANOVA) df_ANOVA_Combo PoP_gene_name = as.character(df_ANOVA_Combo[2,2]) PoP_ANOVA_F = as.numeric(as.character(df_ANOVA_Combo[6,2])) PoP_ANOVA_p = as.numeric(as.character(df_ANOVA_Combo[7,2])) PoP_total_num_values = as.numeric(as.character(df_ANOVA_Combo[31,2])) PoP_FvsW_DunnettVal = as.numeric(as.character(df_ANOVA_Combo[42,8])) PoP_FvsW_DunAdjp = as.numeric(as.character(df_ANOVA_Combo[38,6])) PoP_XvsW_DunnettVal = as.numeric(as.character(df_ANOVA_Combo[43,8])) PoP_XvsW_DunAdjp = as.numeric(as.character(df_ANOVA_Combo[39,6])) ######## #Loop testing #psuedocode: for length of list.files, if odd num, do steps in proof of principle section. if even num, skip. #Note: %% is the modulo division operator in R. That is, it returns the remainder leftover from the division operator / . #Modulo division is an efficient way to determine whehter a number is even or odd. #Generate the Tally_df, a blank data frame to which per gene ANOVA info will be appended gene-by-gene. Tally_df = data.frame(gene_name = character(), ANOVA_F = numeric(), ANOVA_p = numeric(), total_num_values = numeric(), FvsW_DunnettVal = numeric(), FvsW_DunAdjp = numeric(), XvsW_DunnettVal = numeric(), XvsWDunAdjp = numeric(), stringsAsFactors = FALSE) Tally_df str(Tally_df) Tally_df$gene_name[1] Tally_df_rownum = 0 #This loop expects the FileList is entirely ANOVA output tables from PRISM. The loop expects these files to be #a list of file pairs, with each pair containing (in descending order) #the overall and the multicomparision ANOVA output tabs from PRISM for a given gene. #########SEE BELOW #!!If FileList deviates in any way from the above format, this loop will not produce the expected output!! #########SEE ABOVE for (FilePosition in 1:FileListLength) { if (FilePosition %% 2 == 1) { #print(paste0("Loop Position = ", FilePosition, " (Odd)")) df_Summary_ANOVA = read.delim(paste0(FileList[FilePosition],".txt"), header = FALSE, sep= "\t") df_Summary_ANOVA df_MultiComp_ANOVA = read.delim(paste0(FileList[FilePosition+1],".txt"), header = FALSE, sep = "\t") df_MultiComp_ANOVA df_ANOVA_Combo = bind_rows(df_Summary_ANOVA, df_MultiComp_ANOVA) df_ANOVA_Combo gene_name = as.character(df_ANOVA_Combo[2,2]) ANOVA_F = as.numeric(as.character(df_ANOVA_Combo[6,2])) ANOVA_p = as.numeric(as.character(df_ANOVA_Combo[7,2])) total_num_values = as.numeric(as.character(df_ANOVA_Combo[31,2])) FvsW_DunnettVal = as.numeric(as.character(df_ANOVA_Combo[42,8])) FvsW_DunAdjp = as.numeric(as.character(df_ANOVA_Combo[38,6])) XvsW_DunnettVal = as.numeric(as.character(df_ANOVA_Combo[43,8])) XvsW_DunAdjp = as.numeric(as.character(df_ANOVA_Combo[39,6])) new_info_vector = c(gene_name, ANOVA_F, ANOVA_p, total_num_values, FvsW_DunnettVal, FvsW_DunAdjp, XvsW_DunnettVal, XvsW_DunAdjp) Tally_df_rownum = Tally_df_rownum + 1 Tally_df[Tally_df_rownum,] = new_info_vector #print(paste0("Current row num = " , Tally_df_rownum)) #print("Current row vector = ") #print(new_info_vector) #print("Current dataframe = ") #print(Tally_df) } else if (FilePosition %% 2 == 0) { #print(paste0("Loop Position = ", FilePosition, " (Even)")) } else{ print("Unexpected error, FilePosition apparently neither odd nor even") } } #Step up in file hierarchy to set new working directory, write Tally_df to #a tab-delimited text output file setwd("D:/Lab PC/R Scripts/Combine PRISM ANOVA Output") getwd() list.files() write.table(Tally_df, file = "ANOVA Output Stats for '20-8-20, ALL12WFX Inps, ForXRIP Cnts Gr 10' - Genes 1-5760.txt", sep = "\t", row.names = FALSE) #Goal = output a data frame with gene names and stats of interest (esp. p-values for overall ANOVA and multi. comps) #to then combine manually in Excel with Norm%Inp sheets #(plenty of ways to do this automatically, but I think I'd rather do it manually and quickly and in #clear view than read that particular Norm%Inp Excel sheet into R, combine, and output again.) #Note added JCR 20-8-6 -------------------------------------------------------------------------------- Script Title: Hypergeometric Test Script Type: R Script Script Purpose: To test for statistical significance of the overlap between lists of Nab2-associated transcripts and Atx2-associated transcripts. Original File Name: “hypergeometric_test_calc_Updated w RIP-Seq, SignifEnr ANOVA Gene Lists 20-9-14.R” Script: library(stats) #source = https://stats.stackexchange.com/questions/16247/calculating-the-probability-of-gene-list-overlap-between-an-rna-seq-and-a-chip-c?rq=1 #For Atx2 targets to overlap with Nab2 targets as defined by "SignifEnr, ANOVADun" Lists, as of 20-9-14 num_successes_sample = 28 sample_size = 103 num_successes_pop = 141 pop_size = 5760 num_fails_pop = pop_size - num_successes_pop phyper(num_successes_sample-1, num_successes_pop, num_fails_pop, sample_size, lower.tail = F) #For Nab2 targets to overlap with Atx2 targets as defined by "SignifEnr, ANOVADun" Lists, as of 20-9-14 num_successes_sample = 28 sample_size = 141 num_successes_pop = 103 pop_size = 5760 num_fails_pop = pop_size - num_successes_pop phyper(num_successes_sample-1, num_successes_pop, num_fails_pop, sample_size, lower.tail = F) #For Atx2 targets to overlap with dNab2 targets num_successes_sample = 711 sample_size = 1253 num_successes_pop = 1393 pop_size = 9311 num_fails_pop = pop_size - num_successes_pop phyper(num_successes_sample-1, num_successes_pop, num_fails_pop, sample_size, lower.tail = F) #For dNab2 targets to overlap with Atx2 targets num_successes_sample = 711 sample_size = 1411 num_successes_pop = 1244 pop_size = 9296 num_fails_pop = pop_size - num_successes_pop phyper(num_successes_sample-1, num_successes_pop, num_fails_pop, sample_size, lower.tail = F) -------------------------------------------------------------------------------- Script Title: Prep “PRISM RenameDataTables” Script Type: R Script Script Purpose: Due to memory constraints, PRISM 8 cannot in a single file hold and analyze via one-way ANOVA the 5,760 individual data tables represented by the RIP-Seq testable set. To overcome this limitation, the testable set was analyzed in batches, separating the single set into twelve files of 480 genes each, listed alphabetically. In each of twelve PRISM files, an original single data table of 480 genes was separated into 480 individual data tables for ANOVA analysis (see PRISM script “Loop to Split RIP-Seq Genes into Individual Data Tables” below). Once generated, each data table needed to be renamed with the gene symbol it contained, such that later ANOVA output would be associated with the appropriate gene symbol (instead of, for example, “Data Table 125”). This renaming was accomplished with a PRISM script, also included in this file, that iterated from table to table, renaming appropriately. However, PRISM scripts are limited and cannot read and use variable information within a data table for this renaming purpose. Instead, each of 12 PRISM files needed a unique script for table renaming that lists all gene names explicitly; the text of each these scripts was generated by one of twelve nearly identical R scripts. The script below is representative of those twelve R scripts. To reproduce the remaining eleven, replace references in this script to genes 1-480 (e.g. loop parameters, file names) to another gene set of interest (e.g. genes 481-960 or genes 961-1440). Original File Name: “Prep PRISM - RenameDataTables - for 'ALL12 Genes w Detected Expression in all 12 Inps, ForXRIP Cnts Gr 10 - 20-8-17' - Genes 1-480.R” Script: setwd("D:/Lab PC/R Scripts/Prepare PRISM Script for many One-way ANOVAs") getwd() list.files() PRISMGenes = read.delim("Norm_counts_RIP_CY+PUT_CY -(G267, p + G231, p) - Sheets and FBgns for ALL12 Genes w Detected Expression in all 12 Inps, ForXRIP Cnts Gr 10 - 20-8-17.txt", header = TRUE, sep="\t") str(PRISMGenes) PRISMGenes$X PRISMGenes$Gene.Name.As.Char = as.character(PRISMGenes$X) head(PRISMGenes) AlphabeticalPRISMGenes = PRISMGenes[order(PRISMGenes$Gene.Name.As.Char),] str(AlphabeticalPRISMGenes) head(AlphabeticalPRISMGenes) tail(AlphabeticalPRISMGenes) numrows = length(AlphabeticalPRISMGenes$Gene.Name.As.Char) PRISMScriptVector = as.character() DataTableNum = 3 for (CurrentRow in 1:480) { DataTableNum = DataTableNum + 1 NewLines = paste0("GoTo D ", DataTableNum, "\n", "SetSheetTitle ",AlphabeticalPRISMGenes$Gene.Name.As.Char[CurrentRow], "\n") PRISMScriptVector = paste0(PRISMScriptVector, NewLines) } print(PRISMScriptVector) write(PRISMScriptVector, file="RenameDataTables PRISM Script for 'OneWay ANOVAs for 'ALL12 Genes w Detected Expression in all 12 Inps, ForXRIP Cnts Gr 10 - 20-8-17' - Genes 1-480.txt") -------------------------------------------------------------------------------- Script Title: RIP-Seq Fold Enrichment Scatter Plot Script Type: R Script Script Purpose: To generate scatter plots visualizing the degree and statistical significance of transcript enrichment (and thus RBP-association) by Nab2 and Atx2 RIP-Seqs. All points above a statistical significance threshold are color-coded, and points corresponding to specific transcripts of interest are labeled. The x-axis displays log2(fold enrichment) (i.e. normalized IP/Input values) and the y-axis displays -log10(Dunnett Adjusted p) (i.e. the p-value resulting from our statistical significance testing pipeline). Notably, only transcripts with a positive fold enrichment value are displayed, as the x-axis minimum is set to 0. Thus, not all 5,760 transcripts included in the testable set are displayed on the plots generated by this script; this can be changed by changing the x-axis minimum. The script below specifically generates the scatter plot included in the article as Figure 4D and displays Nab2 RIP-Seq results with five points labeled. Reproduce the scripts to generate other scatter plots in the article by making minor changes as appropriate (e.g. to axis text, point color, and point labels). Original File Name: “Volcano Plot - W vs F, Norm PercInp from 'Norm_counts_RIP+PUT...267...231...ALL12...ForXRIP Cnts Gr 10...20824 OneWayANOVAs...' - 20-9-29-Fig5Tscrpts.R” Script: setwd("D:/Lab PC/R Scripts/RIP-Seq Enrichment, Volcano Plots") getwd() list.files() library(ggplot2) library(ggrepel) #"PGenes" = RIP-Seq, IP and InPut Genes, DESeq2 Output w One-Way ANOVAs for each gene PGenes = read.delim("Norm_counts_RIP_CY+PUT_CY -(G267, p + G231, p) - Sheets and FBgns for ALL12 Genes w Detected Expression in all 12 Inps, ForXRIP Cnts Gr 10 - 20-8-17 - 20824 OneWay ANOVAs Added_Edited,ParedDown.txt", header = TRUE, sep="\t") head(PGenes) tail(PGenes) str(PGenes) #Create new variables by converting variables imported as factors in data frame into #either characters or numerics as needed. PGenes$gene_nameAsChar = as.character(PGenes$Gene.Name) PGenes$PercentInpW_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.W.AVG)) PGenes$PercentInpF_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.F.AVG)) PGenes$PercentInpX_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.X.AVG)) PGenes$Log2PercentInpW_AsNumeric = log(PGenes$PercentInpW_AsNumeric, 2) PGenes$Log2PercentInpF_AsNumeric = log(PGenes$PercentInpF_AsNumeric, 2) PGenes$Log2PercentInpX_AsNumeric = log(PGenes$PercentInpX_AsNumeric, 2) PGenes$NormPercentInpW_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.W.Norm.AVG)) PGenes$NormPercentInpF_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.F.Norm.AVG)) PGenes$NormPercentInpX_AsNumeric = as.numeric(as.character(PGenes$Percent.Input.X.Norm.AVG)) str(PGenes) PGenes[PGenes$gene_nameAsChar =="me31B",] #Calculate the number of genes: #1) with Percent Input F/W >1.5, FRIP Counts > 10 ("ALLF") #2) with Percent Input X/W >1.5, XRIP Counts > 10 ("ALLX") #Convert these sums to characters and assign their values to variables. NumEnrInALLF=as.character(sum( PGenes$NormPercentInpF_AsNumeric > 1 & PGenes$FvsW.DunAdjp < 0.05, na.rm=TRUE)) NumEnrInALLX=as.character(sum( PGenes$NormPercentInpX_AsNumeric > 1 & PGenes$XvsW.DunAdjp < 0.05, na.rm=TRUE)) #Create, set, and count "Enrichment Type" for: #1) BOTH Percent Input F/W and X/W >1, FandX_vsW DunAdjp < 0.05 #2) ONLY Percent Input F/W > 1, FvsW.DunAdjp < 0.05 ; X below one or both criteria or FoldEnr=NA #3) ONLY Percent Input X/W > 1 , XvsW.DunAdjp < 0.05 ; F below one or both criteria or FoldEnr=NA #4) NEITHER, w Percent Input F/W and X/W < 1, and/or FandX_vsW DunAdjp < 0.05 #After these steps: #A)these labels added to appropriate data frame rows and #B)number of each label (as a character) assigned to an appropriate variable. PGenes$EnrType = as.character("NA") #BOTH - Set and count Enrichment Type - BOTH PGenes$EnrType[ PGenes$NormPercentInpF_AsNumeric > 1 & PGenes$FvsW.DunAdjp < 0.05 & PGenes$NormPercentInpX_AsNumeric > 1 & PGenes$XvsW.DunAdjp < 0.05] = "BOTH" NumEnrInBOTHFandX = as.character(nrow(PGenes[PGenes$EnrType=="BOTH",])) #ONLYF - Set and count Enrichment Type - ONLYF PGenes$EnrType[ ((PGenes$NormPercentInpF_AsNumeric > 1 & PGenes$FvsW.DunAdjp < 0.05) & (PGenes$NormPercentInpX_AsNumeric < 1 | is.na(PGenes$XvsW.DunAdjp) | PGenes$XvsW.DunAdjp >= 0.05))] = "ONLYF" NumEnrInONLYF = as.character(nrow(PGenes[PGenes$EnrType=="ONLYF",])) #ONLYX - Set and count Enrichment Type - ONLYX PGenes$EnrType[ ((PGenes$NormPercentInpX_AsNumeric > 1 & PGenes$XvsW.DunAdjp < 0.05) & (PGenes$NormPercentInpF_AsNumeric < 1 | is.na(PGenes$FvsW.DunAdjp) | PGenes$FvsW.DunAdjp >= 0.05))] = "ONLYX" NumEnrInONLYX = as.character(nrow(PGenes[PGenes$EnrType=="ONLYX",])) #NEITHER - Set and count Enrichment Type - NEITHER PGenes$EnrType[ ((PGenes$NormPercentInpF_AsNumeric < 1 | is.na(PGenes$FvsW.DunAdjp) | PGenes$FvsW.DunAdjp >= 0.05) & (PGenes$NormPercentInpX_AsNumeric < 1 |is.na(PGenes$XvsW.DunAdjp) | PGenes$XvsW.DunAdjp >= 0.05))] = "NEITHER" NumEnrInNEITHER = as.character(nrow(PGenes[PGenes$EnrType=="NEITHER",])) #Test EnrType contents PGenes[PGenes$EnrType=="ONLYF",1] str(PGenes$EnrType) write.table(PGenes[PGenes$EnrType=="BOTH",], file = "PGenes EnrinBOTH 20-8-24", sep = "\t") write.table(PGenes[PGenes$EnrType=="ONLYF",], file = "PGenes EnrinONLYF 20-8-24", sep = "\t") #Refer to previous Volcano Plot scripts for examples of choosing genes by name to be labelled on final plot. #Generate Point Color (PtColor) variable. Set color for each gene based on EnrType value (can add others, e.g. gene name) ColorDEFAULT = "gray" ColorBOTH = "lightseagreen" ColorONLYF = "lightseagreen" ColorONLYX = "gray" #ColorBOTH = "midnightblue" #ColorONLYX = "maroon" #ColorBOTH = "tan4" #ColorONLYF = "forestgreen" #ColorONLYX = "darkorange1" #other color options: "lightsea green, # violetred series, slateblue series, darkslateblue, darkslategrey/gray, # darkorchid series, navy, midnight blue PGenes$PtColor = ColorDEFAULT PGenes$GraphOrder = 1 PGenes$PtColor[PGenes$EnrType=="BOTH" ] = ColorBOTH PGenes$GraphOrder[PGenes$EnrType=="BOTH" ] = 4 PGenes$PtColor[PGenes$EnrType=="ONLYF" ] = ColorONLYF PGenes$GraphOrder[PGenes$EnrType=="ONLYF" ] = 3 PGenes$PtColor[PGenes$EnrType=="ONLYX" ] = ColorONLYX PGenes$GraphOrder[PGenes$EnrType=="ONLYX" ] = 2 str(PGenes) PGenes[PGenes$PtColor==ColorBOTH,] nrow(PGenes[PGenes$PtColor==ColorBOTH,]) #Give unique color to a single point #SGenesClean[SGenes$gene_nameAsChar =="Nab2",] #SGenes$PtColor[SGenes$gene_nameAsChar=="Nab2"] = "green4" #Generate PlotLabel, to be called while graphing in order to label only points of interest #Setting PlotLabel to an empty character vector rather than NA will confer an unseen label to every point by #default. This way, geom_text_repel will pad around every point when requested, rather than only points with #visible labels. See ?help for geom_text_repel for more info. PGenes$PlotLabel = "" PGenes$PlotLabel[PGenes$gene_nameAsChar =="Arpc2"] = "Arpc2" PGenes$PlotLabel[PGenes$gene_nameAsChar =="side-II"] = "side-II" PGenes$PlotLabel[PGenes$gene_nameAsChar =="Cpsf160"] = "Cpsf160" PGenes$PlotLabel[PGenes$gene_nameAsChar =="drk"] = "drk" PGenes$PlotLabel[PGenes$gene_nameAsChar =="me31B"] = "me31B" #SGenes$PlotLabel[SGenes$PtColor == "forestgreen"] = SGenes$gene_nameAsChar[SGenes$PtColor=="forestgreen"] #SGenes$PlotLabel[SGenes$PtColor == "green4"] = SGenes$gene_nameAsChar[SGenes$PtColor=="green4"] #Create "clean" data frames, which include only genes which were assigned a normalized Percent Input W (versus a #DIV/0! or NA). #In many cases, these genes were not detected at least one W.INP, meaning a DIV/0 was assigned to the corresponding %Input calculation. #These genes are excluded from the analysis, such that only genes with detectable expression in all 4 controls inputs are considered. #This should increase confidence in results, eliminate genes which are just expressed in F or X and not W, and shift focus to more reliably expressed, less noisy genes. #Importantly, if this step is skipped, #any points with an NA for x or y will not be graphed by ggplot2. #Thus, this step isn't necessary in this version of the script (5-1-20), but these genes should not be considered #enriched or non-enriched, at least without new parameters/manual inspection, #so removing them is safer for future graph functions/calculations that may not automatically remove them. PGenesClean=subset(PGenes, !is.na(NormPercentInpW_AsNumeric),select = Gene.Name:PlotLabel) str(PGenesClean) head(PGenesClean, 20) tail(PGenesClean) str(PGenesClean$PlotLabel) length(PGenesClean$PlotLabel) #Create new data frames, ordering the GenesClean data frames by GraphOrder followed by Gene.Name #My understanding is that ggplot2 draws points on a graph in descending order through a data frame. #Therefore, in order to bring colored points and associated labels to the front, they should be drawn last #Using a data frame ordered in this way accomplishes this drawing order. OrderedPGenesClean = PGenesClean[order(PGenesClean$GraphOrder,PGenesClean$Gene.Name),] str(OrderedPGenesClean) head(OrderedPGenesClean) tail(OrderedPGenesClean) OrderedPGenesClean[OrderedPGenesClean$Gene.Name =="me31B",] #OrdPGenesClnFCntsGr10 = subset(OrderedPGenesClean, #OrderedPGenesClean$F.CY.RIP.1.4..Avg.Count. > 10, #select = Gene.Name:PlotLabel) #Generate character variables describing the size of the final data frame and the actual number of points to be plotted #(i.e. the whole data frame is defined by the number of genes with a Norm.PercentInpW value able to be calculated and with FandXRIP Cnts Gr 10, #while the number of points to be graphed are also required to have values for both the x- and y-axis (a handful in the final df won't necessarily meet this, almost all will)) #NUMBER OF ROWS REMOVED BY GGPLOT2 FOR GEOM_POINT SHOULD EQUAL DIFFERENCE BETWEEN NumGenesInFinalDataFrame AND NumGenesMetGraphingCriteria #NumGenesInFinalDataFrame = prettyNum(nrow(OrdPGenesClnFCntsGr10),big.mark = ",") #NumGenesMetGraphingCriteria = prettyNum( #(nrow(OrdPGenesClnFCntsGr10[ #!is.na(OrdPGenesClnFCntsGr10$PercentInpW_AsNumeric) & #!is.na(OrdPGenesClnFCntsGr10$PercentInpF_AsNumeric),])), big.mark = ",") NumGenesInFinalDataFrame = prettyNum(nrow(OrderedPGenesClean),big.mark = ",") NumGenesMetGraphingCriteria = prettyNum( (nrow(OrderedPGenesClean[ !is.na(OrderedPGenesClean$PercentInpW_AsNumeric) & !is.na(OrderedPGenesClean$PercentInpF_AsNumeric),])), big.mark = ",") ###CLEAN THIS UP!! 3 DIFFERENT WAYS OF TESTING LINEAR REGRESSION MODELING ARE USED BELOW, CLEAN UP TO JUST ONE #TESTING WAS REQUIRED B/C OF TEMP. DISAGREEMENT BETWEEN geom_smooth's lm line and the line predicted by the #lm function outside of ggplot2. The source of the discrepancy looks simple - I wasn't calculating the right line from the output of the outside lm function #NAs are removed from both internal and external lm function, so this didn't need to be done to bring into agreement. #Consider keeping step (w/ shorter variable names) anyway, as it makes adding predicted vals from linear reg to plotted data frame possible if #both same length (i.e. have had the 5 NA's removed) #Additionally, this means the R-squared value calc'ed outside ggplot2 can be accurately reported on graph associated with the lm drawn by geom_smooth, if desired. #This may not be necessary though, as perfing lm outside ggplot and then adding appropriate vals to df/line to plot may prove best, as gives easy access to other lm output (e.g. R-squared values) #JCR 20-5-1 #Linear Regression Calcs cor(OrderedPGenesClean$PercentInpW_AsNumeric, OrderedPGenesClean$PercentInpF_AsNumeric, use = "pairwise.complete.obs") #lm formula format = y~x LinearRegModel = lm(PercentInpF_AsNumeric ~ PercentInpW_AsNumeric, OrderedPGenesClean) print(LinearRegModel) summary(LinearRegModel) summary(LinearRegModel)$r.squared R_Squared_Char = as.character(round(summary(LinearRegModel)$r.squared, digits=2)) predict(LinearRegModel) OrderedPGenesClean$LinearRegPredictedVals[ !is.na(OrderedPGenesClean$PercentInpW_AsNumeric) & !is.na(OrderedPGenesClean$PercentInpF_AsNumeric)] = predict(LinearRegModel) str(OrderedPGenesClean) OrderedPGenesClean$Log2NormPercentInpF = log(OrderedPGenesClean$NormPercentInpF_AsNumeric, 2) OrderedPGenesClean$Log2NormPercentInpX = log(OrderedPGenesClean$NormPercentInpX_AsNumeric, 2) OrderedPGenesClean$NegLog10.FvsW.DunAdjp = -log(OrderedPGenesClean$FvsW.DunAdjp, 10) OrderedPGenesClean$NegLog10.XvsW.DunAdjp = -log(OrderedPGenesClean$XvsW.DunAdjp, 10) str(OrderedPGenesClean) #Plotting Variables xmaxVar = 3 ymaxVar = 4 #ENSURE CITATION of the authors of ggplot2 AND ggrepel. ggrepel is not included in the base ggplot2, and is #responsible for data labels not overlapping & being called out with lines if placed far away from associated point. library(ggplot2) ggplot(OrderedPGenesClean, aes(x=Log2NormPercentInpF, y=NegLog10.FvsW.DunAdjp)) + geom_point(col=OrderedPGenesClean$PtColor, size=4, shape=19)+ #geom_point(col = "gray", size=4, shape=19)+ geom_text_repel(aes(label=OrderedPGenesClean$PlotLabel), fontface = "bold", size = 5, box.padding = 0.75, point.padding = 0.3, min.segment.length = 0, nudge_x = 0.1, nudge_y = 0.1, seed = 0) + #geom_smooth(method = "lm", formula = y~x, se = FALSE, col="black", linetype = "longdash", size = 1.5)+ labs(x=bquote(bold("Log2(Nab2 Fold Enrichment (Norm. IP/Input))")), y=bquote(bold("-log10(DunAdjp)")), title="RIP-Seq IP/Input", subtitle = "Heads, Pan-neuronal Control vs. Nab2-FLAG vs. Atx2-3xFLAG") + #geom_segment(x=0,y=0,xend=xmaxVar*2,yend=ymaxVar*2, col="black",linetype = "longdash", size =1.5)+ #geom_hline(size = 0.5, yintercept = 1.30102999566398 ) + #geom_segment(size = 0.5, x=1.5, xend = 1.5, y=0.25, yend=1.5 )+ #geom_segment(size = 0.5, x=1.5, xend = 4, y=1.5, yend=1.5 )+ #geom_segment(size = 0.5, x=0.25, xend = 1.5, y=1.5, yend=1.5 ) + #geom_segment(size = 0.5, x=1.5, xend = 1.5, y=1.5, yend=4 ) + #geom_segment(size=1.5, x=-3, xend=30, y=-3, yend=30)+ #geom_segment(size=1.5, x=0, y =0.3966, xend=5, yend=3.4941, col="green")+ #geom_line(aes(y=OrdPGenesClnFXCntsGr10$LinearRegPredictedVals), col="blue", size = 1.5)+ #geom_point(col="gray", size=4, shape=19)+ theme(text = element_text(family = "sans"), panel.background = element_rect(fill = "white" ), axis.line = element_line(size=2), axis.ticks = element_line(size=2), axis.ticks.length = unit(15,"points"), axis.text = element_text(face = "bold", size=40, color = "black"), axis.text.x = element_text(margin = margin(10,0,0,0)), axis.text.y = element_text(margin = margin(0,5,0,0)), axis.title = element_text(face = "bold", size=40), plot.title = element_text(size=16)) + #annotate("text", x=0.65*xmaxVar, y=0.1*ymaxVar, fontface="bold", size=7, col=ColorONLYF, label = paste("Enriched in F ONLY", NumEnrInONLYF, sep="\n")) + #annotate("text", x=0.15*xmaxVar, y=0.65*ymaxVar,fontface="bold", size=7, col=ColorONLYX, label = paste("Enriched in X ONLY", NumEnrInONLYX, sep="\n")) + #annotate("text", x=0.65*xmaxVar, y=0.65*ymaxVar, fontface="bold", size=7, col=ColorBOTH, label = paste("Enriched in BOTH", NumEnrInBOTHFandX, sep="\n")) + #annotate("text", x=0.94*xmaxVar, y=0.6*ymaxVar, fontface="bold", size=7, col="black", label = bquote(bold(atop("R"^2*"=",.(R_Squared_Char)))))+ annotate("text", x=0.8*xmaxVar, y=0.9*ymaxVar, fontface="bold", size=7, col=ColorONLYF, label = paste("Enriched in F", NumEnrInALLF, sep="\n")) + annotate("text", x=0.8*xmaxVar, y=0.8*ymaxVar, fontface="bold", size=7, col=ColorDEFAULT, label = paste("Total Genes Tested for Enrichment", NumGenesMetGraphingCriteria, sep="\n")) + #annotate("text", x=0.90*xmaxVar, y=0.9*ymaxVar, fontface="bold", size=7, col="black",label = paste("Nab2-FLAG=","Control",sep="\n"), angle = 35)+ #scale_x_continuous(breaks=c((1-xmaxVar),(1-(0.5*xmaxVar)),1,(1+(0.5*xmaxVar)),(1+xmaxVar)), expand = c(0,0))+ #scale_y_continuous(breaks=c(0,0.25*ymaxVar,0.5*ymaxVar,0.75*ymaxVar,ymaxVar), expand = c(0,0))+ #coord_cartesian(xlim = c((-1.05*xmaxVar),(1.05*xmaxVar)), ylim = c(0,ymaxVar+(ymaxVar*0.05))) #annotate("text", x=8, y=120, fontface="bold", size=4.5, label = paste("DEGs Up in Females:", NumUpDEGs, sep="\n")) + #annotate("text", x=-8, y=120,fontface="bold", size=4.5, label = paste("DEGs Down in Females:", NumDownDEGs, sep="\n"))+ scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ scale_y_continuous(breaks=c(0,0.25*ymaxVar,0.5*ymaxVar,0.75*ymaxVar,ymaxVar), expand = c(0,0))+ coord_cartesian(xlim = c(0,1.05*xmaxVar), ylim = c(0,ymaxVar+(ymaxVar*0.05)), expand = FALSE) #For values used in tiffs labeled "Longer Axes" (on 6-5-18), use xlim = c(-7.5,7.5), ylim = c(0,300) #For values used in tiffs without referecne to axis size, use xlim = c(-10,10), ylim = c(0,120) (Note added 9-4-19 based on values in "ALL DEGs Counted" version of this code) #For values used in tiffs labeled "Small Axes", use xlim = c(-3,3), ylim = c(-2,20) (Note added 9-4-19 based on values in 5-16-19's version of this code) #Methods of printing specific row(s) of a data frame. The variable defined here is a logical vector (of TRUEs and FALSEs) #Only the rows corresponding to the poisition of TRUE in this array are printed in the second line. #The third line is a more concise way of accomplishing a specific row call Atx2Print = SGenes$test_idAsChar=="Atx2" SGenes[Atx2Print,] SGenes[SGenes$test_idAsChar=="Atx2",] SGenes$PlotLabel SGenes$gene_nameAsChar SGenes$GoIAsChar=="Of interest" ?options #Testing the is.na function. is.na returns (a matrix of?) #TRUE for NAs (the value; not "NA" as a string), FALSE for any other value. #Therefore, !is.na returns (a matrix of?) #FALSE for NAs, (the value, not "NA" as a string) and TRUE for any other value. AdjPValIsNA = is.na(SGenes$Adj..p.value..FDR.controlled.signif.) AdjPValIsNA tail(AdjPValIsNA, 200) AdjPValIsNotNA = !is.na(SGenes$Adj..p.value..FDR.controlled.signif.) AdjPValIsNotNA tail(AdjPValIsNotNA, 200) #Method for writing data frames to a tab-delimited text file. Especially useful for debugging write.table(SGenesClean, file = "SGenes Clean Output DEBUG 6-4-18", sep = "\t") x= count.fields("FE_P4 v FE_X3 - gene_exp_diff - 2-23-18.txt", sep="\t") str(x) -------------------------------------------------------------------------------- PRISM SCRIPTS PRISM SCRIPTS PRISM SCRIPTS PRISM SCRIPTS PRISM SCRIPTS -------------------------------------------------------------------------------- Script Title: Loop to Split RIP-Seq Genes into Individual Data Tables Script Type: PRISM Script Script Purpose: Following RIP-Seq, normalized IP/Input (i.e. Fold Enrichment) values were calculated for all 5,760 genes in the testable set and stored in a single table. However, PRISM 8 cannot perform one-way ANOVAs line-by-line in a single data table. Instead, an individual data table was required for each gene; each table would include a gene symbol and its associated normalized IP/Input values. This script generates these individual data tables one-by-one, at each iteration copying a new row from a single source data table containing values for 480 genes in the testable set. The testable set was subsetted in this way because, due to memory constraints, PRISM 8 cannot in a single file hold and analyze via one-way ANOVA the 5,760 individual data tables represented by the RIP-Seq testable set. To overcome this memory limitation, the testable set was analyzed in batches, separating the single testable set into twelve files of 480 genes each, listed alphabetically. Original File Name: “Loop - Duplicate Template Table, Copy One Row Into It.pzc” Script: ForEach 480 GoTo D 2 Copy 1 10 %N 0 GoTo D 3 DuplicateDataTable Paste 1 0 Next -------------------------------------------------------------------------------- Script Title: Change All One-Way ANOVA Analysis Tabs, Summary Tab Script Type: PRISM Script Script Purpose: To set all PRISM ANOVA results tables to the Summary tab. PRISM 8 outputs results of one-way ANOVAs with multiple comparisons as two separate tabs, the Summary tab and the Multiple Comparisons tab. For further analysis of our ANOVA results, output data from both tabs for all genes needed to be collected, searched through, and combined (see “Combining ANOVA Output Files from PRISM” R script above). PRISM readily exports analysis results to .txt files in bulk, enabling this process of collection and combination. However, PRISM only exports in bulk tabs currently set as primary/viewed. Thus, we required short scripts to change the primary/viewed analysis tab for all 480 ANOVA results tables in each PRISM file. The script to change each results table to Summary tab view is shown below. Original File Name: “Change All One-Way ANOVA Analysis Tabs to ANOVA Summary Tab.pzc” Script: GoTo R, 1 ForEach 480 GoTo V1 GoTo + Next -------------------------------------------------------------------------------- Script Title: Change All One-Way ANOVA Analysis Tabs, Multiple Comparisons Tab Script Type: PRISM Script Script Purpose: To set all PRISM ANOVA results tables to the Multiple Comparisons tab. PRISM 8 outputs results of one-way ANOVAs with multiple comparisons as two separate tabs, the Summary tab and the Multiple Comparisons tab. For further analysis of our ANOVA results, output data from both tabs for all genes needed to be collected, searched through, and combined (see “Combining ANOVA Output Files from PRISM” R script above). PRISM readily exports analysis results to .txt files in bulk, enabling this process of collection and combination. However, PRISM only exports in bulk tabs currently set as primary/viewed. Thus, we required short scripts to change the primary/viewed analysis tab for all 480 ANOVA results tables in each PRISM file. The script to change each results table to Multiple Comparisons tab view is shown below. Original File Name: “Change All One-Way ANOVA Analysis Tabs to MultiComp Tab.pzc” Script: GoTo R, 1 ForEach 480 GoTo V2 GoTo + Next -------------------------------------------------------------------------------- Script Title: PRISM RenameDataTables Script Type: PRISM Script Script Purpose: Due to memory constraints, PRISM 8 cannot in a single file hold and analyze via one-way ANOVA the 5,760 individual data tables represented by the RIP-Seq testable set. To overcome this limitation, the testable set was analyzed in batches, separating the single set into twelve files of 480 genes each, listed alphabetically. In each of twelve PRISM files, an original single data table of 480 genes was separated into 480 individual data tables for ANOVA analysis (see PRISM script “Loop to Split RIP-Seq Genes into Individual Data Tables” above). Once generated, each data table needed to be renamed with the gene symbol it contained, such that later ANOVA output would be associated with the appropriate gene symbol (instead of, for example, “Data Table 125”). This renaming was accomplished with this PRISM script, which iterates from table to table, renaming appropriately. Notably, PRISM scripts are limited and cannot read and use variable information within a data table for this renaming purpose. Hence, each of 12 PRISM files needed a unique script for table renaming that lists all gene names explicitly. The script below is one of those twelve PRISM scripts and is representative of the rest. To reproduce the remaining eleven, replace gene symbols for testable set genes 1-480 in this script with symbols for genes in another batch of the testable set (e.g. genes 481-960 or genes 961-1440). Importantly, the text of each these twelve PRISM scripts was generated by one of twelve nearly identical R scripts (see “Prep “PRISM RenameDataTables”” above). Original File Name: “Genes 1-480 - RenameDataTables PRISM Script for 'OneWay ANOVAs for ALL12, ForXRIP Cnts Gr 10 - 20-8-17'.pzc” Script: GoTo D 4 SetSheetTitle 128up GoTo D 5 SetSheetTitle 14-3-3epsilon GoTo D 6 SetSheetTitle 14-3-3zeta GoTo D 7 SetSheetTitle 18SrRNA-Psi:CR41602 GoTo D 8 SetSheetTitle 18w GoTo D 9 SetSheetTitle 26-29-p GoTo D 10 SetSheetTitle 28SrRNA-Psi:CR40596 GoTo D 11 SetSheetTitle 28SrRNA-Psi:CR40741 GoTo D 12 SetSheetTitle 28SrRNA-Psi:CR41609 GoTo D 13 SetSheetTitle 28SrRNA-Psi:CR45848 GoTo D 14 SetSheetTitle 28SrRNA-Psi:CR45851 GoTo D 15 SetSheetTitle 28SrRNA-Psi:CR45855 GoTo D 16 SetSheetTitle 28SrRNA-Psi:CR45859 GoTo D 17 SetSheetTitle 28SrRNA-Psi:CR45860 GoTo D 18 SetSheetTitle 2mit GoTo D 19 SetSheetTitle 43709 GoTo D 20 SetSheetTitle 43710 GoTo D 21 SetSheetTitle 43712 GoTo D 22 SetSheetTitle 4E-T GoTo D 23 SetSheetTitle 5-HT1A GoTo D 24 SetSheetTitle 5-HT1B GoTo D 25 SetSheetTitle 5-HT2A GoTo D 26 SetSheetTitle 5-HT2B GoTo D 27 SetSheetTitle 5-HT7 GoTo D 28 SetSheetTitle 5PtaseI GoTo D 29 SetSheetTitle 7B2 GoTo D 30 SetSheetTitle 7SLRNA:CR32864 GoTo D 31 SetSheetTitle 7SLRNA:CR42652 GoTo D 32 SetSheetTitle a GoTo D 33 SetSheetTitle A16 GoTo D 34 SetSheetTitle a6 GoTo D 35 SetSheetTitle Aac11 GoTo D 36 SetSheetTitle AANAT1 GoTo D 37 SetSheetTitle AANATL2 GoTo D 38 SetSheetTitle Aatf GoTo D 39 SetSheetTitle aay GoTo D 40 SetSheetTitle ab GoTo D 41 SetSheetTitle ABCA GoTo D 42 SetSheetTitle ABCB7 GoTo D 43 SetSheetTitle Abi GoTo D 44 SetSheetTitle Abl GoTo D 45 SetSheetTitle Abp1 GoTo D 46 SetSheetTitle Ac13E GoTo D 47 SetSheetTitle Ac3 GoTo D 48 SetSheetTitle Ac76E GoTo D 49 SetSheetTitle Ac78C GoTo D 50 SetSheetTitle ACC GoTo D 51 SetSheetTitle AcCoAS GoTo D 52 SetSheetTitle Ace GoTo D 53 SetSheetTitle Acer GoTo D 54 SetSheetTitle Acf GoTo D 55 SetSheetTitle Achl GoTo D 56 SetSheetTitle Ack GoTo D 57 SetSheetTitle Ack-like GoTo D 58 SetSheetTitle Acn GoTo D 59 SetSheetTitle Acon GoTo D 60 SetSheetTitle Acox57D-d GoTo D 61 SetSheetTitle Acox57D-p GoTo D 62 SetSheetTitle Acp1 GoTo D 63 SetSheetTitle Acph-1 GoTo D 64 SetSheetTitle Acsl GoTo D 65 SetSheetTitle Act42A GoTo D 66 SetSheetTitle Act57B GoTo D 67 SetSheetTitle Act5C GoTo D 68 SetSheetTitle Act79B GoTo D 69 SetSheetTitle Act87E GoTo D 70 SetSheetTitle Actbeta GoTo D 71 SetSheetTitle Actn GoTo D 72 SetSheetTitle Ada2b GoTo D 73 SetSheetTitle Ada3 GoTo D 74 SetSheetTitle AdamTS-A GoTo D 75 SetSheetTitle Adar GoTo D 76 SetSheetTitle ADD1 GoTo D 77 SetSheetTitle AdenoK GoTo D 78 SetSheetTitle Adf1 GoTo D 79 SetSheetTitle Adgf-A GoTo D 80 SetSheetTitle Adgf-D GoTo D 81 SetSheetTitle Adi1 GoTo D 82 SetSheetTitle AdipoR GoTo D 83 SetSheetTitle Adk1 GoTo D 84 SetSheetTitle Adk2 GoTo D 85 SetSheetTitle adp GoTo D 86 SetSheetTitle AdSL GoTo D 87 SetSheetTitle AdSS GoTo D 88 SetSheetTitle Aduk GoTo D 89 SetSheetTitle Ady43A GoTo D 90 SetSheetTitle Aef1 GoTo D 91 SetSheetTitle aft GoTo D 92 SetSheetTitle Afti GoTo D 93 SetSheetTitle AGBE GoTo D 94 SetSheetTitle ago GoTo D 95 SetSheetTitle AGO1 GoTo D 96 SetSheetTitle AGO2 GoTo D 97 SetSheetTitle Ahcy GoTo D 98 SetSheetTitle AhcyL1 GoTo D 99 SetSheetTitle AhcyL2 GoTo D 100 SetSheetTitle AIF GoTo D 101 SetSheetTitle AIMP2 GoTo D 102 SetSheetTitle AIMP3 GoTo D 103 SetSheetTitle Akap200 GoTo D 104 SetSheetTitle AkhR GoTo D 105 SetSheetTitle akirin GoTo D 106 SetSheetTitle Akt1 GoTo D 107 SetSheetTitle AlaRS GoTo D 108 SetSheetTitle AlaRS-m GoTo D 109 SetSheetTitle Alas GoTo D 110 SetSheetTitle alc GoTo D 111 SetSheetTitle Ald GoTo D 112 SetSheetTitle Aldh GoTo D 113 SetSheetTitle Aldh-III GoTo D 114 SetSheetTitle Alg1 GoTo D 115 SetSheetTitle Alg11 GoTo D 116 SetSheetTitle Alg2 GoTo D 117 SetSheetTitle Alh GoTo D 118 SetSheetTitle alien GoTo D 119 SetSheetTitle ALiX GoTo D 120 SetSheetTitle Alk GoTo D 121 SetSheetTitle Alp4 GoTo D 122 SetSheetTitle alph GoTo D 123 SetSheetTitle alpha-Cat GoTo D 124 SetSheetTitle alpha-Catr GoTo D 125 SetSheetTitle alpha-Est1 GoTo D 126 SetSheetTitle alpha-Est10 GoTo D 127 SetSheetTitle alpha-Est2 GoTo D 128 SetSheetTitle alpha-Est3 GoTo D 129 SetSheetTitle alpha-Est7 GoTo D 130 SetSheetTitle alpha-Est8 GoTo D 131 SetSheetTitle alpha-Est9 GoTo D 132 SetSheetTitle alpha-Man-Ia GoTo D 133 SetSheetTitle alpha-Man-Ib GoTo D 134 SetSheetTitle alpha-Man-IIa GoTo D 135 SetSheetTitle alpha-Man-IIb GoTo D 136 SetSheetTitle alpha-PheRS GoTo D 137 SetSheetTitle alpha-Spec GoTo D 138 SetSheetTitle alpha4GT1 GoTo D 139 SetSheetTitle alphaCOP GoTo D 140 SetSheetTitle alphaSnap GoTo D 141 SetSheetTitle alphaTub84B GoTo D 142 SetSheetTitle alphaTub84D GoTo D 143 SetSheetTitle alrm GoTo D 144 SetSheetTitle Als2 GoTo D 145 SetSheetTitle alt GoTo D 146 SetSheetTitle amon GoTo D 147 SetSheetTitle AMPdeam GoTo D 148 SetSheetTitle Amph GoTo D 149 SetSheetTitle AMPKalpha GoTo D 150 SetSheetTitle Amun GoTo D 151 SetSheetTitle Amy-d GoTo D 152 SetSheetTitle ana GoTo D 153 SetSheetTitle ana1 GoTo D 154 SetSheetTitle Ance GoTo D 155 SetSheetTitle Ance-3 GoTo D 156 SetSheetTitle Ance-4 GoTo D 157 SetSheetTitle Ance-5 GoTo D 158 SetSheetTitle Ank GoTo D 159 SetSheetTitle Ank2 GoTo D 160 SetSheetTitle Ankle2 GoTo D 161 SetSheetTitle anne GoTo D 162 SetSheetTitle antdh GoTo D 163 SetSheetTitle AnxB10 GoTo D 164 SetSheetTitle AnxB11 GoTo D 165 SetSheetTitle AnxB9 GoTo D 166 SetSheetTitle aop GoTo D 167 SetSheetTitle aos GoTo D 168 SetSheetTitle AOX1 GoTo D 169 SetSheetTitle AOX3 GoTo D 170 SetSheetTitle ap GoTo D 171 SetSheetTitle AP-1-2beta GoTo D 172 SetSheetTitle AP-1gamma GoTo D 173 SetSheetTitle AP-1mu GoTo D 174 SetSheetTitle AP-1sigma GoTo D 175 SetSheetTitle AP-2alpha GoTo D 176 SetSheetTitle AP-2mu GoTo D 177 SetSheetTitle Apc GoTo D 178 SetSheetTitle aPKC GoTo D 179 SetSheetTitle Aplip1 GoTo D 180 SetSheetTitle apolpp GoTo D 181 SetSheetTitle Apoltp GoTo D 182 SetSheetTitle app GoTo D 183 SetSheetTitle APP-BP1 GoTo D 184 SetSheetTitle Appl GoTo D 185 SetSheetTitle Aps GoTo D 186 SetSheetTitle apt GoTo D 187 SetSheetTitle aralar1 GoTo D 188 SetSheetTitle Arc1 GoTo D 189 SetSheetTitle Arc42 GoTo D 190 SetSheetTitle Arf51F GoTo D 191 SetSheetTitle Arf79F GoTo D 192 SetSheetTitle ArfGAP1 GoTo D 193 SetSheetTitle ArfGAP3 GoTo D 194 SetSheetTitle Arfip GoTo D 195 SetSheetTitle Argk GoTo D 196 SetSheetTitle ari-1 GoTo D 197 SetSheetTitle ari-2 GoTo D 198 SetSheetTitle Arl4 GoTo D 199 SetSheetTitle Arl5 GoTo D 200 SetSheetTitle Arl6IP1 GoTo D 201 SetSheetTitle arm GoTo D 202 SetSheetTitle Arp1 GoTo D 203 SetSheetTitle Arp10 GoTo D 204 SetSheetTitle Arp2 GoTo D 205 SetSheetTitle Arp3 GoTo D 206 SetSheetTitle Arpc1 GoTo D 207 SetSheetTitle Arpc2 GoTo D 208 SetSheetTitle arr GoTo D 209 SetSheetTitle Arr1 GoTo D 210 SetSheetTitle Arr2 GoTo D 211 SetSheetTitle Ars2 GoTo D 212 SetSheetTitle Art1 GoTo D 213 SetSheetTitle Art4 GoTo D 214 SetSheetTitle aru GoTo D 215 SetSheetTitle Asap GoTo D 216 SetSheetTitle Asator GoTo D 217 SetSheetTitle ash1 GoTo D 218 SetSheetTitle ash2 GoTo D 219 SetSheetTitle Ask1 GoTo D 220 SetSheetTitle AsnRS GoTo D 221 SetSheetTitle AsnS GoTo D 222 SetSheetTitle Asph GoTo D 223 SetSheetTitle ASPP GoTo D 224 SetSheetTitle AspRS GoTo D 225 SetSheetTitle AspRS-m GoTo D 226 SetSheetTitle asrij GoTo D 227 SetSheetTitle asRNA:CR43480 GoTo D 228 SetSheetTitle asRNA:CR45601 GoTo D 229 SetSheetTitle Asx GoTo D 230 SetSheetTitle Ate1 GoTo D 231 SetSheetTitle Atet GoTo D 232 SetSheetTitle Atf3 GoTo D 233 SetSheetTitle Atf6 GoTo D 234 SetSheetTitle Atg1 GoTo D 235 SetSheetTitle Atg101 GoTo D 236 SetSheetTitle Atg13 GoTo D 237 SetSheetTitle Atg14 GoTo D 238 SetSheetTitle Atg17 GoTo D 239 SetSheetTitle Atg18a GoTo D 240 SetSheetTitle Atg18b GoTo D 241 SetSheetTitle Atg2 GoTo D 242 SetSheetTitle Atg3 GoTo D 243 SetSheetTitle Atg4a GoTo D 244 SetSheetTitle Atg4b GoTo D 245 SetSheetTitle Atg8a GoTo D 246 SetSheetTitle Atg9 GoTo D 247 SetSheetTitle atilla GoTo D 248 SetSheetTitle atk GoTo D 249 SetSheetTitle atl GoTo D 250 SetSheetTitle atms GoTo D 251 SetSheetTitle Atox1 GoTo D 252 SetSheetTitle ATP6AP2 GoTo D 253 SetSheetTitle ATP7 GoTo D 254 SetSheetTitle ATP8B GoTo D 255 SetSheetTitle Atpalpha GoTo D 256 SetSheetTitle ATPCL GoTo D 257 SetSheetTitle ATPsynB GoTo D 258 SetSheetTitle ATPsynbeta GoTo D 259 SetSheetTitle ATPsynC GoTo D 260 SetSheetTitle ATPsynCF6 GoTo D 261 SetSheetTitle ATPsynD GoTo D 262 SetSheetTitle ATPsyndelta GoTo D 263 SetSheetTitle ATPsynE GoTo D 264 SetSheetTitle ATPsynF GoTo D 265 SetSheetTitle ATPsynG GoTo D 266 SetSheetTitle ATPsyngamma GoTo D 267 SetSheetTitle ATPsynO GoTo D 268 SetSheetTitle AttA GoTo D 269 SetSheetTitle AttB GoTo D 270 SetSheetTitle AttC GoTo D 271 SetSheetTitle Atu GoTo D 272 SetSheetTitle Atx2 GoTo D 273 SetSheetTitle Atxn7 GoTo D 274 SetSheetTitle aux GoTo D 275 SetSheetTitle awd GoTo D 276 SetSheetTitle axed GoTo D 277 SetSheetTitle Axn GoTo D 278 SetSheetTitle axo GoTo D 279 SetSheetTitle Axs GoTo D 280 SetSheetTitle Axud1 GoTo D 281 SetSheetTitle b GoTo D 282 SetSheetTitle B4 GoTo D 283 SetSheetTitle B52 GoTo D 284 SetSheetTitle bab2 GoTo D 285 SetSheetTitle babo GoTo D 286 SetSheetTitle babos GoTo D 287 SetSheetTitle Bacc GoTo D 288 SetSheetTitle bai GoTo D 289 SetSheetTitle Baldspot GoTo D 290 SetSheetTitle Bap111 GoTo D 291 SetSheetTitle Bap170 GoTo D 292 SetSheetTitle Bap60 GoTo D 293 SetSheetTitle barc GoTo D 294 SetSheetTitle bark GoTo D 295 SetSheetTitle baz GoTo D 296 SetSheetTitle bbc GoTo D 297 SetSheetTitle bbg GoTo D 298 SetSheetTitle bc10 GoTo D 299 SetSheetTitle bchs GoTo D 300 SetSheetTitle bdg GoTo D 301 SetSheetTitle bdl GoTo D 302 SetSheetTitle Bdp1 GoTo D 303 SetSheetTitle be GoTo D 304 SetSheetTitle BEAF-32 GoTo D 305 SetSheetTitle beat-Ic GoTo D 306 SetSheetTitle beat-IIa GoTo D 307 SetSheetTitle beat-IIb GoTo D 308 SetSheetTitle beat-IIIc GoTo D 309 SetSheetTitle beat-IV GoTo D 310 SetSheetTitle beat-Va GoTo D 311 SetSheetTitle beat-VI GoTo D 312 SetSheetTitle beat-VII GoTo D 313 SetSheetTitle bel GoTo D 314 SetSheetTitle Bem46 GoTo D 315 SetSheetTitle ben GoTo D 316 SetSheetTitle Best1 GoTo D 317 SetSheetTitle Best2 GoTo D 318 SetSheetTitle beta'COP GoTo D 319 SetSheetTitle beta-Man GoTo D 320 SetSheetTitle beta-PheRS GoTo D 321 SetSheetTitle beta-Spec GoTo D 322 SetSheetTitle beta4GalNAcTA GoTo D 323 SetSheetTitle betaCOP GoTo D 324 SetSheetTitle betaggt-I GoTo D 325 SetSheetTitle betaggt-II GoTo D 326 SetSheetTitle betaTub56D GoTo D 327 SetSheetTitle betaTub60D GoTo D 328 SetSheetTitle betaTub97EF GoTo D 329 SetSheetTitle bgm GoTo D 330 SetSheetTitle bi GoTo D 331 SetSheetTitle BI-1 GoTo D 332 SetSheetTitle bic GoTo D 333 SetSheetTitle BicD GoTo D 334 SetSheetTitle bif GoTo D 335 SetSheetTitle bigmax GoTo D 336 SetSheetTitle Bili GoTo D 337 SetSheetTitle bin3 GoTo D 338 SetSheetTitle bip1 GoTo D 339 SetSheetTitle bip2 GoTo D 340 SetSheetTitle Blimp-1 GoTo D 341 SetSheetTitle blot GoTo D 342 SetSheetTitle blow GoTo D 343 SetSheetTitle blw GoTo D 344 SetSheetTitle bma GoTo D 345 SetSheetTitle Bmcp GoTo D 346 SetSheetTitle bmm GoTo D 347 SetSheetTitle bnb GoTo D 348 SetSheetTitle bnl GoTo D 349 SetSheetTitle boca GoTo D 350 SetSheetTitle bocks GoTo D 351 SetSheetTitle BOD1 GoTo D 352 SetSheetTitle bol GoTo D 353 SetSheetTitle bon GoTo D 354 SetSheetTitle bond GoTo D 355 SetSheetTitle bor GoTo D 356 SetSheetTitle boss GoTo D 357 SetSheetTitle botv GoTo D 358 SetSheetTitle bowl GoTo D 359 SetSheetTitle br GoTo D 360 SetSheetTitle Br140 GoTo D 361 SetSheetTitle brat GoTo D 362 SetSheetTitle Brca2 GoTo D 363 SetSheetTitle Bre1 GoTo D 364 SetSheetTitle Brf GoTo D 365 SetSheetTitle brm GoTo D 366 SetSheetTitle Brms1 GoTo D 367 SetSheetTitle brp GoTo D 368 SetSheetTitle bru1 GoTo D 369 SetSheetTitle bru2 GoTo D 370 SetSheetTitle bru3 GoTo D 371 SetSheetTitle Bruce GoTo D 372 SetSheetTitle brun GoTo D 373 SetSheetTitle BRWD3 GoTo D 374 SetSheetTitle bs GoTo D 375 SetSheetTitle bsf GoTo D 376 SetSheetTitle Bsg GoTo D 377 SetSheetTitle Bsg25D GoTo D 378 SetSheetTitle bsk GoTo D 379 SetSheetTitle bt GoTo D 380 SetSheetTitle BTBD9 GoTo D 381 SetSheetTitle BtbVII GoTo D 382 SetSheetTitle Btk29A GoTo D 383 SetSheetTitle btsz GoTo D 384 SetSheetTitle btz GoTo D 385 SetSheetTitle Bub1 GoTo D 386 SetSheetTitle Buffy GoTo D 387 SetSheetTitle BuGZ GoTo D 388 SetSheetTitle bun GoTo D 389 SetSheetTitle bur GoTo D 390 SetSheetTitle bves GoTo D 391 SetSheetTitle bw GoTo D 392 SetSheetTitle bwa GoTo D 393 SetSheetTitle Bx GoTo D 394 SetSheetTitle Bx42 GoTo D 395 SetSheetTitle by GoTo D 396 SetSheetTitle c11.1 GoTo D 397 SetSheetTitle c12.2 GoTo D 398 SetSheetTitle C1GalTA GoTo D 399 SetSheetTitle C3G GoTo D 400 SetSheetTitle ca GoTo D 401 SetSheetTitle Ca-alpha1D GoTo D 402 SetSheetTitle Ca-alpha1T GoTo D 403 SetSheetTitle Ca-beta GoTo D 404 SetSheetTitle CaBP1 GoTo D 405 SetSheetTitle cac GoTo D 406 SetSheetTitle cact GoTo D 407 SetSheetTitle Cad74A GoTo D 408 SetSheetTitle Cad86C GoTo D 409 SetSheetTitle Cad87A GoTo D 410 SetSheetTitle Cad96Ca GoTo D 411 SetSheetTitle Cad99C GoTo D 412 SetSheetTitle CadN GoTo D 413 SetSheetTitle CadN2 GoTo D 414 SetSheetTitle Cadps GoTo D 415 SetSheetTitle Caf1-55 GoTo D 416 SetSheetTitle CAH1 GoTo D 417 SetSheetTitle CAH2 GoTo D 418 SetSheetTitle cal1 GoTo D 419 SetSheetTitle CalpA GoTo D 420 SetSheetTitle CalpB GoTo D 421 SetSheetTitle CalpC GoTo D 422 SetSheetTitle Calr GoTo D 423 SetSheetTitle Cals GoTo D 424 SetSheetTitle Calx GoTo D 425 SetSheetTitle Cam GoTo D 426 SetSheetTitle CaMKI GoTo D 427 SetSheetTitle CaMKII GoTo D 428 SetSheetTitle Camta GoTo D 429 SetSheetTitle cana GoTo D 430 SetSheetTitle CanA-14F GoTo D 431 SetSheetTitle CanB GoTo D 432 SetSheetTitle CanB2 GoTo D 433 SetSheetTitle Cand1 GoTo D 434 SetSheetTitle CAP GoTo D 435 SetSheetTitle Cap-G GoTo D 436 SetSheetTitle Cap-H2 GoTo D 437 SetSheetTitle Caper GoTo D 438 SetSheetTitle Capr GoTo D 439 SetSheetTitle caps GoTo D 440 SetSheetTitle capt GoTo D 441 SetSheetTitle capu GoTo D 442 SetSheetTitle car GoTo D 443 SetSheetTitle CarT GoTo D 444 SetSheetTitle CASK GoTo D 445 SetSheetTitle casp GoTo D 446 SetSheetTitle Cat GoTo D 447 SetSheetTitle cathD GoTo D 448 SetSheetTitle Catsup GoTo D 449 SetSheetTitle Cbl GoTo D 450 SetSheetTitle Cbp53E GoTo D 451 SetSheetTitle Cbp80 GoTo D 452 SetSheetTitle cbs GoTo D 453 SetSheetTitle Cbs GoTo D 454 SetSheetTitle cbt GoTo D 455 SetSheetTitle CCHa2 GoTo D 456 SetSheetTitle Cchl GoTo D 457 SetSheetTitle Ccm3 GoTo D 458 SetSheetTitle Ccn GoTo D 459 SetSheetTitle CCT1 GoTo D 460 SetSheetTitle CCT2 GoTo D 461 SetSheetTitle CCT3 GoTo D 462 SetSheetTitle CCT4 GoTo D 463 SetSheetTitle CCT5 GoTo D 464 SetSheetTitle CCT6 GoTo D 465 SetSheetTitle CCT7 GoTo D 466 SetSheetTitle CCT8 GoTo D 467 SetSheetTitle Ccz1 GoTo D 468 SetSheetTitle CD98hc GoTo D 469 SetSheetTitle Cda5 GoTo D 470 SetSheetTitle CDase GoTo D 471 SetSheetTitle cdc14 GoTo D 472 SetSheetTitle Cdc27 GoTo D 473 SetSheetTitle Cdc42 GoTo D 474 SetSheetTitle Cdc5 GoTo D 475 SetSheetTitle Cdc7 GoTo D 476 SetSheetTitle Cdep GoTo D 477 SetSheetTitle CdGAPr GoTo D 478 SetSheetTitle cdi GoTo D 479 SetSheetTitle cDIP GoTo D 480 SetSheetTitle Cdk12 GoTo D 481 SetSheetTitle Cdk4 GoTo D 482 SetSheetTitle Cdk5 GoTo D 483 SetSheetTitle Cdk5alpha --------------------------------------------------------------------------------