setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) # the number of classes searched n.classes = c(10, 20, 30) # path to the images for the logo path.a = file.path("res/A.png") path.c = file.path("res/C.png") path.g = file.path("res/G.png") path.t = file.path("res/T.png") ################## sequence patterns around ctcf motifs ################## for(k in n.classes) { # sequence data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2", sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k))) model.seq = data$models model.prob = data$prob data = NULL # open chromatin model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2", sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models # nucleosomes model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2", sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models # plot classes col = brewer.pal(3, "Set1") # X11(width=26, height=12) png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2", sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)), units="in", res=720, width=18, height=12) m = matrix(1:30, nrow=6, ncol=5, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.open = model.open[ord,, drop=F] ref.nucl = model.nucl[ord,, drop=F] ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3) x.at = seq(1, ncol(ref.open), length.out=length(x.lab)) axis(1, at=x.at, labels=x.lab) # y-axis is [0,1] for min/max signal y.at = seq(0, 2, length.out=2) y.lab = c("min", "max") axis(2, at=y.at, labels=y.lab) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1]) lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2]) } # inlets with center # row_n = 1 # row counter # col_n = 1 # column counter # for(i in 1:nrow(ref.open)) # { # plot logo center # right = 0.5*col_n - 0.01 # left = right - 0.2 # bottom = 1-(row_n*(0.2))+0.05 # top = bottom + 0.15 # par(fig=c(left, right, bottom, top), new=T) # idx = (391-1-20):(391+1+20) # plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t) # # plot signal (multiplies by 2 because the y-axis goes to 2 bits) # lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1]) # lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2]) # # xaxis # x.at = seq(1, length(idx), length.out = 3) # x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at] # axis(1, at=x.at, labels=x.lab) # # yaxis # axis(2, at=y.at, labels=y.lab) # row_n = row_n + 1 # if(i %% 5 == 0) # { col_n = col_n + 1 # row_n = 1 # } # } dev.off() }