setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) library(seqLogo) # functions source(file.path("scripts", "functions.R")) # the minimum number of classes searched k.min = 1 # the maximum number of classes searched k.max = 10 # path to the images for the logo path.a = file.path("res/A.png") path.c = file.path("res/C.png") path.g = file.path("res/G.png") path.t = file.path("res/T.png") ################## sequence patterns around ctcf motifs ################## for(k in k.min:k.max) { # sequence data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5", sprintf("ctcf_motifs_10e-6_sequences_%dclass_model.mat", k))) model.seq = data$models model.prob = data$prob data = NULL # plot classes col = brewer.pal(3, "Set1") # X11(width=17, height=10) png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5", sprintf("ctcf_motifs_10e-6_classification_sequences_%dclass.png", k)), units="in", res=720, width=18, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:dim(ref.seq)[3])[ord] for(i in 1:(dim(ref.seq)[3])) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3) x.at = seq(1, ncol(ref.seq), length.out=3) axis(1, at=x.at, labels=x.lab) # y-axis is [0,1] for min/max signal x.at = seq(0, 1, 0.5) axis(2, at=x.at, labels=x.at) } dev.off() } ################## sequence patterns around ctcf motifs repeat masked ################## for(k in k.min:k.max) { # sequence data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5", sprintf("ctcf_motifs_10e-6_sequences_rmsk_%dclass_model.mat", k))) model.seq = data$models model.prob = data$prob data = NULL # plot classes col = brewer.pal(3, "Set1") # X11(width=17, height=10) png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5", sprintf("ctcf_motifs_10e-6_classification_sequences_rmsk_%dclass.png", k)), units="in", res=720, width=18, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:dim(ref.seq)[3])[ord] for(i in 1:(dim(ref.seq)[3])) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3) x.at = seq(1, ncol(ref.seq), length.out=3) axis(1, at=x.at, labels=x.lab) # y-axis is [0,1] for min/max signal x.at = seq(0, 1, 0.5) axis(2, at=x.at, labels=x.at) } dev.off() }