setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) ################## open chromatin patterns around sp1 motifs ################## # open chromatin data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_1class_ref.mat")) open.1.mod = data$models open.1.prob = data$prob open.1.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_1class_1nucl_fragment_center_ref.mat"))$models # open.1.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_1class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_2class_ref.mat")) open.2.mod = data$models open.2.prob = data$prob open.2.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_2class_1nucl_fragment_center_ref.mat"))$models # open.2.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_2class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_3class_ref.mat")) open.3.mod = data$models open.3.prob = data$prob open.3.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_3class_1nucl_fragment_center_ref.mat"))$models # open.3.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_3class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_4class_ref.mat")) open.4.mod = data$models open.4.prob = data$prob open.4.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_4class_1nucl_fragment_center_ref.mat"))$models # open.4.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_4class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_4class_ref.mat")) open.5.mod = data$models open.5.prob = data$prob open.5.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_5class_1nucl_fragment_center_ref.mat"))$models # open.5.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_5class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_ref.mat")) open.6.mod = data$models open.6.prob = data$prob open.6.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_1nucl_fragment_center_ref.mat"))$models # open.6.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_7class_ref.mat")) open.7.mod = data$models open.7.prob = data$prob open.7.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_7class_1nucl_fragment_center_ref.mat"))$models # open.7.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_7class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_8class_ref.mat")) open.8.mod = data$models open.8.prob = data$prob open.8.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_8class_1nucl_fragment_center_ref.mat"))$models # open.8.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_8class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_9class_ref.mat")) open.9.mod = data$models open.9.prob = data$prob open.9.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_9class_1nucl_fragment_center_ref.mat"))$models # open.9.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_9class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_10class_ref.mat")) open.10.mod = data$models open.10.prob = data$prob open.10.mod.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_10class_1nucl_fragment_center_ref.mat"))$models # open.10.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", # "sp1_motifs_10e-7_open_bin1bp_read_atac_10class_aic.txt"))) data = NULL # plot 10 classes col = brewer.pal(3, "Set1") # X11(width=18, height=12) png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_classification_open_bin1bp_10class.png"), units="in", res=720, width=18, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(open.7.prob, decreasing=T) ref.open = open.7.mod[ord,] ref.nucl = open.7.mod.nucl[ord,] prob = open.7.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { plot(ref.open[i,] / max(ref.open[i,]), type='l', lwd=2, ylim=c(0,1), main=sprintf("class %d (p=%.2f)", class[i], prob[i]), col=col[1]) lines(ref.nucl[i,] / max(ref.nucl[i,]), lwd=2, col=col[2]) } dev.off() # plot all classes ref = list(open.10.mod, open.9.mod, open.8.mod, open.7.mod, open.6.mod, open.5.mod, open.4.mod, open.3.mod, open.2.mod, open.1.mod) prob = list(open.10.prob, open.9.prob, open.8.prob, open.7.prob, open.6.prob, open.5.prob, open.4.prob, open.3.prob, open.2.prob, open.1.prob) aic = c(open.10.aic, open.9.aic, open.8.aic, open.7.aic, open.6.aic, open.5.aic, open.4.aic, open.3.aic, open.2.aic, open.1.aic) # number of runs n_run = length(ref) # number of different classes overall n_class_tot = sum(unlist(lapply(ref, nrow))) # max value of K n_class_max = max(unlist(lapply(ref, nrow))) # some colors colors = rep(brewer.pal(9, "Set1")[1], n_class_max) # construct a matrix with all discovered class models on the rows models = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]])) run_value = vector(length=n_class_tot) k_value = vector(length=n_class_tot) probabilities = vector(length=n_class_tot) k = 1 for(i in 1:n_run) { for(j in 1:nrow(ref[[i]])) { models[k,] = ref[[i]][j,] probabilities[k] = prob[[i]][j] run_value[k] = i k_value[k] = j k = k + 1 } } # distance matrix between all models distances = distance.mod(models) rownames(distances) = 1:nrow(distances) colnames(distances) = 1:ncol(distances) # plot plot.moderences(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_classification_open_bin1bp_classes.png"), models, probabilities, colors, aic, distances, n_run, run_value, n_class_max) ################## nucleosome patterns around sp1 motifs ################## # nucleosomes data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_1class_ref.mat")) nucl.1.mod = data$models nucl.1.prob = data$prob nucl.1.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_1class_open_read_atac_ref.mat"))$models nucl.1.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_1class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_2class_ref.mat")) nucl.2.mod = data$models nucl.2.prob = data$prob nucl.2.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_2class_open_read_atac_ref.mat"))$models nucl.2.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_2class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_3class_ref.mat")) nucl.3.mod = data$models nucl.3.prob = data$prob nucl.3.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_3class_open_read_atac_ref.mat"))$models nucl.3.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_3class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_4class_ref.mat")) nucl.4.mod = data$models nucl.4.prob = data$prob nucl.4.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_4class_open_read_atac_ref.mat"))$models nucl.4.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_4class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_5class_ref.mat")) nucl.5.mod = data$models nucl.5.prob = data$prob nucl.5.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_5class_open_read_atac_ref.mat"))$models nucl.5.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_5class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_6class_ref.mat")) nucl.6.mod = data$models nucl.6.prob = data$prob nucl.6.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_6class_open_read_atac_ref.mat"))$models nucl.6.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_6class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_7class_ref.mat")) nucl.7.mod = data$models nucl.7.prob = data$prob nucl.7.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_7class_open_read_atac_ref.mat"))$models nucl.7.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_7class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_8class_ref.mat")) nucl.8.mod = data$models nucl.8.prob = data$prob nucl.8.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_8class_open_read_atac_ref.mat"))$models nucl.8.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_8class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_9class_ref.mat")) nucl.9.mod = data$models nucl.9.prob = data$prob nucl.9.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_9class_open_read_atac_ref.mat"))$models nucl.9.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_9class_aic.txt"))) data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_10class_ref.mat")) nucl.10.mod = data$models nucl.10.prob = data$prob nucl.10.mod.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_10class_open_read_atac_ref.mat"))$models nucl.10.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_10class_aic.txt"))) data = NULL # plot 10 classes col = brewer.pal(3, "Set1") # X11(width=18, height=12) png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_classification_1nucl_bin1bp_10class.png"), units="in", res=720, width=18, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(nucl.10.prob, decreasing=T) ref.nucl = nucl.10.mod[ord,] ref.open = nucl.10.mod.open[ord,] prob = nucl.10.prob[ord] class = c(1:nrow(ref.nucl))[ord] for(i in 1:nrow(ref.nucl)) { plot(ref.nucl[i,] / max(ref.nucl[i,]), type='l', lwd=2, ylim=c(0,1), main=sprintf("class %d (p=%.2f)", class[i], prob[i]), col=col[2]) lines(ref.open[i,] / max(ref.open[i,]), lwd=2, col=col[1]) } dev.off() # plot all classes ref = list(nucl.10.mod, nucl.9.mod, nucl.8.mod, nucl.7.mod, nucl.6.mod, nucl.5.mod, nucl.4.mod, nucl.3.mod, nucl.2.mod, nucl.1.mod) prob = list(nucl.10.prob, nucl.9.prob, nucl.8.prob, nucl.7.prob, nucl.6.prob, nucl.5.prob, nucl.4.prob, nucl.3.prob, nucl.2.prob, nucl.1.prob) aic = c(nucl.10.aic, nucl.9.aic, nucl.8.aic, nucl.7.aic, nucl.6.aic, nucl.5.aic, nucl.4.aic, nucl.3.aic, nucl.2.aic, nucl.1.aic) # number of runs n_run = length(ref) # number of different classes overall n_class_tot = sum(unlist(lapply(ref, nrow))) # max value of K n_class_max = max(unlist(lapply(ref, nrow))) # some colors colors = rep(brewer.pal(9, "Set1")[1], n_class_max) # construct a matrix with all discovered class models on the rows models = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]])) run_value = vector(length=n_class_tot) k_value = vector(length=n_class_tot) probabilities = vector(length=n_class_tot) k = 1 for(i in 1:n_run) { for(j in 1:nrow(ref[[i]])) { models[k,] = ref[[i]][j,] probabilities[k] = prob[[i]][j] run_value[k] = i k_value[k] = j k = k + 1 } } # distance matrix between all models distances = distance.mod(models) rownames(distances) = 1:nrow(distances) colnames(distances) = 1:ncol(distances) # plot plot.moderences(file.path("results", "10xgenomics_PBMC_5k_classification_1", "sp1_motifs_10e-7_classification_1nucl_bin1bp_classes.png"), models, probabilities, colors, aic, distances, n_run, run_value, n_class_max)