setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) ################## open chromatin patterns around ebf1 motifs ################## # open chromatin data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_1class_ref.mat")) open.1.ref = data$references open.1.prob = data$prob open.1.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_1class_1nucl_fragment_center_ref.mat"))$ref open.1.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_1class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_2class_ref.mat")) open.2.ref = data$references open.2.prob = data$prob open.2.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_2class_1nucl_fragment_center_ref.mat"))$ref open.2.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_2class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_3class_ref.mat")) open.3.ref = data$references open.3.prob = data$prob open.3.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_3class_1nucl_fragment_center_ref.mat"))$ref open.3.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_3class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_4class_ref.mat")) open.4.ref = data$references open.4.prob = data$prob open.4.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_4class_1nucl_fragment_center_ref.mat"))$ref open.4.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_4class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_4class_ref.mat")) open.5.ref = data$references open.5.prob = data$prob open.5.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_5class_1nucl_fragment_center_ref.mat"))$ref open.5.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_5class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_6class_ref.mat")) open.6.ref = data$references open.6.prob = data$prob open.6.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_6class_1nucl_fragment_center_ref.mat"))$ref open.6.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_6class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_7class_ref.mat")) open.7.ref = data$references open.7.prob = data$prob open.7.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_7class_1nucl_fragment_center_ref.mat"))$ref open.7.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_7class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_8class_ref.mat")) open.8.ref = data$references open.8.prob = data$prob open.8.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_8class_1nucl_fragment_center_ref.mat"))$ref open.8.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_8class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_9class_ref.mat")) open.9.ref = data$references open.9.prob = data$prob open.9.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_9class_1nucl_fragment_center_ref.mat"))$ref open.9.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_9class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_10class_ref.mat")) open.10.ref = data$references open.10.prob = data$prob open.10.ref.nucl = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_10class_1nucl_fragment_center_ref.mat"))$ref open.10.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_open_bin2bp_read_atac_10class_aic.txt"))) data = NULL # plot 10 classes col = brewer.pal(3, "Set1") # X11(width=8, height=12) png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_classification_open_bin2bp_10class.png"), units="in", res=720, width=8, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(open.10.prob, decreasing=T) ref.open = open.10.ref[ord,] ref.nucl = open.10.ref.nucl[ord,] prob = open.10.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { plot(ref.open[i,] / max(ref.open[i,]), type='l', lwd=2, ylim=c(0,1), main=sprintf("class %d (p=%.2f)", class[i], prob[i]), col=col[1]) lines(ref.nucl[i,] / max(ref.nucl[i,]), lwd=2, col=col[2]) } dev.off() # plot all classes ref = list(open.10.ref, open.9.ref, open.8.ref, open.7.ref, open.6.ref, open.5.ref, open.4.ref, open.3.ref, open.2.ref, open.1.ref) prob = list(open.10.prob, open.9.prob, open.8.prob, open.7.prob, open.6.prob, open.5.prob, open.4.prob, open.3.prob, open.2.prob, open.1.prob) aic = c(open.10.aic, open.9.aic, open.8.aic, open.7.aic, open.6.aic, open.5.aic, open.4.aic, open.3.aic, open.2.aic, open.1.aic) # number of runs n_run = length(ref) # number of different classes overall n_class_tot = sum(unlist(lapply(ref, nrow))) # max value of K n_class_max = max(unlist(lapply(ref, nrow))) # some colors colors = rep(brewer.pal(9, "Set1")[1], n_class_max) # construct a matrix with all discovered references on the rows references = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]])) run_value = vector(length=n_class_tot) k_value = vector(length=n_class_tot) probabilities = vector(length=n_class_tot) k = 1 for(i in 1:n_run) { for(j in 1:nrow(ref[[i]])) { references[k,] = ref[[i]][j,] probabilities[k] = prob[[i]][j] run_value[k] = i k_value[k] = j k = k + 1 } } # distance matrix between all references distances = distance.ref(references) rownames(distances) = 1:nrow(distances) colnames(distances) = 1:ncol(distances) # plot plot.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_classification_open_bin2bp_classes.png"), references, probabilities, colors, aic, distances, n_run, run_value, n_class_max) ################## nucleosome patterns around ebf1 motifs ################## # nucleosomes data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_1class_ref.mat")) nucl.1.ref = data$references nucl.1.prob = data$prob nucl.1.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_1class_open_read_atac_ref.mat"))$ref nucl.1.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_1class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_2class_ref.mat")) nucl.2.ref = data$references nucl.2.prob = data$prob nucl.2.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_2class_open_read_atac_ref.mat"))$ref nucl.2.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_2class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_3class_ref.mat")) nucl.3.ref = data$references nucl.3.prob = data$prob nucl.3.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_3class_open_read_atac_ref.mat"))$ref nucl.3.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_3class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_4class_ref.mat")) nucl.4.ref = data$references nucl.4.prob = data$prob nucl.4.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_4class_open_read_atac_ref.mat"))$ref nucl.4.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_4class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_5class_ref.mat")) nucl.5.ref = data$references nucl.5.prob = data$prob nucl.5.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_5class_open_read_atac_ref.mat"))$ref nucl.5.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_5class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_6class_ref.mat")) nucl.6.ref = data$references nucl.6.prob = data$prob nucl.6.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_6class_open_read_atac_ref.mat"))$ref nucl.6.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_6class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_7class_ref.mat")) nucl.7.ref = data$references nucl.7.prob = data$prob nucl.7.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_7class_open_read_atac_ref.mat"))$ref nucl.7.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_7class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_8class_ref.mat")) nucl.8.ref = data$references nucl.8.prob = data$prob nucl.8.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_8class_open_read_atac_ref.mat"))$ref nucl.8.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_8class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_9class_ref.mat")) nucl.9.ref = data$references nucl.9.prob = data$prob nucl.9.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_9class_open_read_atac_ref.mat"))$ref nucl.9.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_9class_aic.txt"))) data = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_10class_ref.mat")) nucl.10.ref = data$references nucl.10.prob = data$prob nucl.10.ref.open = read.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_10class_open_read_atac_ref.mat"))$ref nucl.10.aic = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center_10class_aic.txt"))) data = NULL # plot 10 classes col = brewer.pal(3, "Set1") # X11(width=8, height=12) png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_classification_1nucl_bin2bp_10class.png"), units="in", res=720, width=8, height=12) m = matrix(1:10, nrow=5, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(nucl.10.prob, decreasing=T) ref.nucl = nucl.10.ref[ord,] ref.open = nucl.10.ref.open[ord,] prob = nucl.10.prob[ord] class = c(1:nrow(ref.nucl))[ord] for(i in 1:nrow(ref.nucl)) { plot(ref.nucl[i,] / max(ref.nucl[i,]), type='l', lwd=2, ylim=c(0,1), main=sprintf("class %d (p=%.2f)", class[i], prob[i]), col=col[2]) lines(ref.open[i,] / max(ref.open[i,]), lwd=2, col=col[1]) } dev.off() # plot all classes ref = list(nucl.10.ref, nucl.9.ref, nucl.8.ref, nucl.7.ref, nucl.6.ref, nucl.5.ref, nucl.4.ref, nucl.3.ref, nucl.2.ref, nucl.1.ref) prob = list(nucl.10.prob, nucl.9.prob, nucl.8.prob, nucl.7.prob, nucl.6.prob, nucl.5.prob, nucl.4.prob, nucl.3.prob, nucl.2.prob, nucl.1.prob) aic = c(nucl.10.aic, nucl.9.aic, nucl.8.aic, nucl.7.aic, nucl.6.aic, nucl.5.aic, nucl.4.aic, nucl.3.aic, nucl.2.aic, nucl.1.aic) # number of runs n_run = length(ref) # number of different classes overall n_class_tot = sum(unlist(lapply(ref, nrow))) # max value of K n_class_max = max(unlist(lapply(ref, nrow))) # some colors colors = rep(brewer.pal(9, "Set1")[1], n_class_max) # construct a matrix with all discovered references on the rows references = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]])) run_value = vector(length=n_class_tot) k_value = vector(length=n_class_tot) probabilities = vector(length=n_class_tot) k = 1 for(i in 1:n_run) { for(j in 1:nrow(ref[[i]])) { references[k,] = ref[[i]][j,] probabilities[k] = prob[[i]][j] run_value[k] = i k_value[k] = j k = k + 1 } } # distance matrix between all references distances = distance.ref(references) rownames(distances) = 1:nrow(distances) colnames(distances) = 1:ncol(distances) # plot plot.references(file.path("results", "10xgenomics_PBMC_5k_classification_2", "ebf1_motifs_10e-6_classification_1nucl_bin2bp_classes.png"), references, probabilities, colors, aic, distances, n_run, run_value, n_class_max)