setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) library(clues) # functions source(file.path("scripts", "functions.R")) #' Performs a hard assigment. #' Each region is assigned to the #' class for which it has been classified #' with the highest probability, over #' all shift and flip states. #' \param prob a 4D array containing #' the posterior probabilities. It has #' the following dimensions : #' 1st number of regions #' 2nd number of classes #' 3rd number of shifts #' 4th number of flips #' \return a vector of labels, 1 per #' region. #' \author Romain Groux hard.assign = function(prob) { prob.per.region = apply(prob, c(1,2), sum) cluster = apply(prob.per.region, 1, which.max) return(cluster) } # path to the images for the logo path.a = file.path("res/A.png") path.c = file.path("res/C.png") path.g = file.path("res/G.png") path.t = file.path("res/T.png") # the true labels 5000 CTCF sites and 5000 SP1 sites true.labels = c(rep(1,1000), rep(2,1000)) # the expected dimensionality of the prob array to read # -1 indicate values that will change (class and shift) dim = c(2000, -1, -1, 2) # the parameters used to run the EM n.shifts = c(90) n.classes = 1:2 #number of time a classification was repeated n.repeat = 50 # where the results are dir.results = file.path("results", "10xgenomics_PBMC_5k_motifs_classification_7") # ari values ari = array(dim=c(length(n.classes), length(n.shifts), n.repeat)) for(i in 1:length(n.classes)) { n.class = n.classes[i] for(j in 1:length(n.shifts)) { n.shift = n.shifts[j] # update dimensions dim[2] = n.class dim[3] = n.shift # go over each repetition for(k in 1:n.repeat) { file.prob = file.path(dir.results, sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_prob_%d.txt", n.class, n.shift, k)) file.motif = file.path(dir.results, sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_model_%d.mat", n.class, n.shift, k)) # prob = read.arraytxt(file.prob, dim) # print(apply(prob, 2, sum)/sum(prob)) # cluster = hard.assign(prob) # ari[i,j,k] = adjustedRand(cluster, true.labels)["HA"] X11(width=10, height=12) par(mfrow=c(2,1)) motif = read.sequence.models(file.motif)$models plot.logo(motif[,,1], path.a, path.c, path.g, path.t) plot.logo(motif[,,2], path.a, path.c, path.g, path.t) } } }