diff --git a/scripts/10xgenomics_PBMC_5k/analyse_fragment_lengths.R b/scripts/10xgenomics_PBMC_5k/analyse_fragment_lengths.R
index 3c33c58..40d90ae 100644
--- a/scripts/10xgenomics_PBMC_5k/analyse_fragment_lengths.R
+++ b/scripts/10xgenomics_PBMC_5k/analyse_fragment_lengths.R
@@ -1,151 +1,239 @@
 
 setwd(file.path("", "local", "groux", "scATAC-seq"))
 
 if(!file.exists(file.path("results", "10xgenomics_PBMC_5k")))
 {	dir.create(file.path("results", "10xgenomics_PBMC_5k")) }
 
 # library
 library("RColorBrewer")
 
 ############# data ############# 
 
 data = read.table(file.path("data", "10xgenomics_PBMC_5k", 
                             "atac_v1_pbmc_5k_possorted_filtered_fragment_lengths.txt"), header=F)
 colnames(data) = c("nb", "size")
 
 ############# fit to gaussian mixture ############# 
 
 set.seed(20190604) # d-day - 2 (les sanglots long de l'automne...)
 # fit data to gaussian mixture model
 size = data$size[1:1000]
 dens = data$nb[1:1000] / sum(data$nb[1:1000])
 # model parameters, 1st guess by looking at plot
 m1 = 50   ; s1 = 10 ; a1 = 1
 m2 = 200  ; s2 = 10 ; a2 = 1 
 m3 = 380  ; s3 = 30 ; a3 = 1 
 # fit
 init = c(m1=m1, s1=s1, a1=a1,
          m2=m2, s2=s2, a2=a2,
          m3=m3, s3=s3, a3=a3)
 f = nls(dens ~ a1 * exp(-((size-m1)**2)/(2*s1)) + 
                a2 * exp(-((size-m2)**2)/(2*s2)) +
                a3 * exp(-((size-m3)**2)/(2*s3)),
         start=init)
 # parameter estimates
 param = matrix(nrow=3, ncol=3)
 colnames(param) = c("m", "s", "a")
 rownames(param) = c("class1", "class2", "class3")
 param[1,] = c(coef(f)["m1"], coef(f)["s1"], coef(f)["a1"])
 param[2,] = c(coef(f)["m2"], coef(f)["s2"], coef(f)["a2"])
 param[3,] = c(coef(f)["m3"], coef(f)["s3"], coef(f)["a3"])
 # plot
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "fragment_lengths_classes.png"), width=10, height=8, units="in", res=720)
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   plot(size, dens, type='l', lwd=2, 
        main="Fragment lengths", xlab="length (bp)", ylab="density",
        cex.main=3, cex.axis=1.5, cex.lab=2.5)
   col = brewer.pal(4, "Set1")
   lines(size, param[1,3] * exp(-((size-param[1,1])**2)/(2*param[1,2])), col=col[1], lwd=4, lty=2)
   lines(size, param[2,3] * exp(-((size-param[2,1])**2)/(2*param[2,2])), col=col[2], lwd=4, lty=2)
   lines(size, param[3,3] * exp(-((size-param[3,1])**2)/(2*param[3,2])), col=col[3], lwd=4, lty=2)
   lines(size, param[1,3] * exp(-((size-param[1,1])**2)/(2*param[1,2])) +
               param[2,3] * exp(-((size-param[2,1])**2)/(2*param[2,2])) +
               param[3,3] * exp(-((size-param[3,1])**2)/(2*param[3,2])), col=col[4], lwd=4)
   legend("topright",
          legend=c("open chromatin",
                   "mono-nucl.",
                   "di-nucl.",
                   "all"),
          col=col, lwd=c(4,4,4,4), lty=c(2,2,2,1),
          bty='n', cex=2)
 dev.off()
 # assign probabilities to fragment length
 prob = matrix(nrow=1000, ncol=3)
 rownames(prob) = size
 for(i in 1:nrow(prob))
 { for(j in 1:ncol(prob))
   { prob[i,j] = param[j,3] * exp(-((size[i]-param[j,1])**2)/(2*param[j,2])) }
     prob[i,]  = prob[i,] / sum(prob[i,])
 }
 # plot
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "fragment_lengths_class_prob.png"), width=10, height=8, units="in", res=720)
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   plot(size, prob[,1], ylim=c(0, max(prob)), type='l',
-       main="Fragment classes", xlab="length (bp)", ylab="p(class)",
+       main="Fragment class probability", xlab="length (bp)", ylab="p(class)",
        cex.main=3, cex.axis=1.5, cex.lab=2.5, lwd=4, col=col[1])
   lines(size, prob[,2], lwd=4, col=col[2])
   lines(size, prob[,3], lwd=4, col=col[3])
   
   # set limits at min 90 assignment to a class
   abline(v=30, lwd=2, lty=2)  # class 1 lower limit (size limit)
   abline(v=84, lwd=2, lty=2)  # class 1 upper limit
   abline(v=133, lwd=2, lty=2) # class 2 lower limit
   abline(v=266, lwd=2, lty=2) # class 2 upper limit
   abline(v=341, lwd=2, lty=2) # class 3 lower limit
   abline(v=500, lwd=2, lty=2) # class 3 upper limit (size limit)
 dev.off()
 
 ############# break dataset into classes ############# 
 
 # size limits
 i_cl1_1 = which(size == 30)
 i_cl1_2 = which(size == 84)
 i_cl2_1 = which(size == 133)
 i_cl2_2 = which(size == 266)
 i_cl3_1 = which(size == 341)
 i_cl3_2 = which(size == 500)
 
 # nb of reads per class
 nb_all   = sum(data$nb)
 nb_cl1   = sum(data$nb[i_cl1_1:i_cl1_2])
 nb_cl2   = sum(data$nb[i_cl2_1:i_cl2_2])
 nb_cl3   = sum(data$nb[i_cl3_1:i_cl3_2])
 # nb of reads not assigned at the boundaries of classes
 nb_left1 = sum(data$nb[(i_cl1_2+1):(i_cl2_1-1)]) +
            sum(data$nb[(i_cl2_2+1):(i_cl3_1-1)])
 # nb of reads > 500bp
 nb_left2 = sum(data$nb[(i_cl3_2+1):length(data$nb)])
 nb_left = nb_left1 + nb_left2
 
 # plot classes
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "fragment_lengths_groups.png"), width=10, height=8, units="in", res=720)
 
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   plot(y=data$nb[1:1000], x=data$size[1:1000], type='l', lwd=4,
-       main="Fragment lengths", xlab="length (bp)", ylab="frequency",
+       main="Fragment classes", xlab="length (bp)", ylab="frequency",
        cex.main=3, cex.axis=1.5, cex.lab=2.5)
   # show limits
   abline(v=data$size[i_cl1_1], lwd=3, lty=2, col=col[1])
   abline(v=data$size[i_cl1_2], lwd=3, lty=2, col=col[1])
   abline(v=data$size[i_cl2_1], lwd=3, lty=2, col=col[2])
   abline(v=data$size[i_cl2_2], lwd=3, lty=2, col=col[2])
   abline(v=data$size[i_cl3_1], lwd=3, lty=2, col=col[3])
   abline(v=data$size[i_cl3_2], lwd=3, lty=2, col=col[3])
   # nb of reads in groups
-  text(x=550, y=0.85*max(data[,1]), labels=sprintf("%.2f mio reads",         nb_all/1e6),  cex=1.8, pos=4)
-  text(x=550, y=0.80*max(data[,1]), labels=sprintf("%.2f mio reads class 1", nb_cl1/1e6),  cex=1.8, pos=4, col=col[1])
-  text(x=550, y=0.75*max(data[,1]), labels=sprintf("%.2f mio reads class 2", nb_cl2/1e6),  cex=1.8, pos=4, col=col[2])
-  text(x=550, y=0.70*max(data[,1]), labels=sprintf("%.2f mio reads class 3", nb_cl3/1e6),  cex=1.8, pos=4, col=col[3])
-  text(x=550, y=0.65*max(data[,1]), labels=sprintf("%.2f mio reads left",    nb_left/1e6), cex=1.8, pos=4)
+  text(x=550, y=0.85*max(data[,1]), labels=sprintf("%.2f mio reads",            nb_all/1e6),  cex=1.8, pos=4)
+  text(x=550, y=0.80*max(data[,1]), labels=sprintf("%.2f mio reads open",       nb_cl1/1e6),  cex=1.8, pos=4, col=col[1])
+  text(x=550, y=0.75*max(data[,1]), labels=sprintf("%.2f mio reads mono nucl.", nb_cl2/1e6),  cex=1.8, pos=4, col=col[2])
+  text(x=550, y=0.70*max(data[,1]), labels=sprintf("%.2f mio reads di.nucl.",   nb_cl3/1e6),  cex=1.8, pos=4, col=col[3])
+  text(x=550, y=0.65*max(data[,1]), labels=sprintf("%.2f mio reads left",       nb_left/1e6), cex=1.8, pos=4)
   # shade the class areas
   # class 1
   rect(size[i_cl1_1],
        0,
        size[i_cl1_2],
        max(data$nb),
        col=rgb(red=1, green=0, blue=0, alpha=0.1), border="transparent")
   # class 2  
   rect(size[i_cl2_1],
        0,
        size[i_cl2_2],
        max(data$nb),
        col=rgb(red=0, green=0, blue=1, alpha=0.1), border="transparent")
   # class 3
   rect(size[i_cl3_1],
        0,
        size[i_cl3_2],
        max(data$nb),
        col=rgb(red=0, green=1, blue=0, alpha=0.1), border="transparent")
   
 dev.off()
 
+
+
+
+
+
+
+
+# plot
+# X11(width=20, height=6)
+png(filename=file.path("results", "10xgenomics_PBMC_5k", "fragment_lengths.png"), width=20, height=6, units="in", res=720)
+  p = par(mar=c(5.1, 5.1, 5.1, 2.1),
+          mfrow=c(1,3))
+  
+  # plot fragment sizes and gaussians
+  plot(size, dens, type='l', lwd=2, 
+       main="Fragment lengths", xlab="length (bp)", ylab="density",
+       cex.main=3, cex.axis=1.5, cex.lab=2.5)
+  col = brewer.pal(4, "Set1")
+  lines(size, param[1,3] * exp(-((size-param[1,1])**2)/(2*param[1,2])), col=col[1], lwd=4, lty=2)
+  lines(size, param[2,3] * exp(-((size-param[2,1])**2)/(2*param[2,2])), col=col[2], lwd=4, lty=2)
+  lines(size, param[3,3] * exp(-((size-param[3,1])**2)/(2*param[3,2])), col=col[3], lwd=4, lty=2)
+  lines(size, param[1,3] * exp(-((size-param[1,1])**2)/(2*param[1,2])) +
+          param[2,3] * exp(-((size-param[2,1])**2)/(2*param[2,2])) +
+          param[3,3] * exp(-((size-param[3,1])**2)/(2*param[3,2])), col=col[4], lwd=4)
+  legend("topright",
+         legend=c("open chromatin",
+                  "mono-nucl.",
+                  "di-nucl.",
+                  "all"),
+         col=col, lwd=c(4,4,4,4), lty=c(2,2,2,1),
+         bty='n', cex=2)
+  mtext('A', 3, 0, cex=4.5, at=-80)
+
+  # plot class probability mass function
+  plot(size, prob[,1], ylim=c(0, max(prob)), type='l',
+       main="Fragment class probability", xlab="length (bp)", ylab="p(class)",
+       cex.main=3, cex.axis=1.5, cex.lab=2.5, lwd=4, col=col[1])
+  lines(size, prob[,2], lwd=4, col=col[2])
+  lines(size, prob[,3], lwd=4, col=col[3])
+  
+  # set limits at min 90 assignment to a class
+  abline(v=30, lwd=2, lty=2)  # class 1 lower limit (size limit)
+  abline(v=84, lwd=2, lty=2)  # class 1 upper limit
+  abline(v=133, lwd=2, lty=2) # class 2 lower limit
+  abline(v=266, lwd=2, lty=2) # class 2 upper limit
+  abline(v=341, lwd=2, lty=2) # class 3 lower limit
+  abline(v=500, lwd=2, lty=2) # class 3 upper limit (size limit)
+  mtext('B', 3, 0, cex=4.5, at=-80)
+  
+  
+  # plot final categories
+  plot(y=data$nb[1:1000], x=data$size[1:1000], type='l', lwd=4,
+       main="Fragment classes", xlab="length (bp)", ylab="frequency",
+       cex.main=3, cex.axis=1.5, cex.lab=2.5)
+  # show limits
+  abline(v=data$size[i_cl1_1], lwd=3, lty=2, col=col[1])
+  abline(v=data$size[i_cl1_2], lwd=3, lty=2, col=col[1])
+  abline(v=data$size[i_cl2_1], lwd=3, lty=2, col=col[2])
+  abline(v=data$size[i_cl2_2], lwd=3, lty=2, col=col[2])
+  abline(v=data$size[i_cl3_1], lwd=3, lty=2, col=col[3])
+  abline(v=data$size[i_cl3_2], lwd=3, lty=2, col=col[3])
+  # nb of reads in groups
+  text(x=550, y=0.85*max(data[,1]), labels=sprintf("%.2f mio reads",            nb_all/1e6),  cex=1.8, pos=4)
+  text(x=550, y=0.80*max(data[,1]), labels=sprintf("%.2f mio reads open",       nb_cl1/1e6),  cex=1.8, pos=4, col=col[1])
+  text(x=550, y=0.75*max(data[,1]), labels=sprintf("%.2f mio reads mono nucl.", nb_cl2/1e6),  cex=1.8, pos=4, col=col[2])
+  text(x=550, y=0.70*max(data[,1]), labels=sprintf("%.2f mio reads di.nucl.",   nb_cl3/1e6),  cex=1.8, pos=4, col=col[3])
+  text(x=550, y=0.65*max(data[,1]), labels=sprintf("%.2f mio reads left",       nb_left/1e6), cex=1.8, pos=4)
+  # shade the class areas
+  # class 1
+  rect(size[i_cl1_1],
+       0,
+       size[i_cl1_2],
+       max(data$nb),
+       col=rgb(red=1, green=0, blue=0, alpha=0.1), border="transparent")
+  # class 2  
+  rect(size[i_cl2_1],
+       0,
+       size[i_cl2_2],
+       max(data$nb),
+       col=rgb(red=0, green=0, blue=1, alpha=0.1), border="transparent")
+  # class 3
+  rect(size[i_cl3_1],
+       0,
+       size[i_cl3_2],
+       max(data$nb),
+       col=rgb(red=0, green=1, blue=0, alpha=0.1), border="transparent")
+  mtext('C', 3, 0, cex=4.5, at=-80)
+dev.off()
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.R b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.R
index a67e09a..fca9588 100644
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.R
@@ -1,277 +1,277 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 
 ################## aggregations around CTCF motifs ################## 
 
 # data
 # open chromatin
-data.open.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin1bp_fragment.mat")))
-data.open.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin2bp_fragment.mat")))
-data.open.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin10bp_fragment.mat")))
+data.open.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin1bp_fragment.mat")))
+data.open.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin2bp_fragment.mat")))
+data.open.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin10bp_fragment.mat")))
 
-data.open.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin1bp_read.mat")))
-data.open.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin2bp_read.mat")))
-data.open.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin10bp_read.mat")))
+data.open.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin1bp_read.mat")))
+data.open.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin2bp_read.mat")))
+data.open.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin10bp_read.mat")))
 
-data.open.1.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin1bp_read_atac.mat")))
-data.open.2.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin2bp_read_atac.mat")))
-data.open.10.atac = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_open_bin10bp_read_atac.mat")))
+data.open.1.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin1bp_read_atac.mat")))
+data.open.2.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin2bp_read_atac.mat")))
+data.open.10.atac = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_open_bin10bp_read_atac.mat")))
 
 # mono-nucleosomes
-data.1nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
-data.1nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
-data.1nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
+data.1nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
+data.1nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
+data.1nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
 
-data.1nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin1bp_read.mat")))
-data.1nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin2bp_read.mat")))
-data.1nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin10bp_read.mat")))
+data.1nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin1bp_read.mat")))
+data.1nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin2bp_read.mat")))
+data.1nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin10bp_read.mat")))
 
-data.1nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
-data.1nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
-data.1nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
+data.1nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
+data.1nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
+data.1nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
 
 # di-nucleosomes
-data.2nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
-data.2nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
-data.2nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
+data.2nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
+data.2nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
+data.2nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
 
-data.2nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin1bp_read.mat")))
-data.2nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin2bp_read.mat")))
-data.2nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin10bp_read.mat")))
+data.2nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin1bp_read.mat")))
+data.2nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin2bp_read.mat")))
+data.2nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin10bp_read.mat")))
 
-data.2nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
-data.2nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
-data.2nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
+data.2nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
+data.2nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
+data.2nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
 
 # mono-nucleosomes from di-nucleosome data
-data.nucls.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
-data.nucls.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
-data.nucls.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
+data.nucls.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
+data.nucls.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
+data.nucls.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
 
-data.nucls.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
-data.nucls.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
-data.nucls.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
+data.nucls.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
+data.nucls.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
+data.nucls.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
 
-data.nucls.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
-data.nucls.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
-data.nucls.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
+data.nucls.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
+data.nucls.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
+data.nucls.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
 
 
 # colors
 col = brewer.pal(4, "Set1")
 
 # x-axis
 axis.at.1   = seq(0, ncol(data.open.1.frag), length.out =5)
 axis.lab.1  = seq(-400,   400, by=200)
 axis.at.2   = seq(0, ncol(data.open.2.frag), length.out =5)
 axis.lab.2  = seq(-400,   400, by=200)
 axis.at.10  = seq(0, ncol(data.open.10.frag), length.out=5)
 axis.lab.10 = seq(-1000, 1000, by=500)
 
 # X11(width=12, height=12)
 png(filename=file.path("results/10xgenomics_PBMC_5k/ctcf_motifs_10e-6_aggregations.png"),
     units="in", res=720, width=12, height=9)
   m = matrix(nrow=4, ncol=4,
              data=c(16,13,14,15,
                     10, 1, 4, 7,
                     11, 2, 5, 8,
                     12, 3, 6, 9), byrow=T)
   l = layout(mat=m, widths=c(0.2, 1, 1, 1), heights=c(0.2, 1, 1, 1))
   layout.show(l)
   
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   
   # 1bp resolution
   ## entire fragments
   plot(colMeans(data.open.1.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.open.1.frag),  col=col[1], lwd=3)
   lines(colMeans(data.1nucl.1.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## entire reads
   plot(colMeans(data.open.1.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.1.atac)/max(colMeans(data.open.1.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.cent)/max(colMeans(data.1nucl.1.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.cent)/max(colMeans(data.2nucl.1.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.cent)/max(colMeans(data.nucls.1.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   ## entire fragments
   plot(colMeans(data.open.2.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## entire reads
   plot(colMeans(data.open.2.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.2.atac)/max(colMeans(data.open.2.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.cent)/max(colMeans(data.1nucl.2.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.cent)/max(colMeans(data.2nucl.2.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.cent)/max(colMeans(data.nucls.2.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   
   # 10bp resolution
   ## entire fragments
   plot(colMeans(data.open.10.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## entire reads
   plot(colMeans(data.open.10.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.10.atac)/max(colMeans(data.open.10.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.cent)/max(colMeans(data.1nucl.10.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.cent)/max(colMeans(data.2nucl.10.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.cent)/max(colMeans(data.nucls.10.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   
   # some legends over the rows and columns
   p = par(mar=c(0,0,0,0))
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="FRAGMENTS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="READS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="EDGES/CENTERS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 1bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 2bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-1kp by 10bp", cex=2)
   
   par(p)
 dev.off()
 
 
 
 # footprint
 # x-axis
 axis.lab.1 = seq(-200, 200, by=100)
 axis.at.1  = seq(0,    400, length.out=length(axis.lab.1))
 
 axis.lab.2 = seq(-200, 200, by=100)
 axis.at.2  = seq(0,    200, length.out=length(axis.lab.2))
 
 axis.lab.10 = seq(-200, 200, by=100)
 axis.at.10  = seq(0,    41, length.out=length(axis.lab.10))
 
 
 # X11(width=10, height=12)
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_footprint.png"),
     units="in", res=720, width=10, height=12)
   p = par(mfrow=c(3,1),
           mar=c(5.1, 5.1, 4.1, 2.1))
   # 1bp resolution
   index = 200:600
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.1.atac[,index])/max(colMeans(data.open.1.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="CTCF motif 1bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.1.cent[,index])/max(colMeans(data.1nucl.1.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.1.cent[,index])/max(colMeans(data.nucls.1.cent[,index])),
         lwd=3, col=col[4])
   abline(v=191, lwd=3, lty=2)
   abline(v=211, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   index = 100:300
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.2.atac[,index])/max(colMeans(data.open.2.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="CTCF motif 2bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.2.cent[,index])/max(colMeans(data.1nucl.2.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.2.cent[,index])/max(colMeans(data.nucls.2.cent[,index])),
         lwd=3, col=col[4])
   abline(v=96, lwd=3, lty=2)
   abline(v=106, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 10bp resolution
   index = 80:120
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.10.atac[,index])/max(colMeans(data.open.10.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="CTCF motif 10bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.10.cent[,index])/max(colMeans(data.1nucl.10.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.10.cent[,index])/max(colMeans(data.nucls.10.cent[,index])),
         lwd=3, col=col[4])
   abline(v=20, lwd=3, lty=2)
   abline(v=22, lwd=3, lty=2)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   par(p)
 dev.off()
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.sh
index 32860f2..b103ae9 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_motif.sh
@@ -1,89 +1,96 @@
 # some paths
 ## directories
 results_dir='data/10xgenomics_PBMC_5k_motifs'
 read_dir="data/10xgenomics_PBMC_5k"
 seq_dir="data/genomes"
 ## input1
 file_bed=$read_dir'/ctcf_motifs_10e-6.bed'
+file_bed_rmsk=$read_dir'/ctcf_motifs_10e-6_rmsk.bed'
 file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
 file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
 file_bam_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam"
 file_bai_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam.bai"
 file_bam_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam"
 file_bai_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam.bai"
 file_bam_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam"
 file_bai_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam.bai"
 file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
 file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
 file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
 
 mkdir -p $results_dir
 
+# filter out motifs with >=30% repeated region inside
+bin/bedtools/bedtools subtract -A -f 0.3 -a $file_bed -b $file_rmsk > $file_bed_rmsk
+
 # matrix creation
-## sequences
+## sequences and sequenced repeat masked
 file_mat_seq="$results_dir/ctcf_motifs_10e-6_sequences.mat"
-bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+file_mat_seq_rmsk="$results_dir/ctcf_motifs_10e-6_sequences_rmsk.mat"
+bin/SequenceMatrixCreator --bed $file_bed      --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+bin/SequenceMatrixCreator --bed $file_bed_rmsk --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq_rmsk
 
 ## open chromatin around CTCF motif
 for method in 'read' 'read_atac' 'fragment'
 do
 	file_mat_open_1="$results_dir/ctcf_motifs_10e-6_open_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
 	file_mat_open_2="$results_dir/ctcf_motifs_10e-6_open_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 2  --method $method > $file_mat_open_2
 	file_mat_open_10="$results_dir/ctcf_motifs_10e-6_open_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_open_10
 done
 
 ## mono around CTCF motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/ctcf_motifs_10e-6_1nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/ctcf_motifs_10e-6_1nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/ctcf_motifs_10e-6_1nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## di nucleosomes around CTCF motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### di nucleosomes
 	file_mat_2nucl_1="$results_dir/ctcf_motifs_10e-6_2nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_2nucl_1
 	file_mat_2nucl_2="$results_dir/ctcf_motifs_10e-6_2nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_2nucl_2
 	file_mat_2nucl_10="$results_dir/ctcf_motifs_10e-6_2nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_2nucl_10
 done
 
 
 ## mono nucleosomes from processed di-nucleosome data around CTCF motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/ctcf_motifs_10e-6_2nuclsplitintwo_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/ctcf_motifs_10e-6_2nuclsplitintwo_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/ctcf_motifs_10e-6_2nuclsplitintwo_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## all nucleosomes around CTCF motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_nucl_1="$results_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
 	file_mat_nucl_2="$results_dir/ctcf_motifs_10e-6_nucleosomes_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_nucl_2
 	file_mat_nucl_10="$results_dir/ctcf_motifs_10e-6_nucleosomes_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_nucl_10
 done
 
 
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_myc_motif.sh
new file mode 100755
index 0000000..f43146f
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_myc_motif.sh
@@ -0,0 +1,44 @@
+# some paths
+## directories
+results_dir='data/10xgenomics_PBMC_5k_motifs'
+read_dir="data/10xgenomics_PBMC_5k"
+seq_dir="data/genomes"
+## input
+file_bed_ctcf=$read_dir'/ctcf_motifs_10e-6_rmsk.bed'
+file_bed_myc=$read_dir'/myc_motifs_10e-6_rmsk.bed'
+file_bed=$read_dir'/ctcf_motifs_10e-6_myc_motifs_10e-6_rmsk.bed'
+file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
+file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
+file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
+file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
+file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
+
+mkdir -p $results_dir
+
+# merge 5000 CTCF peaks and 2000 myc peaks
+touch $file_bed
+shuf $file_bed_ctcf | head -n 2000 >> $file_bed
+shuf $file_bed_myc  | head -n 2000 >> $file_bed
+
+# matrix creation
+## sequences and sequenced repeat masked
+file_mat_seq="$results_dir/ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk.mat"
+bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+
+## open chromatin around motifs
+for method in 'read_atac'
+do
+	file_mat_open_1="$results_dir/ctcf_motifs_10e-6_myc_motifs_10e-6_open_bin1bp_"$method"_rmsk.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
+done
+
+## all nucleosomes around motifs
+for method in 'fragment_center'
+do
+	### mono nucleosomes
+	file_mat_nucl_1="$results_dir/ctcf_motifs_10e-6_myc_motifs_10e-6_nucleosomes_bin1bp_"$method"_rmsk.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
+done
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_sp1_motif.sh
new file mode 100755
index 0000000..2c1aaed
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ctcf_sp1_motif.sh
@@ -0,0 +1,44 @@
+# some paths
+## directories
+results_dir='data/10xgenomics_PBMC_5k_motifs'
+read_dir="data/10xgenomics_PBMC_5k"
+seq_dir="data/genomes"
+## input
+file_bed_ctcf=$read_dir'/ctcf_motifs_10e-6_rmsk.bed'
+file_bed_sp1=$read_dir'/sp1_motifs_10e-7_rmsk.bed'
+file_bed=$read_dir'/ctcf_motifs_10e-6_sp1_motifs_10e-7_rmsk.bed'
+file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
+file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
+file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
+file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
+file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
+
+mkdir -p $results_dir
+
+# merge 5000 CTCF peaks and 5000 SP1 peaks
+touch $file_bed
+shuf $file_bed_ctcf | head -n 5000 >> $file_bed
+shuf $file_bed_sp1 | head -n 5000  >> $file_bed
+
+# matrix creation
+## sequences and sequenced repeat masked
+file_mat_seq="$results_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk.mat"
+bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+
+## open chromatin around motifs
+for method in 'read_atac'
+do
+	file_mat_open_1="$results_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_"$method"_rmsk.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
+done
+
+## all nucleosomes around motifs
+for method in 'fragment_center'
+do
+	### mono nucleosomes
+	file_mat_nucl_1="$results_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_"$method"_rmsk.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
+done
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ebf1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ebf1_motif.R
index 2504612..de580b7 100644
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_ebf1_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_ebf1_motif.R
@@ -1,307 +1,307 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 
 ################## aggregations around ebf1 motifs ################## 
 
 # data
 # open chromatin
-data.open.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin1bp_fragment.mat")))
-data.open.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin2bp_fragment.mat")))
-data.open.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin10bp_fragment.mat")))
+data.open.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin1bp_fragment.mat")))
+data.open.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin2bp_fragment.mat")))
+data.open.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin10bp_fragment.mat")))
 
-data.open.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin1bp_read.mat")))
-data.open.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin2bp_read.mat")))
-data.open.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin10bp_read.mat")))
+data.open.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin1bp_read.mat")))
+data.open.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin2bp_read.mat")))
+data.open.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin10bp_read.mat")))
 
-data.open.1.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin1bp_read_atac.mat")))
-data.open.2.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin2bp_read_atac.mat")))
-data.open.10.atac = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_open_bin10bp_read_atac.mat")))
+data.open.1.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin1bp_read_atac.mat")))
+data.open.2.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin2bp_read_atac.mat")))
+data.open.10.atac = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_open_bin10bp_read_atac.mat")))
 
 # mono-nucleosomes
-data.1nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
-data.1nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
-data.1nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
+data.1nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
+data.1nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
+data.1nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
 
-data.1nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin1bp_read.mat")))
-data.1nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin2bp_read.mat")))
-data.1nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin10bp_read.mat")))
+data.1nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin1bp_read.mat")))
+data.1nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin2bp_read.mat")))
+data.1nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin10bp_read.mat")))
 
-data.1nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
-data.1nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
-data.1nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
+data.1nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
+data.1nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
+data.1nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
 
 # di-nucleosomes
-data.2nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
-data.2nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
-data.2nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
+data.2nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
+data.2nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
+data.2nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
 
-data.2nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin1bp_read.mat")))
-data.2nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin2bp_read.mat")))
-data.2nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin10bp_read.mat")))
+data.2nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin1bp_read.mat")))
+data.2nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin2bp_read.mat")))
+data.2nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin10bp_read.mat")))
 
-data.2nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
-data.2nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
-data.2nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
+data.2nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
+data.2nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
+data.2nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
 
 # mono-nucleosomes from di-nucleosome data
-data.nucls.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
-data.nucls.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
-data.nucls.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
+data.nucls.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
+data.nucls.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
+data.nucls.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
 
-data.nucls.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
-data.nucls.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
-data.nucls.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
+data.nucls.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
+data.nucls.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
+data.nucls.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
 
-data.nucls.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
-data.nucls.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
-data.nucls.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
+data.nucls.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
+data.nucls.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
+data.nucls.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "ebf1_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
 
 
 # colors
 col = brewer.pal(4, "Set1")
 
 # x-axis
 axis.at.1   = seq(0, ncol(data.open.1.frag), length.out =5)
 axis.lab.1  = seq(-400,   400, by=200)
 axis.at.2   = seq(0, ncol(data.open.2.frag), length.out =5)
 axis.lab.2  = seq(-400,   400, by=200)
 axis.at.10  = seq(0, ncol(data.open.10.frag), length.out=5)
 axis.lab.10 = seq(-1000, 1000, by=500)
 
 # X11(width=12, height=12)
 png(filename=file.path("results/10xgenomics_PBMC_5k/ebf1_motifs_10e-6_aggregations.png"),
     units="in", res=720, width=12, height=9)
   m = matrix(nrow=4, ncol=4,
              data=c(16,13,14,15,
                     10, 1, 4, 7,
                     11, 2, 5, 8,
                     12, 3, 6, 9), byrow=T)
   l = layout(mat=m, widths=c(0.2, 1, 1, 1), heights=c(0.2, 1, 1, 1))
   layout.show(l)
   
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   
   # 1bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.1.frag)),
                  max(colMeans(data.open.1.frag)),
                  max(colMeans(data.1nucl.1.frag)),
                  max(colMeans(data.2nucl.1.frag)),
                  max(colMeans(data.nucls.1.frag))))
   plot(colMeans(data.open.1.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.open.1.frag),  col=col[1], lwd=3)
   lines(colMeans(data.1nucl.1.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.1.read)),
                  max(colMeans(data.open.1.read)),
                  max(colMeans(data.1nucl.1.read)),
                  max(colMeans(data.2nucl.1.read)),
                  max(colMeans(data.nucls.1.read))))
   plot(colMeans(data.open.1.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.1.atac)/max(colMeans(data.open.1.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.cent)/max(colMeans(data.1nucl.1.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.cent)/max(colMeans(data.2nucl.1.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.cent)/max(colMeans(data.nucls.1.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.2.frag)),
                  max(colMeans(data.open.2.frag)),
                  max(colMeans(data.1nucl.2.frag)),
                  max(colMeans(data.2nucl.2.frag)),
                  max(colMeans(data.nucls.2.frag))))
   plot(colMeans(data.open.2.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.2.read)),
                  max(colMeans(data.open.2.read)),
                  max(colMeans(data.1nucl.2.read)),
                  max(colMeans(data.2nucl.2.read)),
                  max(colMeans(data.nucls.2.read))))
   plot(colMeans(data.open.2.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.2.atac)/max(colMeans(data.open.2.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.cent)/max(colMeans(data.1nucl.2.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.cent)/max(colMeans(data.2nucl.2.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.cent)/max(colMeans(data.nucls.2.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   
   # 10bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.10.frag)),
                  max(colMeans(data.open.10.frag)),
                  max(colMeans(data.1nucl.10.frag)),
                  max(colMeans(data.2nucl.10.frag)),
                  max(colMeans(data.nucls.10.frag))))
   plot(colMeans(data.open.10.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.10.read)),
                  max(colMeans(data.open.10.read)),
                  max(colMeans(data.1nucl.10.read)),
                  max(colMeans(data.2nucl.10.read)),
                  max(colMeans(data.nucls.10.read))))
   plot(colMeans(data.open.10.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.10.atac)/max(colMeans(data.open.10.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.cent)/max(colMeans(data.1nucl.10.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.cent)/max(colMeans(data.2nucl.10.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.cent)/max(colMeans(data.nucls.10.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   
   # some legends over the rows and columns
   p = par(mar=c(0,0,0,0))
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="FRAGMENTS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="READS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="EDGES/CENTERS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 1bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 2bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-1kp by 10bp", cex=2)
   
   par(p)
 dev.off()
 
 
 
 # footprint
 # x-axis
 axis.lab.1 = seq(-200, 200, by=100)
 axis.at.1  = seq(0,    400, length.out=length(axis.lab.1))
 
 axis.lab.2 = seq(-200, 200, by=100)
 axis.at.2  = seq(0,    200, length.out=length(axis.lab.2))
 
 axis.lab.10 = seq(-200, 200, by=100)
 axis.at.10  = seq(0,    41, length.out=length(axis.lab.10))
 
 
 # X11(width=10, height=12)
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "ebf1_motifs_10e-6_footprint.png"),
     units="in", res=720, width=10, height=12)
   p = par(mfrow=c(3,1),
           mar=c(5.1, 5.1, 4.1, 2.1))
   # 1bp resolution
   index = 200:600
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.1.atac[,index])/max(colMeans(data.open.1.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="EBF1 motif 1bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.1.cent[,index])/max(colMeans(data.1nucl.1.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.1.cent[,index])/max(colMeans(data.nucls.1.cent[,index])),
         lwd=3, col=col[4])
   abline(v=191, lwd=3, lty=2)
   abline(v=211, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   index = 100:300
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.2.atac[,index])/max(colMeans(data.open.2.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="EBF1 motif 2bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.2.cent[,index])/max(colMeans(data.1nucl.2.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.2.cent[,index])/max(colMeans(data.nucls.2.cent[,index])),
         lwd=3, col=col[4])
   abline(v=96, lwd=3, lty=2)
   abline(v=106, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 10bp resolution
   index = 80:120
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.10.atac[,index])/max(colMeans(data.open.10.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="EBF1 motif 10bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.10.cent[,index])/max(colMeans(data.1nucl.10.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.10.cent[,index])/max(colMeans(data.nucls.10.cent[,index])),
         lwd=3, col=col[4])
   abline(v=20, lwd=3, lty=2)
   abline(v=22, lwd=3, lty=2)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   par(p)
 dev.off()
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.R b/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.R
index 6fcdefb..1145a83 100644
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.R
@@ -1,307 +1,307 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 
 ################## aggregations around myc motifs ################## 
 
 # data
 # open chromatin
-data.open.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin1bp_fragment.mat")))
-data.open.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin2bp_fragment.mat")))
-data.open.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin10bp_fragment.mat")))
+data.open.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin1bp_fragment.mat")))
+data.open.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin2bp_fragment.mat")))
+data.open.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin10bp_fragment.mat")))
 
-data.open.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin1bp_read.mat")))
-data.open.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin2bp_read.mat")))
-data.open.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin10bp_read.mat")))
+data.open.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin1bp_read.mat")))
+data.open.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin2bp_read.mat")))
+data.open.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin10bp_read.mat")))
 
-data.open.1.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin1bp_read_atac.mat")))
-data.open.2.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin2bp_read_atac.mat")))
-data.open.10.atac = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_open_bin10bp_read_atac.mat")))
+data.open.1.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin1bp_read_atac.mat")))
+data.open.2.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin2bp_read_atac.mat")))
+data.open.10.atac = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_open_bin10bp_read_atac.mat")))
 
 # mono-nucleosomes
-data.1nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
-data.1nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
-data.1nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
+data.1nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin1bp_fragment.mat")))
+data.1nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin2bp_fragment.mat")))
+data.1nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin10bp_fragment.mat")))
 
-data.1nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin1bp_read.mat")))
-data.1nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin2bp_read.mat")))
-data.1nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin10bp_read.mat")))
+data.1nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin1bp_read.mat")))
+data.1nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin2bp_read.mat")))
+data.1nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin10bp_read.mat")))
 
-data.1nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
-data.1nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
-data.1nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
+data.1nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat")))
+data.1nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin2bp_fragment_center.mat")))
+data.1nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_1nucl_bin10bp_fragment_center.mat")))
 
 # di-nucleosomes
-data.2nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
-data.2nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
-data.2nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
+data.2nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin1bp_fragment.mat")))
+data.2nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin2bp_fragment.mat")))
+data.2nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin10bp_fragment.mat")))
 
-data.2nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin1bp_read.mat")))
-data.2nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin2bp_read.mat")))
-data.2nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin10bp_read.mat")))
+data.2nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin1bp_read.mat")))
+data.2nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin2bp_read.mat")))
+data.2nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin10bp_read.mat")))
 
-data.2nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
-data.2nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
-data.2nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
+data.2nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin1bp_fragment_center.mat")))
+data.2nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin2bp_fragment_center.mat")))
+data.2nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nucl_bin10bp_fragment_center.mat")))
 
 # mono-nucleosomes from di-nucleosome data
-data.nucls.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
-data.nucls.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
-data.nucls.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
+data.nucls.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment.mat")))
+data.nucls.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment.mat")))
+data.nucls.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment.mat")))
 
-data.nucls.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
-data.nucls.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
-data.nucls.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
+data.nucls.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_read.mat")))
+data.nucls.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_read.mat")))
+data.nucls.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_read.mat")))
 
-data.nucls.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
-data.nucls.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
-data.nucls.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
+data.nucls.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin1bp_fragment_center.mat")))
+data.nucls.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin2bp_fragment_center.mat")))
+data.nucls.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "myc_motifs_10e-6_2nuclsplitintwo_bin10bp_fragment_center.mat")))
 
 
 # colors
 col = brewer.pal(4, "Set1")
 
 # x-axis
 axis.at.1   = seq(0, ncol(data.open.1.frag), length.out =5)
 axis.lab.1  = seq(-400,   400, by=200)
 axis.at.2   = seq(0, ncol(data.open.2.frag), length.out =5)
 axis.lab.2  = seq(-400,   400, by=200)
 axis.at.10  = seq(0, ncol(data.open.10.frag), length.out=5)
 axis.lab.10 = seq(-1000, 1000, by=500)
 
 # X11(width=12, height=12)
 png(filename=file.path("results/10xgenomics_PBMC_5k/myc_motifs_10e-6_aggregations.png"),
     units="in", res=720, width=12, height=9)
   m = matrix(nrow=4, ncol=4,
              data=c(16,13,14,15,
                     10, 1, 4, 7,
                     11, 2, 5, 8,
                     12, 3, 6, 9), byrow=T)
   l = layout(mat=m, widths=c(0.2, 1, 1, 1), heights=c(0.2, 1, 1, 1))
   layout.show(l)
   
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   
   # 1bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.1.frag)),
                  max(colMeans(data.open.1.frag)),
                  max(colMeans(data.1nucl.1.frag)),
                  max(colMeans(data.2nucl.1.frag)),
                  max(colMeans(data.nucls.1.frag))))
   plot(colMeans(data.open.1.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.open.1.frag),  col=col[1], lwd=3)
   lines(colMeans(data.1nucl.1.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.1.read)),
                  max(colMeans(data.open.1.read)),
                  max(colMeans(data.1nucl.1.read)),
                  max(colMeans(data.2nucl.1.read)),
                  max(colMeans(data.nucls.1.read))))
   plot(colMeans(data.open.1.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.1.atac)/max(colMeans(data.open.1.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.cent)/max(colMeans(data.1nucl.1.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.cent)/max(colMeans(data.2nucl.1.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.cent)/max(colMeans(data.nucls.1.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.2.frag)),
                  max(colMeans(data.open.2.frag)),
                  max(colMeans(data.1nucl.2.frag)),
                  max(colMeans(data.2nucl.2.frag)),
                  max(colMeans(data.nucls.2.frag))))
   plot(colMeans(data.open.2.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.2.read)),
                  max(colMeans(data.open.2.read)),
                  max(colMeans(data.1nucl.2.read)),
                  max(colMeans(data.2nucl.2.read)),
                  max(colMeans(data.nucls.2.read))))
   plot(colMeans(data.open.2.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.2.atac)/max(colMeans(data.open.2.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.cent)/max(colMeans(data.1nucl.2.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.cent)/max(colMeans(data.2nucl.2.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.cent)/max(colMeans(data.nucls.2.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   
   # 10bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.10.frag)),
                  max(colMeans(data.open.10.frag)),
                  max(colMeans(data.1nucl.10.frag)),
                  max(colMeans(data.2nucl.10.frag)),
                  max(colMeans(data.nucls.10.frag))))
   plot(colMeans(data.open.10.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.10.read)),
                  max(colMeans(data.open.10.read)),
                  max(colMeans(data.1nucl.10.read)),
                  max(colMeans(data.2nucl.10.read)),
                  max(colMeans(data.nucls.10.read))))
   plot(colMeans(data.open.10.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.10.atac)/max(colMeans(data.open.10.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.cent)/max(colMeans(data.1nucl.10.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.cent)/max(colMeans(data.2nucl.10.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.cent)/max(colMeans(data.nucls.10.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   
   # some legends over the rows and columns
   p = par(mar=c(0,0,0,0))
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="FRAGMENTS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="READS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="EDGES/CENTERS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 1bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 2bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-1kp by 10bp", cex=2)
   
   par(p)
 dev.off()
 
 
 
 # footprint
 # x-axis
 axis.lab.1 = seq(-200, 200, by=100)
 axis.at.1  = seq(0,    400, length.out=length(axis.lab.1))
 
 axis.lab.2 = seq(-200, 200, by=100)
 axis.at.2  = seq(0,    200, length.out=length(axis.lab.2))
 
 axis.lab.10 = seq(-200, 200, by=100)
 axis.at.10  = seq(0,    41, length.out=length(axis.lab.10))
 
 
 # X11(width=10, height=12)
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "myc_motifs_10e-6_footprint.png"),
     units="in", res=720, width=10, height=12)
   p = par(mfrow=c(3,1),
           mar=c(5.1, 5.1, 4.1, 2.1))
   # 1bp resolution
   index = 200:600
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.1.atac[,index])/max(colMeans(data.open.1.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="myc motif 1bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.1.cent[,index])/max(colMeans(data.1nucl.1.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.1.cent[,index])/max(colMeans(data.nucls.1.cent[,index])),
         lwd=3, col=col[4])
   abline(v=191, lwd=3, lty=2)
   abline(v=211, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   index = 100:300
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.2.atac[,index])/max(colMeans(data.open.2.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="myc motif 2bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.2.cent[,index])/max(colMeans(data.1nucl.2.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.2.cent[,index])/max(colMeans(data.nucls.2.cent[,index])),
         lwd=3, col=col[4])
   abline(v=96, lwd=3, lty=2)
   abline(v=106, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 10bp resolution
   index = 80:120
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.10.atac[,index])/max(colMeans(data.open.10.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="myc motif 10bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.10.cent[,index])/max(colMeans(data.1nucl.10.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.10.cent[,index])/max(colMeans(data.nucls.10.cent[,index])),
         lwd=3, col=col[4])
   abline(v=20, lwd=3, lty=2)
   abline(v=22, lwd=3, lty=2)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   par(p)
 dev.off()
\ No newline at end of file
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.sh
index d927a88..64a7ec2 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_myc_motif.sh
@@ -1,179 +1,186 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k'
 data_dir='data'
 read_dir="$data_dir/10xgenomics_PBMC_5k"
 seq_dir="$data_dir/genomes"
 ## input1
 file_bed=$read_dir'/myc_motifs_10e-6.bed'
+file_bed_rmsk=$read_dir'/myc_motifs_10e-6_rmsk.bed'
 file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
 file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
 file_bam_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam"
 file_bai_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam.bai"
 file_bam_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam"
 file_bai_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam.bai"
 file_bam_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam"
 file_bai_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam.bai"
 file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
 file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
 file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
 
 mkdir -p $results_dir
 
+# filter out motifs with >=30% repeated region inside
+bin/bedtools/bedtools subtract -A -f 0.3 -a $file_bed -b $file_rmsk > $file_bed_rmsk
+
 # matrix creation
 ## sequences
 file_mat_seq="$results_dir/myc_motifs_10e-6_sequences.mat"
-# bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+file_mat_seq_rmsk="$results_dir/myc_motifs_10e-6_sequences_rmsk.mat"
+bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+bin/SequenceMatrixCreator --bed $file_bed_rmsk --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq_rmsk
 
 ## open chromatin around myc motif
 for method in 'read' 'read_atac' 'fragment'
 do
 	file_mat_open_1="$results_dir/myc_motifs_10e-6_open_bin1bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
 	file_mat_open_2="$results_dir/myc_motifs_10e-6_open_bin2bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 2  --method $method > $file_mat_open_2
 	file_mat_open_10="$results_dir/myc_motifs_10e-6_open_bin10bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_open_10
 done
 
 ## mono around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/myc_motifs_10e-6_1nucl_bin1bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/myc_motifs_10e-6_1nucl_bin2bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/myc_motifs_10e-6_1nucl_bin10bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## di nucleosomes around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### di nucleosomes
 	file_mat_2nucl_1="$results_dir/myc_motifs_10e-6_2nucl_bin1bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_2nucl_1
 	file_mat_2nucl_2="$results_dir/myc_motifs_10e-6_2nucl_bin2bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_2nucl_2
 	file_mat_2nucl_10="$results_dir/myc_motifs_10e-6_2nucl_bin10bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_2nucl_10
 done
 
 
 ## mono nucleosomes from processed di-nucleosome data around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin1bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin2bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin10bp_$method.mat"
 	# bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## all nucleosomes around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_nucl_1="$results_dir/myc_motifs_10e-6_nucleosomes_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
 	file_mat_nucl_2="$results_dir/myc_motifs_10e-6_nucleosomes_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_nucl_2
 	file_mat_nucl_10="$results_dir/myc_motifs_10e-6_nucleosomes_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_nucl_10
 done
 
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k'
 data_dir='data'
 read_dir="$data_dir/10xgenomics_PBMC_5k"
 seq_dir="$data_dir/genomes"
 ## input1
 file_bed=$read_dir'/myc_motifs_10e-6.bed'
 file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
 file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
 file_bam_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam"
 file_bai_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam.bai"
 file_bam_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam"
 file_bai_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam.bai"
 file_bam_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam"
 file_bai_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam.bai"
 file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
 file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
 file_hg19="$seq_dir/hg19.fasta"
 
 mkdir -p $results_dir
 
 # matrix creation
 ## sequences
 file_mat_seq="$results_dir/myc_motifs_10e-6_sequences.mat"
 bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
 
 ## open chromatin around myc motif
 for method in 'read' 'read_atac' 'fragment'
 do
 	file_mat_open_1="$results_dir/myc_motifs_10e-6_open_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
 	file_mat_open_2="$results_dir/myc_motifs_10e-6_open_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 2  --method $method > $file_mat_open_2
 	file_mat_open_10="$results_dir/myc_motifs_10e-6_open_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_open_10
 done
 
 ## mono around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/myc_motifs_10e-6_1nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/myc_motifs_10e-6_1nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/myc_motifs_10e-6_1nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## di nucleosomes around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### di nucleosomes
 	file_mat_2nucl_1="$results_dir/myc_motifs_10e-6_2nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_2nucl_1
 	file_mat_2nucl_2="$results_dir/myc_motifs_10e-6_2nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_2nucl_2
 	file_mat_2nucl_10="$results_dir/myc_motifs_10e-6_2nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_2nucl_10
 done
 
 
 ## mono nucleosomes from processed di-nucleosome data around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/myc_motifs_10e-6_2nuclsplitintwo_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## all nucleosomes around myc motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_nucl_1="$results_dir/myc_motifs_10e-6_nucleosomes_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
 	file_mat_nucl_2="$results_dir/myc_motifs_10e-6_nucleosomes_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_nucl_2
 	file_mat_nucl_10="$results_dir/myc_motifs_10e-6_nucleosomes_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_nucl_10
 done
 
 
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.R
index 5011e4b..267a246 100644
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.R
@@ -1,307 +1,307 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 
 ################## aggregations around myc motifs ################## 
 
 # data
 # open chromatin
-data.open.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin1bp_fragment.mat")))
-data.open.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin2bp_fragment.mat")))
-data.open.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin10bp_fragment.mat")))
+data.open.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin1bp_fragment.mat")))
+data.open.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin2bp_fragment.mat")))
+data.open.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin10bp_fragment.mat")))
 
-data.open.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin1bp_read.mat")))
-data.open.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin2bp_read.mat")))
-data.open.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin10bp_read.mat")))
+data.open.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin1bp_read.mat")))
+data.open.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin2bp_read.mat")))
+data.open.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin10bp_read.mat")))
 
-data.open.1.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin1bp_read_atac.mat")))
-data.open.2.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin2bp_read_atac.mat")))
-data.open.10.atac = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_open_bin10bp_read_atac.mat")))
+data.open.1.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin1bp_read_atac.mat")))
+data.open.2.atac  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin2bp_read_atac.mat")))
+data.open.10.atac = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_open_bin10bp_read_atac.mat")))
 
 # mono-nucleosomes
-data.1nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin1bp_fragment.mat")))
-data.1nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin2bp_fragment.mat")))
-data.1nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin10bp_fragment.mat")))
+data.1nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin1bp_fragment.mat")))
+data.1nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin2bp_fragment.mat")))
+data.1nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin10bp_fragment.mat")))
 
-data.1nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin1bp_read.mat")))
-data.1nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin2bp_read.mat")))
-data.1nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin10bp_read.mat")))
+data.1nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin1bp_read.mat")))
+data.1nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin2bp_read.mat")))
+data.1nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin10bp_read.mat")))
 
-data.1nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat")))
-data.1nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin2bp_fragment_center.mat")))
-data.1nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_1nucl_bin10bp_fragment_center.mat")))
+data.1nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat")))
+data.1nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin2bp_fragment_center.mat")))
+data.1nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_1nucl_bin10bp_fragment_center.mat")))
 
 # di-nucleosomes
-data.2nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin1bp_fragment.mat")))
-data.2nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin2bp_fragment.mat")))
-data.2nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin10bp_fragment.mat")))
+data.2nucl.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin1bp_fragment.mat")))
+data.2nucl.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin2bp_fragment.mat")))
+data.2nucl.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin10bp_fragment.mat")))
 
-data.2nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin1bp_read.mat")))
-data.2nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin2bp_read.mat")))
-data.2nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin10bp_read.mat")))
+data.2nucl.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin1bp_read.mat")))
+data.2nucl.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin2bp_read.mat")))
+data.2nucl.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin10bp_read.mat")))
 
-data.2nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin1bp_fragment_center.mat")))
-data.2nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin2bp_fragment_center.mat")))
-data.2nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nucl_bin10bp_fragment_center.mat")))
+data.2nucl.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin1bp_fragment_center.mat")))
+data.2nucl.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin2bp_fragment_center.mat")))
+data.2nucl.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nucl_bin10bp_fragment_center.mat")))
 
 # mono-nucleosomes from di-nucleosome data
-data.nucls.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_fragment.mat")))
-data.nucls.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_fragment.mat")))
-data.nucls.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_fragment.mat")))
+data.nucls.1.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_fragment.mat")))
+data.nucls.2.frag  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_fragment.mat")))
+data.nucls.10.frag = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_fragment.mat")))
 
-data.nucls.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_read.mat")))
-data.nucls.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_read.mat")))
-data.nucls.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_read.mat")))
+data.nucls.1.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_read.mat")))
+data.nucls.2.read  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_read.mat")))
+data.nucls.10.read = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_read.mat")))
 
-data.nucls.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_fragment_center.mat")))
-data.nucls.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_fragment_center.mat")))
-data.nucls.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_fragment_center.mat")))
+data.nucls.1.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_fragment_center.mat")))
+data.nucls.2.cent  = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_fragment_center.mat")))
+data.nucls.10.cent = as.matrix(read.table(file.path("data", "10xgenomics_PBMC_5k_motifs", "sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_fragment_center.mat")))
 
 
 # colors
 col = brewer.pal(4, "Set1")
 
 # x-axis
 axis.at.1   = seq(0, ncol(data.open.1.frag), length.out =5)
 axis.lab.1  = seq(-400,   400, by=200)
 axis.at.2   = seq(0, ncol(data.open.2.frag), length.out =5)
 axis.lab.2  = seq(-400,   400, by=200)
 axis.at.10  = seq(0, ncol(data.open.10.frag), length.out=5)
 axis.lab.10 = seq(-1000, 1000, by=500)
 
 # X11(width=12, height=12)
 png(filename=file.path("results/10xgenomics_PBMC_5k/sp1_motifs_10e-7_aggregations.png"),
     units="in", res=720, width=12, height=9)
   m = matrix(nrow=4, ncol=4,
              data=c(16,13,14,15,
                     10, 1, 4, 7,
                     11, 2, 5, 8,
                     12, 3, 6, 9), byrow=T)
   l = layout(mat=m, widths=c(0.2, 1, 1, 1), heights=c(0.2, 1, 1, 1))
   layout.show(l)
   
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   
   # 1bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.1.frag)),
                  max(colMeans(data.open.1.frag)),
                  max(colMeans(data.1nucl.1.frag)),
                  max(colMeans(data.2nucl.1.frag)),
                  max(colMeans(data.nucls.1.frag))))
   plot(colMeans(data.open.1.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.open.1.frag),  col=col[1], lwd=3)
   lines(colMeans(data.1nucl.1.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.1.read)),
                  max(colMeans(data.open.1.read)),
                  max(colMeans(data.1nucl.1.read)),
                  max(colMeans(data.2nucl.1.read)),
                  max(colMeans(data.nucls.1.read))))
   plot(colMeans(data.open.1.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.1.atac)/max(colMeans(data.open.1.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.1.cent)/max(colMeans(data.1nucl.1.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.1.cent)/max(colMeans(data.2nucl.1.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.1.cent)/max(colMeans(data.nucls.1.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.2.frag)),
                  max(colMeans(data.open.2.frag)),
                  max(colMeans(data.1nucl.2.frag)),
                  max(colMeans(data.2nucl.2.frag)),
                  max(colMeans(data.nucls.2.frag))))
   plot(colMeans(data.open.2.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.2.read)),
                  max(colMeans(data.open.2.read)),
                  max(colMeans(data.1nucl.2.read)),
                  max(colMeans(data.2nucl.2.read)),
                  max(colMeans(data.nucls.2.read))))
   plot(colMeans(data.open.2.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.2.atac)/max(colMeans(data.open.2.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.2.cent)/max(colMeans(data.1nucl.2.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.2.cent)/max(colMeans(data.2nucl.2.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.2.cent)/max(colMeans(data.nucls.2.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
   
   # 10bp resolution
   ## entire fragments
   ylim = c(0,max(max(colMeans(data.open.10.frag)),
                  max(colMeans(data.open.10.frag)),
                  max(colMeans(data.1nucl.10.frag)),
                  max(colMeans(data.2nucl.10.frag)),
                  max(colMeans(data.nucls.10.frag))))
   plot(colMeans(data.open.10.frag),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.frag), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.frag), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.frag), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## entire reads
   ylim = c(0,max(max(colMeans(data.open.10.read)),
                  max(colMeans(data.open.10.read)),
                  max(colMeans(data.1nucl.10.read)),
                  max(colMeans(data.2nucl.10.read)),
                  max(colMeans(data.nucls.10.read))))
   plot(colMeans(data.open.10.read),   col=col[1], lwd=3, type='l',
        main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
        ylim=ylim, cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.read), col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.read), col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.read), col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   ## atac reads and centers
   plot(colMeans(data.open.10.atac)/max(colMeans(data.open.10.atac)),
        col=col[1], lwd=3, type='l', xaxt='n',
        main="", xlab="pos[bp]", ylab="Prop max signal",
        cex.axis=2, cex.lab=2)
   lines(colMeans(data.1nucl.10.cent)/max(colMeans(data.1nucl.10.cent)), 
         col=col[2], lwd=3)
   lines(colMeans(data.2nucl.10.cent)/max(colMeans(data.2nucl.10.cent)),
         col=col[3], lwd=3)
   lines(colMeans(data.nucls.10.cent)/max(colMeans(data.nucls.10.cent)),
         col=col[4], lwd=3)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   
   # some legends over the rows and columns
   p = par(mar=c(0,0,0,0))
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="FRAGMENTS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="READS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="EDGES/CENTERS", cex=2, srt=90)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 1bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-400bp by 2bp", cex=2)
   
   plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
   text(0, 0, labels="+/-1kp by 10bp", cex=2)
   
   par(p)
 dev.off()
 
 
 
 # x-axis
 axis.lab.1 = seq(-200, 200, by=100)
 axis.at.1  = seq(0,    400, length.out=length(axis.lab.1))
 
 axis.lab.2 = seq(-200, 200, by=100)
 axis.at.2  = seq(0,    200, length.out=length(axis.lab.2))
 
 axis.lab.10 = seq(-200, 200, by=100)
 axis.at.10  = seq(0,    41, length.out=length(axis.lab.10))
 
 
 # X11(width=10, height=12)
 png(filename=file.path("results", "10xgenomics_PBMC_5k", "sp1_motifs_10e-7_footprint.png"),
     units="in", res=720, width=10, height=12)
   p = par(mfrow=c(3,1),
           mar=c(5.1, 5.1, 4.1, 2.1))
   # 1bp resolution
   index = 200:600
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.1.atac[,index])/max(colMeans(data.open.1.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="SP1 motif 1bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.1.cent[,index])/max(colMeans(data.1nucl.1.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.1.cent[,index])/max(colMeans(data.nucls.1.cent[,index])),
         lwd=3, col=col[4])
   abline(v=191, lwd=3, lty=2)
   abline(v=211, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 2bp resolution
   index = 100:300
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.2.atac[,index])/max(colMeans(data.open.2.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="SP1 motif 2bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.2.cent[,index])/max(colMeans(data.1nucl.2.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.2.cent[,index])/max(colMeans(data.nucls.2.cent[,index])),
         lwd=3, col=col[4])
   abline(v=96, lwd=3, lty=2)
   abline(v=106, lwd=3, lty=2)
   axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
   
   # 10bp resolution
   index = 80:120
   x     = 1:length(index)
   plot(x,
        colMeans(data.open.10.atac[,index])/max(colMeans(data.open.10.atac[,index])),
        type='l', lwd=3, col=col[1],
        main="SP1 motif 10bp", xlab="pos[bp]", ylab="Prop max signal", xaxt='n',
        cex.axis=2, cex.lab=2, cex.main=2)
   lines(x,
         colMeans(data.1nucl.10.cent[,index])/max(colMeans(data.1nucl.10.cent[,index])), 
         lwd=3, col=col[2])
   lines(x,
         colMeans(data.nucls.10.cent[,index])/max(colMeans(data.nucls.10.cent[,index])),
         lwd=3, col=col[4])
   abline(v=20, lwd=3, lty=2)
   abline(v=22, lwd=3, lty=2)
   axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
   par(p)
 dev.off()
 
diff --git a/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.sh
index 7c2012e..8904a67 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs/analysis_sp1_motif.sh
@@ -1,90 +1,96 @@
 # some paths
 ## directories
-results_dir='results/10xgenomics_PBMC_5k'
-data_dir='data'
-read_dir="$data_dir/10xgenomics_PBMC_5k"
-seq_dir="$data_dir/genomes"
+results_dir='data/10xgenomics_PBMC_5k_motifs'
+read_dir="data/10xgenomics_PBMC_5k"
+seq_dir="data/genomes"
 ## input1
 file_bed=$read_dir'/sp1_motifs_10e-7.bed'
+file_bed_rmsk=$read_dir'/sp1_motifs_10e-7_rmsk.bed'
 file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
 file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
 file_bam_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam"
 file_bai_1nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam.bai"
 file_bam_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam"
 file_bai_2nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam.bai"
 file_bam_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam"
 file_bai_1nucl2="$read_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam.bai"
 file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
 file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
 file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
 
 mkdir -p $results_dir
 
+# filter out peaks with >=30% repeated region inside
+bin/bedtools/bedtools subtract -A -f 0.3 -a $file_bed -b $file_rmsk > $file_bed_rmsk
+
 # matrix creation
-## sequences
+## sequences and sequenced repeat masked
 file_mat_seq="$results_dir/sp1_motifs_10e-7_sequences.mat"
-bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+file_mat_seq_rmsk="$results_dir/sp1_motifs_10e-7_sequences_rmsk.mat"
+bin/SequenceMatrixCreator --bed $file_bed      --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq
+bin/SequenceMatrixCreator --bed $file_bed_rmsk --fasta $file_hg19 --from -400 --to 400 > $file_mat_seq_rmsk
 
 ## open chromatin around sp1 motif
 for method in 'read' 'read_atac' 'fragment'
 do
 	file_mat_open_1="$results_dir/sp1_motifs_10e-7_open_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
 	file_mat_open_2="$results_dir/sp1_motifs_10e-7_open_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 2  --method $method > $file_mat_open_2
 	file_mat_open_10="$results_dir/sp1_motifs_10e-7_open_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_open_10
 done
 
 ## mono around sp1 motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/sp1_motifs_10e-7_1nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/sp1_motifs_10e-7_1nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/sp1_motifs_10e-7_1nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## di nucleosomes around sp1 motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### di nucleosomes
 	file_mat_2nucl_1="$results_dir/sp1_motifs_10e-7_2nucl_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_2nucl_1
 	file_mat_2nucl_2="$results_dir/sp1_motifs_10e-7_2nucl_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_2nucl_2
 	file_mat_2nucl_10="$results_dir/sp1_motifs_10e-7_2nucl_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_2nucl_10
 done
 
 
 ## mono nucleosomes from processed di-nucleosome data around sp1 motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_1nucl_1="$results_dir/sp1_motifs_10e-7_2nuclsplitintwo_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
 	file_mat_1nucl_2="$results_dir/sp1_motifs_10e-7_2nuclsplitintwo_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
 	file_mat_1nucl_10="$results_dir/sp1_motifs_10e-7_2nuclsplitintwo_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
 done
 
 
 ## all nucleosomes around sp1 motif
 for method in 'read' 'fragment' 'fragment_center'
 do
 	### mono nucleosomes
 	file_mat_nucl_1="$results_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_nucl_1
 	file_mat_nucl_2="$results_dir/sp1_motifs_10e-7_nucleosomes_bin2bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_nucl_2
 	file_mat_nucl_10="$results_dir/sp1_motifs_10e-7_nucleosomes_bin10bp_$method.mat"
 	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_nucl_10
 done
 
 
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.R
new file mode 100644
index 0000000..17992c1
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.R
@@ -0,0 +1,171 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 6
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## open chromatin patterns around ctcf motifs with flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("ctcf_motifs_10e-6_classification_open_bin1bp_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+      x.at  = (x.lab + ncol(ref.open)) / 2
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    row_n = 1 # row counter
+    col_n = 1 # column counter
+    for(i in 1:nrow(ref.open))
+    { # plot logo center
+      right  = 0.5*col_n - 0.01
+      left   = right - 0.2
+      bottom = 1-(row_n*(0.2))+0.05
+      top    = bottom + 0.15
+      par(fig=c(left, right, bottom, top), new=T)
+      idx = 380:420
+      plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+      # xaxis
+      x.at = 1:length(idx)
+      axis(1, at=x.at, labels=x.at)
+      # yaxis
+      x.at = seq(0, 2, by=1)
+      axis(2, at=x.at, labels=x.at)
+      row_n = row_n + 1
+      if(i %% 5 == 0)
+      { col_n = col_n + 1
+        row_n = 1
+      }
+    }
+  dev.off()
+}
+
+
+################## open chromatin patterns around ctcf motifs without flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("ctcf_motifs_10e-6_classification_open_bin1bp_%dclass_noflip.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F]
+  ref.nucl = model.nucl[ord,, drop=F]
+  ref.seq  = model.seq[,,ord, drop=F]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+    # x-axis
+    x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+    x.at  = (x.lab + ncol(ref.open)) / 2
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    x.at = seq(0, 1, 0.5)
+    axis(2, at=x.at, labels=x.at)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  row_n = 1 # row counter
+  col_n = 1 # column counter
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    right  = 0.5*col_n - 0.01
+    left   = right - 0.2
+    bottom = 1-(row_n*(0.2))+0.05
+    top    = bottom + 0.15
+    par(fig=c(left, right, bottom, top), new=T)
+    idx = 380:420
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    x.at = 1:length(idx)
+    axis(1, at=x.at, labels=x.at)
+    # yaxis
+    x.at = seq(0, 2, by=1)
+    axis(2, at=x.at, labels=x.at)
+    row_n = row_n + 1
+    if(i %% 5 == 0)
+    { col_n = col_n + 1
+      row_n = 1
+    }
+  }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.sh
new file mode 100755
index 0000000..0306079
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ctcf_motif.sh
@@ -0,0 +1,49 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_0'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
+file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift='1'
+n_core=6
+
+# open chromatin with flip
+for k in 1 2 3 4 5 6
+do
+	# seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
+# open chromatin without flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_prob.mat4d'
+	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_model.mat'
+	file_mod2=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.R
new file mode 100644
index 0000000..c15695e
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.R
@@ -0,0 +1,171 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 6
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## open chromatin patterns around ebf1 motifs with flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("ebf1_motifs_10e-6_classification_open_bin1bp_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+      x.at  = (x.lab + ncol(ref.open)) / 2
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    row_n = 1 # row counter
+    col_n = 1 # column counter
+    for(i in 1:nrow(ref.open))
+    { # plot logo center
+      right  = 0.5*col_n - 0.01
+      left   = right - 0.2
+      bottom = 1-(row_n*(0.2))+0.05
+      top    = bottom + 0.15
+      par(fig=c(left, right, bottom, top), new=T)
+      idx = 380:420
+      plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+      # xaxis
+      x.at = 1:length(idx)
+      axis(1, at=x.at, labels=x.at)
+      # yaxis
+      x.at = seq(0, 2, by=1)
+      axis(2, at=x.at, labels=x.at)
+      row_n = row_n + 1
+      if(i %% 5 == 0)
+      { col_n = col_n + 1
+        row_n = 1
+      }
+    }
+  dev.off()
+}
+
+
+################## open chromatin patterns around ebf1 motifs without flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("ebf1_motifs_10e-6_classification_open_bin1bp_%dclass_noflip.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F]
+  ref.nucl = model.nucl[ord,, drop=F]
+  ref.seq  = model.seq[,,ord, drop=F]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+    # x-axis
+    x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+    x.at  = (x.lab + ncol(ref.open)) / 2
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    x.at = seq(0, 1, 0.5)
+    axis(2, at=x.at, labels=x.at)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  row_n = 1 # row counter
+  col_n = 1 # column counter
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    right  = 0.5*col_n - 0.01
+    left   = right - 0.2
+    bottom = 1-(row_n*(0.2))+0.05
+    top    = bottom + 0.15
+    par(fig=c(left, right, bottom, top), new=T)
+    idx = 380:420
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    x.at = 1:length(idx)
+    axis(1, at=x.at, labels=x.at)
+    # yaxis
+    x.at = seq(0, 2, by=1)
+    axis(2, at=x.at, labels=x.at)
+    row_n = row_n + 1
+    if(i %% 5 == 0)
+    { col_n = col_n + 1
+      row_n = 1
+    }
+  }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.sh
new file mode 100755
index 0000000..2dbcbda
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_ebf1_motif.sh
@@ -0,0 +1,49 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_0'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
+file_mat_nucl="$data_dir/ebf1_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
+file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"
+
+## file with seeds
+file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift='1'
+n_core=6
+
+# open chromatin with flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
+# open chromatin without flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_prob.mat4d'
+	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_model.mat'
+	file_mod2=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.R
new file mode 100644
index 0000000..a4d12ad
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.R
@@ -0,0 +1,171 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 6
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## open chromatin patterns around myc motifs with flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("myc_motifs_10e-6_classification_open_bin1bp_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+      x.at  = (x.lab + ncol(ref.open)) / 2
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    row_n = 1 # row counter
+    col_n = 1 # column counter
+    for(i in 1:nrow(ref.open))
+    { # plot logo center
+      right  = 0.5*col_n - 0.01
+      left   = right - 0.2
+      bottom = 1-(row_n*(0.2))+0.05
+      top    = bottom + 0.15
+      par(fig=c(left, right, bottom, top), new=T)
+      idx = 380:420
+      plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+      # xaxis
+      x.at = 1:length(idx)
+      axis(1, at=x.at, labels=x.at)
+      # yaxis
+      x.at = seq(0, 2, by=1)
+      axis(2, at=x.at, labels=x.at)
+      row_n = row_n + 1
+      if(i %% 5 == 0)
+      { col_n = col_n + 1
+        row_n = 1
+      }
+    }
+  dev.off()
+}
+
+
+################## open chromatin patterns around myc motifs without flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_noflip_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("myc_motifs_10e-6_classification_open_bin1bp_%dclass_noflip.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F]
+  ref.nucl = model.nucl[ord,, drop=F]
+  ref.seq  = model.seq[,,ord, drop=F]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+    # x-axis
+    x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+    x.at  = (x.lab + ncol(ref.open)) / 2
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    x.at = seq(0, 1, 0.5)
+    axis(2, at=x.at, labels=x.at)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  row_n = 1 # row counter
+  col_n = 1 # column counter
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    right  = 0.5*col_n - 0.01
+    left   = right - 0.2
+    bottom = 1-(row_n*(0.2))+0.05
+    top    = bottom + 0.15
+    par(fig=c(left, right, bottom, top), new=T)
+    idx = 380:420
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    x.at = 1:length(idx)
+    axis(1, at=x.at, labels=x.at)
+    # yaxis
+    x.at = seq(0, 2, by=1)
+    axis(2, at=x.at, labels=x.at)
+    row_n = row_n + 1
+    if(i %% 5 == 0)
+    { col_n = col_n + 1
+      row_n = 1
+    }
+  }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.sh
new file mode 100755
index 0000000..062819e
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_myc_motif.sh
@@ -0,0 +1,49 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_0'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
+file_mat_nucl="$data_dir/myc_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
+file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"
+
+## file with seeds
+file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift='1'
+n_core=6
+
+# open chromatin with flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
+# open chromatin without flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_prob.mat4d'
+	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_model.mat'
+	file_mod2=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_noflip_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.R
new file mode 100644
index 0000000..272cf1f
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.R
@@ -0,0 +1,171 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 6
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## open chromatin patterns around sp1 motifs with flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("sp1_motifs_10e-7_classification_open_bin1bp_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+      x.at  = (x.lab + ncol(ref.open)) / 2
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    row_n = 1 # row counter
+    col_n = 1 # column counter
+    for(i in 1:nrow(ref.open))
+    { # plot logo center
+      right  = 0.5*col_n - 0.01
+      left   = right - 0.2
+      bottom = 1-(row_n*(0.2))+0.05
+      top    = bottom + 0.15
+      par(fig=c(left, right, bottom, top), new=T)
+      idx = 380:420
+      plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+      # xaxis
+      x.at = 1:length(idx)
+      axis(1, at=x.at, labels=x.at)
+      # yaxis
+      x.at = seq(0, 2, by=1)
+      axis(2, at=x.at, labels=x.at)
+      row_n = row_n + 1
+      if(i %% 5 == 0)
+      { col_n = col_n + 1
+        row_n = 1
+      }
+    }
+  dev.off()
+}
+
+
+################## open chromatin patterns around sp1 motifs without flip ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0", 
+                                          sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_noflip_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                          sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_noflip_nucleosomes_fragment_center_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                                              sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_noflip_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_0",
+                         sprintf("sp1_motifs_10e-7_classification_open_bin1bp_%dclass_noflip.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F]
+  ref.nucl = model.nucl[ord,, drop=F]
+  ref.seq  = model.seq[,,ord, drop=F]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+    # x-axis
+    x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+    x.at  = (x.lab + ncol(ref.open)) / 2
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    x.at = seq(0, 1, 0.5)
+    axis(2, at=x.at, labels=x.at)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  row_n = 1 # row counter
+  col_n = 1 # column counter
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    right  = 0.5*col_n - 0.01
+    left   = right - 0.2
+    bottom = 1-(row_n*(0.2))+0.05
+    top    = bottom + 0.15
+    par(fig=c(left, right, bottom, top), new=T)
+    idx = 380:420
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    x.at = 1:length(idx)
+    axis(1, at=x.at, labels=x.at)
+    # yaxis
+    x.at = seq(0, 2, by=1)
+    axis(2, at=x.at, labels=x.at)
+    row_n = row_n + 1
+    if(i %% 5 == 0)
+    { col_n = col_n + 1
+      row_n = 1
+    }
+  }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.sh
new file mode 100755
index 0000000..7c74512
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/classification_sp1_motif.sh
@@ -0,0 +1,49 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_0'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
+file_mat_nucl="$data_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"
+file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
+
+## file with seeds
+file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift='1'
+n_core=6
+
+# open chromatin with flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
+# open chromatin without flip
+for k in 1 2 3 4 5 6
+do
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_noflip_prob.mat4d'
+	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_noflip_model.mat'
+	file_mod2=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_noflip_nucleosomes_fragment_center_model.mat'
+	file_mod3=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_noflip_sequences_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_0/run_all.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/run_all.sh
new file mode 100755
index 0000000..11b7a41
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_0/run_all.sh
@@ -0,0 +1,14 @@
+
+dir='scripts/10xgenomics_PBMC_5k_motifs_classification_1'
+
+# classification
+$dir/classification_ctcf_motif.sh
+$dir/classification_myc_motif.sh
+$dir/classification_ebf1_motif.sh
+$dir/classification_sp1_motif.sh
+
+# analysis of classification results
+Rscript $dir/classification_ctcf_motif.R
+Rscript $dir/classification_myc_motif.R
+Rscript $dir/classification_ebf1_motif.R
+Rscript $dir/classification_sp1_motif.R
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
index 013fa9f..7ae6bef 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
@@ -1,36 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
 data_dir='data/10xgenomics_PBMC_5k_motifs'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-n_core=28
+n_core=32
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
index e938ca5..870ae2d 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
@@ -1,36 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
 data_dir='data/10xgenomics_PBMC_5k_motifs'
 ## input
 file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-n_core=28
+n_core=32
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ebf1_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
index 345c0cd..408aa18 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
@@ -1,36 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
 data_dir='data/10xgenomics_PBMC_5k_motifs'
 ## input
 file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-n_core=28
+n_core=32
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'myc_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
index bcaa4f4..8be1be7 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
@@ -1,38 +1,38 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_motifs_classification_3'
 data_dir='data/10xgenomics_PBMC_5k_motifs'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-n_core=28
+n_core=32
 
 # sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
 	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
index f9a59cf..920f9d3 100755
--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
@@ -1,38 +1,38 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_motifs_classification_4'
 data_dir='data/10xgenomics_PBMC_5k_motifs'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='1'
-n_core=28
+n_core=32
 
 # sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
 	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.R
new file mode 100644
index 0000000..23f7561
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.R
@@ -0,0 +1,100 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(seqLogo)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 10
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in k.min:k.max)
+{
+  # sequence
+  data  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                                              sprintf("ctcf_motifs_10e-6_sequences_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  
+  data = NULL
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                         sprintf("ctcf_motifs_10e-6_classification_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:dim(ref.seq)[3])[ord]
+    for(i in 1:(dim(ref.seq)[3]))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3)
+      x.at  = seq(1, ncol(ref.seq), length.out=3)
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+    }
+    dev.off()
+}
+
+
+################## sequence patterns around ctcf motifs repeat masked ##################
+
+for(k in k.min:k.max)
+{
+  # sequence
+  data  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                                         sprintf("ctcf_motifs_10e-6_sequences_rmsk_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  
+  data = NULL
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                         sprintf("ctcf_motifs_10e-6_classification_sequences_rmsk_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:dim(ref.seq)[3])[ord]
+    for(i in 1:(dim(ref.seq)[3]))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3)
+      x.at  = seq(1, ncol(ref.seq), length.out=3)
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+    }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.sh
new file mode 100755
index 0000000..41efee5
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_ctcf_motif.sh
@@ -0,0 +1,40 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_5'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
+file_mat_seq_rmsk="$data_dir/ctcf_motifs_10e-6_sequences_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
+file_seed_rmsk=$results_dir'/ctcf_motifs_10e-6_seed_rmsk.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+touch $file_seed_rmsk
+
+# parameters
+n_iter='20'
+n_shift='771'
+n_core=32
+
+# sequences
+for k in 1 2 3 4 5 6 7 8 9 10
+do
+	# all sequences	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
+	file_mod=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod
+
+	# repeat masked sequences	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_rmsk_'$k'class_prob.mat4d'
+	file_mod=$results_dir/'ctcf_motifs_10e-6_sequences_rmsk_'$k'class_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq_rmsk --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --seq $file_mat_seq_rmsk --prob $file_prob --thread $n_core 1> $file_mod
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.R
new file mode 100644
index 0000000..d405144
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.R
@@ -0,0 +1,100 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(seqLogo)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the minimum number of classes searched
+k.min = 1
+# the maximum number of classes searched
+k.max = 10
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around sp1 motifs ##################
+
+for(k in k.min:k.max)
+{
+  # sequence
+  data  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                                         sprintf("sp1_motifs_10e-7_sequences_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  
+  data = NULL
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                         sprintf("sp1_motifs_10e-7_classification_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:dim(ref.seq)[3])[ord]
+    for(i in 1:(dim(ref.seq)[3]))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3)
+      x.at  = seq(1, ncol(ref.seq), length.out=3)
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+    }
+  dev.off()
+}
+
+
+################## sequence patterns around sp1 motifs repeat masked ##################
+
+for(k in k.min:k.max)
+{
+  # sequence
+  data  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                                         sprintf("sp1_motifs_10e-7_sequences_rmsk_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  
+  data = NULL
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+    png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_5",
+                           sprintf("sp1_motifs_10e-7_classification_sequences_rmsk_%dclass.png", k)),
+        units="in", res=720, width=18, height=12)
+    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:dim(ref.seq)[3])[ord]
+    for(i in 1:(dim(ref.seq)[3]))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-floor(ncol(ref.seq)/2), floor(ncol(ref.seq)/2), length.out=3)
+      x.at  = seq(1, ncol(ref.seq), length.out=3)
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      x.at = seq(0, 1, 0.5)
+      axis(2, at=x.at, labels=x.at)
+    }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.sh
new file mode 100755
index 0000000..8674f3f
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/classification_sp1_motif.sh
@@ -0,0 +1,40 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_5'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
+file_mat_seq_rmsk="$data_dir/sp1_motifs_10e-7_sequences_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
+file_seed_rmsk=$results_dir'/sp1_motifs_10e-7_seed_rmsk.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+touch $file_seed_rmsk
+
+# parameters
+n_iter='20'
+n_shift='771'
+n_core=32
+
+# sequences
+for k in 1 2 3 4 5 6 7 8 9 10
+do
+	# all sequences	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_'$k'class_prob.mat4d'
+	file_mod=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod
+
+	# repeat masked sequences	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_rmsk_'$k'class_prob.mat4d'
+	file_mod=$results_dir/'sp1_motifs_10e-7_sequences_rmsk_'$k'class_model.mat'
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq_rmsk --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --seq $file_mat_seq_rmsk --prob $file_prob --thread $n_core 1> $file_mod
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_5/run_all.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/run_all.sh
new file mode 100755
index 0000000..4b406e0
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_5/run_all.sh
@@ -0,0 +1,10 @@
+
+dir='scripts/10xgenomics_PBMC_5k_motifs_classification_4'
+
+# classification
+$dir/classification_ctcf_motif.sh
+$dir/classification_sp1_motif.sh
+
+# analysis of classification results
+Rscript $dir/classification_ctcf_motif.R
+Rscript $dir/classification_sp1_motif.R
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_6/analyse_motifs.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/analyse_motifs.R
new file mode 100644
index 0000000..e8a6f3f
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/analyse_motifs.R
@@ -0,0 +1,140 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(clues)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+#' Performs a hard assigment.
+#' Each region is assigned to the
+#' class for which it has been classified
+#' with the highest probability, over
+#' all shift and flip states.
+#' \param prob a 4D array containing
+#' the posterior probabilities. It has
+#' the following dimensions :
+#' 1st number of regions
+#' 2nd number of classes
+#' 3rd number of shifts
+#' 4th number of flips
+#' \return a vector of labels, 1 per
+#' region.
+#' \author Romain Groux
+hard.assign = function(prob)
+{ 
+  prob.per.region = apply(prob, c(1,2), sum)
+  cluster = apply(prob.per.region, 1, which.max)
+  return(cluster)
+}
+
+#' Given a set of n labels corresponding to the cluster/class 
+#' assignment of n data point, this function creates a 
+#' co-occurence matrix where the element [i,j] indicates whether 
+#' the i-th and the j-th data points are in the same cluster 
+#' (=1) or not (=0).
+#' @param clusters a vector of numerical indicating the cluster assignment
+#' for a set of data points.
+#' @return the co-occurence matrix. Only the lower triangle is filled.
+#' 0 means that both points are not assigned the same label, whereas
+#' 1 means that they are.
+#' @author Romain Groux
+construct.cooccurence.matrix = function(clusters)
+{ 
+  n = length(clusters)
+  # square matrix
+  matrix.cooc = matrix(data=0, nrow=n, ncol=n)
+  # only fill the lower triangle of the matrix 
+  for(i in 1:n)
+  { j = 1
+    while(j <= i)
+    { if( clusters[i] == clusters[j])
+      { matrix.cooc[i,j] = 1 }
+        j = j + 1
+    }
+  }
+  return(matrix.cooc)
+}
+
+#' Computes the Hubert Gamma statistic of a clustering given two vectors of 
+#' cluster labels. The elements in one vector are expected to 
+#' correspond to the same in the other vector.
+#' @param labels.true a vector of size <n> containing cluster 
+#' labels corresponding to data cluster assignments.
+#' @param labels.cand a vector of size <n> containing cluster 
+#' labels corresponding to data cluster assignments.
+#' @return the Hubert Gamma statistic.
+#' @seealso construct.cooccurence.matrix()
+#' @author Romain Groux
+gamma.stat = function(labels.true, labels.cand)
+{ labels.true.m = construct.cooccurence.matrix(labels.true)
+  labels.cand.m = construct.cooccurence.matrix(labels.cand)
+  return(cor(as.vector(labels.true.m), as.vector(labels.cand.m)))
+}
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+# the true labels 5000 CTCF sites and 5000 SP1 sites
+true.labels = c(rep(1,5000),
+                rep(2,5000))
+
+# the expected dimensionality of the prob array to read
+# -1 indicate values that will change (class and shift)
+dim  = c(10000, -1, -1, 2)
+
+#  the parameters used to run the EM
+n.shifts   = c(1, 771)
+n.classes = 2:6
+
+#number of time a classification was repeated
+n.repeat  = 10
+
+# where the results are
+dir.results = file.path("results",
+                        "10xgenomics_PBMC_5k_motifs_classification_6")
+
+# ari values
+ari = array(dim=c(length(n.classes),
+                  length(n.shifts),
+                  n.repeat))
+
+for(i in 1:length(n.shifts))
+{
+  n.shift = n.shifts[i]
+  
+  for(j in 1:length(n.classes))
+  {
+    n.class = n.classes[j]
+    
+    # update dimensions
+    dim[2] = n.class
+    dim[3] = n.shift
+    
+    # go over each repetition
+    for(k in 1:n.repeat)
+    { print(sprintf("%d shift   %d class   %d repead", n.shift, n.class, k))
+      file.prob = file.path(dir.results,
+                            sprintf("ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_%dclass_%dshift_prob_%d.txt", 
+                                    n.class, n.shift, k))
+      file.motif = file.path(dir.results,
+                            sprintf("ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_%dclass_%dshift_model_%d.mat", 
+                                    n.class, n.shift, k))
+      
+      prob = read.arraytxt(file.prob, dim)
+      print(apply(prob, 2, sum)/sum(prob))
+      cluster = hard.assign(prob)
+      ari[i,j,k] = adjustedRand(cluster, true.labels)["HA"]
+      
+      # motif = read.sequence.models(file.motif)$models
+      # plot.logo(motif[,390:410,1], path.a, path.c, path.g, path.t)
+      # plot.logo(motif[,390:410,2], path.a, path.c, path.g, path.t)
+    }
+  }
+}
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_6/analyse_reads.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/analyse_reads.R
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_motifs.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_motifs.sh
new file mode 100755
index 0000000..f6fe2fb
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_motifs.sh
@@ -0,0 +1,45 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_6'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='200'
+n_shift1='1'
+n_shift2='771'
+n_core=32
+
+# sequences
+for i in {1..10}
+do
+	for k in 2 3 4 5 6
+	do
+		# without shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.txt'
+		file_mod=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift1 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+
+		# with shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7__sequences_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.txt'
+		file_mod=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift2 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+	done
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_reads.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_reads.sh
new file mode 100755
index 0000000..fe864dc
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/classification_reads.sh
@@ -0,0 +1,55 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_6'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk.mat"
+file_mat_open="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk.mat"
+file_mat_nucl="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift1='1'
+n_shift2='21'
+n_core=32
+
+# open chromatin
+for i in {1..10}
+do
+	for k in 2 3 4 5 6
+	do
+		# without shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.txt'
+		file_mod1=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		file_mod2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		file_mod3=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMRead      --read $file_mat_seq --class $k --shift $n_shift1 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+		bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+
+		# with shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.txt'
+		file_mod1=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		file_mod2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		file_mod3=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMRead      --read $file_mat_seq --class $k --shift $n_shift2 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+		bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+	done
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_6/run_all.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/run_all.sh
new file mode 100755
index 0000000..25f35b5
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_6/run_all.sh
@@ -0,0 +1,10 @@
+
+dir='scripts/10xgenomics_PBMC_5k_motifs_classification_6'
+
+# classification
+$dir/classification_motifs.sh
+$dir/classification_reads.sh
+
+# analysis of classification results
+Rscript $dir/analysis_motifs.R
+Rscript $dir/analysis_reads.R
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_7/analyse_motifs.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/analyse_motifs.R
new file mode 100644
index 0000000..38fa96f
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/analyse_motifs.R
@@ -0,0 +1,98 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(clues)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+#' Performs a hard assigment.
+#' Each region is assigned to the
+#' class for which it has been classified
+#' with the highest probability, over
+#' all shift and flip states.
+#' \param prob a 4D array containing
+#' the posterior probabilities. It has
+#' the following dimensions :
+#' 1st number of regions
+#' 2nd number of classes
+#' 3rd number of shifts
+#' 4th number of flips
+#' \return a vector of labels, 1 per
+#' region.
+#' \author Romain Groux
+hard.assign = function(prob)
+{ 
+  prob.per.region = apply(prob, c(1,2), sum)
+  cluster = apply(prob.per.region, 1, which.max)
+  return(cluster)
+}
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+# the true labels 5000 CTCF sites and 5000 SP1 sites
+true.labels = c(rep(1,1000),
+                rep(2,1000))
+
+# the expected dimensionality of the prob array to read
+# -1 indicate values that will change (class and shift)
+dim  = c(2000, -1, -1, 2)
+
+#  the parameters used to run the EM
+n.shifts   = c(90)
+n.classes = 1:2
+
+#number of time a classification was repeated
+n.repeat  = 50
+
+# where the results are
+dir.results = file.path("results",
+                        "10xgenomics_PBMC_5k_motifs_classification_7")
+
+# ari values
+ari = array(dim=c(length(n.classes),
+                  length(n.shifts),
+                  n.repeat))
+
+for(i in 1:length(n.classes))
+{
+  n.class = n.classes[i]
+  
+  for(j in 1:length(n.shifts))
+  {
+    n.shift = n.shifts[j]
+    
+    # update dimensions
+    dim[2] = n.class
+    dim[3] = n.shift
+    
+    
+    # go over each repetition
+    for(k in 1:n.repeat)
+    { file.prob = file.path(dir.results,
+                            sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_prob_%d.txt", 
+                                    n.class, n.shift, k))
+      file.motif = file.path(dir.results,
+                            sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_model_%d.mat", 
+                                    n.class, n.shift, k))
+      
+      # prob = read.arraytxt(file.prob, dim)
+      # print(apply(prob, 2, sum)/sum(prob))
+      # cluster = hard.assign(prob)
+      # ari[i,j,k] = adjustedRand(cluster, true.labels)["HA"]
+      
+      X11(width=10, height=12)
+      par(mfrow=c(2,1))
+      motif = read.sequence.models(file.motif)$models
+      plot.logo(motif[,,1], path.a, path.c, path.g, path.t)
+      plot.logo(motif[,,2], path.a, path.c, path.g, path.t)
+    }
+  }
+}
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_motifs.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_motifs.sh
new file mode 100755
index 0000000..a0e5c69
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_motifs.sh
@@ -0,0 +1,54 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_7'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='200'
+n_shift1='1'
+n_shift2='771'
+n_core=32
+
+# sequences
+for i in {1..10}
+do
+	for k in 2 3 4 5 6
+	do
+		# without shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.txt'
+		file_mod=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift1 --flip --iter $n_iter --seed $seed --thread 10 --out $file_prob
+		bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread 10 1> $file_mod
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+
+		# with shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6__sequences_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.txt'
+		file_mod=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift2 --flip --iter $n_iter --seed $seed --thread 32 --out $file_prob
+		bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread 32 1> $file_mod
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+	done
+done
+
+
+# with shift
+file_prob=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6__sequences_rmsk_10class_771shift_prob_kmer.mat4d'
+file_prob2=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_10class_771shift_prob_kmer.txt'
+file_mod=$results_dir/'ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_10class_771shift_model_kmer.mat'
+bin/EMSequence  --seq $file_mat_seq --class 10 --shift 771 --flip --iter 100 --thread 32 --out $file_prob
+bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread 32 1> $file_mod
+bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_reads.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_reads.sh
new file mode 100755
index 0000000..fe864dc
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/classification_reads.sh
@@ -0,0 +1,55 @@
+# some paths
+## directories
+results_dir='results/10xgenomics_PBMC_5k_motifs_classification_6'
+data_dir='data/10xgenomics_PBMC_5k_motifs'
+## input
+file_mat_seq="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk.mat"
+file_mat_open="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk.mat"
+file_mat_nucl="$data_dir/ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk.mat"
+
+## file with seeds
+file_seed=$results_dir'/ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# parameters
+n_iter='20'
+n_shift1='1'
+n_shift2='21'
+n_core=32
+
+# open chromatin
+for i in {1..10}
+do
+	for k in 2 3 4 5 6
+	do
+		# without shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_prob_'$i'.txt'
+		file_mod1=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		file_mod2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		file_mod3=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift1'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMRead      --read $file_mat_seq --class $k --shift $n_shift1 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+		bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+
+		# with shift
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		file_prob=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.mat4d'
+		file_prob2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_prob_'$i'.txt'
+		file_mod1=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_open_bin1bp_read_atac_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		file_mod2=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		file_mod3=$results_dir/'ctcf_motifs_10e-6_sp1_motifs_10e-7_sequences_rmsk_'$k'class_'$n_shift2'shift_model_'$i'.mat'
+		echo "$file_prob $seed" >> $file_seed
+		bin/EMRead      --read $file_mat_seq --class $k --shift $n_shift2 --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+		bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+		bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+	done
+done
diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_7/run_all.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/run_all.sh
new file mode 100755
index 0000000..0967903
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_7/run_all.sh
@@ -0,0 +1,10 @@
+
+dir='scripts/10xgenomics_PBMC_5k_motifs_classification_7'
+
+# classification
+$dir/classification_motifs.sh
+$dir/classification_reads.sh
+
+# analysis of classification results
+Rscript $dir/analysis_motifs.R
+Rscript $dir/analysis_reads.R
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_0/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_0/classification_peaks_sampled.sh
index 17b2445..788e02a 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_0/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_0/classification_peaks_sampled.sh
@@ -1,35 +1,35 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_0"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='971'
-n_core=8
+n_core=32
 
 # classify
 for k in 10 20 30
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh
index a32bc7e..d317884 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh
@@ -1,35 +1,35 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_1"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='981'
-n_core=24
+n_core=32
 
 # classify
 for k in 10 20 30
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --bgclass --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core --bgclass 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core --bgclass 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core --bgclass 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh
index 9a59ce4..66660d2 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh
@@ -1,35 +1,35 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_2"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='981'
-n_core=24
+n_core=32
 
 # classify
 for k in 10 20 30
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh
index 30bbc56..56e6072 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh
@@ -1,35 +1,35 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_3"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='981'
-n_core=24
+n_core=32
 
 # classify
 for k in 10 20 30
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_openchromatin-sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMJoint     --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh
index 383afb1..0283689 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh
@@ -1,55 +1,55 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 pwm_dir="data/pwm/jaspar_2018_clustering/"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_4"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='971'
-n_core=24
+n_core=32
 ## PWM files
 jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
 hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
 myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
 pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
 cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
 irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
 irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
 lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
 foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
 sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
 mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
 elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
 stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
 nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
 ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
 e2f2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
 ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
 
 
 # classify
 for k in 17 20 30
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq  $file_mat_seq --class $k --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$stat6,$nfe2,$ahr,$e2f2,$ctcf --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh
index d44282f..ba851a6 100755
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh
@@ -1,35 +1,35 @@
 
 # paths
 ## dir
 data_dir="data/10xgenomics_PBMC_5k_peaks"
 results_dir="results/10xgenomics_PBMC_5k_peaks_classification_5"
 ## matrix files
 file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
 file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
 file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
 ## file with seeds
 file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # EM param
 n_iter='100'
 n_shift='991'
-n_core=24
+n_core=32
 
 # classify
 for k in 20 30 40
 do
 	## results files
 	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	echo "$file_prob $seed" >> $file_seed
 	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
 	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
 	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
 	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.R
index 43375ee..4edf115 100644
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.R
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.R
@@ -1,210 +1,216 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the number of classes searched
 n.classes = c(23)
 
-class.tf = c("jun", "HIF1a", "myc", "PU.1", "CEBPb", "IRF4", "IRF2", "LHX3", "FOXH1",
-             "SOX3", "MEF2c", "ELF5", "STAT6", "NFE2", "AHR", "E2F2", "CTCF", "KLF", 
-             "NR4A1", "EGR", "GATA", "NFAT", "RUNX")
+# class.tf = c("jun", "HIF1a", "myc", "PU.1", "CEBPb", "IRF4", "IRF2", "LHX3", "FOXH1",
+#              "SOX3", "MEF2c", "ELF5", "STAT6", "NFE2", "AHR", "E2F2", "CTCF", "KLF", 
+#              "NR4A1", "EGR", "GATA", "NFAT", "RUNX")
+
+class.tf = c("AP1",   "HIF1a", "myc",  "PU.1",  "CEBP", "IRF4", "IRF2", "LHX3", "FOXH1",
+             "SOX",   "MEF2",  "ELF",  "STAT6", "NFE2", "AHR",  "E2F",  "CTCF", "KLF", 
+             "NR4A1", "EGR",   "GATA", "NFAT",  "RUNX")
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## plot architecture around TF motifs ##################
 
 for(k in n.classes)
 { 
   # sequence
   data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                                         sprintf("peaks_rmsk_sequences_1kb_%dclass_model_extended.mat", k)))
   model.seq = data$models
   model.prob = data$prob
   data = NULL
   
   # open chromatin
   model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7", 
                                           sprintf("peaks_rmsk_openchromatin_1kb_read_atac_%dclass_model_extended.mat", k)))$models
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                                           sprintf("peaks_rmsk_nucleosomes_1kb_fragment_center_%dclass_model_extended.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=24, height=12)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                          sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
-      units="in", res=720, width=18, height=12)
-    m = matrix(1:24, nrow=6, ncol=4, byrow=F)
+      units="in", res=720, width=18, height=16)
+    m = matrix(1:24, nrow=8, ncol=3, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F][,]
     ref.nucl = model.nucl[ord,, drop=F][,]
     ref.seq  = model.seq[,,ord, drop=F][,,]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     tf       = class.tf[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       par(mar=c(2,2,2,0))
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("%s (p=%.2f)", tf[i], prob[i]))
       # x-axis
       x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
       x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
       y.at  = seq(0, 2, length.out=2)
       y.lab = c("min", "max")
       axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
     
     # inlets with center
     row_n   = 1         # row counter
     col_n   = 1         # column counter
     row_h   = 1/nrow(m) # height of row
     col_w   = 1/ncol(m) # width of column
     row_cor = row_h / 3
     col_cor = col_w / 3
     for(i in 1:nrow(ref.open))
     { # plot logo center
       left   = (col_w*col_n) - col_w
       right  = left + col_w
       left   = right - col_cor
-      bottom = 1 - (row_h*row_n)
-      top    = bottom + row_h
-      bottom = top - row_cor
+      bottom = 1 - ((row_h*row_n)-(0.2*row_h))
+      top    = bottom + row_cor
+      # top    = bottom + row_h
+      # bottom = top - row_cor
 
       p= par(fig=c(left, right, bottom, top),
              mar=c(0,0,0,0),
              new=T)
       idx = (ceiling(dim(ref.seq)[2]/2)-1-10):(ceiling(dim(ref.seq)[2]/2)-1+10)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
       # x.at = seq(1, length(idx), length.out = 3)
       # x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
       x.at  = ceiling(length(idx)/2)
       x.lab = 0
       axis(1, at=x.at, labels=x.lab)
       # yaxis
       axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% nrow(m) == 0)
       { col_n = col_n + 1
         row_n = 1
       }
       par(p)
     }
   dev.off()
 }
 
 
 
 ################## zoom in the center ##################
 
 for(k in n.classes)
 { idx = 516 + c(-100:+100)
   # sequence
   data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                                         sprintf("peaks_rmsk_sequences_1kb_%dclass_model_extended.mat", k)))
   model.seq = data$models[,idx,]
   model.prob = data$prob
   data = NULL
   
   # open chromatin
   model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7", 
                                           sprintf("peaks_rmsk_openchromatin_1kb_read_atac_%dclass_model_extended.mat", k)))$models[,idx]
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                                           sprintf("peaks_rmsk_nucleosomes_1kb_fragment_center_%dclass_model_extended.mat", k)))$models[,idx]
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=24, height=12)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_7",
                          sprintf("peaks_rmsk_sampled_sequences_%dclass_2.png", k)),
-      units="in", res=720, width=18, height=12)
-    m = matrix(1:24, nrow=6, ncol=4, byrow=F)
-    layout(m)
-    # order from most to least probable class
-    ord      = order(model.prob, decreasing=T)
-    ref.open = model.open[ord,, drop=F][,]
-    ref.nucl = model.nucl[ord,, drop=F][,]
-    ref.seq  = model.seq[,,ord, drop=F][,,]
-    prob     = model.prob[ord]
-    class    = c(1:nrow(ref.open))[ord]
-    tf       = class.tf[ord]
-    for(i in 1:nrow(ref.open))
-    { # plot logo
-      par(mar=c(2,2,2,0))
-      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
-                main=sprintf("%s (p=%.2f)", tf[i], prob[i]))
-      # x-axis
-      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
-      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
-      axis(1, at=x.at, labels=x.lab)
-      # y-axis is [0,1] for min/max signal
-      y.at  = seq(0, 2, length.out=2)
-      y.lab = c("min", "max")
-      axis(2, at=y.at, labels=y.lab)
-      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
-      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
-    }
+      units="in", res=720, width=18, height=16)
+  m = matrix(1:24, nrow=8, ncol=3, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F][,]
+  ref.nucl = model.nucl[ord,, drop=F][,]
+  ref.seq  = model.seq[,,ord, drop=F][,,]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  tf       = class.tf[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    par(mar=c(2,2,2,0))
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("%s (p=%.2f)", tf[i], prob[i]))
+    # x-axis
+    x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+    x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    y.at  = seq(0, 2, length.out=2)
+    y.lab = c("min", "max")
+    axis(2, at=y.at, labels=y.lab)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  
+  # inlets with center
+  row_n   = 1         # row counter
+  col_n   = 1         # column counter
+  row_h   = 1/nrow(m) # height of row
+  col_w   = 1/ncol(m) # width of column
+  row_cor = row_h / 3
+  col_cor = col_w / 3
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    left   = (col_w*col_n) - col_w
+    right  = left + col_w
+    left   = right - col_cor
+    bottom = 1 - ((row_h*row_n)-(0.2*row_h))
+    top    = bottom + row_cor
+    # top    = bottom + row_h
+    # bottom = top - row_cor
     
-    # inlets with center
-    row_n   = 1         # row counter
-    col_n   = 1         # column counter
-    row_h   = 1/nrow(m) # height of row
-    col_w   = 1/ncol(m) # width of column
-    row_cor = row_h / 3
-    col_cor = col_w / 3
-    for(i in 1:nrow(ref.open))
-    { # plot logo center
-      left   = (col_w*col_n) - col_w
-      right  = left + col_w
-      left   = right - col_cor
-      bottom = 1 - (row_h*row_n)
-      top    = bottom + row_h
-      bottom = top - row_cor
-      
-      p= par(fig=c(left, right, bottom, top),
-             mar=c(0,0,0,0),
-             new=T)
-      idx = (ceiling(dim(ref.seq)[2]/2)-1-10):(ceiling(dim(ref.seq)[2]/2)-1+10)
-      plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
-      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-      lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
-      lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
-      # xaxis
-      # x.at = seq(1, length(idx), length.out = 3)
-      # x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
-      x.at  = ceiling(length(idx)/2)
-      x.lab = 0
-      axis(1, at=x.at, labels=x.lab)
-      # yaxis
-      axis(2, at=y.at, labels=y.lab)
-      row_n = row_n + 1
-      if(i %% nrow(m) == 0)
-      { col_n = col_n + 1
-        row_n = 1
-      }
-      par(p)
+    p= par(fig=c(left, right, bottom, top),
+           mar=c(0,0,0,0),
+           new=T)
+    idx = (ceiling(dim(ref.seq)[2]/2)-1-10):(ceiling(dim(ref.seq)[2]/2)-1+10)
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    # x.at = seq(1, length(idx), length.out = 3)
+    # x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    x.at  = ceiling(length(idx)/2)
+    x.lab = 0
+    axis(1, at=x.at, labels=x.lab)
+    # yaxis
+    axis(2, at=y.at, labels=y.lab)
+    row_n = row_n + 1
+    if(i %% nrow(m) == 0)
+    { col_n = col_n + 1
+    row_n = 1
     }
+    par(p)
+  }
   dev.off()
 }
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_8/analysis_test.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_8/analysis_test.R
new file mode 100644
index 0000000..042553b
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_8/analysis_test.R
@@ -0,0 +1,121 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+
+# the TF names
+class.tf = c("jun", "HIF1a", "myc", "PU.1", "CEBPb", "IRF4", "IRF2", "LHX3", "FOXH1",
+             "SOX3", "MEF2c", "ELF5", "STAT6", "NFE2", "AHR", "E2F2", "CTCF", "KLF", 
+             "NR4A1", "EGR", "GATA", "NFAT", "RUNX")
+
+# the number of classes searched for each TF
+# n.classes = 2:10
+n.classes = 10:10
+
+# the methods used for the classification for each TF
+# em.methods = c("read", "consensussequence", "read_consensussequence")
+em.methods = c("consensussequence_kmer")
+
+# make a loop here for final analysis
+for(tf in class.tf)
+{ 
+  # make a loop here for final analysis
+  for(method in em.methods)
+  { 
+    dir.tf = file.path("results", "10xgenomics_PBMC_5k_peaks_classification_8", tf, method)
+  
+    for(k in n.classes)
+    {
+      # sequence
+      data = read.sequence.models(file.path(dir.tf, sprintf("data_class%s_%dclass_model_sequence.mat2d", tf, k)))
+      model.seq = data$models
+      model.prob = data$prob
+      data = NULL
+      # open chromatin
+      model.open = read.read.models(file.path(dir.tf, sprintf("data_class%s_%dclass_model_open.mat2d", tf, k)))$models
+      # nucleosomes
+      model.nucl = read.read.models(file.path(dir.tf, sprintf("data_class%s_%dclass_model_nucl.mat2d", tf, k)))$models
+      
+      # plot classes
+      col = brewer.pal(3, "Set1")
+      # X11(width=20, height=10)
+      png(filename=file.path(dir.tf, sprintf("data_class%s_%dclass.png", tf, k)),
+          units="in", res=720, width=20, height=10)
+        m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+        layout(m)
+        # order from most to least probable class
+        ord      = order(model.prob, decreasing=T)
+        ref.open = model.open[ord,, drop=F][,]
+        ref.nucl = model.nucl[ord,, drop=F][,]
+        ref.seq  = model.seq[,,ord, drop=F][,,]
+        prob     = model.prob[ord]
+        class    = c(1:nrow(ref.open))[ord]
+        for(i in 1:nrow(ref.open))
+        { # plot logo
+          plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                    main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+          # x-axis
+          x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+          x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+          axis(1, at=x.at, labels=x.lab)
+          # y-axis is [0,1] for min/max signal
+          y.at  = seq(0, 2, length.out=2)
+          y.lab = c("min", "max")
+          axis(2, at=y.at, labels=y.lab)
+          # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+          lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+          lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+        }
+        # inlets with center
+        row_n   = 1         # row counter
+        col_n   = 1         # column counter
+        row_h   = 1/nrow(m) # height of row
+        col_w   = 1/ncol(m) # width of column
+        row_cor = row_h / 3
+        col_cor = col_w / 3
+        for(i in 1:nrow(ref.open))
+        { # plot logo center
+          left   = (col_w*col_n) - col_w
+          right  = left + col_w
+          left   = right - col_cor
+          bottom = 1 - (row_h*row_n)
+          top    = bottom + row_h
+          bottom = top - row_cor
+          
+          p= par(fig=c(left, right, bottom, top),
+                 mar=c(0,0,0,0),
+                 new=T)
+          idx = (ceiling(dim(ref.seq)[2]/2)-1-10):(ceiling(dim(ref.seq)[2]/2)-1+10)
+          plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+          # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+          lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+          lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+          # xaxis
+          # x.at = seq(1, length(idx), length.out = 3)
+          # x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+          x.at  = ceiling(length(idx)/2)
+          x.lab = 0
+          axis(1, at=x.at, labels=x.lab)
+          # yaxis
+          axis(2, at=y.at, labels=y.lab)
+          row_n = row_n + 1
+          if(i %% nrow(m) == 0)
+          { col_n = col_n + 1
+            row_n = 1
+          }
+          par(p)
+        }
+      dev.off()
+    }
+  }
+}
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_8/classification_peaks.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_8/classification_peaks.sh
new file mode 100755
index 0000000..8c29086
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_8/classification_peaks.sh
@@ -0,0 +1,119 @@
+
+file_bed='data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks_rmsk_sampled.bed'
+file_bam_open='data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam'
+file_bai_open='data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai'
+file_bam_nucl='data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam'
+file_bai_nucl='data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai'
+file_fasta='data/genomes/hg19.fasta'
+file_prob='results/10xgenomics_PBMC_5k_peaks_classification_6/peaks_rmsk_sampled_sequences_1kb_23class_prob.mat4d'
+
+
+# the TF associated to each of the 23 classes (in the good order)
+# tfs[0] = class 1
+tfs=('jun' 'HIF1a' 'myc'  'PU.1'  'CEBPb' 'IRF4'  'IRF2'
+    'LHX3' 'FOXH1' 'SOX3' 'MEF2c' 'ELF5'  'STAT6' 'NFE2'
+    'AHR'  'E2F2'  'CTCF' 'KLF'   'NR4A1' 'EGR'   'GATA'
+    'NFAT' 'RUNX')
+
+
+# EM parameters
+n_class_min=1     # min nb of classes to search
+n_class_max=10    # max nb of classes to search
+n_iter=20         # nb of iter for read pattern search
+n_iter2=100       # nb of iter for sequence pattern search
+n_shift=1         # shift freedom to find diff. footprint on motif of TF of interest
+n_shift2=21       # shift freedom to find diff. other footprint/motif
+n_shift3=981      # shift freedom to find other motifs (=20bp motif)
+n_core=32
+
+
+# get each class
+for class in $(seq 1 ${#tfs[*]})
+do
+	# TF for that class
+	tf=${tfs[$(($class-1))]}
+	echo "extracting class $tf..."
+
+	# create directories for each type of partitioning for this class
+	dir_class="results/10xgenomics_PBMC_5k_peaks_classification_8/$tf"
+	dir_class_read=$dir_class'/read'
+	dir_class_cons=$dir_class'/consensussequence'
+	dir_class_cons=$dir_class'/consensussequence_kmer'
+	dir_class_joint=$dir_class'/read_consensussequence'
+	mkdir -p $dir_class
+	mkdir -p $dir_class_read
+	mkdir -p $dir_class_cons
+	mkdir -p $dir_class_joint
+		
+	# extract class
+	file_class_open=$dir_class'/data_class'$tf'_open.mat2d'
+	file_class_nucl=$dir_class'/data_class'$tf'_nucl.mat2d'
+	file_class_consseq=$dir_class'/data_class'$tf'_consensus_sequence.mat3d'
+	bin/ClassReadDataCreator     --bed $file_bed --bam $file_bam_open --bai $file_bai_open --prob $file_prob --from -500 --to 500 --binSize 1 --k $class --method "read_atac" > $file_class_open
+	bin/ClassReadDataCreator     --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --prob $file_prob --from -500 --to 500 --binSize 1 --k $class --method "fragment_center" > $file_class_nucl
+	bin/ClassSequenceDataCreator --bed $file_bed --fasta $file_fasta                       --prob $file_prob --from -500 --to 500             --k $class --out $file_class_consseq
+
+	# list 0 signal rows in open chromatin
+	# regions will be classifier according to open chromatin
+	# profiles -> allow to ignore these regions during
+	# classification process
+	filter_file=$dir_class'/data_class'$tf'_open_emptyrows.mat2d'
+	bin/WhichNullRows --mat $file_class_open > $filter_file
+
+	# seed file
+	file_seed_open=$dir_class_read'/data_class'$tf'_seed.txt'
+	file_seed_cseq=$dir_class_cons'/data_class'$tf'_seed.txt'
+	file_seed_joint=$dir_class_joint'/data_class'$tf'_seed.txt'
+	touch $file_seed_open
+	touch $file_seed_joint
+	touch $file_seed_cseq
+
+	# partition data
+	echo "classifying class $tf..."
+	for k in $(seq $n_class_min $n_class_max)
+	do
+		# open chromatin only
+		# find different types of footprint for this TF
+		# no flip, motifs are already aligned/oriented in the same orientation
+		file_prob=$dir_class_read'/data_class'$tf'_'$k'class_prob.mat4d'
+		file_mod1=$dir_class_read'/data_class'$tf'_'$k'class_model_open.mat2d'
+		file_mod2=$dir_class_read'/data_class'$tf'_'$k'class_model_nucl.mat2d'
+		file_mod3=$dir_class_read'/data_class'$tf'_'$k'class_model_sequence.mat2d'
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		echo "$file_prob $seed" >> $file_seed_open
+		bin/EMRead --read $file_class_open --iter $n_iter --class $k --shift $n_shift --filter $filter_file --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read    $file_class_open    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read    $file_class_nucl    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod2
+		bin/ProbToModel --consseq $file_class_consseq --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod3
+
+		# open chromatin and sequence
+		# find different footprints for this TF and other TF if there are
+		# use flip because only this TF motifs are aligned/oriented in same orientation
+		file_prob=$dir_class_joint'/data_class'$tf'_'$k'class_prob.mat4d'
+		file_mod1=$dir_class_joint'/data_class'$tf'_'$k'class_model_open.mat2d'
+		file_mod2=$dir_class_joint'/data_class'$tf'_'$k'class_model_nucl.mat2d'
+		file_mod3=$dir_class_joint'/data_class'$tf'_'$k'class_model_sequence.mat2d'
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		echo "$file_prob $seed" >> $file_seed_joint
+		bin/EMJoint     --read    $file_class_open --consseq $file_class_consseq --iter $n_iter --class $k --shift $n_shift2 --flip --filter $filter_file --seed $seed --thread $n_core --out $file_prob
+		bin/ProbToModel --read    $file_class_open    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read    $file_class_nucl    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod2
+		bin/ProbToModel --consseq $file_class_consseq --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod3
+
+		# sequence
+		# find other motifs
+		# use flip because only this TF motifs are aligned/oriented in same orientation
+		file_prob=$dir_class_cons'/data_class'$tf'_'$k'class_prob_seq.mat4d'
+		file_mod1=$dir_class_cons'/data_class'$tf'_'$k'class_model_open.mat2d'
+		file_mod2=$dir_class_cons'/data_class'$tf'_'$k'class_model_nucl.mat2d'
+		file_mod3=$dir_class_cons'/data_class'$tf'_'$k'class_model_sequence.mat2d'
+		seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+		echo "$file_prob $seed" >> $file_seed_cseq
+		bin/EMConsensusSequence --consseq $file_class_consseq --iter $n_iter2 --class $k --shift $n_shift3 --flip --filter $filter_file --seed $seed --thread $n_core --out $file_prob
+		bin/EMConsensusSequence --consseq $file_class_consseq --iter $n_iter2 --class $k --shift $n_shift3 --flip --filter $filter_file --thread $n_core --out $file_prob
+		bin/ProbToModel --read    $file_class_open    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod1
+		bin/ProbToModel --read    $file_class_nucl    --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod2
+		bin/ProbToModel --consseq $file_class_consseq --prob $file_prob --filter $filter_file --thread $n_core 1> $file_mod3
+	done
+done
+
diff --git a/scripts/functions.R b/scripts/functions.R
index 0345e93..11fdcb4 100644
--- a/scripts/functions.R
+++ b/scripts/functions.R
@@ -1,422 +1,490 @@
+#' Reads the text version of a mat4d file
+#' and loads it into an array.
+#' \param path the path to the file to read
+#' \param dim a vector containing the dimensions 
+#' of the matrix stored, for instance c(10,3,1,2).
+#' \return a 4D array
+#' \author Romain Groux
+read.arraytxt = function(path, dim)
+{ # to use str_count
+  require(stringr)
+  con = file(path, "r")
+
+  dimensionality = length(dim)
+  array          = array(dim=dim)
+  dim_current    = rep(0, dimensionality)
+  
+  n_line    = 1
+  reading   = TRUE
+  while (reading)
+  { line = readLines(con, n = 1)
+    
+    # number of ',' in line
+    n_coma = str_count(line, ',')
+  
+    # eof
+    if(length(line) == 0)
+    { reading = FALSE
+      break
+    # 1st line : contains dimensions of the matrix 
+    # } else if(n_line == 1) { 
+      # fields = unlist(strsplit(line, split=" "))
+      # if(fields[1] != "dim")
+      # { stop("Error! Could not find dimensions in 1st line!") }
+      # dim_read       = as.numeric(fields[2:length(fields)])
+      # dimensionality = length(dim_read)
+      # dim_current    = c(rep(1, 2), rep(0, dimensionality-2))
+      # dim_current    = rep(0, dimensionality)
+      # array          = array(dim=dim_read)
+    # line contains ',' : indicates a coordinate
+    } else if(n_coma != 0) { 
+      # sets the given coordinates according to what is found in file (correct 0 to 1 based)
+      x                     = as.numeric(unlist(strsplit(line, ','))[n_coma+1]) + 1
+      dim_current[n_coma+1] = x
+      # every coordinate before in the vector should be reset
+      for(i in n_coma:1)
+      { dim_current[i] = 1 }
+    # line contains data
+    } else {
+      # fill a row
+      # na.omit because some extra ' ' in <line> may create NA values in <fields>
+      fields = na.omit(as.numeric(unlist(strsplit(line, split=" "))))
+      for(j in 1:length(fields))
+      { array[t(dim_current)] = fields[j]
+        # column + 1
+        dim_current[2]        = dim_current[2] + 1
+      }
+      # reset column
+      dim_current[2] = 1
+      # row + 1
+      dim_current[1] = dim_current[1] + 1
+    }
+    n_line = n_line + 1
+  }
+  close(con)
+  return(array)
+}
+
+
 #' Reads a read density model file and returns a list 
 #' with the class models and the associated 
 #' class probabilities.
 #' \param file the path to the file of interest.
 #' \return a list of two elements : "models" 
 #' a matrix with the class models on each row 
 #' and "prob" the associated class probabilities.
 #'
 read.read.models = function(file)
 { mod  = as.matrix(read.table(file), drop=F)
   prob = mod[,1]
   mod  = mod[,-1, drop=F]
   rownames(mod) = paste("class", 1:nrow(mod))
   colnames(mod) = 1:ncol(mod)
   return(list(models=mod, prob=prob))
 }
 
 #' Reads a sequence model file and returns a list 
 #' with the class models and the associated 
 #' class probabilities.
 #' \param file the path to the file of interest.
 #' \return a list of two elements : "models" 
 #' an array containing the models as probability
 #' matrices with the following dimensions :
 #' 1) 4 for A,C,G,T
 #' 2) the model length
 #' 3) the numbler of classes
 #' and "prob" the associated class probabilities.
 #'
 read.sequence.models = function(file)
 { data  = as.matrix(read.table(file.path(file)))
 
   # prob  = unique(data[,1])
   prob = data[,1][rep(c(T,F,F,F), rep=nrow(data)/4)]
   
   n_class = length(prob)
   l_model = ncol(data) - 1
   n_row   = 4
   
   models = array(dim=c(n_row, l_model, n_class))
   dimnames(models)[[1]] = c('A', 'C', 'G', 'T')
   dimnames(models)[[2]] = 1:l_model
   dimnames(models)[[3]] = paste("class" , 1:n_class)
   
   i_from = 1
   i_to = i_from + n_row - 1
   for(k in 1:n_class)
   { models[,,k] = data[i_from:i_to,-1]
     i_from = i_to + 1
     i_to = i_from + n_row - 1
   }
   return(list(models=models, prob=prob))
 }
 
 #' Computes the reverse complement of a
 #' DNA motif.
 #' \param the motif of interest with 
 #' A,C,G,T on the rows and the positions 
 #' on the columns.
 #' \return the reverse complement motif.
 #' \author Romain Groux
 reverse.complement = function(motif)
 { n.row = nrow(motif)
   n.col = ncol(motif)
   motif.rev = matrix(nrow=n.row, ncol=n.col)
   
   for(i in 1:n.row)
   { for(j in 1:n.col)
     { i_rev = n.row - i + 1
       j_rev = n.col - j + 1
       motif.rev[i_rev,j_rev] = motif[i,j]
     }
   }
   return(motif.rev)
 }
 
 #' Computes the Kullback-Leibler
 #' divergence of a given distristribution
 #' x to its corresponding uniform 
 #' counterpart.
 #' For instance c(0.7, 0.1, 0.1, 0.1) 
 #' will be compared to 
 #' c(0.25, 0.25, 0.25, 0.25)
 #' \param x a vector containing the 
 #' probability mass function values of
 #' the distribution for all possible
 #' values.
 #' \return the Kullback-Leibler
 #' divergence 
 kl.divergence = function(x)
 {
   kl = 0
   p0 = 1 / length(x)
   for(i in x)
   { kl = kl + (i * log(i/p0)) }
   return(kl)
 }
 
 #' A function to plot a DNA logo of a letter probability 
 #' matrix (pwm). In essence, it does exactly the same 
 #' as seqLogo::seqLogo except that it does not need 
 #' a new display device on its own.
 #' \param pwm the letter probability matrix.
 #' \param path.a the path to a file containing
 #' the image to display for the A character, 
 #' in PNG format.
 #' \param path.c the path to a file containing
 #' the image to display for the C character, 
 #' in PNG format.
 #' \param path.g the path to a file containing
 #' the image to display for the G character, 
 #' in PNG format.
 #' \param path.t the path to a file containing
 #' the image to display for the T character, 
 #' in PNG format.
 #' \param pseudocounts a pseudocounts to add to
 #' the probabilities to avoid 0's.
 #' \param ... additional plotting parameters for
 #' plot().
 #' \author Romain Groux
 plot.logo = function(pwm,
                      path.a,
                      path.c,
                      path.g,
                      path.t,
                      pseudocounts=10e-10,
                      ...)
 { n.row = 4
   n.col = ncol(pwm)
   
   if(nrow(pwm) != n.row)
   { stop("Error! pwm should have 4 rows!") }
   if(length(dim(pwm)) != 2)
   { stop("Error! pwm should be a matrix!") }
   
   # images for nucleotides
   require(png)
   image.a = readPNG(path.a)
   image.c = readPNG(path.c)
   image.g = readPNG(path.g)
   image.t = readPNG(path.t)
   
   # add pseudo-counts to avoid 0's
   pwm = pwm + pseudocounts
   for(j in 1:n.col)
   { pwm[,j] = pwm[,j] / sum(pwm[,j]) }
   
   # entropy
   h = rep(0, n.col)
   for(j in 1:n.col)
   { for(i in 1:n.row)
     { h[j] = h[j] - pwm[i,j] * log2(pwm[i,j]) }
   }
   # information content
   r = -h + log2(4)
   # height
   heights = matrix(nrow=n.row, ncol=n.col, data=0)
   for(i in 1:n.row)
   { for(j in 1:n.col)
     { heights[i,j] = pwm[i,j] * r[j]  }
   }
   
   # compute coordinates
   x.coord = matrix(nrow=2, ncol=n.col, data=0)
   rownames(x.coord) = c("from", "to")
   for(i in 1:n.col)
   { x.coord[1,i] = i - 0.5
     x.coord[2,i] = i + 0.5
   }
   
   # plot
   x.lim = c(1,n.col)
   y.lim = c(0,2)
   x.at  = 1:n.col
   plot(0, 0, col=0, xlim=x.lim, ylim=y.lim, bty='n',
        xaxt='n', yaxt='n', xlab="", ylab="",
        ...)
   # axis(1, at=x.at, labels=x.at)
   for(j in 1:n.col)
   { # highest at top
     ord = order(heights[,j], decreasing=F)
     x_left  = x.coord[1,j]
     x_right = x.coord[2,j]
     y_curr  = 0
     for(i in ord)
     { height   = heights[i,j]
       y_bottom = y_curr
       y_top    = y_bottom + height
       if(i == 1)
       { rasterImage(image.a, x_left, y_bottom, x_right, y_top) }
       if(i == 2)
       { rasterImage(image.c, x_left, y_bottom, x_right, y_top) }
       if(i == 3)
       { rasterImage(image.g, x_left, y_bottom, x_right, y_top) }
       if(i == 4)
       { rasterImage(image.t, x_left, y_bottom, x_right, y_top) }
       y_curr = y_curr + height
     }
   }
 }
 
 
 #' Compute the euclidean distance between two models.
 #' It also check if a reference is in reverse orientation 
 #' and returns the smallest distance value.
 #' \param ref1 a vector containing the first reference.
 #' \param ref2 a vector containing the second reference.
 #' \return the euclidean distance.
 eucl.dist.models = function(mod1, mod2)
 {
   return(min(sqrt(sum(((mod1 -     mod2 ) ^ 2))),
              sqrt(sum(((mod1 - rev(mod2)) ^ 2)))))
 }
 
 
 #' Compute the correlation distance between two models.
 #' It also check if a reference is in reverse orientation 
 #' and returns the smallest distance value.
 #' \param ref1 a vector containing the first reference.
 #' \param ref2 a vector containing the second reference.
 #' \return the euclidean distance.
 cor.dist.models= function(mod1, mod2)
 {
   return(1 - min(cor(mod1,     mod2 ),
                  cor(mod1, rev(mod2))))
 }
 
 
 #' Computes the (eucliden) distance matrix  for all the given 
 #' the models As some models may be in reverse 
 #' orientation compared to others, the distance in both 
 #' orientation is computed, for each pair, and the best is 
 #' returned.
 #' \param models a matrix with the models on each row.
 #' \return a matrix containing the distances between each reference.
 distance.model = function(models)
 { n = nrow(models)
   d = matrix(nrow=n, ncol=n, data=0)
   
   for(i in 1:n)
   { for(j in 1:i)
     { x =  eucl.dist.models(models[i,], models[j,])
       d[i,j] = x
       d[j,i] = x
     }
   }
   return(d)
 }
 
 
 get_matches = function(distances, run_value)
 {
   matches = matrix(nrow=0, ncol=4)
   
   # references of run i on the row -> y coord
   # references of run j on the col -> x coord
   
   # run labels
   run_i = 1
   # run_j = 2
   
   for(run_j in setdiff(unique(run_value), run_i))
   {
     # number of references in each run
     n_i = length(which(run_value == run_i))
     n_j = length(which(run_value == run_j))
     
     index_i = which(run_value == run_i) # rows    of run i
     index_j = which(run_value == run_j) # columns of run j
     
     i_taken = c() # classes of i already matched -> rows    to ignore
     j_taken = c() # classes of j already matched -> columns to ignore
     
     # while not all classes in j have been assigned a best match
     row_n = 1
     while(length(j_taken) < n_j)
     { if(length(i_taken) == 0 &&
          length(j_taken) == 0)
       { distances_tmp = distances[index_i, index_j, drop=F]
         coord   = which(distances_tmp == min(distances_tmp), arr.ind=T)
         coord_i = as.numeric(rownames(distances_tmp)[coord[1]])
         coord_j = as.numeric(colnames(distances_tmp)[coord[2]])
         coord   = c(coord_i, coord_j)
       } else { 
         rows = setdiff(index_i, i_taken)
         cols = setdiff(index_j, j_taken)
         distances_tmp = distances[rows, cols, drop=F]
         coord = which(distances_tmp == min(distances_tmp), arr.ind=T)
         coord_i = as.numeric(rownames(distances_tmp)[coord[1]])
         coord_j = as.numeric(colnames(distances_tmp)[coord[2]])
         coord   = c(coord_i, coord_j)
       }
       coord   = c(coord, row_n, run_j)
       i_taken = c(i_taken, coord[1])
       j_taken = c(j_taken, coord[2])
       matches = rbind(matches, coord)
       row_n = row_n + 1
     }
   }
   return(matches)
 }
 
 
 
 #'Creates a composite figure in which several class references from 
 #'several partitions, with different numbers of classes, are plotted.
 #'The figure is composed of a matrix of <k_max> rows and <n_run> 
 #'columns where <k_max> is the highest number of classes in all 
 #'partitions and <n_run> the number of different partition. T
 #'The first column will contain the references of the 
 #'partition with <k_max> classes. The next columns will contain the 
 #'references of the partition with the second biggest number of 
 #'classes (and so on). In a given column, except the 1st one, 
 #'the references are ordered (over the rows) such that the 
 #'overall similarity (euclidean distance) with the 1st column 
 #'references are maximized.
 #'\param file the file name where the image will be saved.
 #'\param references a matrix with the different references to draw on 
 #'each row.
 #'\param references a vector containing the class probability (or weight) associated
 #'to each corresponding reference (row) in matrix.
 #'\param probabilities a vector of <n_run> values that will be displayed atop of each 
 #'column of plots.
 #'\param colors a vector of colors to draw the class profiles. There should 
 #'be <k_max> colors, they can be the same.
 #'\param distances a distance matrix containing the distance between all 
 #'references. The row and column labels have to be the row and column 
 #'number (1, 2, 3, ...)!
 #'\param n_run the total number of different partitions to which all 
 #'references belong.
 #'\param run_value a vector indicating to which partition each reference 
 #'(row of references) belong to. It should be a simple vector of integers,
 #'for instance 1,1,1,1,2,2,2,3,3
 #'\param n_class_max, the highest number of classes searches in all partitions (<k_max>)
 plot.references = function(file,
                            references,
                            probabilities,
                            colors,
                            col.titles,
                            distances, 
                            n_run,
                            run_value,
                            n_class_max,
                            width=15,
                            height=18)
 { 
   # compute the best matches between all references to 1st run references
   matches       = get_matches(distances, run_value)
   
   # make a matrix for layout with good plot numbers
   plots.lab       = matrix(nrow=n_class_max+1, ncol=n_run) # the 1st row will be filled last with only text (col.titles)
   plots.lab[1,]   = (length(plots.lab) - ncol(plots.lab) + 1) : length(plots.lab) 
   plots.lab[-1,1] = 1:n_class_max # for run with max number of classes
   z = n_class_max + 1
   for(i in 1:nrow(matches))
   { coord = matches[i,]
     # plots.lab[coord[3], coord[4]] = z
     plots.lab[coord[1]+1, coord[4]] = z
     z = z + 1
   }
   # these will be the empty plots
   for(i in 1:nrow(plots.lab))
   { for(j in 1:ncol(plots.lab))
     { if(is.na(plots.lab[i,j]))
       { plots.lab[i,j] = z
         z = z + 1
       }
     }
   }
   
 
   # plot
   if(!is.null(file))
   { png(filename=file, width=width, height=height, unit="in", res=720) }
   else
   { X11(width=width, height=height) }
     # a grid
     m = layout(mat = plots.lab, heights=c(0.3, rep(1, nrow(plots.lab)-1)) )
     layout.show(m)
     x = 1:ncol(references)
     
     # plot references of partition with highest number of classes
     for(i in 1:n_class_max)
     { plot(x=x, y=references[i,], lwd=2, type='l', ylim=c(0, 1.2*max(references[i,])),
            col=colors[i], main="", xlab="pos [bp]", ylab="Nb reads")
       # prob
       x_ = 0.85*length(references[i,])
       y_ = max(references[i,])
       lab = round(probabilities[i],3)
       text(x=x_, y=y_, labels=lab, cex=1.2)
     }
     
     # plot others
     for(i in 1:nrow(matches))
     { 
       ref_index = matches[i,2]
       col_index = matches[i,3]
       
      
       plot(x=x, y=references[ref_index,], lwd=2, type='l', ylim=c(0, 1.2*max(references[ref_index,])),
            col=colors[col_index], main="", xlab="pos [bp]", ylab="Nb reads")
       # prob
       x_ = 0.85*length(references[ref_index,])
       y_ = max(references[ref_index,])
       lab = round(probabilities[ref_index],3)
       text(x=x_, y=y_, labels=lab, cex=1.2)
     }
     
     # empty plots
     for(i in (length(run_value)+1):(n_run*n_class_max))
     { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n") }
     
     # col titles
     p = par(mar=c(0,0,0,0))
     for(i in 1:length(col.titles))
     { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n")
       text(1,1, labels=col.titles[i], cex=2)
     }
     par(p)
   if(!is.null(file))
   { dev.off() }
 }
diff --git a/scripts/test.R b/scripts/test.R
deleted file mode 100644
index 68eb808..0000000
--- a/scripts/test.R
+++ /dev/null
@@ -1,284 +0,0 @@
-setwd(file.path("/", "local", "groux", "scATAC-seq"))
-
-# libraries
-library(RColorBrewer)
-
-# functions
-source(file.path("scripts", "functions.R"))
-
-#' Converts a sequence in character format 
-#' to integer format A->0, C->1, N->2, G->3
-#' T->4.
-#' \param seq a vector containing the sequence
-#' in character format.
-#' \return a vector containing the sequence
-#' in integer format.
-#' \author Romain Groux
-char.to.int = function(seq)
-{ seq_int = vector(length=length(seq))
-  for(i in 1:length(seq))
-  { if(seq[i] == 'A') { seq_int[i] = 0 }
-    if(seq[i] == 'C') { seq_int[i] = 1 }
-    if(seq[i] == 'N') { seq_int[i] = 2 }
-    if(seq[i] == 'G') { seq_int[i] = 3 }
-    if(seq[i] == 'T') { seq_int[i] = 4 }
-  }
-  return(seq_int)
-}
-
-#' Generates the reverse complement of a kmer.
-#' \param kmer a vector containing the kmer in
-#' integer format.
-#' \return a vector containing the reverse 
-#' complement kmer
-#' \author Romain Groux
-get_rev_compl = function(kmer)
-{ kmer_rv = vector(length=length(kmer), mode="numeric")
-  i_rv = length(kmer)
-  for(i in 1:length(kmer))
-  { if(kmer[i] == 0) { kmer_rv[i_rv] = 4 } # A
-    if(kmer[i] == 1) { kmer_rv[i_rv] = 3 } # C
-    if(kmer[i] == 2) { kmer_rv[i_rv] = 2 } # N
-    if(kmer[i] == 3) { kmer_rv[i_rv] = 1 } # G
-    if(kmer[i] == 4) { kmer_rv[i_rv] = 0 } # T
-  }
-  return(kmer_rv)
-}
-
-#' Generates a hash given a kmer.
-#' Kmers with a same length are guaranteed
-#' to have different hashes.
-#' AA..AA will generate a hash of 1,
-#' AA..AC will generate a hash of 2,
-#' AA..AN will generate a hash of 3,
-#' AA..AG will generate a hash of 4,
-#' AA..AT will generate a hash of 5,
-#' TT..TG will generate a hash of 5**k - 1,
-#' TT..TT will generate a hash of 5**k
-#' \param seq a vector containing the kmer
-#' in integer format : A->0, C->1, N->2, G->3, 
-#' T->4.
-#' \return the kmer hash
-#' \author Romain Groux
-hash = function(seq)
-{ k = length(seq) ; z = 5
-  h = 0
-  for(i in 0:(length(seq)-1))
-  { if(seq[i+1] == 0) { h = h + (0*(z**(k-i-1))) } # A
-    if(seq[i+1] == 1) { h = h + (1*(z**(k-i-1))) } # C
-    if(seq[i+1] == 2) { h = h + (2*(z**(k-i-1))) } # N
-    if(seq[i+1] == 3) { h = h + (3*(z**(k-i-1))) } # G
-    if(seq[i+1] == 4) { h = h + (4*(z**(k-i-1))) } # T
-  }
-  return(h+1)
-}
-
-#' Computes the hash of a sequence and of
-#' its reverse complement and returns the
-#' smallest one.
-#' \param seq a vector containing the 
-#' sequence in integer format : : A->0, 
-#' C->1, N->2, G->3, T->4.
-#' \author Romain Groux
-hash.min(seq)
-{ seq_r = get_rev_compl(seq)
-  return(min(hash(seq), hash(seq_r)))
-}
-
-#' Generates all kmers for a given value of K 
-#' and return them in lexicographic order.
-#' \param k the kmer length.
-#' \return a matrix with the different kmers
-#' on the rows and k columns. The kmers are
-#' in integer format : A->0, C->1, N->2, G->3,
-#' T->4.
-#' \author Romain Groux
-generate_all_kmers = function(k)
-{ kmers = matrix(nrow=5**k, ncol=k, data=-1)
-  n = k
-  currentWord = rep(1, n)
-  i = 1
-  while(n > 0)
-  { kmers[i,] = currentWord
-    i = i + 1
-    while(n>0 && currentWord[n+1-1] == 5)
-    { currentWord[n] = 1
-      n = n - 1
-    }
-    if(n > 0)
-    { currentWord[n] = currentWord[n] + 1
-    n = k
-    }
-  }
-  return(kmers - 1)
-}
-
-
-data = as.matrix(read.table(file.path("data",
-                                      "10xgenomics_PBMC_5k_peaks",
-                                      "peaks_rmsk_sampled_sequences_1kb.mat")))
-
-data = as.matrix(read.table(file.path("data/toy_data/simulated_sequences_2class_flip.mat")))
-data = apply(data, 1, char.to.int)
-
-k       = 5
-n_kmer  = 5**k
-hmax    = ceiling(n_kmer / 2)
-n_shift = ncol(data) - k + 1
-
-# transitions and counts
-counts = vector(length=n_kmer, mode="numeric")
-kmers  = generate_all_kmers(k)
-counts = vector(length=n_kmer, mode="numeric")
-t_out  = matrix(nrow=n_kmer, ncol=n_kmer, data=0)
-t_in   = t_out
-t_all  = t_out
-for(i in 1:nrow(data))
-{ for(j in 1:n_shift)
-  { # no in transition (1st kmer)
-    if(j == 1)
-    { # kmer1 < kmer2
-      from1 = j   ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
-      from2 = j+1 ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
-      kmer1r = get_rev_compl(kmer2) ; kmer2r = get_rev_compl(kmer1) ;
-      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
-      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
-      # out transition kmer1 -> kmer2
-      t_out[idx1,idx2]   = t_out[idx1,idx2]   + 1
-      t_out[idx1r,idx2r] = t_out[idx1r,idx2r] + 1
-      # number of edges
-      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1  
-      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1  
-      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
-      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1  
-      # counts
-      counts[idx1]  = counts[idx1]  + 1
-      counts[idx1r] = counts[idx1r] + 1
-    }
-    # no out transition (last kmer)
-    else if(j == n_shift)
-    { # kmer1 < kmer2
-      from1 = j-1 ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
-      from2 = j   ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
-      kmer1r = get_rev_compl(kmer2) ; kmer2r = get_rev_compl(kmer1) ;
-      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
-      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
-      # in transition kmer1 <- kmer2
-      t_in[idx1,idx2]   = t_in[idx1,idx2]   + 1
-      t_in[idx1r,idx2r] = t_in[idx1r,idx2r] + 1
-      # number of edges
-      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1  
-      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1  
-      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
-      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1  
-      # counts
-      # no need, kmer2 was counted at last iteration as kmer2
-    }
-    # both out and in transitions (middle)
-    else
-    { # kmer0 < kmer1 < kmer2
-      from0 = j   ; to0 = from0 + k - 1 ; kmer0 = data[i,from0:to0] ;
-      from1 = j   ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
-      from2 = j+1 ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
-      kmer0r = get_rev_compl(kmer2) ; kmer1r = get_rev_compl(kmer1) ; kmer2r = get_rev_compl(kmer0) ;
-      idx0  = hash(kmer0) ; idx0r = hash(kmer0r) ;
-      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
-      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
-      # out transition kmer1 -> kmer2
-      t_out[idx1,idx2]   = t_out[idx1,idx2]   + 1
-      t_out[idx1r,idx2r] = t_out[idx1r,idx2r] + 1
-      # in transition kmer0 -> kmer1
-      t_in[idx1,idx0]   = t_in[idx1,idx0]   + 1
-      t_in[idx1r,idx0r] = t_in[idx1r,idx0r] + 1
-      # number of edges
-      t_all[idx0,idx1]   = t_all[idx0,idx1]   + 1  
-      t_all[idx1,idx0]   = t_all[idx1,idx0]   + 1
-      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1
-      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1
-      t_all[idx0r,idx1r] = t_all[idx0r,idx1r] + 1
-      t_all[idx1r,idx0r] = t_all[idx1r,idx0r] + 1
-      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
-      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1
-      # counts
-      counts[idx1]  = counts[idx1]  + 1
-      counts[idx1r] = counts[idx1r] + 1
-    }
-  }
-}
-
-# spectral clustering
-# t_all is the affinity matrix
-# compute the degree matrix
-d = diag(apply(t_in, 1, sum)) # sum rows
-# unormalized laplacian
-u = d - t_in
-# get eigen values and vectors
-evL = eigen(u, symmetric=TRUE)
-# plot eigen values
-plot(1:20, rev(evL$values)[1:20], type='b')
-# partition
-partitions = list()
-for(n_clust in 2:20)
-{ print(n_clust)
-  # get K biggest eigen values and vectors -> embedding space
-  z   = evL$vectors[,(ncol(evL$vectors)-n_clust+1):ncol(evL$vectors)]
-  partitions[[n_clust]] = kmeans(z, centers=n_clust, iter.max=100, nstart=10)
-}
-
-
-plot(evL$vectors[,3124:3125])
-
-# motif 1 is ACGTTGCA
-kmers_motif1 = matrix(ncol=k,
-                      data=c(0,1,2,3,3,
-                               1,2,3,3,2,
-                                 2,3,3,2,1,
-                                   3,3,2,1,0),
-                      byrow=T)
-# motif 2 is GCGAATTT
-kmers_motif2 = matrix(ncol=k,
-                      data=c(2,1,2,0,0,
-                               1,3,0,0,3,
-                                 3,0,0,3,3,
-                                   0,0,3,3,3),
-                      byrow=T)
-idx1 = apply(kmers_motif1, 1, hash)
-idx2 = apply(kmers_motif2, 1, hash)
-
-partitions[[2]]$size
-
-partitions[[2]]$cluster[idx1]
-partitions[[2]]$cluster[idx2]
-
-
-
-
-c1 = which(partitions[[2]]$cluster == 1)
-c2 = which(partitions[[2]]$cluster == 2)
-
-plot(evL$vectors[,3124:3125], col=partitions[[2]]$cluster+1, cex=0.1)
-
-points(evL$vectors[idx1,3124:3125], col=2)
-points(evL$vectors[idx2,3124:3125], col=3)
-
-par(mfrow=c(3,1))
-plot(t_all[idx1[1],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2) ; abline(v=idx1[1], col="blue")
-plot(t_all[idx1[2],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2)
-plot(t_all[idx1[3],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2)
-
-
-boxplot(counts, counts[idx1], counts[idx2], outline=F)
-
-
-
-
-# reconstruct kmers
-best.k = 2
-partition = partitions[[best.k]]
-clusters  = partition$cluster
-c1 = which(clusters == 1)
-c2 = which(clusters == 2)
-best1 = which.max(counts[c1])
-best2 = which.max(counts[c2])
-
-
diff --git a/scripts/test/analysis_test_sampled.R b/scripts/test/analysis_test_sampled.R
deleted file mode 100644
index afcf023..0000000
--- a/scripts/test/analysis_test_sampled.R
+++ /dev/null
@@ -1,97 +0,0 @@
-setwd(file.path("/", "local", "groux", "scATAC-seq"))
-
-# libraries
-library(RColorBrewer)
-library(seqLogo)
-
-# functions
-source(file.path("scripts", "functions.R"))
-
-# the number of classes searched
-n.classes = c(10, 20, 30)
-
-# path to the images for the logo
-path.a = file.path("res/A.png")
-path.c = file.path("res/C.png")
-path.g = file.path("res/G.png")
-path.t = file.path("res/T.png") 
-
-################## sequence patterns around ctcf motifs ##################
-
-for(k in n.classes)
-{ 
-  # sequence
-  data = read.sequence.models(file.path("results", "test_1kb",
-                                        sprintf("peaks_rmsk_sampled_sequences_%dclass_model.mat", k)))
-  model.seq = data$models
-  model.prob = data$prob
-  data = NULL
-  
-  # open chromatin
-  model.open = read.read.models(file.path("results", "test_1kb", 
-                                          sprintf("peaks_rmsk_sampled_openchromatin_%dclass_model.mat", k)))$models
-  # nucleosomes
-  model.nucl = read.read.models(file.path("results", "test_1kb",
-                                          sprintf("peaks_rmsk_sampled_nucleosomes_%dclass_model.mat", k)))$models
-  
-  # plot classes
-  col = brewer.pal(3, "Set1")
-  # X11(width=26, height=12)
-  png(filename=file.path("results", "test_1kb",
-                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
-      units="in", res=720, width=18, height=12)
-    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
-    layout(m)
-    # order from most to least probable class
-    ord      = order(model.prob, decreasing=T)
-    ref.open = model.open[ord,, drop=F]
-    ref.nucl = model.nucl[ord,, drop=F]
-    ref.seq  = model.seq[,,ord, drop=F]
-    prob     = model.prob[ord]
-    class    = c(1:nrow(ref.open))[ord]
-    for(i in 1:nrow(ref.open))
-    { # plot logo
-      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
-                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
-      # x-axis
-      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
-      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
-      axis(1, at=x.at, labels=x.lab)
-      # y-axis is [0,1] for min/max signal
-      y.at  = seq(0, 2, length.out=2)
-      y.lab = c("min", "max")
-      axis(2, at=y.at, labels=y.lab)
-      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
-      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
-    }
-    # inlets with center
-    # row_n = 1 # row counter
-    # col_n = 1 # column counter
-    # for(i in 1:nrow(ref.open))
-    # { # plot logo center
-    #   right  = 0.5*col_n - 0.01
-    #   left   = right - 0.2
-    #   bottom = 1-(row_n*(0.2))+0.05
-    #   top    = bottom + 0.15
-    #   par(fig=c(left, right, bottom, top), new=T)
-    #   idx = (391-1-20):(391+1+20)
-    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
-    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
-    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
-    #   # xaxis
-    #   x.at = seq(1, length(idx), length.out = 3)
-    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
-    #   axis(1, at=x.at, labels=x.lab)
-    #   # yaxis
-    #   axis(2, at=y.at, labels=y.lab)
-    #   row_n = row_n + 1
-    #   if(i %% 5 == 0)
-    #   { col_n = col_n + 1
-    #     row_n = 1
-    #   }
-    # }
-  dev.off()
-}
-
diff --git a/scripts/test/test_1kb.sh b/scripts/test/test_1kb.sh
deleted file mode 100755
index 3bc3541..0000000
--- a/scripts/test/test_1kb.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_1kb"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_1kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_bin1bp_1kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sequences_1kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='951'
-n_core=12
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
diff --git a/scripts/test/test_1kb_pwms.sh b/scripts/test/test_1kb_pwms.sh
deleted file mode 100755
index f7c51f8..0000000
--- a/scripts/test/test_1kb_pwms.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-
-# paths
-## dir
-pwm_dir="data/pwm/jaspar_2018_clustering/"
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_1kb_pwms"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_1kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_1kb_bin1bp_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sequences_1kb.mat'
-## PWM files
-jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
-hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
-myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
-pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
-cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
-irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
-irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
-lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
-foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
-sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
-mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
-elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
-stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
-nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
-ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
-elf2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
-ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
-
-mkdir -p $results_dir
-
-# EM param
-n_iter='100'
-n_shift='951'
-n_core=12
-
-# classify
-## results files
-file_prob=$results_dir/'peaks_rmsk_sequences_1kb_15class_prob.mat4d'
-file_mod1=$results_dir/'peaks_rmsk_openchromatin_1kb_15class_model.mat'
-file_mod2=$results_dir/'peaks_rmsk_nucleosomes_1kb_15class_model.mat'
-file_mod3=$results_dir/'peaks_rmsk_sequences_1kb_15class_model.mat'
-
-bin/EMSequence --seq $file_mat_seq --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$nfe2,$ahr,$elf2 --shift $n_shift --flip --iter $n_iter --thread $n_core > $file_prob
-
-bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-
diff --git a/scripts/test/test_1kb_sampled.sh b/scripts/test/test_1kb_sampled.sh
deleted file mode 100755
index edd22fe..0000000
--- a/scripts/test/test_1kb_sampled.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_1kb"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_1kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='951'
-n_core=12
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
diff --git a/scripts/test/test_1kb_sampled_pwms.sh b/scripts/test/test_1kb_sampled_pwms.sh
deleted file mode 100755
index 454cae9..0000000
--- a/scripts/test/test_1kb_sampled_pwms.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-
-# paths
-## dir
-pwm_dir="data/pwm/jaspar_2018_clustering/"
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_1kb_pwms"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_bin1bp_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
-## PWM files
-jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
-hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
-myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
-pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
-cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
-irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
-irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
-lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
-foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
-sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
-mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
-elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
-# stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
-nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
-ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
-elf2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
-# ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
-
-mkdir -p $results_dir
-
-# EM param
-n_iter='100'
-n_shift='951'
-n_core=12
-
-# classify
-## results files
-file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_15class_prob.mat4d'
-file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_15class_model.mat'
-file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_15class_model.mat'
-file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_15class_model.mat'
-
-bin/EMSequence --seq $file_mat_seq --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$nfe2,$ahr,$elf2 --shift $n_shift --flip --iter $n_iter --thread $n_core > $file_prob
-
-bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-
diff --git a/scripts/test/test_2kb.sh b/scripts/test/test_2kb.sh
deleted file mode 100755
index dbabf08..0000000
--- a/scripts/test/test_2kb.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_2kb"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_2kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_bin1bp_2kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sequences_2kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='201'
-n_core=12
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
-
diff --git a/scripts/test/test_2kb_sampled.sh b/scripts/test/test_2kb_sampled.sh
deleted file mode 100755
index 2f3e309..0000000
--- a/scripts/test/test_2kb_sampled.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_2kb"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_2kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_2kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_2kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='201'
-n_core=12
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
-
diff --git a/scripts/test_2/analysis_test_2_sampled.R b/scripts/test_2/analysis_test_2_sampled.R
deleted file mode 100644
index afcf023..0000000
--- a/scripts/test_2/analysis_test_2_sampled.R
+++ /dev/null
@@ -1,97 +0,0 @@
-setwd(file.path("/", "local", "groux", "scATAC-seq"))
-
-# libraries
-library(RColorBrewer)
-library(seqLogo)
-
-# functions
-source(file.path("scripts", "functions.R"))
-
-# the number of classes searched
-n.classes = c(10, 20, 30)
-
-# path to the images for the logo
-path.a = file.path("res/A.png")
-path.c = file.path("res/C.png")
-path.g = file.path("res/G.png")
-path.t = file.path("res/T.png") 
-
-################## sequence patterns around ctcf motifs ##################
-
-for(k in n.classes)
-{ 
-  # sequence
-  data = read.sequence.models(file.path("results", "test_1kb",
-                                        sprintf("peaks_rmsk_sampled_sequences_%dclass_model.mat", k)))
-  model.seq = data$models
-  model.prob = data$prob
-  data = NULL
-  
-  # open chromatin
-  model.open = read.read.models(file.path("results", "test_1kb", 
-                                          sprintf("peaks_rmsk_sampled_openchromatin_%dclass_model.mat", k)))$models
-  # nucleosomes
-  model.nucl = read.read.models(file.path("results", "test_1kb",
-                                          sprintf("peaks_rmsk_sampled_nucleosomes_%dclass_model.mat", k)))$models
-  
-  # plot classes
-  col = brewer.pal(3, "Set1")
-  # X11(width=26, height=12)
-  png(filename=file.path("results", "test_1kb",
-                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
-      units="in", res=720, width=18, height=12)
-    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
-    layout(m)
-    # order from most to least probable class
-    ord      = order(model.prob, decreasing=T)
-    ref.open = model.open[ord,, drop=F]
-    ref.nucl = model.nucl[ord,, drop=F]
-    ref.seq  = model.seq[,,ord, drop=F]
-    prob     = model.prob[ord]
-    class    = c(1:nrow(ref.open))[ord]
-    for(i in 1:nrow(ref.open))
-    { # plot logo
-      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
-                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
-      # x-axis
-      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
-      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
-      axis(1, at=x.at, labels=x.lab)
-      # y-axis is [0,1] for min/max signal
-      y.at  = seq(0, 2, length.out=2)
-      y.lab = c("min", "max")
-      axis(2, at=y.at, labels=y.lab)
-      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
-      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
-    }
-    # inlets with center
-    # row_n = 1 # row counter
-    # col_n = 1 # column counter
-    # for(i in 1:nrow(ref.open))
-    # { # plot logo center
-    #   right  = 0.5*col_n - 0.01
-    #   left   = right - 0.2
-    #   bottom = 1-(row_n*(0.2))+0.05
-    #   top    = bottom + 0.15
-    #   par(fig=c(left, right, bottom, top), new=T)
-    #   idx = (391-1-20):(391+1+20)
-    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
-    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
-    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
-    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
-    #   # xaxis
-    #   x.at = seq(1, length(idx), length.out = 3)
-    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
-    #   axis(1, at=x.at, labels=x.lab)
-    #   # yaxis
-    #   axis(2, at=y.at, labels=y.lab)
-    #   row_n = row_n + 1
-    #   if(i %% 5 == 0)
-    #   { col_n = col_n + 1
-    #     row_n = 1
-    #   }
-    # }
-  dev.off()
-}
-
diff --git a/scripts/test_2/test_1kb_sampled.sh b/scripts/test_2/test_1kb_sampled.sh
deleted file mode 100755
index 2b9e096..0000000
--- a/scripts/test_2/test_1kb_sampled.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_1kb_2"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_1kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='971'
-n_core=14
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
diff --git a/scripts/test_2/test_2kb_sampled.sh b/scripts/test_2/test_2kb_sampled.sh
deleted file mode 100755
index 02af0c9..0000000
--- a/scripts/test_2/test_2kb_sampled.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-
-# paths
-## dir
-data_dir="results/10xgenomics_PBMC_5k"
-results_dir="results/test_2kb_2"
-## matrix files
-file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_2kb_read_atac.mat'
-file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_2kb_fragment_center.mat'
-file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_2kb.mat'
-## file with seeds
-file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# EM param
-n_iter='100'
-n_shift='971'
-n_core=14
-
-# classify
-for k in 10 20 30
-do
-	## results files
-	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
-	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
-	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	echo "$file_prob $seed" >> $file_seed
-	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
-done
-
diff --git a/scripts/toy_data/analyse_data_sequence.R b/scripts/toy_data/analyse_data_sequence.R
new file mode 100644
index 0000000..bccfc2b
--- /dev/null
+++ b/scripts/toy_data/analyse_data_sequence.R
@@ -0,0 +1,360 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(motifStack)
+library(TFBSTools)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+
+#' Scores a sequence, in integer format, given
+#' a probability matrix.
+#' @pwm a probability matrix of dimensions :
+#' 1st 4 for A, C, G, T
+#' 2nd the number of positions
+#' @param sequence a vector of integer encoding
+#' the sequence A:1, C:2, G:3, T:4
+#' @param prior a vector containing the prior 
+#' probabilities about each nucleotide. It may
+#' be the base composition. It should sum up 
+#' to 1.
+#' @return the sequence cumulative score.
+#' @author Romain Groux
+score.seq = function(sequence, pwm, prior=rep(0.25,4))
+{
+  score = 0
+  l_seq = length(sequence)
+  l_pwm = ncol(pwm)
+  n_shifts = l_seq - l_pwm + 1
+  
+  pwm   = log(pwm)
+  prior = log(prior)
+  
+  for(shift in 1:n_shifts)
+  { score_ = 0
+    for(j in 1:l_pwm)
+    { base  = sequence[shift+j-1]
+    score_ = score_ + pwm[base, j] - prior[base]
+    }
+    score = score + exp(score_)
+  }
+  return(score)
+}
+
+#' Given a set of sequences and two motifs, this function
+#' assigns to each sequence a class label.
+#' The class label is determined by scoring a sequence
+#' with both motifs. The best scoring motif determine
+#' the class label.
+#' @param sequences a matrix containing the sequences
+#' in integer format. It has the following dimensions :
+#' 1st the number of sequences
+#' 2nd the sequence length
+#' @param motif.1 a probability matrix containing the
+#' 1st motif. It has the following dimensions :
+#' 1st 4 for A, C, G, T
+#' 2nd the number of positions
+#' @param motif.2 a probability matrix containing the
+#' 2st motif. It has the following dimensions :
+#' 1st 4 for A, C, G, T
+#' 2nd the number of positions
+#' @return a list with 2 items
+#' scores : a matrix containing the sequence scores. It
+#' has the following dimensions :
+#' 1st the number of sequences
+#' 2nd 2 for each motif
+#' labels : a vector containing the sequence labels.
+#' @author Romain Groux
+predict.class = function(sequences, motif.1, motif.2)
+{ scores.1 = apply(sequences, 1, score.seq, motif.1)
+  scores.2 = apply(sequences, 1, score.seq, motif.2)
+  
+  labels = rep(1, nrow(sequences))
+  
+  labels[which(scores.1 < scores.2)] = 2
+  
+  return(list("scores"=cbind(scores.1, scores.2), "labels"=labels))
+
+}
+
+#' Computes the x and y coordinates of the ROC curve
+#' given the sequence scores and their corresponding 
+#' true class labels.
+#' @param scores a matrix containing the scores for 
+#' each sequence (on the rows), for each class (on 
+#' the columns)
+#' @param labels the true class labels for each 
+#' sequence.
+#' @return a matrix with 2 columns containing the 
+#' x and y coordinates of the curve. The x coordinates
+#' correspond to the specificity values and the y
+#' coordinates the sensitivity values.
+#' @author Romain Groux
+get.roc.coord = function(scores, labels)
+{ # number of points
+  n = length(labels)
+  
+  # order by descending label for class 1 and then class2
+  ord   = order(scores[,1], scores[,2], decreasing=T)
+  labels = labels[ord]
+  
+  idx1 = which(labels == 1)
+  idx2 = which(labels == 2)
+  
+  # true positive and negative discovery rates
+  true  = vector(length=length(labels)) ; n_true = 0 ;
+  false = vector(length=length(labels)) ; n_false = 0 ;
+  
+  for(i in 1:length(labels))
+  { if(labels[i] == 1)
+    { n_true = n_true + 1 }
+    else
+    { n_false = n_false + 1 }
+    true[i] = n_true / (0.5*n)    # 0.5 because as many class 1 as 2
+    false[i] = n_false / (0.5*n)
+  }
+  # to draw plot
+  m = cbind(true, false)
+  colnames(m) = c("y", "x")
+  return(m)
+}
+
+
+
+# seed
+set.seed(20191007)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+# 2000 sequences classified
+n.seq  = 2000
+#number of time a classification was repeated
+n.runs  = 50
+# number of classes searches
+n.classes   = 2
+# the shifting freedom allowed
+n.shifts = 90
+
+# the expected dimensionality of the prob array to read
+dim  = c(n.seq, n.classes, n.shifts, 2) # 2000 seq, 2 classes, 90 shifts, 2 flips
+
+# where the data are
+dir.data = file.path("data",
+                     "toy_data")
+# where the results are
+dir.results = file.path("results",
+                        "toy_data")
+
+
+
+# sequences
+sequences = as.matrix(read.table(file.path(dir.data,
+                                           "simulated_sequences_2class_flip.mat"))) + 1
+
+# true motifs
+motif.true.1 = as.matrix(read.table(file.path(dir.data,
+                                               "simulated_sequences_2class_flip_motif1.txt")))
+motif.true.2 = as.matrix(read.table(file.path(dir.data,
+                                               "simulated_sequences_2class_flip_motif2.txt")))
+# true class labels
+labels.true = as.matrix(read.table(file.path(dir.data,
+                                             "simulated_sequences_2class_flip_classes.txt")))[,1]
+
+# AUC sequences with true motifs
+scores.true = predict.class(sequences, motif.true.1, motif.true.2)
+labels      = scores.true$labels
+auc.true    = auc(labels.true, labels)
+scores.true = scores.true$scores
+
+# sequence scores with found motifs
+scores.found = array(dim=c(n.runs, n.seq, 2), data=0)
+
+# AUCs
+auc.found = rep(0, n.runs)
+
+# go over each run
+for(i in 1:n.runs)
+{ # get motifs found
+  file.motif = file.path(dir.results,
+                        sprintf("simulated_sequences_2class_flip_class_model_%d.mat", 
+                                i))
+  motifs.found = read.sequence.models(file.motif)$models
+  
+  # display logo
+  # par(mfrow=c(2,2))
+  # plot.logo(motifs.found[,,1], path.a, path.c, path.g, path.t, main=sprintf("run %d class1", i))
+  # plot.logo(motifs.found[,,2], path.a, path.c, path.g, path.t, main=sprintf("run %d class2", i))
+  # plot.logo(motif.true.1,  path.a, path.c, path.g, path.t, main=sprintf("true class1", i))
+  # plot.logo(motif.true.2,  path.a, path.c, path.g, path.t, main=sprintf("true class2", i))
+  
+  # get AUC
+  # class 1 -> label 1, class 2 -> label2
+  scores1 = predict.class(sequences, motifs.found[,,1], motifs.found[,,2])
+  labels1 = scores1$labels
+  auc1    = auc(labels.true, labels1)
+  auc.found[i] = auc1
+  scores.found[i,,] = scores1$scores
+  
+  # class 1 -> label 2, class 2 -> label1
+  # NO NEED ! AUC value is the same. This is simply an anti-classifier :)
+  # scores2 = predict.class(sequences, motifs.found[,,2], motifs.found[,,1])
+  # labels2 = scores2$labels
+  # auc2    = auc(labels.true, labels2)
+  # auc.found[i,2] = auc2
+  # scores.found[i,,2,] = scores2$scores
+  
+  # coord1 = get.roc.coord(scores1$scores, labels.true)
+  # coord2 = get.roc.coord(scores2$scores, labels.true)
+  # coord.true = get.roc.coord(scores.true, labels.true)
+  
+  # plot(coord1[,"x"], coord1[,"y"], lwd=2, type='l')  
+  # lines(coord2[,"x"], coord2[,"y"], lwd=2, col="blue")
+  # lines(coord.true[,"x"], coord.true[,"y"], lwd=2, col="red")
+  # segments(0,0,1,1)
+}
+
+
+  
+# # plot results
+# X11(width=12, height=8)
+# 
+#   m = matrix(nrow=3, ncol=2,
+#              data=1:6, byrow=T)
+# 
+#   layout(m, heights=c(2,1,1))
+#   
+#   # boxplot AUCs
+#   boxplot(auc.found, ylim=c(0,1), main="AUCs", ylab="AUC",
+#           cex.lab=2, cex.main=2)
+#   abline(h=auc.true, col="red", lwd=2, lty=2)
+#   
+#   # plot all ROC
+#   coord.true = get.roc.coord(scores.true, labels.true)
+#   # empty plot
+#   plot(seq(0,1,by=0.2), seq(0,1,by=0.2), col=0,
+#        main="ROCs", xlab="specificity", ylab="sensitivity",
+#        cex.lab=2, cex.main=2)
+#   for(i in 1:n.runs)
+#   { # compute true positive and negative discovery rates
+#     coord.found = get.roc.coord(scores.found[i,,], labels.true)
+#     # plot found motifs ROC
+#     lines(coord.found[,"x"], coord.found[,"y"], lwd=0.5)
+#   }
+#   # plot diagonale line
+#   segments(0,0,1,1, lwd=2, lty=2)
+#   # plot true motifs ROC
+#   lines(coord.true[,"x"], coord.true[,"y"], lwd=3, col="red")
+#   
+#   # true motif class 1
+#   plot.logo(motif.true.1, path.a, path.c, path.g, path.t,
+#             main="True motif class 1", cex.main=2)
+#   # x-axis
+#   axis(1, at=1:ncol(motif.true.1), labels=1:ncol(motif.true.1))
+#   # x-axis
+#   axis(2, at=0:2, labels=0:2)
+#   
+#   # true motif class 2
+#   plot.logo(motif.true.2, path.a, path.c, path.g, path.t,
+#             main="True motif class 2", cex.main=2)
+#   # x-axis
+#   axis(1, at=1:ncol(motif.true.2), labels=1:ncol(motif.true.2))
+#   # x-axis
+#   axis(2, at=0:2, labels=0:2)
+#   
+#   # best motif found
+#   idx.best = which.max(auc.found)
+#   file.motif.best = file.path(dir.results,
+#                               sprintf("simulated_sequences_2class_flip_class_model_%d.mat", 
+#                                       idx.best))
+#   motif.found.best = read.sequence.models(file.motif.best)$models
+#   # best found motif class 1
+#   plot.logo(motif.found.best[,,1], path.a, path.c, path.g, path.t,
+#             main="Best found motif class 1", cex.main=2)
+#   # x-axis
+#   axis(1, at=1:ncol(motif.found.best[,,1]), labels=1:ncol(motif.found.best[,,1]))
+#   # x-axis
+#   axis(2, at=0:2, labels=0:2)
+#   # best found motif class 1
+#   plot.logo(motif.found.best[,,2], path.a, path.c, path.g, path.t,
+#             main="Best found motif class 2", cex.main=2)
+#   # x-axis
+#   axis(1, at=1:ncol(motif.found.best[,,2]), labels=1:ncol(motif.found.best[,,2]))
+#   # x-axis
+#   axis(2, at=0:2, labels=0:2)
+#     
+# dev.off()
+
+
+# plot results
+png(filename=file.path(dir.results,
+                       "simulated_sequences_2class_flip_auc_roc.png"),
+    units="in", res=720, width=12, height=6)
+# X11(width=12, height=6)
+
+  par(mfrow=c(1,2),
+      mar=c(5.1,5.1,4.1,2.1))
+  
+  
+  # boxplot AUCs
+  boxplot(auc.found, ylim=c(0,1), main="AUC values", ylab="AUC",
+          cex.lab=2, cex.main=2)
+  abline(h=auc.true, col="red", lwd=2, lty=2)
+  
+  # plot all ROC
+  coord.true = get.roc.coord(scores.true, labels.true)
+  # empty plot
+  plot(seq(0,1,by=0.2), seq(0,1,by=0.2), col=0,
+       main="ROC curves", xlab="specificity", ylab="sensitivity",
+       cex.lab=2, cex.main=2)
+  for(i in 1:n.runs)
+  { # compute true positive and negative discovery rates
+    coord.found = get.roc.coord(scores.found[i,,], labels.true)
+    # plot found motifs ROC
+    lines(coord.found[,"x"], coord.found[,"y"], lwd=0.5)
+  }
+  # plot diagonale line
+  segments(0,0,1,1, lwd=2, lty=2)
+  # plot true motifs ROC
+  lines(coord.true[,"x"], coord.true[,"y"], lwd=3, col="red")
+dev.off()
+
+png(filename=file.path(dir.results,
+                       "simulated_sequences_2class_flip_best_motifs.png"),
+    units="in", res=720, width=7, height=8)
+# X11(width=7, height=10) 
+  # best motif found
+  idx.best = which.max(auc.found)
+  file.motif.best = file.path(dir.results,
+                              sprintf("simulated_sequences_2class_flip_class_model_%d.mat", 
+                                      idx.best))
+  motif.found.best = read.sequence.models(file.motif.best)$models
+  
+  pfm.found.best = get_pfm_list(motif.found.best, "Motif found")
+  rownames(motif.true.1) = rownames(motif.true.2) = c('A', 'C', 'G', 'T')
+  pfm.true.1     = new("pfm",
+                         mat=motif.true.1, 
+                         name="True motif class 1")
+  pfm.true.2     = new("pfm",
+                        mat=motif.true.2, 
+                        name="True motif class 2")
+  pfm.found.best.1 = new("pfm",
+                         mat=motif.found.best[,,1], 
+                         name="Found motif class 1")
+  pfm.found.best.2 = new("pfm",
+                         mat=motif.found.best[,,2], 
+                         name="Found motif class 2")
+  
+  motifStack(c(pfm.true.1,
+               pfm.true.2,
+               pfm.found.best.1,
+               pfm.found.best.2),
+             layout="treeview")
+dev.off()
+
+
diff --git a/scripts/toy_data/analysis_data_sequence.sh b/scripts/toy_data/analysis_data_sequence.sh
new file mode 100755
index 0000000..063886b
--- /dev/null
+++ b/scripts/toy_data/analysis_data_sequence.sh
@@ -0,0 +1,33 @@
+# some paths
+## directories
+results_dir='results/toy_data'
+seq_dir="data/toy_data"
+
+file_seq_1=$seq_dir'/simulated_sequences_1class_flip.mat'
+file_seq_2=$seq_dir'/simulated_sequences_2class_flip.mat'
+
+file_seed_1=$results_dir'/simulated_sequences_1class_flip_seed.mat'
+file_seed_2=$results_dir'/simulated_sequences_2class_flip_seed.mat'
+
+mkdir $results_dir
+
+touch $file_seed_1
+touch $file_seed_2
+
+# parameters
+n_iter='200'
+n_shift='90'
+n_core=20
+
+for i in {1..50}
+do
+	# 2 classes
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	file_prob=$results_dir/'simulated_sequences_2class_flip_'$k'class_prob_'$i'.mat4d'
+	file_prob2=$results_dir/'simulated_sequences_2class_flip_'$k'class_prob_'$i'.txt'
+	file_mod=$results_dir/'simulated_sequences_2class_flip_'$k'class_model_'$i'.mat'
+	echo "$file_prob $seed" >> $file_seed_2
+	bin/EMSequence  --seq $file_seq_2 --class 2 --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
+	bin/ProbToModel --seq $file_seq_2 --prob $file_prob --thread $n_core 1> $file_mod
+	bin/MatrixBinToTxt --file $file_prob --type double --ndim 4 > $file_prob2
+done
diff --git a/scripts/toy_data/generate_matrix_data_sequence.R b/scripts/toy_data/generate_matrix_data_sequence.R
index d34cbd7..8f98f3a 100644
--- a/scripts/toy_data/generate_matrix_data_sequence.R
+++ b/scripts/toy_data/generate_matrix_data_sequence.R
@@ -1,255 +1,245 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # required librairies and functions
 library(abind)
 
 # functions
 
 #' Converts a vector of characters containing a DNA sequence 
 #' into a vector of integers : A->1, C->2, G->3, T->4. Any 
 #' non ACGT character triggers an error.
 #' \param sequence the DNA sequence stored as a vector of 
 #' characters.
 #' \return a vector of integers.
 #' \author Romain Groux
 dna.to.int = function(sequence)
 { seq.len = length(sequence)
   seq.int = vector(length=seq.len, mode="numeric")
   for(i in 1:seq.len)
   { if(sequence[i] == "A")
   { seq.int[i] = 1 }
     else if(sequence[i] == "C")
     { seq.int[i] = 2 }
     else if(sequence[i] == "G")
     { seq.int[i] = 3 }
     else if(sequence[i] == "T")
     { seq.int[i] = 4}
     else
     { stop(sprintf("Error! Unrecognized character in DNA sequence at position %d : %s", i, sequence[i])) }
   }
   return(seq.int)
 }
 
 
 #' The complementary function to dna.to.int().
 #' \param sequence the DNA stored as a vector of int :
-#' A->1, C->2, G->3, T->4
+#' 1->A, 2->C, 3->G, 4->T
 #' \return a vector of characters.
 #' \author Romain Groux
 int.to.dna = function(sequence)
 { seq.len = length(sequence)
   seq.let = vector(length=seq.len, mode="character")
   for(i in 1:seq.len)
   { if(sequence[i] == 1)
   { seq.let[i] = "A" }
     else if(sequence[i] == 2)
     { seq.let[i] = "C" }
     else if(sequence[i] == 3)
     { seq.let[i] = "G" }
     else if(sequence[i] == 4)
     { seq.let[i] = "T"}
     else
     { stop(sprintf("Error! Unrecognized character in int sequence at position %d : %d", i, sequence[i])) }
   }
   return(seq.let)
 }
 
 simulate_data = function(n_seq, l_seq, classes, prob_bg, p_classes, p_flip)
 { 
-  # the alphabet A->1, C->2, G->3, T->4
+  # the alphabet A->0, C->1, G->2, T->3
   alphabet = c(1, 2, 3, 4)
   l_alpha  = length(alphabet)
   
   # binding site length
   l_bs     = ncol(classes[,,1])
   # number of classes
   n_class  = dim(classes)[3]
   
   # checks
   if(length(p_classes )!= n_class)
   { stop(sprintf("Error! %d classes detected but %d class probability given!", n_class, length(p_classes))) }
   for(k in 1:n_class)
   { if((nrow(classes[,,k]) != 4) || (ncol(classes[,,k]) != l_bs))
     { stop(sprintf("Error! Check the dimensions of class %d motif : %d / %d!", k, nrow(classes[,,k]), ncol(classes[,,k]))) }
   }
   
   # last position (comprised) for a binding site to begin and be entirely in the seq
   last_pos_bs = l_seq - l_bs + 1
   
   # data structures
   sequences   = matrix(data=0, n_seq, l_seq)                 # the sequences
   bs_starts   = vector(length=n_seq, mode="numeric")         # the starting positions of the binding site
   bs_flips    = vector(length=n_seq, mode="numeric")         # the orientation of the binding site
   bs_classes  = vector(length=n_seq, mode="numeric")         # the class from which the binding site was sampled 
   bs_contents = matrix(data=0, nrow=n_seq, ncol=l_bs)        # the binding site sequences
   bs_probs    = array(data=0, dim=c(l_alpha, l_bs, n_class)) # the class binding site probability matrices
   
   for(i in 1:n_seq)
   {
     # sample from a uniform distribution where the binding site should start
     bs_starts[i]  = sample(1:last_pos_bs, 1)
     # sample a class
     class         = sample(1:n_class, 1, prob=p_classes)
     bs_classes[i] = class
     # sample a flip state (0->forward, 1->reverse)
     flip          = rbinom(1, 1, prob=p_flip)
     bs_flips[i]   = flip
     # to store the int seq
     seq           = vector(length=l_seq, mode="numeric")
     seq_bs        = vector(length=l_bs, mode="numeric")
     
     # over the sequence
     j = 1
     while(j <= l_seq)
     { # binding site starts
       if(j == bs_starts[i])
       { for(k in 0:(l_bs-1))
         { # reverse strand
           if(flip)
           { base                                    = sample(alphabet, 1, prob=rev(classes[,,class][,l_bs-k]))
             seq[j+k]                                = base
             seq_bs[k+1]                             = base
             bs_probs[l_alpha-base+1, l_bs-k, class] = bs_probs[l_alpha-base+1, l_bs-k, class] + 1
           }
           # forward strand
           else
           { base                        = sample(alphabet, 1, prob=classes[,,class][,k+1])
             seq[j+k]                    = base
             seq_bs[k+1]                 = base
             bs_probs[base, k+1, class]  = bs_probs[base, k+1, class] + 1
           }
         }
         j = j + k
       # this is background sequence
       } else {
         seq[j] = sample(alphabet, 1, prob=prob_bg)
       }
       j = j + 1
     }
-    sequences[i,]   = int.to.dna(seq)
-    bs_contents[i,] = int.to.dna(seq_bs)
+    sequences[i,]   = seq
+    bs_contents[i,] = seq_bs
   }
   # normalize
   for(i in 1:n_class)
   { bs_probs[,,i] = bs_probs[,,i] / colSums(bs_probs[,,i]) }
   return(list(sequences=sequences, sites=bs_contents, motifs=bs_probs, starts=bs_starts, flips=bs_flips, classes=bs_classes))
 }
 
 
 # some general parameters
-n_seq         = 10  # number of sequences
-l_seq         = 10  # length of sequences
+n_seq         = 2000  # number of sequences
+l_seq         = 100  # length of sequences
 
 # the base probabilities inside the binding site (A,C,G,T)
-motif_class1 = matrix(data=c(1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,
-                             0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
-                             0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0,
-                             0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0), 
+motif_class1 = matrix(data=c(0.9,  .033, .033,  0.1,  0.1, .033, .033,  0.9,
+                             .033,  0.9, .033,  0.1,  0.1, .033,  0.9, .033,
+                             .033, .033,  0.9,  0.1,  0.1,  0.9, .033, .033,
+                             .033, .033, .033,  0.7,  0.7, .033, .033, .033), 
                       nrow=4, ncol=8, byrow=T)
-motif_class2 = matrix(data=c(0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0,
-                             0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                             1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                             0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0), 
+motif_class2 = matrix(data=c(.033, .033, .033, 0.7, 0.7, .033, .033, .033,
+                              0.9, .033,  0.9, 0.1, 0.1, .033, .033, .033,
+                             .033,  0.9, .033, 0.1, 0.1, .033, .033, .033,
+                             .033, .033, .033, 0.1, 0.1,  0.9,  0.9,  0.9), 
                       nrow=4, ncol=8, byrow=T)
 
 
 dir.create(file.path("data", "toy_data"), showWarnings=FALSE)
 
-# ------------------------------------------- 1 classes with 1 motif/sequence, no flip, a really toy exemple  -------------------------------------------
-
-data = matrix(nrow=4, ncol=10, byrow=T,
-              data=c('T', 'T', 'C', 'C', 'T', 'T', 'A', 'G', 'C', 'T',
-                     'T', 'T', 'C', 'C', 'T', 'T', 'G', 'C', 'T', 'A',
-                     'T', 'T', 'C', 'C', 'T', 'T', 'C', 'T', 'A', 'G',
-                     'T', 'T', 'C', 'C', 'T', 'T', 'T', 'A', 'G', 'C'))
-write.table(data, file=file.path("data", "toy_data", "simulated_sequences_toy.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
-rm(data)
-
 
 # -------------------------------------------------------- 1 classes with 1 motif/sequence, no flip ----------------------------------------------------- 
 
 # seed
 set.seed(20190715)
 
 # the class binding site
 motif_classes = array(data=motif_class1, dim=c(dim(motif_class1), 1))
 # the class probability
 p_classes     = c(1)
 # the probability of having a binding site on the reverse strand
 p_flip        = 0
 # the base probabilities outside the binding site (A,C,G,T)
 prob_bg       = rep(0.25, 4)
 
 # simulate the data
 data = simulate_data(n_seq, l_seq, motif_classes, prob_bg, p_classes, p_flip)
 # save
-write.table(data$sequences, file=file.path("data", "toy_data", "simulated_sequences_1class_noflip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
-write.table(motif_class1,   file=file.path("data", "toy_data", "simulated_sequences_motif.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
-write.table(data$sites,       file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sequences-1, file=file.path("data", "toy_data", "simulated_sequences_1class_noflip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(motif_class1,     file=file.path("data", "toy_data", "simulated_sequences_motif.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sites-1,     file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$starts,      file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_starts.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$motifs[,,1], file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_motif1.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$flips,       file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_flips.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$classes,     file=file.path("data", "toy_data", "simulated_sequences_1class_noflip_classes.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 # clean
 rm(motif_classes, p_classes, p_flip, prob_bg, data)
 
 
 
 # -------------------------------------------------------- 1 classes with 1 motif/sequence, flip -------------------------------------------------------- 
 
 # seed
 set.seed(201803142)
 
 # the class binding site
 motif_classes = array(data=motif_class1, dim=c(dim(motif_class1), 1))
 # the class probability
 p_classes     = c(1)
 # the probability of having a binding site on the reverse strand
 p_flip        = 0.5
 # the base probabilities outside the binding site (A,C,G,T)
 prob_bg       = rep(0.25, 4)
 
 # simulate the data
 data = simulate_data(n_seq, l_seq, motif_classes, prob_bg, p_classes, p_flip)
 # save
-write.table(data$sequences,   file=file.path("data", "toy_data", "simulated_sequences_1class_flip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
-write.table(data$sites,       file=file.path("data", "toy_data", "simulated_sequences_1class_flip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sequences-1, file=file.path("data", "toy_data", "simulated_sequences_1class_flip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sites-1,     file=file.path("data", "toy_data", "simulated_sequences_1class_flip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$starts,      file=file.path("data", "toy_data", "simulated_sequences_1class_flip_starts.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$motifs[,,1], file=file.path("data", "toy_data", "simulated_sequences_1class_flip_motif1.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$flips,       file=file.path("data", "toy_data", "simulated_sequences_1class_flip_flips.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$classes,     file=file.path("data", "toy_data", "simulated_sequences_1class_flip_classes.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 # clean
 rm(motif_classes, p_classes, p_flip, prob_bg, data)
 
 
 
-# -------------------------------------------------------- 2 classes with 1 bs/sequence, flip -------------------------------------------------------- 
+# -------------------------------------------------------- 2 classes with 1 motif /sequence, flip -------------------------------------------------------- 
 
 # seed
 set.seed(201803143)
 
 # the class binding site
 motif_classes = abind(motif_class1, motif_class2, along=3)
 # the class probability
 p_classes     = c(0.5, 0.5)
 # the probability of having a binding site on the reverse strand
 p_flip        = 0.5
 # the base probabilities outside the binding site (A,C,G,T)
 prob_bg       = rep(0.25, 4)
 
 # simulate the data
 data = simulate_data(n_seq, l_seq, motif_classes, prob_bg, p_classes, p_flip)
 # save
-write.table(data$sequences,   file=file.path("data", "toy_data", "simulated_sequences_2class_flip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
-write.table(data$sites,       file=file.path("data", "toy_data", "simulated_sequences_2class_flip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sequences-1, file=file.path("data", "toy_data", "simulated_sequences_2class_flip.mat"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
+write.table(data$sites-1 ,    file=file.path("data", "toy_data", "simulated_sequences_2class_flip_contents.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$starts,      file=file.path("data", "toy_data", "simulated_sequences_2class_flip_starts.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$motifs[,,1], file=file.path("data", "toy_data", "simulated_sequences_2class_flip_motif1.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$motifs[,,2], file=file.path("data", "toy_data", "simulated_sequences_2class_flip_motif2.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$flips,       file=file.path("data", "toy_data", "simulated_sequences_2class_flip_flips.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 write.table(data$classes,     file=file.path("data", "toy_data", "simulated_sequences_2class_flip_classes.txt"), quote=F, row.names=F, col.names=F, sep='\t', eol='\n')
 # clean
 rm(motif_classes, p_classes, p_flip, prob_bg, data)
 
 
 
 
diff --git a/src/Applications/.ProbToModelApplication.cpp.swp b/src/Applications/.ProbToModelApplication.cpp.swp
deleted file mode 100644
index ace6708..0000000
Binary files a/src/Applications/.ProbToModelApplication.cpp.swp and /dev/null differ
diff --git a/src/Applications/EMConsensusSequenceApplication.cpp b/src/Applications/EMConsensusSequenceApplication.cpp
index 391036a..b18601b 100644
--- a/src/Applications/EMConsensusSequenceApplication.cpp
+++ b/src/Applications/EMConsensusSequenceApplication.cpp
@@ -1,271 +1,272 @@
 
 #include <EMConsensusSequenceApplication.hpp>
 #include <EMConsensusSequence.hpp>
 
 #include <iostream>
 #include <string>
 #include <vector>
 #include <utility>                     // std::move()
 #include <stdexcept>                   // std::invalid_argument
 #include <boost/program_options.hpp>
 
 #include <Matrix2D.hpp>
 #include <Matrix4D.hpp>
 #include <matrix_utility.hpp>
 #include <KmersStatistics.hpp>  // kmer::compute_kmer_pvalue()
 #include <sorting_utility.hpp>  // order()
 
 
 namespace po = boost::program_options ;
 
 
 EMConsensusSequenceApplication::EMConsensusSequenceApplication(int argn, char** argv)
     : file_consseq(""), file_filter(""), file_out(""),
       n_class(0), n_iter(0), n_shift(0), flip(false), bckg_class(false),
       n_threads(0), seed(""), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
 int EMConsensusSequenceApplication::run()
 {   if(this->runnable)
     {   EMConsensusSequence* em(nullptr) ;
 
         // row filter
         std::vector<size_t> filter ;
         if(this->file_filter != "")
         {   // it is a column vector, easier to use the Matrix2D interface
             // to read it rather than coding a function for :)
             filter = Matrix2D<size_t>(this->file_filter).get_data() ;
             std::sort(filter.begin(), filter.end()) ;
         }
 
         // data
         Matrix3D<double> data ;
         data.load(this->file_consseq) ;
         // filter out some rows if needed
         if(filter.size())
         {   data = filter_rows(filter, data) ; }
 
         // seeds motifs randomly
         if(this->seed != "")
         {   em = new EMConsensusSequence(std::move(data),
                                          this->n_class,
                                          this->n_iter,
                                          this->n_shift,
                                          this->flip,
                                          this->bckg_class,
                                          this->seed,
                                          this->n_threads) ;
         }
         // seeds from enriched kmers
         else
         {   size_t model_ncol = data.get_dim()[1] - this->n_shift + 1 ;
 
             Matrix3D<double> model = this->init_model_kmer(model_ncol,
                                                            data) ;
             em = new EMConsensusSequence(std::move(data),
                                          std::move(model),
                                          this->n_iter,
                                          this->flip,
+                                         this->bckg_class,
                                          this->n_threads) ;
         }
 
         // classify
         em->classify() ;
         em->get_post_prob().save(this->file_out) ;
 
         // clean
         delete em ;
         em = nullptr ;
 
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
 void EMConsensusSequenceApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
                                    "EMConsensusSequence is a probabilistic partitioning algorithm that \n"
                                    "sofetly assigns consensus sequences to classes given their motif\n"
                                    "content.\n"
                                    "The assignment probabilities are written in binary format as a 4D "
                                    "matrix.\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
                                    "by default 0 (no parallelization)." ;
     std::string opt_consseq_msg  = "The path to the file containing the consensus sequences" ;
     std::string opt_filter_msg   = "Optional. The path to a single column text file containing the 0-based\n"
                                    "indices of rows to filter out in the data." ;
     std::string opt_file_out_msg = "A path to a file in which the assignment probabilities will be saved\n"
                                    "in binary format." ;
     std::string opt_iter_msg     = "The number of iterations." ;
     std::string opt_class_msg    = "The number of classes to find." ;
     std::string opt_shift_msg    = "Enables this number of column of shifting freedom to realign\n"
                                    "the data. By default, shifting is disabled (equivalent to\n"
                                    "--shift 1)." ;
     std::string opt_flip_msg     = "Enables flipping to realign the data.";
     std::string opt_bckg_msg     = "Adds a class to model the sequence background. This class\n"
                                    "contains the sequence background probabilities at each position\n"
                                    "and is never updated." ;
     std::string opt_seed_msg     = "A value to seed the random number generator.";
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string seeding_tmp ;
 
     desc.add_options()
                 ("help,h",  opt_help_msg.c_str())
 
                 ("consseq", po::value<std::string>(&(this->file_consseq)), opt_consseq_msg.c_str())
                 ("filter",   po::value<std::string>(&(this->file_filter)), opt_filter_msg.c_str())
 
                 ("out",     po::value<std::string>(&(this->file_out)),     opt_file_out_msg.c_str())
 
                 ("iter,i",  po::value<size_t>(&(this->n_iter)),            opt_iter_msg.c_str())
                 ("class,c", po::value<size_t>(&(this->n_class)),           opt_class_msg.c_str())
                 ("shift,s", po::value<size_t>(&(this->n_shift)),           opt_shift_msg.c_str())
                 ("flip",    opt_flip_msg.c_str())
                 ("bgclass", opt_bckg_msg.c_str())
 
                 ("seed",    po::value<std::string>(&(this->seed)),         opt_seed_msg.c_str())
                 ("thread",  po::value<std::size_t>(&(this->n_threads)),    opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
     if(this->file_consseq == "" and
        (not help))
     {   std::string msg("Error! No data were given (--seq)!") ;
         throw std::invalid_argument(msg) ;
     }
     if(this->file_out == "" and
        (not help))
     {   std::string msg("Error! No output file given (--out)!") ;
         throw std::invalid_argument(msg) ;
     }
 
     // no iter given -> 1 iter
     if(this->n_iter == 0)
     {   this->n_iter = 1 ; }
     // no shift class given -> 1 class
     if(this->n_class == 0)
     {   this->n_class = 1 ; }
     // no shift given, value of 1 -> no shift
     if(this->n_shift == 0)
     {   this->n_shift = 1 ; }
     // set flip
     if(vm.count("flip"))
     {   this->flip  = true ; }
     // set background class
     if(vm.count("bgclass"))
     {   this->bckg_class  = true ; }
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 Matrix3D<double> EMConsensusSequenceApplication::init_model_kmer(size_t l_model,
                                                                  const Matrix3D<double>& data) const
 {
     // leave space for 2N's on each side
     size_t l_kmer  = l_model ;
     size_t n_n     = 0 ; // so far, 0 N's added
     if(l_model > 4)
     {   n_n     = 2 ; // 2 N's on each side
         l_kmer -= (2*n_n) ;
     }
 
     // compute the pvalue associated to each kmer
     auto kmers_pvalues = kmers::compute_kmer_pvalue(data, l_kmer) ;
 
     // sort kmers by ascending pvalue
     std::vector<size_t> index = order(kmers_pvalues.second, true) ;
     // get most significant
     std::vector<std::string> kmers(this->n_class) ;
     for(size_t i=0; i<this->n_class; i++)
     {   size_t idx = index[i] ;
         kmers[i] = kmers_pvalues.first[idx] ;
         std::cerr << kmers_pvalues.first[idx] << "  " << kmers_pvalues.second[idx] << std::endl ;
     }
     // turn to motifs
     double p_base  = 0.7 ;  // the prob of the base matching these of the kmer
     double p_nbase = 0.1 ;  // the prob of the bases not matching these of the kmer
     double p_n     = 0.25 ; // the prob of N
     // only N's for now
     Matrix3D<double> model(this->n_class,
                                 l_model,
                                 4,
                                 p_n) ;
     for(size_t i=0; i<kmers.size(); i++)
     {   for(size_t j_kmer=0, j_model=n_n; j_kmer<l_kmer; j_kmer++)
         {   // A
             if(kmers[i][j_kmer] == '0')
             {   model(i,j_model, 0) = p_base ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // C
             else if(kmers[i][j_kmer] == '1')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_base ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // G
             else if(kmers[i][j_kmer] == '2')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_base ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // T
             else if(kmers[i][j_kmer] == '3')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_base ;
             }
         }
     }
     return model ;
 }
 
 int main(int argn, char** argv)
 {   EMConsensusSequenceApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Applications/EMSequenceApplication.cpp b/src/Applications/EMSequenceApplication.cpp
index 3d38eb4..cfed4c6 100644
--- a/src/Applications/EMSequenceApplication.cpp
+++ b/src/Applications/EMSequenceApplication.cpp
@@ -1,377 +1,378 @@
 
 #include <EMSequenceApplication.hpp>
 #include <EMSequence.hpp>
 
 #include <iostream>
 #include <string>
 #include <utility>                     // std::move()
 #include <stdexcept>                   // std::invalid_argument
 #include <boost/program_options.hpp>
 #include <boost/algorithm/string.hpp>  // boost::split()
 
 #include <Matrix2D.hpp>
 #include <Matrix4D.hpp>
 #include <matrix_utility.hpp>   // filter()
 #include <KmersStatistics.hpp>  // kmer::compute_kmer_pvalue()
 #include <sorting_utility.hpp>  // order()
 
 namespace po = boost::program_options ;
 
 
 EMSequenceApplication::EMSequenceApplication(int argn, char** argv)
     : file_seq(""), files_motif(""), file_filter(""), file_out(""),
       n_class(0), n_iter(0), n_shift(0), flip(false), bckg_class(false),
       n_threads(0), seed(""), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
 int EMSequenceApplication::run()
 {   if(this->runnable)
     {   EMSequence* em(nullptr) ;
 
         // data
         Matrix2D<int> data(this->file_seq) ;
 
         // filter out some rows if needed
         std::vector<size_t> filter ;
         if(this->file_filter != "")
         {   // it is a column vector, easier to use the Matrix2D interface
             // to read it rather than coding a function for :)
             filter = Matrix2D<size_t>(this->file_filter).get_data() ;
             std::sort(filter.begin(), filter.end()) ;
             data = filter_rows(filter, data) ;
         }
 
         // seeds motifs randomly
         if(this->files_motif == "" and
            this->seed != "")
         {   em = new EMSequence(std::move(data),
                                 this->n_class,
                                 this->n_iter,
                                 this->n_shift,
                                 this->flip,
                                 this->bckg_class,
                                 this->seed,
                                 this->n_threads) ;
         }
         // seeds motifs with the given matrices
         else if(this->files_motif != "")
         {   // model
             std::vector<std::string> motif_paths ;
             boost::split(motif_paths, this->files_motif, [](char c){return c == ',';}) ;
             // this->n_class = motif_paths.size() + this->bckg_class ;
             size_t model_ncol = data.get_ncol() - this->n_shift + 1 ;
 
             // add the given motif, random motifs (if needed) and
             // background class (if needed)
 
             Matrix3D<double> model = this->init_model(model_ncol,
                                                       data,
                                                       motif_paths) ;
 
             em = new EMSequence(std::move(data),
                                 std::move(model),
                                 this->n_iter,
-                                this->flip,
+                                this->bckg_class,
                                 this->n_threads) ;
         }
         // seeds from enriched kmers
         else
         {   size_t model_ncol = data.get_ncol() - this->n_shift + 1 ;
 
             Matrix3D<double> model = this->init_model_kmer(model_ncol,
                                                            data) ;
             em = new EMSequence(std::move(data),
                                 std::move(model),
                                 this->n_iter,
                                 this->flip,
+                                this->bckg_class,
                                 this->n_threads) ;
         }
         // classify
         em->classify() ;
         em->get_post_prob().save(this->file_out) ;
 
         // clean
         delete em ;
         em = nullptr ;
 
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
 void EMSequenceApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
                                    "EMSequence is a probabilistic partitioning algorithm that \n"
                                    "sofetly assigns sequences to classes given their motif content.\n"
                                    "The assignment probabilities are written in binary format as a 4D "
                                    "matrix.\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
                                    "by default 0 (no parallelization)." ;
     std::string opt_seq_msg      = "The path to the file containing the sequences" ;
     std::string opt_motifs_msg   = "A coma separated list of path to files containing the initial motifs\n"
                                    "values. The motifs should be probability matrices in horizontal format.\n"
                                    "If the motifs are too short after accounting for shifting, extra\n"
                                    "columns with uniform probabilities will be added on each side. The\n"
                                    "given number of classes (--class) should at least be the number of\n"
                                    "initial motifs. If the number of classes is bigger than the number of"
                                    "given motifs, the remaining classes are initialised randomly\n." ;
     std::string opt_filter_msg   = "Optional. The path to a single column text file containing the 0-based\n"
                                    "indices of rows to filter out in the data." ;
     std::string opt_file_out_msg = "A path to a file in which the assignment probabilities will be saved\n"
                                    "in binary format." ;
     std::string opt_iter_msg     = "The number of iterations." ;
     std::string opt_class_msg    = "The number of classes to find." ;
     std::string opt_shift_msg    = "Enables this number of column of shifting freedom to realign\n"
                                    "the data. By default, shifting is disabled (equivalent to\n"
                                    "--shift 1)." ;
     std::string opt_flip_msg     = "Enables flipping to realign the data.";
     std::string opt_bckg_msg     = "Adds a class to model the sequence background. This class\n"
                                    "contains the sequence background probabilities at each position\n"
                                    "and is never updated." ;
     std::string opt_seed_msg     = "A value to seed the random number generator.";
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string seeding_tmp ;
 
     desc.add_options()
                 ("help,h",  opt_help_msg.c_str())
 
                 ("seq",     po::value<std::string>(&(this->file_seq)),     opt_seq_msg.c_str())
                 ("motifs",  po::value<std::string>(&(this->files_motif)),  opt_motifs_msg.c_str())
                 ("filter",   po::value<std::string>(&(this->file_filter)), opt_filter_msg.c_str())
                 ("out",     po::value<std::string>(&(this->file_out)),     opt_file_out_msg.c_str())
 
                 ("iter,i",  po::value<size_t>(&(this->n_iter)),            opt_iter_msg.c_str())
                 ("class,c", po::value<size_t>(&(this->n_class)),           opt_class_msg.c_str())
                 ("shift,s", po::value<size_t>(&(this->n_shift)),           opt_shift_msg.c_str())
                 ("flip",    opt_flip_msg.c_str())
                 ("bgclass", opt_bckg_msg.c_str())
 
                 ("seed",    po::value<std::string>(&(this->seed)),         opt_seed_msg.c_str())
                 ("thread",  po::value<std::size_t>(&(this->n_threads)),    opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
     if(this->file_seq == "" and
        (not help))
     {   std::string msg("Error! No data were given (--seq)!") ;
         throw std::invalid_argument(msg) ;
     }
     if(this->file_out == "" and
        (not help))
     {   std::string msg("Error! No output file given (--out)!") ;
         throw std::invalid_argument(msg) ;
     }
 
     // no iter given -> 1 iter
     if(this->n_iter == 0)
     {   this->n_iter = 1 ; }
     // no shift class given -> 1 class
     if(this->n_class == 0)
     {   this->n_class = 1 ; }
     // no shift given, value of 1 -> no shift
     if(this->n_shift == 0)
     {   this->n_shift = 1 ; }
     // set flip
     if(vm.count("flip"))
     {   this->flip  = true ; }
     // set background class
     if(vm.count("bgclass"))
     {   this->bckg_class  = true ; }
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 
 Matrix3D<double> EMSequenceApplication::init_model(size_t l_model,
                                                    const Matrix2D<int>& data,
                                                    const std::vector<std::string>& motif_paths) const
 {
     int n_class_given = motif_paths.size() ;
     int n_class_bckg  = this->bckg_class ;
     int n_class_rand  = this->n_class - n_class_given - n_class_bckg ;
 
     // number of classes should at least be number of motifs
     if(n_class_given > (int)this->n_class)
     {   char msg[4096] ;
         sprintf(msg, "Error! number of class given (--class %zu) should at "
                      "least be equal to number of motifs (--motifs %d)",
                 this->n_class, n_class_given) ;
         throw std::invalid_argument(msg) ;
     }
     // check if there is room for a background class
     if((int)this->n_class < n_class_given+this->bckg_class)
     {   char msg[4096] ;
         sprintf(msg, "Error! no class left to add a background "
                      "class (--bgclass) with the given motifs (--motifs) (--class %zu)",
                 this->n_class) ;
         throw std::invalid_argument(msg) ;
     }
 
     // init empty model
     Matrix3D<double> model(this->n_class,
                            l_model,
                            4,
                            0.25) ;
     // add given motifs
     for(size_t i=0; i<motif_paths.size(); i++)
     {   Matrix2D<double> matrix(motif_paths[i]) ;
         // motif is too big for this shift
         if(matrix.get_ncol() > l_model)
         {   char msg[4096] ;
             sprintf(msg,
                     "Error! In %s, motif column number is bigger "
                     "than data column number - shift + 1 "
                     "(%zu > %zu - %zu + 1)",
                     motif_paths[i].c_str(),
                     matrix.get_ncol(),
                     data.get_ncol(),
                     this->n_shift) ;
             throw std::invalid_argument(msg) ;
         }
         // insert motif in middle of matrix
         else
         {   // size_t j_model = this->n_shift / 2 ;
             size_t j_model = (l_model - matrix.get_ncol()) / 2 ;
             for(size_t j_mat=0, j_mod=j_model; j_mat<matrix.get_ncol(); j_mat++, j_mod++)
             {   for(size_t k=0; k<4; k++)
                 {   model(i,j_mod,k) = matrix(k,j_mat) ; }
             }
         }
     }
 
     // add random motifs and background class
     // delegate this to EMSequence constructor
     // (ensure that it is done properly)
     if(n_class_rand > 0)
     {   // initialise randomly
         EMSequence em(data,
                       n_class_rand,
                       this->n_iter,
                       this->n_shift,
                       this->flip,
                       this->bckg_class,
                       this->seed,
                       this->n_threads) ;
         Matrix3D<double> model_rand = em.get_sequence_models() ;
         // copy them into model
         for(int i_rand=0, i_mod=n_class_given; i_rand<n_class_rand; i_rand++, i_mod++)
         {   for(int j=0; j<(int)l_model; j++)
             {   for(int k=0; k<4; k++)
                 {   model(i_mod,j,k) = model_rand(i_rand,j,k) ; }
             }
         }
     }
     return model ;
 }
 
 Matrix3D<double> EMSequenceApplication::init_model_kmer(size_t l_model,
                                                         const Matrix2D<int>& data) const
 {
     // leave space for 2N's on each side
     size_t l_kmer  = l_model ;
     size_t n_n     = 0 ; // so far, 0 N's added
     if(l_model > 4)
     {   n_n     = 2 ; // 2 N's on each side
         l_kmer -= (2*n_n) ;
     }
 
     // compute the pvalue associated to each kmer
     auto kmers_pvalues = kmers::compute_kmer_pvalue(data, l_kmer) ;
 
     // sort kmers by ascending pvalue
     std::vector<size_t> index = order(kmers_pvalues.second, true) ;
     // get most significant
     std::vector<std::string> kmers(this->n_class) ;
     for(size_t i=0; i<this->n_class; i++)
     {   size_t idx = index[i] ;
         kmers[i] = kmers_pvalues.first[idx] ;
         std::cerr << kmers_pvalues.first[idx] << "  " << kmers_pvalues.second[idx] << std::endl ;
     }
     // turn to motifs
     double p_base  = 0.7 ;  // the prob of the base matching these of the kmer
     double p_nbase = 0.1 ;  // the prob of the bases not matching these of the kmer
     double p_n     = 0.25 ; // the prob of N
     // only N's for now
     Matrix3D<double> model(this->n_class,
                                 l_model,
                                 4,
                                 p_n) ;
     for(size_t i=0; i<kmers.size(); i++)
     {   for(size_t j_kmer=0, j_model=n_n; j_kmer<l_kmer; j_kmer++)
         {   // A
             if(kmers[i][j_kmer] == '0')
             {   model(i,j_model, 0) = p_base ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // C
             else if(kmers[i][j_kmer] == '1')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_base ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // G
             else if(kmers[i][j_kmer] == '2')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_base ;
                 model(i,j_model, 3) = p_nbase ;
             }
             // T
             else if(kmers[i][j_kmer] == '3')
             {   model(i,j_model, 0) = p_nbase ;
                 model(i,j_model, 1) = p_nbase ;
                 model(i,j_model, 2) = p_nbase ;
                 model(i,j_model, 3) = p_base ;
             }
         }
     }
     return model ;
 }
 
 int main(int argn, char** argv)
 {   EMSequenceApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Clustering/EMBase.cpp b/src/Clustering/EMBase.cpp
index 570866c..65d4637 100644
--- a/src/Clustering/EMBase.cpp
+++ b/src/Clustering/EMBase.cpp
@@ -1,305 +1,306 @@
 #include <EMBase.hpp>
 
 #include <vector>
 #include <stdexcept>   // std::invalid_argument
 #include <future>      // std::promise, std::future
 #include <utility>     // std::pair, std::move()
 #include <functional>  // std::bind(), std::ref()
 #include <numeric>     // std::iota()
 #include <random>      // std::mt19937
 
 #include <Matrix3D.hpp>
 #include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 #include <BetaDistribution.hpp>      // beta_distribution()
 #include <Random.hpp>                // rand_string()
 #include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
 #include <Statistics.hpp>            // sd(), normal_pmf()
 
 
 EMBase::EMBase(size_t n_row,
                size_t n_col,
                size_t n_class,
                size_t n_iter,
                size_t n_shift,
                bool flip,
                size_t n_threads=0)
     : n_row(n_row),
       n_col(n_col),
       n_class(n_class),
       n_shift(n_shift),
       flip(flip),
       n_flip(flip+1),
       n_iter(n_iter),
       l_model(n_col - n_shift + 1),
       loglikelihood(n_row, n_class, n_shift, n_flip, 0.),
       post_prob(n_row, n_class, n_shift, n_flip, 0.),
       post_state_prob(n_class, n_shift, n_flip, 0.),
       post_class_prob(n_class, 0.),
       post_prob_rowsum(n_row, 0.),
       post_prob_colsum(n_class, 0.),
       post_prob_tot(0.),
       threads(nullptr)
-{   // check n_shift value
+{
+    // check n_shift value
     if(this->n_col < this->n_shift)
     {   char msg[4096] ;
         sprintf(msg, "Error! Shift is bigger than data column number "
                      "(%zu / %zu)!",
                 this->n_shift, this->n_col) ;
         throw std::invalid_argument(msg) ;
     }
     /*
     // data structures
     this->loglikelihood = Matrix4D<double>(this->n_row,
                                            this->n_class,
                                            this->n_shift,
                                            this->n_flip,
                                            0.) ;
     this->post_prob = Matrix4D<double>(this->n_row,
                                        this->n_class,
                                        this->n_shift,
                                        this->n_flip,
                                        0.) ;
     this->post_state_prob = Matrix3D<double>(this->n_class,
                                              this->n_shift,
                                              this->n_flip,
                                              0.) ;
     this->post_class_prob = vector_d(this->n_class, 0) ;
     this->post_prob_rowsum = vector_d(this->n_row, 0) ;
     this->post_prob_colsum = vector_d(this->n_class, 0) ;
     this->post_prob_tot = 0 ;
     */
     // threads
     if(n_threads)
     {   this->threads = new ThreadPool(n_threads) ; }
 
 }
 
 EMBase::~EMBase()
 {   // threads
     if(this->threads != nullptr)
     {   this->threads->join() ;
         delete this->threads ;
         this->threads = nullptr ;
     }
 }
 
 Matrix4D<double> EMBase::get_post_prob() const
 {   return this->post_prob ; }
 
 vector_d EMBase::get_post_class_prob() const
 {   return this->post_class_prob ; }
 
 void EMBase::set_state_prob_uniform()
 {   double sum = this->n_class * this->n_shift * this->n_flip ;
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->n_shift; j++)
         {   for(size_t k=0; k<this->n_flip; k++)
             {   this->post_state_prob(i,j,k) = 1./sum ; }
         }
     }
 }
 
 void EMBase::set_post_prob_random(const std::string& seed)
 {   // set random number generator
     // will be used to generate thread private seeds
     getRandomGenerator(seed) ;
 
     // don't parallelize
     if(this->threads == nullptr)
     {   std::promise<vector_d> promise ;
         std::future<vector_d> future = promise.get_future() ;
         this->set_post_prob_random_routine(0, this->n_row, seed, promise) ;
         // compute the sum of post prob and the per class sum of post prob
         // from the partial results computed on each slice
         this->post_prob_tot = 0. ;
         this->post_prob_colsum = future.get() ;
         for(const auto& prob : this->post_prob_colsum)
         {   this->post_prob_tot += prob ; }
     }
     // parallelize
     else
     {    size_t n_threads = this->threads->getNThread() ;
 
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row,n_threads) ;
 
         // get promises and futures
         // the function run by the threads will compute
         // the partial sum per class of post_prob for the given slice
         // this should be used to compute the complete sum of post_prob
         // and the complete sum per class of post_prob
         std::vector<std::promise<vector_d>> promises(n_threads) ;
         std::vector<std::future<vector_d>>  futures(n_threads) ;
         // private seeds
         std::vector<std::string> private_seeds(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ;
             private_seeds[i] = rand_string(15) ;
         }
 
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   // generate a private seed to set the random number generator
             // in this thread
             auto slice = slices[i] ;
             this->threads->addJob(std::move(
                                       std::bind(&EMBase::set_post_prob_random_routine,
                                                 this,
                                                 slice.first,
                                                 slice.second,
                                                 private_seeds[i],
                                                 std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         // compute the sum of post prob and the per class sum of post prob
         // from the partial results computed on each slice
         this->post_prob_tot = 0. ;
         this->post_prob_colsum = vector_d(this->n_class, 0.) ;
         for(auto& future : futures)
         {   auto probs = future.get() ;
             for(size_t i=0; i<this->n_class; i++)
             {   double prob = probs[i] ;
                 this->post_prob_colsum[i] += prob ;
                 this->post_prob_tot       += prob ;
             }
         }
         // -------------------------- threads stop ---------------------------
     }
 
     // compute class and state probs
     this->compute_class_prob() ;
 }
 
 void EMBase::set_post_prob_random_routine(size_t from,
                                           size_t to,
                                           const std::string& seed,
                                           std::promise<vector_d>& post_prob_colsum)
 {   // random number generator
     std::mt19937 generator ;
     std::seed_seq seed_sequence(seed.begin(),seed.end()) ;
     generator.seed(seed_sequence) ;
 
     // this->post_prob_tot = 0. ;
     // this->post_prob_colsum = vector_d(this->n_class, 0.) ;
     vector_d colsums = vector_d(this->n_class, 0.) ;
 
     vector_d rowsums(this->n_row, 0) ;
 
     // random sampling
     beta_distribution<double> beta(1, this->n_row) ;
     for(size_t i=from; i<to; i++)
     {   for(size_t j=0; j<this->n_class; j++)
         {   for(size_t k=0; k<this->n_shift; k++)
             {   for(size_t l=0; l<this->n_flip; l++)
                 {   double p = beta(generator) ;
                     this->post_prob(i,j,k,l) = p ;
                     rowsums[i] += p ;
                 }
             }
         }
     }
 
     // normalization
     for(size_t i=from; i<to; i++)
     {   for(size_t j=0; j<this->n_class; j++)
         {   for(size_t k=0; k<this->n_shift; k++)
             {   for(size_t l=0; l<this->n_flip; l++)
                 {   double p = this->post_prob(i,j,k,l) / rowsums[i] ;
                     this->post_prob(i,j,k,l) = p ;
                     // this->post_prob_tot        += p ;
                     // this->post_prob_colsum[j]  += p ;
                     colsums[j] += p ;
                 }
             }
         }
     }
 
     // compute class and state probs
     // this->compute_class_prob() ;
     post_prob_colsum.set_value(colsums) ;
 }
 
 void EMBase::compute_class_prob()
 {
     for(size_t n_class=0; n_class<this->n_class; n_class++)
     {   // reset total
         this->post_class_prob[n_class] = 0. ;
         for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
         {   for(size_t flip=0; flip<this->n_flip; flip++)
             {   // sum
                 this->post_state_prob(n_class,n_shift,flip) = 0. ;
                 for(size_t i=0; i<this->n_row; i++)
                 {   this->post_state_prob(n_class,n_shift,flip) +=
                                                 this->post_prob(i,n_class,n_shift,flip) ;
                 }
                 // normalize
                 this->post_state_prob(n_class,n_shift,flip) /= this->post_prob_tot ;
                 this->post_class_prob[n_class] += this->post_state_prob(n_class,n_shift,flip) ;
             }
         }
     }
 }
 
 void EMBase::center_post_state_prob()
 {
     if(this->n_shift == 1)
     {   return ; }
 
     // the possible shift states
     vector_d shifts(this->n_shift) ;
     std::iota(shifts.begin(), shifts.end(), 1.) ;
 
     // the shift probabilities and the class probabilies
     // (no need to norm., class_prob sums to 1)
     double shifts_prob_measured_tot = 0. ;
     vector_d shifts_prob_measured(this->n_shift) ;
     for(size_t s=0; s<this->n_shift; s++)
     {   for(size_t k=0; k<this->n_class; k++)
         {   for(size_t f=0; f<this->n_flip; f++)
             {   shifts_prob_measured[s]  += this->post_state_prob(k,s,f) ;
                 shifts_prob_measured_tot += this->post_state_prob(k,s,f) ;
             }
         }
     }
 
 
     // the shift mean and (biased) standard deviation
     double shifts_sd = sd(shifts, shifts_prob_measured, false) ;
 
     // the shift probabilities under the assumption that is
     // distributed as a gaussian centered on
     // the central shift state with sd and mean as in the data
     // sd as the data
     vector_d shifts_prob_centered(shifts.size(), 0.) ;
     double shifts_prob_centered_tot = 0. ;
     for(size_t i=0; i<shifts.size(); i++)
     {   shifts_prob_centered[i]   = normal_pmf(shifts[i],
                                                (this->n_shift/2)+1, shifts_sd) ;
         shifts_prob_centered_tot += shifts_prob_centered[i] ;
     }
 
     for(size_t k=0; k<this->n_class; k++)
     {   for(size_t f=0; f<this->n_flip; f++)
         {   for(size_t s=0; s<this->n_shift; s++)
             {   this->post_state_prob(k,s,f) = this->post_class_prob[k] *
                                                  shifts_prob_centered[s] /
                                                 (this->n_flip * shifts_prob_centered_tot) ;
             }
         }
     }
 
     // shifts_prob_measured_tot = 0. ;
     shifts_prob_measured.clear() ;
     shifts_prob_measured.resize(this->n_shift) ;
     for(size_t s=0; s<this->n_shift; s++)
     {   for(size_t k=0; k<this->n_class; k++)
         {   for(size_t f=0; f<this->n_flip; f++)
             {   shifts_prob_measured[s]  +=
                         this->post_state_prob(k,s,f) ;
             }
         }
     }
 }
diff --git a/src/Clustering/EMConsensusSequence.cpp b/src/Clustering/EMConsensusSequence.cpp
index ea97f85..268cec2 100644
--- a/src/Clustering/EMConsensusSequence.cpp
+++ b/src/Clustering/EMConsensusSequence.cpp
@@ -1,355 +1,353 @@
 #include <EMConsensusSequence.hpp>
 
 #include <string>
 #include <vector>
 #include <future>                    // std::promise, std::future
 #include <utility>                   // std::pair, std::move()
 #include <functional>                // std::bind(), std::ref()
 
 #include <ConsensusSequenceLayer.hpp>  // SequenceLayer
 #include <RandomNumberGenerator.hpp>   // getRandomNumberGenerator()
 #include <ConsoleProgressBar.hpp>      // ConsoleProgressBar
 #include <ThreadPool.hpp>              // ThreadPool
 #include <dna_utility.hpp>             // dna::base_composition()
 
 
 EMConsensusSequence::EMConsensusSequence(const Matrix3D<double>& seq_matrix,
                                          size_t n_class,
                                          size_t n_iter,
                                          size_t n_shift,
                                          bool flip,
                                          bool bckg_class,
                                          const std::string& seed,
                                          size_t n_threads)
     : EMBase(seq_matrix.get_dim()[0],
              seq_matrix.get_dim()[1],
              n_class,
              n_iter,
              n_shift,
              flip,
              n_threads),
       loglikelihood_max(n_row, 0.),
       cseq_layer(nullptr)
 {
     this->loglikelihood_max = vector_d(n_row, 0.) ;
 
     // initialise post prob randomly
     // getRandomGenerator(seed) ;
     this->set_post_prob_random(seed) ;
 
     // data and models
     this->cseq_layer = new ConsensusSequenceLayer(seq_matrix,
                                                   this->n_class,
                                                   this->n_shift,
                                                   this->flip,
                                                   bckg_class) ;
 
     // intialise the models with the post prob
     this->cseq_layer->update_model(this->post_prob,
                                    this->threads) ;
 }
 
 EMConsensusSequence::EMConsensusSequence(Matrix3D<double>&& seq_matrix,
                                          size_t n_class,
                                          size_t n_iter,
                                          size_t n_shift,
                                          bool flip,
                                          bool bckg_class,
                                          const std::string& seed,
                                          size_t n_threads)
     : EMBase(seq_matrix.get_dim()[0],
              seq_matrix.get_dim()[1],
              n_class,
              n_iter,
              n_shift,
              flip,
              n_threads),
       loglikelihood_max(n_row, 0.),
       cseq_layer(nullptr)
 {
     this->loglikelihood_max = vector_d(n_row, 0.) ;
 
     // initialise post prob randomly
     // getRandomGenerator(seed) ;
     this->set_post_prob_random(seed) ;
 
     // data and models
     this->cseq_layer = new ConsensusSequenceLayer(std::move(seq_matrix),
                                                   this->n_class,
                                                   this->n_shift,
                                                   this->flip,
                                                   bckg_class) ;
 
     // intialise the models with the post prob
     this->cseq_layer->update_model(this->post_prob,
                                    this->threads) ;
 }
 
 EMConsensusSequence::EMConsensusSequence(const Matrix3D<double>& seq_matrix,
                                          const Matrix3D<double>& motifs,
                                          size_t n_iter,
                                          bool flip,
                                          bool bckg_class,
                                          size_t n_threads)
     : EMBase(seq_matrix.get_dim()[0],
              seq_matrix.get_dim()[1],
              motifs.get_dim()[0],
              n_iter,
              seq_matrix.get_dim()[1] - motifs.get_dim()[1] + 1,
              flip,
              n_threads),
       loglikelihood_max(n_row, 0.),
       cseq_layer(nullptr)
 {
-
     this->loglikelihood_max = vector_d(n_row, 0.) ;
 
     // data and models
     // background motif (if any) is the last of the given motifs
     this->cseq_layer = new ConsensusSequenceLayer(seq_matrix,
                                                   motifs,
                                                   this->flip,
                                                   bckg_class) ;
 
     // intialise the class prob uniformly
     this->set_state_prob_uniform() ;
 }
 
 EMConsensusSequence::EMConsensusSequence(Matrix3D<double>&& seq_matrix,
                                          Matrix3D<double>&& motifs,
                                          size_t n_iter,
                                          bool flip,
                                          bool bckg_class,
                                          size_t n_threads)
     : EMBase(seq_matrix.get_dim()[0],
              seq_matrix.get_dim()[1],
              motifs.get_dim()[0],
              n_iter,
              seq_matrix.get_dim()[1] - motifs.get_dim()[1] + 1,
              flip,
              n_threads),
       loglikelihood_max(n_row, 0.),
       cseq_layer(nullptr)
 {
-
     this->loglikelihood_max = vector_d(n_row, 0.) ;
 
     // data and models
     // background motif (if any) is the last of the given motifs
     this->cseq_layer = new ConsensusSequenceLayer(std::move(seq_matrix),
                                                   std::move(motifs),
                                                   this->flip,
                                                   bckg_class) ;
 
     // intialise the class prob uniformly
     this->set_state_prob_uniform() ;
 }
 
 
 EMConsensusSequence::~EMConsensusSequence()
 {   if(this->cseq_layer != nullptr)
     {   delete this->cseq_layer ;
         this->cseq_layer = nullptr ;
     }
     if(this->threads != nullptr)
     {   this->threads->join() ;
         delete this->threads ;
         this->threads = nullptr ;
     }
 }
 
 Matrix3D<double> EMConsensusSequence::get_sequence_models() const
 {   return this->cseq_layer->get_model() ; }
 
 EMConsensusSequence::exit_codes EMConsensusSequence::classify()
 {
     size_t bar_update_n = this->n_iter ;
     ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
 
     // optimize the partition
     for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
     {   // E-step
         this->compute_loglikelihood() ;
         this->compute_post_prob() ;
         // M-step
         this->compute_class_prob() ;
         this->update_models() ;
         this->center_post_state_prob() ;
         bar.update() ;
     }
     bar.update() ; std::cerr << std::endl ;
     return EMConsensusSequence::exit_codes::ITER_MAX ;
 }
 
 void EMConsensusSequence::compute_loglikelihood()
 {   // compute the loglikelihood
     this->cseq_layer->compute_loglikelihoods(this->loglikelihood,
                                             this->loglikelihood_max,
                                             this->threads) ;
     // rescale the values
     // don't parallelize
     if(this->threads == nullptr)
     {   std::promise<bool> promise ;
         std::future<bool> future = promise.get_future() ;
         this->compute_loglikelihood_routine(0,
                                             this->n_row,
                                             promise) ;
         future.get() ;
     }
     // parallelize
     else
     {    size_t n_threads = this->threads->getNThread() ;
 
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row,n_threads) ;
 
         // get promises and futures
         std::vector<std::promise<bool>> promises(n_threads) ;
         std::vector<std::future<bool>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
 
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             this->threads->addJob(std::move(
                                       std::bind(&EMConsensusSequence::compute_loglikelihood_routine,
                                                 this,
                                                 slice.first,
                                                 slice.second,
                                                 std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         for(auto& future : futures)
         {   future.get() ; }
         // -------------------------- threads stop ---------------------------
     }
 }
 
 void EMConsensusSequence::compute_loglikelihood_routine(size_t from,
                                                         size_t to,
                                                         std::promise<bool>& done)
 {
     // rescale the values
     for(size_t i=from; i<to; i++)
     {   for(size_t j=0; j<this->n_class; j++)
         {   for(size_t k=0; k<this->n_shift; k++)
             {   for(size_t l=0; l<this->n_flip; l++)
                 {   this->loglikelihood(i,j,k,l) =
                             std::max(this->loglikelihood(i,j,k,l) -
                                      this->loglikelihood_max[i],
                                      ConsensusSequenceLayer::p_min_log) ;
                 }
             }
         }
     }
     done.set_value(true) ;
 }
 
 void EMConsensusSequence::compute_post_prob()
 {   // don't parallelize
     if(this->threads == nullptr)
     {   std::promise<vector_d> promise ;
         std::future<vector_d> future = promise.get_future() ;
         this->compute_post_prob_routine(0, this->n_row, promise) ;
         // compute the sum of post prob and the per class sum of post prob
         // from the partial results computed on each slice
         this->post_prob_tot = 0. ;
         this->post_prob_colsum = future.get() ;
         for(const auto& prob : this->post_prob_colsum)
         {   this->post_prob_tot += prob ; }
     }
     // parallelize
     else
     {    size_t n_threads = this->threads->getNThread() ;
 
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row,n_threads) ;
 
         // get promises and futures
         // the function run by the threads will compute
         // the partial sum per class of post_prob for the given slice
         // this should be used to compute the complete sum of post_prob
         // and the complete sum per class of post_prob
         std::vector<std::promise<vector_d>> promises(n_threads) ;
         std::vector<std::future<vector_d>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
 
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             this->threads->addJob(std::move(
                                       std::bind(&EMConsensusSequence::compute_post_prob_routine,
                                                 this,
                                                 slice.first,
                                                 slice.second,
                                                 std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         // compute the sum of post prob and the per class sum of post prob
         // from the partial results computed on each slice
         this->post_prob_tot = 0. ;
         this->post_prob_colsum = vector_d(this->n_class, 0.) ;
         for(auto& future : futures)
         {   auto probs = future.get() ;
             for(size_t i=0; i<this->n_class; i++)
             {   double prob = probs[i] ;
                 this->post_prob_colsum[i] += prob ;
                 this->post_prob_tot       += prob ;
             }
         }
         // -------------------------- threads stop ---------------------------
     }
 }
 
 
 void EMConsensusSequence::compute_post_prob_routine(size_t from,
                                                     size_t to,
                                                     std::promise<vector_d>& post_prob_colsum)
 {   vector_d colsums(this->n_class, 0.) ;
 
     // reset grand total
     // this->post_prob_tot = 0 ;
     // this->post_prob_colsum = vector_d(n_class, 0) ;
 
     // post prob
     for(size_t i=from; i<to; i++)
     {   // reset row sum to 0
         this->post_prob_rowsum[i] = 0. ;
         for(size_t n_class=0; n_class<this->n_class; n_class++)
         {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
             {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
                 {
                     double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
                                    this->post_state_prob(n_class,n_shift,n_flip) ;
                     this->post_prob(i,n_class,n_shift,n_flip) = p ;
                     this->post_prob_rowsum[i] += p ;
                 }
             }
         }
         // normalize
         for(size_t n_class=0; n_class<this->n_class; n_class++)
         {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
             {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
                 {
                     double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
                                         this->post_prob_rowsum[i],
                                         ConsensusSequenceLayer::p_min) ;
                     this->post_prob(i,n_class,n_shift,n_flip) = p ;
                     colsums[n_class] += p ;
                 }
             }
         }
     }
     post_prob_colsum.set_value(colsums) ;
 }
 
 void EMConsensusSequence::update_models()
 {   this->cseq_layer->update_model(this->post_prob,
                                   this->threads) ;
 }